From 9380478a0b292bcb0c11987a88803a37a064d618 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Fri, 1 Aug 2014 21:29:21 -0700
Subject: [PATCH 1/4] lib: strings: Fix the behavior of strcasecmp_m_handle()
 in the face of bad conversions.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When either string has a bad conversion, we fall back to
doing raw ascii byte comparisons using strcasecmp().

The problem is we've already stepped past the character
that failed the conversion, so we're not re-testing those
characters for comparison. This can have the effect of
causing strcasecmp_m_handle() to report that two strings
are identical when they are not, if the failed conversion
takes place at the end of the string.

The correct behavior is to step back to the point of
the string(s) that failed the conversion, and continue
the test from there.

Found by <lev@zadarastorage.com> when investigating bug
10716 - smbd constantly crashes when filename contains non-ascii character.

Given the normal character set of utf-8, and an on
disk filename of ISO-8859-1 of file-é on disk hex
value: 66 69 6c 65 2d e9, an incoming open given the
correct utf8 name of file-é will collide when it
should not.

Fixes:

Bug 10716 - smbd constantly crashes when filename contains non-ascii character

https://bugzilla.samba.org/show_bug.cgi?id=10716

Signed-off-by: Jeremy Allison <jra@samba.org>
---
 lib/util/charset/util_str.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c
index 688ab5a..1810e24 100644
--- a/lib/util/charset/util_str.c
+++ b/lib/util/charset/util_str.c
@@ -56,7 +56,17 @@ _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
 
 		if (c1 == INVALID_CODEPOINT ||
 		    c2 == INVALID_CODEPOINT) {
-			/* what else can we do?? */
+			/*
+			 * Fall back to byte
+			 * comparison. We must
+			 * step back by the codepoint
+			 * length we just incremented
+			 * - otherwise we are not
+			 * checking the bytes that
+			 * failed the conversion.
+			 */
+			s1 -= size1;
+			s2 -= size2;
 			return strcasecmp(s1, s2);
 		}
 
-- 
1.9.1


From dfe8dd87e15bec453c7d3a80c4c707d3f2c7b597 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Fri, 1 Aug 2014 21:38:59 -0700
Subject: [PATCH 2/4] lib: strings: Fix the behavior of strncasecmp_m_handle()
 in the face of bad conversions.

When either string has a bad conversion, we fall back to
doing raw ascii byte comparisons using strcasecmp(). This
is wrong - we should fall back to strncasecmp.

The problem is we've already stepped past the character
that failed the conversion, so we're not re-testing those
characters for comparison. This can have the effect of
causing strncasecmp_m_handle() to report that two strings
are identical when they are not, if the failed conversion
takes place at the end of the string.

The correct behavior is to step back to the point of
the string(s) that failed the conversion, and continue
the test from there.

This is a litle trickier than the previous fix, as
it requires converting the incoming n variable from
remaining characters to compare to remaining bytes to
compare.

As bytes are always the smallest character size
(1 byte) then it's safe to convert the remaining
characters to check by decrementing the source string
by the last character length (in bytes) and incrementing
the remaining bytes to scan by the same value, then
calling strncasecmp() with the stepped back strings
remaining.

Signed-off-by: Jeremy Allison <jra@samba.org>
---
 lib/util/charset/util_str.c | 29 +++++++++++++++++++++++++++--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c
index 1810e24..f62c999 100644
--- a/lib/util/charset/util_str.c
+++ b/lib/util/charset/util_str.c
@@ -116,8 +116,33 @@ _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
 
 		if (c1 == INVALID_CODEPOINT ||
 		    c2 == INVALID_CODEPOINT) {
-			/* what else can we do?? */
-			return strcasecmp(s1, s2);
+			/*
+			 * Fall back to byte
+			 * comparison. We must
+			 * step back by the codepoint
+			 * length we just incremented
+			 * by - otherwise we are not
+			 * checking the bytes that
+			 * failed the conversion.
+			 */
+			s1 -= size1;
+			s2 -= size2;
+			/*
+			 * n was specified in characters,
+			 * now we must convert it to bytes.
+			 * As bytes are the smallest
+			 * character unit, the following
+			 * increment and strncasecmp is always
+			 * safe.
+			 *
+			 * The source string was already known
+			 * to be n characters long, so we are
+			 * guaranteed to be able to look at the
+			 * (n remaining + size1) bytes from the
+			 * new (s1 - size1) position).
+			 */
+			n += size1;
+			return strncasecmp(s1, s2, n);
 		}
 
 		if (toupper_m(c1) != toupper_m(c2)) {
-- 
1.9.1


From ca1617121a2a6418c2e3ab7a7ce92ddaa1368ce2 Mon Sep 17 00:00:00 2001
From: Jeremy Allison <jra@samba.org>
Date: Mon, 4 Aug 2014 13:36:42 -0700
Subject: [PATCH 3/4] s4: tests: Added local.charset test for Bug 10716 - smbd
 constantly crashes when filename contains non-ascii character

https://bugzilla.samba.org/show_bug.cgi?id=10716

Signed-off-by: Jeremy Allison <jra@samba.org>
---
 lib/util/charset/tests/charset.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/lib/util/charset/tests/charset.c b/lib/util/charset/tests/charset.c
index 70b6741..a47670e 100644
--- a/lib/util/charset/tests/charset.c
+++ b/lib/util/charset/tests/charset.c
@@ -50,12 +50,18 @@ static bool test_codepoint_cmpi(struct torture_context *tctx)
 
 static bool test_strcasecmp_m(struct torture_context *tctx)
 {
+	/* file.{accented e} in iso8859-1 */
+	const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 };
+	/* file.{accented e} in utf8 */
+	const char file_utf8[8] =      { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 };
 	torture_assert(tctx, strcasecmp_m("foo", "bar") != 0, "different strings");
 	torture_assert(tctx, strcasecmp_m("foo", "foo") == 0, "same case strings");
 	torture_assert(tctx, strcasecmp_m("foo", "Foo") == 0, "different case strings");
 	torture_assert(tctx, strcasecmp_m(NULL, "Foo") != 0, "one NULL");
 	torture_assert(tctx, strcasecmp_m("foo", NULL) != 0, "other NULL");
 	torture_assert(tctx, strcasecmp_m(NULL, NULL) == 0, "both NULL");
+	torture_assert(tctx, strcasecmp_m(file_iso8859_1, file_utf8) != 0,
+		"file.{accented e} should differ");
 	return true;
 }
 
@@ -102,6 +108,10 @@ static bool test_string_replace_m(struct torture_context *tctx)
 
 static bool test_strncasecmp_m(struct torture_context *tctx)
 {
+	/* file.{accented e} in iso8859-1 */
+	const char file_iso8859_1[7] = { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xe9, 0 };
+	/* file.{accented e} in utf8 */
+	const char file_utf8[8] =      { 0x66, 0x69, 0x6c, 0x65, 0x2d, 0xc3, 0xa9, 0 };
 	torture_assert(tctx, strncasecmp_m("foo", "bar", 3) != 0, "different strings");
 	torture_assert(tctx, strncasecmp_m("foo", "foo", 3) == 0, "same case strings");
 	torture_assert(tctx, strncasecmp_m("foo", "Foo", 3) == 0, "different case strings");
@@ -111,6 +121,8 @@ static bool test_strncasecmp_m(struct torture_context *tctx)
 	torture_assert(tctx, strncasecmp_m(NULL, "Foo", 3) != 0, "one NULL");
 	torture_assert(tctx, strncasecmp_m("foo", NULL, 3) != 0, "other NULL");
 	torture_assert(tctx, strncasecmp_m(NULL, NULL, 3) == 0, "both NULL");
+	torture_assert(tctx, strncasecmp_m(file_iso8859_1, file_utf8, 6) != 0,
+		"file.{accented e} should differ");
 	return true;
 }
 
-- 
1.9.1


From 64271c493df66f1dc67fae7fa14ec75f49db65af Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl@samba.org>
Date: Tue, 5 Aug 2014 09:21:07 +0000
Subject: [PATCH 4/4] lib: strings: Simplify strcasecmp

This makes us fallback to strcasecmp early if any INVALID_CODEPOINT
appears. Without this patch we just continue to compare if both strings
happen to have an INVALID_CODEPOINT in the same spot.

Signed-off-by: Volker Lendecke <vl@samba.org>
Reviewed-by: Jeremy Allison <jra@samba.org>
---
 lib/util/charset/util_str.c | 48 ++++++++++++---------------------------------
 1 file changed, 13 insertions(+), 35 deletions(-)

diff --git a/lib/util/charset/util_str.c b/lib/util/charset/util_str.c
index f62c999..d2e6cbb 100644
--- a/lib/util/charset/util_str.c
+++ b/lib/util/charset/util_str.c
@@ -47,6 +47,11 @@ _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
 		c1 = next_codepoint_handle(iconv_handle, s1, &size1);
 		c2 = next_codepoint_handle(iconv_handle, s2, &size2);
 
+		if (c1 == INVALID_CODEPOINT ||
+		    c2 == INVALID_CODEPOINT) {
+			return strcasecmp(s1, s2);
+		}
+
 		s1 += size1;
 		s2 += size2;
 
@@ -54,22 +59,6 @@ _PUBLIC_ int strcasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
 			continue;
 		}
 
-		if (c1 == INVALID_CODEPOINT ||
-		    c2 == INVALID_CODEPOINT) {
-			/*
-			 * Fall back to byte
-			 * comparison. We must
-			 * step back by the codepoint
-			 * length we just incremented
-			 * - otherwise we are not
-			 * checking the bytes that
-			 * failed the conversion.
-			 */
-			s1 -= size1;
-			s2 -= size2;
-			return strcasecmp(s1, s2);
-		}
-
 		if (toupper_m(c1) != toupper_m(c2)) {
 			return c1 - c2;
 		}
@@ -107,27 +96,9 @@ _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
 		c1 = next_codepoint_handle(iconv_handle, s1, &size1);
 		c2 = next_codepoint_handle(iconv_handle, s2, &size2);
 
-		s1 += size1;
-		s2 += size2;
-
-		if (c1 == c2) {
-			continue;
-		}
-
 		if (c1 == INVALID_CODEPOINT ||
 		    c2 == INVALID_CODEPOINT) {
 			/*
-			 * Fall back to byte
-			 * comparison. We must
-			 * step back by the codepoint
-			 * length we just incremented
-			 * by - otherwise we are not
-			 * checking the bytes that
-			 * failed the conversion.
-			 */
-			s1 -= size1;
-			s2 -= size2;
-			/*
 			 * n was specified in characters,
 			 * now we must convert it to bytes.
 			 * As bytes are the smallest
@@ -139,12 +110,19 @@ _PUBLIC_ int strncasecmp_m_handle(struct smb_iconv_handle *iconv_handle,
 			 * to be n characters long, so we are
 			 * guaranteed to be able to look at the
 			 * (n remaining + size1) bytes from the
-			 * new (s1 - size1) position).
+			 * s1 position).
 			 */
 			n += size1;
 			return strncasecmp(s1, s2, n);
 		}
 
+		s1 += size1;
+		s2 += size2;
+
+		if (c1 == c2) {
+			continue;
+		}
+
 		if (toupper_m(c1) != toupper_m(c2)) {
 			return c1 - c2;
 		}
-- 
1.9.1