From 14512c2aaa190dc90aedd034625ecf7c383053ca Mon Sep 17 00:00:00 2001 From: Michael Adam Date: Mon, 1 Nov 2010 16:28:43 +0100 Subject: [PATCH 1/4] s3:lib/util_str: add strlen_m_ext() that takes the dest charset as a parameter. --- source3/include/proto.h | 1 + source3/lib/util_str.c | 64 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/source3/include/proto.h b/source3/include/proto.h index 5064fdb..348b8b2 100644 --- a/source3/include/proto.h +++ b/source3/include/proto.h @@ -1539,6 +1539,7 @@ char *strnrchr_m(const char *s, char c, unsigned int n); char *strstr_m(const char *src, const char *findstr); void strlower_m(char *s); void strupper_m(char *s); +size_t strlen_m_ext(const char *s, const charset_t dst_charset); size_t strlen_m(const char *s); size_t strlen_m_term(const char *s); size_t strlen_m_term_null(const char *s); diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index 9a0b12a..f0eb6e5 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -1454,12 +1454,12 @@ void strupper_m(char *s) } /** - Count the number of UCS2 characters in a string. Normally this will - be the same as the number of bytes in a string for single byte strings, - but will be different for multibyte. -**/ - -size_t strlen_m(const char *s) + * Calculate the number of units (8 or 16-bit, depending on the + * destination charset), that would be needed to convert the input + * string which is expected to be in in CH_UNIX encoding to the + * destination charset (which should be a unicode charset). + */ +size_t strlen_m_ext(const char *s, const charset_t dst_charset) { size_t count = 0; @@ -1479,20 +1479,60 @@ size_t strlen_m(const char *s) while (*s) { size_t c_size; codepoint_t c = next_codepoint(s, &c_size); - if (c < 0x10000) { - /* Unicode char fits into 16 bits. */ + s += c_size; + + switch(dst_charset) { + case CH_UTF16LE: + case CH_UTF16BE: + case CH_UTF16MUNGED: + if (c < 0x10000) { + /* Unicode char fits into 16 bits. */ + count += 1; + } else { + /* Double-width unicode char - 32 bits. */ + count += 2; + } + break; + case CH_UTF8: + /* + * this only checks ranges, and does not + * check for invalid codepoints + */ + if (c < 0x80) { + count += 1; + } else if (c < 0x800) { + count += 2; + } else if (c < 0x1000) { + count += 3; + } else { + count += 4; + } + break; + default: + /* + * non-unicode encoding: + * assume that each codepoint fits into + * one unit in the destination encoding. + */ count += 1; - } else { - /* Double-width unicode char - 32 bits. */ - count += 2; } - s += c_size; } return count; } /** + Count the number of UCS2 characters in a string. Normally this will + be the same as the number of bytes in a string for single byte strings, + but will be different for multibyte. +**/ + +size_t strlen_m(const char *s) +{ + return strlen_m_ext(s, CH_UTF16LE); +} + +/** Count the number of UCS2 characters in a string including the null terminator. **/ -- 1.6.3.3 From 4162c104a8cdb2e74c41dd5e93773fb5410bac53 Mon Sep 17 00:00:00 2001 From: Michael Adam Date: Sun, 31 Oct 2010 02:02:16 +0200 Subject: [PATCH 2/4] s3:lib/util_str: add strlen_m_ext_term() - variant of strlen_m_ext() counting terminator --- source3/include/proto.h | 1 + source3/lib/util_str.c | 8 ++++++++ 2 files changed, 9 insertions(+), 0 deletions(-) diff --git a/source3/include/proto.h b/source3/include/proto.h index 348b8b2..f7bfc2a 100644 --- a/source3/include/proto.h +++ b/source3/include/proto.h @@ -1540,6 +1540,7 @@ char *strstr_m(const char *src, const char *findstr); void strlower_m(char *s); void strupper_m(char *s); size_t strlen_m_ext(const char *s, const charset_t dst_charset); +size_t strlen_m_ext_term(const char *s, const charset_t dst_charset); size_t strlen_m(const char *s); size_t strlen_m_term(const char *s); size_t strlen_m_term_null(const char *s); diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c index f0eb6e5..3da2b83 100644 --- a/source3/lib/util_str.c +++ b/source3/lib/util_str.c @@ -1521,6 +1521,14 @@ size_t strlen_m_ext(const char *s, const charset_t dst_charset) return count; } +size_t strlen_m_ext_term(const char *s, const charset_t dst_charset) +{ + if (!s) { + return 0; + } + return strlen_m_ext(s, dst_charset) + 1; +} + /** Count the number of UCS2 characters in a string. Normally this will be the same as the number of bytes in a string for single byte strings, -- 1.6.3.3 From 37e8b132ac53c14052a1d86806a4e8252e1908b4 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 25 Aug 2010 10:05:15 +0200 Subject: [PATCH 3/4] librpc/ndr: correctly implement ndr_charset_length() Before we ignored the charset type. metze Signed-off-by: Michael Adam --- source3/librpc/ndr/ndr_string.c | 16 ++++++++++++++-- 1 files changed, 14 insertions(+), 2 deletions(-) diff --git a/source3/librpc/ndr/ndr_string.c b/source3/librpc/ndr/ndr_string.c index 519be7b..51b0d89 100644 --- a/source3/librpc/ndr/ndr_string.c +++ b/source3/librpc/ndr/ndr_string.c @@ -714,7 +714,19 @@ _PUBLIC_ enum ndr_err_code ndr_push_charset(struct ndr_push *ndr, int ndr_flags, /* Return number of elements in a string in the specified charset */ _PUBLIC_ uint32_t ndr_charset_length(const void *var, charset_t chset) { - /* FIXME: Treat special chars special here, taking chset into account */ - /* Also include 0 byte */ + switch (chset) { + /* case CH_UTF16: this has the same value as CH_UTF16LE */ + case CH_UTF16LE: + case CH_UTF16BE: + case CH_UTF16MUNGED: + return strlen_m_term((const char *)var); + case CH_DISPLAY: + case CH_DOS: + case CH_UNIX: + case CH_UTF8: + return strlen((const char *)var)+1; + } + + /* Fallback, this should never happen */ return strlen((const char *)var)+1; } -- 1.6.3.3 From dbf264a9f655eba4075ef0853db5749c07910d39 Mon Sep 17 00:00:00 2001 From: Michael Adam Date: Sun, 31 Oct 2010 02:04:25 +0200 Subject: [PATCH 4/4] s3:librpc/ndr: use new strlen_m_ext_term() in ndr_charset_length(): fix bug #7594 This fixes the calculation of needed space for destination unicode charset. --- source3/librpc/ndr/ndr_string.c | 4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/source3/librpc/ndr/ndr_string.c b/source3/librpc/ndr/ndr_string.c index 51b0d89..f0c3e47 100644 --- a/source3/librpc/ndr/ndr_string.c +++ b/source3/librpc/ndr/ndr_string.c @@ -719,11 +719,11 @@ _PUBLIC_ uint32_t ndr_charset_length(const void *var, charset_t chset) case CH_UTF16LE: case CH_UTF16BE: case CH_UTF16MUNGED: - return strlen_m_term((const char *)var); + case CH_UTF8: + return strlen_m_ext_term((const char *)var, chset); case CH_DISPLAY: case CH_DOS: case CH_UNIX: - case CH_UTF8: return strlen((const char *)var)+1; } -- 1.6.3.3