The Samba-Bugzilla – Attachment 6041 Details for
Bug 7594
"wbinfo -u" and "wbinfo -g" gives no output (log=>ndr_pull_error)
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
Proposed patchset for fixing the bug.
patchset-bug7594.mbox (text/plain), 15.58 KB, created by
Michael Adam
on 2010-11-01 09:07:43 UTC
(
hide
)
Description:
Proposed patchset for fixing the bug.
Filename:
MIME Type:
Creator:
Michael Adam
Created:
2010-11-01 09:07:43 UTC
Size:
15.58 KB
patch
obsolete
>From 4af322cad402fb19de7f77cd29f61f9200f14aba Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Fri, 29 Oct 2010 22:06:05 +0200 >Subject: [PATCH 1/7] s3:lib/charcnv: add next_codepoint_ext() that accepts input charset. > >next_codepoint() takes as string in CH_UNIX encoding and returns the >unicode codepoint of the next (possibly multibyte) character of the >input string. > >The new next_codepoint_ext() function adds the encoding of the input >string as a parameter. next_codepoint() now only calls next_codepoint_ext() >with CH_UNIX als src_charset argument. >(cherry picked from commit b887a7b33a855bc3ac6b06f497136b371340d46a) >--- > source3/include/proto.h | 2 + > source3/lib/charcnv.c | 74 +++++++++++++++++++++++++++++++--------------- > 2 files changed, 52 insertions(+), 24 deletions(-) > >diff --git a/source3/include/proto.h b/source3/include/proto.h >index 5064fdb..c9c0b26 100644 >--- a/source3/include/proto.h >+++ b/source3/include/proto.h >@@ -410,6 +410,8 @@ size_t pull_string_talloc_fn(const char *function, > size_t src_len, > int flags); > size_t align_string(const void *base_ptr, const char *p, int flags); >+codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, >+ size_t *bytes_consumed); > codepoint_t next_codepoint(const char *str, size_t *size); > > /* The following definitions come from lib/clobber.c */ >diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c >index 9ac9930..03d1031 100644 >--- a/source3/lib/charcnv.c >+++ b/source3/lib/charcnv.c >@@ -1793,17 +1793,23 @@ size_t align_string(const void *base_ptr, const char *p, int flags) > return 0; > } > >-/* >- Return the unicode codepoint for the next multi-byte CH_UNIX character >- in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value. >- >- Also return the number of bytes consumed (which tells the caller >- how many bytes to skip to get to the next CH_UNIX character). >- >- Return INVALID_CODEPOINT if the next character cannot be converted. >-*/ >+/** >+ * Return the unicode codepoint for the next character in the input >+ * string in the given src_charset. >+ * The unicode codepoint (codepoint_t) is an unsinged 32 bit value. >+ * >+ * Also return the number of bytes consumed (which tells the caller >+ * how many bytes to skip to get to the next src_charset-character). >+ * >+ * This is implemented (in the non-ascii-case) by first converting the >+ * next character in the input string to UTF16_LE and then calculating >+ * the unicode codepoint from that. >+ * >+ * Return INVALID_CODEPOINT if the next character cannot be converted. >+ */ > >-codepoint_t next_codepoint(const char *str, size_t *size) >+codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, >+ size_t *bytes_consumed) > { > /* It cannot occupy more than 4 bytes in UTF16 format */ > uint8_t buf[4]; >@@ -1813,41 +1819,46 @@ codepoint_t next_codepoint(const char *str, size_t *size) > size_t olen; > char *outbuf; > >+ /* fastpath if the character is ASCII */ > if ((str[0] & 0x80) == 0) { >- *size = 1; >+ *bytes_consumed = 1; > return (codepoint_t)str[0]; > } > >- /* We assume that no multi-byte character can take >- more than 5 bytes. This is OK as we only >- support codepoints up to 1M */ >+ /* >+ * We assume that no multi-byte character can take more than >+ * 5 bytes. This is OK as we only support codepoints up to 1M >+ */ > > ilen_orig = strnlen(str, 5); > ilen = ilen_orig; > >- lazy_initialize_conv(); >+ lazy_initialize_conv(); > >- descriptor = conv_handles[CH_UNIX][CH_UTF16LE]; >+ descriptor = conv_handles[src_charset][CH_UTF16LE]; > if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { >- *size = 1; >+ *bytes_consumed = 1; > return INVALID_CODEPOINT; > } > >- /* This looks a little strange, but it is needed to cope >- with codepoints above 64k which are encoded as per RFC2781. */ >+ /* >+ * This looks a little strange, but it is needed to cope >+ * with codepoints above 64k which are encoded as per RFC2781. >+ */ > olen = 2; > outbuf = (char *)buf; > smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); > if (olen == 2) { >- /* We failed to convert to a 2 byte character. >- See if we can convert to a 4 UTF16-LE byte char encoding. >- */ >+ /* >+ * We failed to convert to a 2 byte character. >+ * See if we can convert to a 4 UTF16-LE byte char encoding. >+ */ > olen = 4; > outbuf = (char *)buf; > smb_iconv(descriptor, &str, &ilen, &outbuf, &olen); > if (olen == 4) { > /* We didn't convert any bytes */ >- *size = 1; >+ *bytes_consumed = 1; > return INVALID_CODEPOINT; > } > olen = 4 - olen; >@@ -1855,7 +1866,7 @@ codepoint_t next_codepoint(const char *str, size_t *size) > olen = 2 - olen; > } > >- *size = ilen_orig - ilen; >+ *bytes_consumed = ilen_orig - ilen; > > if (olen == 2) { > /* 2 byte, UTF16-LE encoded value. */ >@@ -1877,6 +1888,21 @@ codepoint_t next_codepoint(const char *str, size_t *size) > } > > /* >+ Return the unicode codepoint for the next multi-byte CH_UNIX character >+ in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value. >+ >+ Also return the number of bytes consumed (which tells the caller >+ how many bytes to skip to get to the next CH_UNIX character). >+ >+ Return INVALID_CODEPOINT if the next character cannot be converted. >+*/ >+ >+codepoint_t next_codepoint(const char *str, size_t *size) >+{ >+ return next_codepoint_ext(str, CH_UNIX, size); >+} >+ >+/* > push a single codepoint into a CH_UNIX string the target string must > be able to hold the full character, which is guaranteed if it is at > least 5 bytes in size. The caller may pass less than 5 bytes if they >-- >1.6.3.3 > > >From 111e2a3bd4663e22cae3b013a09a7426f8cc01ed Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Fri, 29 Oct 2010 22:11:30 +0200 >Subject: [PATCH 2/7] s3:lib/charcnv: clarify comments in next_codepoint_ext() > >(giving the unicod U+<hexnumber> notation of the codepoints > referred to in the comments) >(cherry picked from commit bd874fec1ca70cdb1d1551ffcc8be51bb95c8d26) >--- > source3/lib/charcnv.c | 4 ++-- > 1 files changed, 2 insertions(+), 2 deletions(-) > >diff --git a/source3/lib/charcnv.c b/source3/lib/charcnv.c >index 03d1031..9c76e1b 100644 >--- a/source3/lib/charcnv.c >+++ b/source3/lib/charcnv.c >@@ -1827,7 +1827,7 @@ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, > > /* > * We assume that no multi-byte character can take more than >- * 5 bytes. This is OK as we only support codepoints up to 1M >+ * 5 bytes. This is OK as we only support codepoints up to 1M (U+100000) > */ > > ilen_orig = strnlen(str, 5); >@@ -1843,7 +1843,7 @@ codepoint_t next_codepoint_ext(const char *str, charset_t src_charset, > > /* > * This looks a little strange, but it is needed to cope >- * with codepoints above 64k which are encoded as per RFC2781. >+ * with codepoints above 64k (U+10000) which are encoded as per RFC2781. > */ > olen = 2; > outbuf = (char *)buf; >-- >1.6.3.3 > > >From ab4474abd92b0b1134e04166e250edc03bbc8af4 Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Fri, 29 Oct 2010 22:21:47 +0200 >Subject: [PATCH 3/7] s3:util_str: clarify the comment header for strlen_m(). > (cherry picked from commit cd79c661994530e6bd26aae1a7977a3dc04d42c0) > >--- > source3/lib/util_str.c | 10 ++++++---- > 1 files changed, 6 insertions(+), 4 deletions(-) > >diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c >index 9a0b12a..7b2ee05 100644 >--- a/source3/lib/util_str.c >+++ b/source3/lib/util_str.c >@@ -1454,10 +1454,12 @@ void strupper_m(char *s) > } > > /** >- Count the number of UCS2 characters in a string. Normally this will >- be the same as the number of bytes in a string for single byte strings, >- but will be different for multibyte. >-**/ >+ * Calculate the number of 16-bit units that would be needed to convert >+ * the input string which is expected to be in CH_UNIX encoding to UTF16. >+ * >+ * This will be the same as the number of bytes in a string for single >+ * byte strings, but will be different for multibyte. >+ */ > > size_t strlen_m(const char *s) > { >-- >1.6.3.3 > > >From 2eb186e21fc5a0a73d4fbcc331eb79b50ad9d34e Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Sat, 30 Oct 2010 02:03:02 +0200 >Subject: [PATCH 4/7] s3:util_str: add strlen_m_ext that takes input and output charset > >The function calculates the number of units (8 or 16-bit, depending >on the destination charset), that would be needed to convert the >input string which is expected to be in in src_charset encoding >to the dst_charset (which should be a unicode charset). >(cherry picked from commit 805f7331063db4a5e1156091bff4da0856daa2c2) >--- > source3/include/proto.h | 2 + > source3/lib/util_str.c | 70 ++++++++++++++++++++++++++++++++++++++--------- > 2 files changed, 59 insertions(+), 13 deletions(-) > >diff --git a/source3/include/proto.h b/source3/include/proto.h >index c9c0b26..3c36488 100644 >--- a/source3/include/proto.h >+++ b/source3/include/proto.h >@@ -1541,6 +1541,8 @@ char *strnrchr_m(const char *s, char c, unsigned int n); > char *strstr_m(const char *src, const char *findstr); > void strlower_m(char *s); > void strupper_m(char *s); >+size_t strlen_m_ext(const char *s, const charset_t src_charset, >+ const charset_t dst_charset); > size_t strlen_m(const char *s); > size_t strlen_m_term(const char *s); > size_t strlen_m_term_null(const char *s); >diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c >index 7b2ee05..5937c9d 100644 >--- a/source3/lib/util_str.c >+++ b/source3/lib/util_str.c >@@ -1454,14 +1454,14 @@ void strupper_m(char *s) > } > > /** >- * Calculate the number of 16-bit units that would be needed to convert >- * the input string which is expected to be in CH_UNIX encoding to UTF16. >- * >- * This will be the same as the number of bytes in a string for single >- * byte strings, but will be different for multibyte. >+ * Calculate the number of units (8 or 16-bit, depending on the >+ * destination charset), that would be needed to convert the input >+ * string which is expected to be in in src_charset encoding to the >+ * destination charset (which should be a unicode charset). > */ > >-size_t strlen_m(const char *s) >+size_t strlen_m_ext(const char *s, const charset_t src_charset, >+ const charset_t dst_charset) > { > size_t count = 0; > >@@ -1480,21 +1480,65 @@ size_t strlen_m(const char *s) > > while (*s) { > size_t c_size; >- codepoint_t c = next_codepoint(s, &c_size); >- if (c < 0x10000) { >- /* Unicode char fits into 16 bits. */ >+ codepoint_t c = next_codepoint_ext(s, src_charset, &c_size); >+ s += c_size; >+ >+ switch (dst_charset) { >+ case CH_UTF16LE: >+ case CH_UTF16BE: >+ case CH_UTF16MUNGED: >+ if (c < 0x10000) { >+ /* Unicode char fits into 16 bits. */ >+ count += 1; >+ } else { >+ /* Double-width unicode char - 32 bits. */ >+ count += 2; >+ } >+ break; >+ case CH_UTF8: >+ /* >+ * this only checks ranges, and does not >+ * check for invalid codepoints >+ */ >+ if (c < 0x80) { >+ count += 1; >+ } else if (c < 0x800) { >+ count += 2; >+ } else if (c < 0x1000) { >+ count += 3; >+ } else { >+ count += 4; >+ } >+ break; >+ default: >+ /* non-unicode encoding - does not apply */ >+ return 0; >+ /* >+ * non-unicode encoding: >+ * assume that each codepoint fits into >+ * one unit in the destination encoding. >+ */ > count += 1; >- } else { >- /* Double-width unicode char - 32 bits. */ >- count += 2; > } >- s += c_size; > } > > return count; > } > > /** >+ * Calculate the number of 16-bit units that would bee needed to convert >+ * the input string which is expected to be in CH_UNIX encoding to UTF16. >+ * >+ * This will be the same as the number of bytes in a string for single >+ * byte strings, but will be different for multibyte. >+ */ >+ >+size_t strlen_m(const char *s) >+{ >+ return strlen_m_ext(s, CH_UNIX, CH_UTF16LE); >+} >+ >+/** > Count the number of UCS2 characters in a string including the null > terminator. > **/ >-- >1.6.3.3 > > >From 767cac2ba518d5bc1526dda519d7bbcec0853e7d Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Sun, 31 Oct 2010 02:02:16 +0200 >Subject: [PATCH 5/7] s3:util_str: add strlen_m_ext_term() - variant of strlen_m_ext() counting terminator > (cherry picked from commit f9cc1fa45dad2702ffdd155cec98ad4f51aac39a) > >--- > source3/include/proto.h | 2 ++ > source3/lib/util_str.c | 9 +++++++++ > 2 files changed, 11 insertions(+), 0 deletions(-) > >diff --git a/source3/include/proto.h b/source3/include/proto.h >index 3c36488..f817c2d 100644 >--- a/source3/include/proto.h >+++ b/source3/include/proto.h >@@ -1543,6 +1543,8 @@ void strlower_m(char *s); > void strupper_m(char *s); > size_t strlen_m_ext(const char *s, const charset_t src_charset, > const charset_t dst_charset); >+size_t strlen_m_ext_term(const char *s, const charset_t src_charset, >+ const charset_t dst_charset); > size_t strlen_m(const char *s); > size_t strlen_m_term(const char *s); > size_t strlen_m_term_null(const char *s); >diff --git a/source3/lib/util_str.c b/source3/lib/util_str.c >index 5937c9d..33c2bce 100644 >--- a/source3/lib/util_str.c >+++ b/source3/lib/util_str.c >@@ -1525,6 +1525,15 @@ size_t strlen_m_ext(const char *s, const charset_t src_charset, > return count; > } > >+size_t strlen_m_ext_term(const char *s, const charset_t src_charset, >+ const charset_t dst_charset) >+{ >+ if (!s) { >+ return 0; >+ } >+ return strlen_m_ext(s, src_charset, dst_charset) + 1; >+} >+ > /** > * Calculate the number of 16-bit units that would bee needed to convert > * the input string which is expected to be in CH_UNIX encoding to UTF16. >-- >1.6.3.3 > > >From 4bbd327651794becf5b0a67c6cab8589869faf28 Mon Sep 17 00:00:00 2001 >From: Stefan Metzmacher <metze@samba.org> >Date: Wed, 25 Aug 2010 10:05:15 +0200 >Subject: [PATCH 6/7] librpc/ndr: correctly implement ndr_charset_length() > >Before we ignored the charset type. > >metze > >Signed-off-by: Michael Adam <obnox@samba.org> >--- > source3/librpc/ndr/ndr_string.c | 16 ++++++++++++++-- > 1 files changed, 14 insertions(+), 2 deletions(-) > >diff --git a/source3/librpc/ndr/ndr_string.c b/source3/librpc/ndr/ndr_string.c >index 519be7b..51b0d89 100644 >--- a/source3/librpc/ndr/ndr_string.c >+++ b/source3/librpc/ndr/ndr_string.c >@@ -714,7 +714,19 @@ _PUBLIC_ enum ndr_err_code ndr_push_charset(struct ndr_push *ndr, int ndr_flags, > /* Return number of elements in a string in the specified charset */ > _PUBLIC_ uint32_t ndr_charset_length(const void *var, charset_t chset) > { >- /* FIXME: Treat special chars special here, taking chset into account */ >- /* Also include 0 byte */ >+ switch (chset) { >+ /* case CH_UTF16: this has the same value as CH_UTF16LE */ >+ case CH_UTF16LE: >+ case CH_UTF16BE: >+ case CH_UTF16MUNGED: >+ return strlen_m_term((const char *)var); >+ case CH_DISPLAY: >+ case CH_DOS: >+ case CH_UNIX: >+ case CH_UTF8: >+ return strlen((const char *)var)+1; >+ } >+ >+ /* Fallback, this should never happen */ > return strlen((const char *)var)+1; > } >-- >1.6.3.3 > > >From 7b9b874dc434763ef83397997840d203a656f97c Mon Sep 17 00:00:00 2001 >From: Michael Adam <obnox@samba.org> >Date: Sun, 31 Oct 2010 02:04:25 +0200 >Subject: [PATCH 7/7] s3:librpc/ndr: use new strlen_m_ext_term() in ndr_charset_length(): fix bug #7594 > >This fixes the calculation of needed space for destination unicode charset. >--- > source3/librpc/ndr/ndr_string.c | 4 ++-- > 1 files changed, 2 insertions(+), 2 deletions(-) > >diff --git a/source3/librpc/ndr/ndr_string.c b/source3/librpc/ndr/ndr_string.c >index 51b0d89..ec745e9 100644 >--- a/source3/librpc/ndr/ndr_string.c >+++ b/source3/librpc/ndr/ndr_string.c >@@ -719,11 +719,11 @@ _PUBLIC_ uint32_t ndr_charset_length(const void *var, charset_t chset) > case CH_UTF16LE: > case CH_UTF16BE: > case CH_UTF16MUNGED: >- return strlen_m_term((const char *)var); >+ case CH_UTF8: >+ return strlen_m_ext_term((const char *)var, CH_UNIX, chset); > case CH_DISPLAY: > case CH_DOS: > case CH_UNIX: >- case CH_UTF8: > return strlen((const char *)var)+1; > } > >-- >1.6.3.3 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 7594
:
5876
|
6041
|
6042