From 30a45aa33c72cef1477576268cc472a98bbc6db3 Mon Sep 17 00:00:00 2001 From: Noel Power Date: Mon, 12 Nov 2018 17:42:51 +0000 Subject: [PATCH 1/4] lib/ldb/tests/python: Add test to pass utf8 encoded bytes to ldb.Dn This test should demonstrate an error with the 'es' format in python where a 'str' byte-string is passed (containing utf8 encoded bytes) with some characters that cannot be decoded as ascii. The same code if run in python3 should generate an error (needs string not bytes) Signed-off-by: Noel Power --- lib/ldb/tests/python/api.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/lib/ldb/tests/python/api.py b/lib/ldb/tests/python/api.py index 0a883961c00..05e63544fda 100755 --- a/lib/ldb/tests/python/api.py +++ b/lib/ldb/tests/python/api.py @@ -9,6 +9,7 @@ import gc import time import ldb import shutil +from samba.compat import PY3 PY3 = sys.version_info > (3, 0) @@ -141,6 +142,19 @@ class SimpleLdb(LdbBaseTest): l = ldb.Ldb(self.url(), flags=self.flags()) dn = ldb.Dn(l, (b'a=' + b'\xc4\x85\xc4\x87\xc4\x99\xc5\x82\xc5\x84\xc3\xb3\xc5\x9b\xc5\xba\xc5\xbc').decode('utf8')) + def test_utf8_encoded_ldb_Dn(self): + l = ldb.Ldb(self.url(), flags=self.flags()) + dn_encoded_utf8 = b'a=' + b'\xc4\x85\xc4\x87\xc4\x99\xc5\x82\xc5\x84\xc3\xb3\xc5\x9b\xc5\xba\xc5\xbc' + try: + dn = ldb.Dn(l, dn_encoded_utf8) + except UnicodeDecodeError as e: + raise + except TypeError as te: + if PY3: + self.assertEqual(str(te), "argument 2 must be str, not bytes") + else: + raise + def test_search_attrs(self): l = ldb.Ldb(self.url(), flags=self.flags()) self.assertEqual(len(l.search(ldb.Dn(l, ""), ldb.SCOPE_SUBTREE, "(dc=*)", ["dc"])), 0) -- 2.16.4 From 8cfbe3a6ea49c8b0dceb394a441f6cea5006de03 Mon Sep 17 00:00:00 2001 From: Noel Power Date: Mon, 12 Nov 2018 17:49:30 +0000 Subject: [PATCH 2/4] selftest: Add knownfail for ldb.Dn passed utf8 encoded byte string Although we now have "es" as the format for ParseTuple this unexpectedly results in a UnicodeDecodeError. In python2 the 'es' format attempts to reencode byte str objects (unlike the 's' format) BUG: https://bugzilla.samba.org/show_bug.cgi?id=13616 Signed-off-by: Noel Power --- selftest/knownfail | 2 ++ 1 file changed, 2 insertions(+) diff --git a/selftest/knownfail b/selftest/knownfail index baf3d57a31a..f5d68d3ce46 100644 --- a/selftest/knownfail +++ b/selftest/knownfail @@ -349,3 +349,5 @@ # Disabling NTLM means you can't use samr to change the password ^samba.tests.ntlmdisabled.python\(ktest\).ntlmdisabled.NtlmDisabledTests.test_samr_change_password\(ktest\) ^samba.tests.ntlmdisabled.python\(ad_dc_no_ntlm\).ntlmdisabled.NtlmDisabledTests.test_ntlm_connection\(ad_dc_no_ntlm\) +# Ldb test api.SimpleLdb.test_utf8_ldb_Dn is expected to fail +^ldb.python.api.SimpleLdb.test_utf8_encoded_ldb_Dn(none) -- 2.16.4 From 52139aaeaa4c9fcad9088a83fff9ad93350af626 Mon Sep 17 00:00:00 2001 From: Noel Power Date: Mon, 12 Nov 2018 17:56:46 +0000 Subject: [PATCH 3/4] selftest: Enable ldb.python for PY3 Signed-off-by: Noel Power --- selftest/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/selftest/tests.py b/selftest/tests.py index 8c9102968e3..e9ea353475e 100644 --- a/selftest/tests.py +++ b/selftest/tests.py @@ -53,7 +53,7 @@ else: planpythontestsuite("none", "subunit.tests.test_suite") planpythontestsuite("none", "samba.tests.blackbox.ndrdump", py3_compatible=True) planpythontestsuite("none", "samba.tests.blackbox.check_output", py3_compatible=True) -planpythontestsuite("none", "api", name="ldb.python", extra_path=['lib/ldb/tests/python']) +planpythontestsuite("none", "api", name="ldb.python", extra_path=['lib/ldb/tests/python'], py3_compatible=True) planpythontestsuite("none", "samba.tests.credentials", py3_compatible=True) planpythontestsuite("none", "samba.tests.registry", py3_compatible=True) planpythontestsuite("ad_dc_ntvfs:local", "samba.tests.auth", py3_compatible=True) -- 2.16.4 From 253fce40dd15a3d62daeb1d045353bac96126b77 Mon Sep 17 00:00:00 2001 From: Noel Power Date: Mon, 12 Nov 2018 16:06:10 +0000 Subject: [PATCH 4/4] lib/ldb: Use new PYARG_ES format for parseTuple While 'es' format works great for unicode (in python2) and str (in python3) The behaviour with str (in python2) is unexpected. In python2 the str type is (re-encoded) with the specified encoding. In python2 the 'et' type would be a better match, that ensures 'str' type is treated like it was with 's' (no reencoding) and unicode is encoded with the specified encoding. However in pytho3 'et' allows byte (or bytearray) params to be accepted (with no reencoding), we don't want this. This patch adds a new PYARG_ES format code which is a hybrid, in python2 it evaluates to 'et' and in python3 'es' and so gives the desired behaviour for each python version. Additionally remove the associated known fail. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13616 Signed-off-by: Noel Power --- lib/ldb/pyldb.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/lib/ldb/pyldb.c b/lib/ldb/pyldb.c index a6290d9db09..109ae365443 100644 --- a/lib/ldb/pyldb.c +++ b/lib/ldb/pyldb.c @@ -89,6 +89,9 @@ static struct ldb_message_element *PyObject_AsMessageElement( #define PyStr_AsUTF8 PyUnicode_AsUTF8 #define PyStr_AsUTF8AndSize PyUnicode_AsUTF8AndSize #define PyInt_FromLong PyLong_FromLong + +#define PYARG_ES "es" + #else #define PyStr_Check PyString_Check #define PyStr_FromString PyString_FromString @@ -97,6 +100,16 @@ static struct ldb_message_element *PyObject_AsMessageElement( #define PyStr_FromFormatV PyString_FromFormatV #define PyStr_AsUTF8 PyString_AsString +/* + * We want a format that will ensure unicode is encoded using the + * specified encoding 'utf8' (to obtain the char* array) + * In python3 we use "es" but in python2 the specifiying 'es' will + * result in the any incomming 'str' type being decoded first to ascii + * then encoded to the specified 'utf8' encoding. In order to avoid that + * we use format 'et' in python2 instead. + */ +#define PYARG_ES "et" + const char *PyStr_AsUTF8AndSize(PyObject *pystr, Py_ssize_t *sizeptr); const char * PyStr_AsUTF8AndSize(PyObject *pystr, Py_ssize_t *sizeptr) @@ -865,11 +878,11 @@ static PyObject *py_ldb_dn_new(PyTypeObject *type, PyObject *args, PyObject *kwa PyLdbDnObject *py_ret = NULL; const char * const kwnames[] = { "ldb", "dn", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oes", + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O"PYARG_ES, discard_const_p(char *, kwnames), - &py_ldb, "utf8", &str)) + &py_ldb, "utf8", &str)) { goto out; - + } if (!PyLdb_Check(py_ldb)) { PyErr_SetString(PyExc_TypeError, "Expected Ldb"); goto out; -- 2.16.4