From 263296a2e172dce3d7be7e4e934a14f85a6cb6a2 Mon Sep 17 00:00:00 2001 From: Noel Power Date: Mon, 12 Nov 2018 17:42:51 +0000 Subject: [PATCH 1/3] lib/ldb/tests/python: Add test to pass utf8 encoded bytes to ldb.Dn This test should demonstrate an error with the 'es' format in python where a 'str' byte-string is passed (containing utf8 encoded bytes) with some characters that cannot be decoded as ascii. The same code if run in python3 should generate an error (needs string not bytes) Signed-off-by: Noel Power --- lib/ldb/tests/python/api.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/lib/ldb/tests/python/api.py b/lib/ldb/tests/python/api.py index 0a883961c00..7f530724ed1 100755 --- a/lib/ldb/tests/python/api.py +++ b/lib/ldb/tests/python/api.py @@ -141,6 +141,21 @@ class SimpleLdb(LdbBaseTest): l = ldb.Ldb(self.url(), flags=self.flags()) dn = ldb.Dn(l, (b'a=' + b'\xc4\x85\xc4\x87\xc4\x99\xc5\x82\xc5\x84\xc3\xb3\xc5\x9b\xc5\xba\xc5\xbc').decode('utf8')) + def test_utf8_encoded_ldb_Dn(self): + l = ldb.Ldb(self.url(), flags=self.flags()) + dn_encoded_utf8 = b'a=' + b'\xc4\x85\xc4\x87\xc4\x99\xc5\x82\xc5\x84\xc3\xb3\xc5\x9b\xc5\xba\xc5\xbc' + try: + dn = ldb.Dn(l, dn_encoded_utf8) + except UnicodeDecodeError as e: + raise + except TypeError as te: + if PY3: + p3errors = ["argument 2 must be str, not bytes", + "Can't convert 'bytes' object to str implicitly"] + self.assertIn(str(te), p3errors) + else: + raise + def test_search_attrs(self): l = ldb.Ldb(self.url(), flags=self.flags()) self.assertEqual(len(l.search(ldb.Dn(l, ""), ldb.SCOPE_SUBTREE, "(dc=*)", ["dc"])), 0) -- 2.16.4 From 6f71f25d04d1836ed25fcfed270dfd8bad1d2099 Mon Sep 17 00:00:00 2001 From: Noel Power Date: Mon, 12 Nov 2018 17:49:30 +0000 Subject: [PATCH 2/3] selftest: Add knownfail for ldb.Dn passed utf8 encoded byte string Although we now have "es" as the format for ParseTuple this unexpectedly results in a UnicodeDecodeError. In python2 the 'es' format attempts to reencode byte str objects (unlike the 's' format) BUG: https://bugzilla.samba.org/show_bug.cgi?id=13616 Signed-off-by: Noel Power --- selftest/knownfail | 2 ++ 1 file changed, 2 insertions(+) diff --git a/selftest/knownfail b/selftest/knownfail index baf3d57a31a..f5d68d3ce46 100644 --- a/selftest/knownfail +++ b/selftest/knownfail @@ -349,3 +349,5 @@ # Disabling NTLM means you can't use samr to change the password ^samba.tests.ntlmdisabled.python\(ktest\).ntlmdisabled.NtlmDisabledTests.test_samr_change_password\(ktest\) ^samba.tests.ntlmdisabled.python\(ad_dc_no_ntlm\).ntlmdisabled.NtlmDisabledTests.test_ntlm_connection\(ad_dc_no_ntlm\) +# Ldb test api.SimpleLdb.test_utf8_ldb_Dn is expected to fail +^ldb.python.api.SimpleLdb.test_utf8_encoded_ldb_Dn(none) -- 2.16.4 From 887b9bd6ae30c1d7336ec59f426beeb54bdd575e Mon Sep 17 00:00:00 2001 From: Noel Power Date: Mon, 12 Nov 2018 16:06:10 +0000 Subject: [PATCH 3/3] lib/ldb: Use new PYARG_ES format for parseTuple While 'es' format works great for unicode (in python2) and str (in python3) The behaviour with str (in python2) is unexpected. In python2 the str type is (re-encoded) with the specified encoding. In python2 the 'et' type would be a better match, that ensures 'str' type is treated like it was with 's' (no reencoding) and unicode is encoded with the specified encoding. However in pytho3 'et' allows byte (or bytearray) params to be accepted (with no reencoding), we don't want this. This patch adds a new PYARG_ES format code which is a hybrid, in python2 it evaluates to 'et' and in python3 'es' and so gives the desired behaviour for each python version. Additionally remove the associated known fail. BUG: https://bugzilla.samba.org/show_bug.cgi?id=13616 Signed-off-by: Noel Power --- lib/ldb/pyldb.c | 19 ++++++++++++++++--- selftest/knownfail | 2 -- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/lib/ldb/pyldb.c b/lib/ldb/pyldb.c index a6290d9db09..109ae365443 100644 --- a/lib/ldb/pyldb.c +++ b/lib/ldb/pyldb.c @@ -89,6 +89,9 @@ static struct ldb_message_element *PyObject_AsMessageElement( #define PyStr_AsUTF8 PyUnicode_AsUTF8 #define PyStr_AsUTF8AndSize PyUnicode_AsUTF8AndSize #define PyInt_FromLong PyLong_FromLong + +#define PYARG_ES "es" + #else #define PyStr_Check PyString_Check #define PyStr_FromString PyString_FromString @@ -97,6 +100,16 @@ static struct ldb_message_element *PyObject_AsMessageElement( #define PyStr_FromFormatV PyString_FromFormatV #define PyStr_AsUTF8 PyString_AsString +/* + * We want a format that will ensure unicode is encoded using the + * specified encoding 'utf8' (to obtain the char* array) + * In python3 we use "es" but in python2 the specifiying 'es' will + * result in the any incomming 'str' type being decoded first to ascii + * then encoded to the specified 'utf8' encoding. In order to avoid that + * we use format 'et' in python2 instead. + */ +#define PYARG_ES "et" + const char *PyStr_AsUTF8AndSize(PyObject *pystr, Py_ssize_t *sizeptr); const char * PyStr_AsUTF8AndSize(PyObject *pystr, Py_ssize_t *sizeptr) @@ -865,11 +878,11 @@ static PyObject *py_ldb_dn_new(PyTypeObject *type, PyObject *args, PyObject *kwa PyLdbDnObject *py_ret = NULL; const char * const kwnames[] = { "ldb", "dn", NULL }; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "Oes", + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O"PYARG_ES, discard_const_p(char *, kwnames), - &py_ldb, "utf8", &str)) + &py_ldb, "utf8", &str)) { goto out; - + } if (!PyLdb_Check(py_ldb)) { PyErr_SetString(PyExc_TypeError, "Expected Ldb"); goto out; diff --git a/selftest/knownfail b/selftest/knownfail index f5d68d3ce46..baf3d57a31a 100644 --- a/selftest/knownfail +++ b/selftest/knownfail @@ -349,5 +349,3 @@ # Disabling NTLM means you can't use samr to change the password ^samba.tests.ntlmdisabled.python\(ktest\).ntlmdisabled.NtlmDisabledTests.test_samr_change_password\(ktest\) ^samba.tests.ntlmdisabled.python\(ad_dc_no_ntlm\).ntlmdisabled.NtlmDisabledTests.test_ntlm_connection\(ad_dc_no_ntlm\) -# Ldb test api.SimpleLdb.test_utf8_ldb_Dn is expected to fail -^ldb.python.api.SimpleLdb.test_utf8_encoded_ldb_Dn(none) -- 2.16.4