From 5be35a6d013078991ad025ff641177af78ca953b Mon Sep 17 00:00:00 2001 From: Joe Guo Date: Wed, 30 Jan 2019 15:52:08 +1300 Subject: [PATCH] s4/scripting/bin: open unicode files with utf8 encoding and write unicode string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In files like `libcli/util/werror_err_table.txt` and `libcli/util/ntstatus_err_table.txt`, there were unicode quote symbols at line 6: ...(“this documentation”)... In `libcli/util/wscript_build`, it will run `gen_werror.py` and `gen_ntstatus.py` to `open` above files, read content from them and write to other files. When encoding not specified, `open` in both python 2/3 will guess encoding from locale. When locale is not set, it defaults to POSIX or C, and then python will use encoding `ANSI_X3.4-1968`. So, on a system locale is not set, `make` will fail with encoding error for both python 2 and 3: File "/home/ubuntu/samba/source4/scripting/bin/gen_werror.py", line 139, in main errors = parseErrorDescriptions(input_file, True, transformErrorName) File "/home/ubuntu/samba/source4/scripting/bin/gen_error_common.py", line 52, in parseErrorDescriptions for line in file_contents: File "/usr/lib/python3.5/encodings/ascii.py", line 26, in decode return codecs.ascii_decode(input, self.errors)[0] UnicodeDecodeError: 'ascii' codec can't decode byte 0xe2 in position 318: ordinal not in range(128) In this case, we have to use `io.open` with `encoding='utf8'`. However, then we got unicode strs and try to write them with other strs into new file, which means the new file must also open with utf-8 and all other strs have to be unicode, too. Instead of prefix `u` to all strs, a more easier/elegant way is to enable unicode literals for the python scripts, which we normally didn't do in samba. Since both `gen_werror.py` and `gen_ntstatus.py` are bin scripts and no other modules import them, it should be ok for this case. Signed-off-by: Joe Guo Autobuild-User(master): Douglas Bagnall Autobuild-Date(master): Fri Feb 8 06:34:47 CET 2019 on sn-devel-144 (cherry picked from commit 87149445af26b8577566dfe5e311b32e3650c6e6) --- source4/scripting/bin/gen_ntstatus.py | 11 +++++++---- source4/scripting/bin/gen_werror.py | 11 +++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/source4/scripting/bin/gen_ntstatus.py b/source4/scripting/bin/gen_ntstatus.py index e0d4fe3..7691e7b 100755 --- a/source4/scripting/bin/gen_ntstatus.py +++ b/source4/scripting/bin/gen_ntstatus.py @@ -20,6 +20,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . # +from __future__ import unicode_literals +# this file is a bin script and was not imported by any other modules +# so it should be fine to enable unicode string for python2 import sys, os.path, io, string from gen_error_common import parseErrorDescriptions, ErrorDef @@ -134,20 +137,20 @@ def main (): sys.exit() # read in the data - file_contents = open(input_file, "r") + file_contents = io.open(input_file, "rt", encoding='utf8') errors = parseErrorDescriptions(file_contents, False, transformErrorName) print("writing new header file: %s" % gen_headerfile_name) - out_file = open(gen_headerfile_name, "w") + out_file = io.open(gen_headerfile_name, "wt", encoding='utf8') generateHeaderFile(out_file, errors) out_file.close() print("writing new source file: %s" % gen_sourcefile_name) - out_file = open(gen_sourcefile_name, "w") + out_file = io.open(gen_sourcefile_name, "wt", encoding='utf8') generateSourceFile(out_file, errors) out_file.close() print("writing new python file: %s" % gen_pythonfile_name) - out_file = open(gen_pythonfile_name, "w") + out_file = io.open(gen_pythonfile_name, "wt", encoding='utf8') generatePythonFile(out_file, errors) out_file.close() diff --git a/source4/scripting/bin/gen_werror.py b/source4/scripting/bin/gen_werror.py index 700ccaf..6280d2d 100755 --- a/source4/scripting/bin/gen_werror.py +++ b/source4/scripting/bin/gen_werror.py @@ -20,6 +20,9 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . # +from __future__ import unicode_literals +# this file is a bin script and was not imported by any other modules +# so it should be fine to enable unicode string for python2 import sys, os.path, io, string from gen_error_common import parseErrorDescriptions, ErrorDef @@ -135,20 +138,20 @@ def main(): print("usage: %s winerrorfile headerfile sourcefile pythonfile" % sys.argv[0]) sys.exit() - input_file = open(input_file_name, "r") + input_file = io.open(input_file_name, "rt", encoding='utf8') errors = parseErrorDescriptions(input_file, True, transformErrorName) input_file.close() print("writing new header file: %s" % gen_headerfile_name) - out_file = open(gen_headerfile_name, "w") + out_file = io.open(gen_headerfile_name, "wt", encoding='utf8') generateHeaderFile(out_file, errors) out_file.close() print("writing new source file: %s" % gen_sourcefile_name) - out_file = open(gen_sourcefile_name, "w") + out_file = io.open(gen_sourcefile_name, "wt", encoding='utf8') generateSourceFile(out_file, errors) out_file.close() print("writing new python file: %s" % gen_pythonfile_name) - out_file = open(gen_pythonfile_name, "w") + out_file = io.open(gen_pythonfile_name, "wt", encoding='utf8') generatePythonFile(out_file, errors) out_file.close() -- 2.7.4