| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  | """
 | 
					
						
							|  |  |  | Test the implementation of the PEP 540: the UTF-8 Mode. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import locale | 
					
						
							|  |  |  | import sys | 
					
						
							|  |  |  | import textwrap | 
					
						
							|  |  |  | import unittest | 
					
						
							| 
									
										
										
										
											2017-12-16 04:54:22 +01:00
										 |  |  | from test import support | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  | from test.support.script_helper import assert_python_ok, assert_python_failure | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  | MS_WINDOWS = (sys.platform == 'win32') | 
					
						
							| 
									
										
										
										
											2018-08-28 12:35:44 +02:00
										 |  |  | POSIX_LOCALES = ('C', 'POSIX') | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  | class UTF8ModeTests(unittest.TestCase): | 
					
						
							| 
									
										
										
										
											2017-12-16 04:54:22 +01:00
										 |  |  |     DEFAULT_ENV = { | 
					
						
							|  |  |  |         'PYTHONUTF8': '', | 
					
						
							|  |  |  |         'PYTHONLEGACYWINDOWSFSENCODING': '', | 
					
						
							|  |  |  |         'PYTHONCOERCECLOCALE': '0', | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def posix_locale(self): | 
					
						
							|  |  |  |         loc = locale.setlocale(locale.LC_CTYPE, None) | 
					
						
							| 
									
										
										
										
											2018-08-28 12:35:44 +02:00
										 |  |  |         return (loc in POSIX_LOCALES) | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def get_output(self, *args, failure=False, **kw): | 
					
						
							|  |  |  |         kw = dict(self.DEFAULT_ENV, **kw) | 
					
						
							|  |  |  |         if failure: | 
					
						
							|  |  |  |             out = assert_python_failure(*args, **kw) | 
					
						
							|  |  |  |             out = out[2] | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             out = assert_python_ok(*args, **kw) | 
					
						
							|  |  |  |             out = out[1] | 
					
						
							|  |  |  |         return out.decode().rstrip("\n\r") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  |     @unittest.skipIf(MS_WINDOWS, 'Windows has no POSIX locale') | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  |     def test_posix_locale(self): | 
					
						
							|  |  |  |         code = 'import sys; print(sys.flags.utf8_mode)' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-28 12:35:44 +02:00
										 |  |  |         for loc in POSIX_LOCALES: | 
					
						
							|  |  |  |             with self.subTest(LC_ALL=loc): | 
					
						
							|  |  |  |                 out = self.get_output('-c', code, LC_ALL=loc) | 
					
						
							|  |  |  |                 self.assertEqual(out, '1') | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_xoption(self): | 
					
						
							|  |  |  |         code = 'import sys; print(sys.flags.utf8_mode)' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8', '-c', code) | 
					
						
							|  |  |  |         self.assertEqual(out, '1') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # undocumented but accepted syntax: -X utf8=1 | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8=1', '-c', code) | 
					
						
							|  |  |  |         self.assertEqual(out, '1') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8=0', '-c', code) | 
					
						
							|  |  |  |         self.assertEqual(out, '0') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  |         if MS_WINDOWS: | 
					
						
							| 
									
										
										
										
											2017-12-16 04:54:22 +01:00
										 |  |  |             # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 Mode | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  |             # and has the priority over -X utf8 | 
					
						
							|  |  |  |             out = self.get_output('-X', 'utf8', '-c', code, | 
					
						
							|  |  |  |                                   PYTHONLEGACYWINDOWSFSENCODING='1') | 
					
						
							|  |  |  |             self.assertEqual(out, '0') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_env_var(self): | 
					
						
							|  |  |  |         code = 'import sys; print(sys.flags.utf8_mode)' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         out = self.get_output('-c', code, PYTHONUTF8='1') | 
					
						
							|  |  |  |         self.assertEqual(out, '1') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         out = self.get_output('-c', code, PYTHONUTF8='0') | 
					
						
							|  |  |  |         self.assertEqual(out, '0') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # -X utf8 has the priority over PYTHONUTF8 | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8=0', '-c', code, PYTHONUTF8='1') | 
					
						
							|  |  |  |         self.assertEqual(out, '0') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  |         if MS_WINDOWS: | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  |             # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode | 
					
						
							|  |  |  |             # and has the priority over PYTHONUTF8 | 
					
						
							|  |  |  |             out = self.get_output('-X', 'utf8', '-c', code, PYTHONUTF8='1', | 
					
						
							|  |  |  |                                   PYTHONLEGACYWINDOWSFSENCODING='1') | 
					
						
							|  |  |  |             self.assertEqual(out, '0') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Cannot test with the POSIX locale, since the POSIX locale enables | 
					
						
							|  |  |  |         # the UTF-8 mode | 
					
						
							|  |  |  |         if not self.posix_locale(): | 
					
						
							|  |  |  |             # PYTHONUTF8 should be ignored if -E is used | 
					
						
							|  |  |  |             out = self.get_output('-E', '-c', code, PYTHONUTF8='1') | 
					
						
							|  |  |  |             self.assertEqual(out, '0') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # invalid mode | 
					
						
							|  |  |  |         out = self.get_output('-c', code, PYTHONUTF8='xxx', failure=True) | 
					
						
							|  |  |  |         self.assertIn('invalid PYTHONUTF8 environment variable value', | 
					
						
							|  |  |  |                       out.rstrip()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_filesystemencoding(self): | 
					
						
							|  |  |  |         code = textwrap.dedent('''
 | 
					
						
							|  |  |  |             import sys | 
					
						
							|  |  |  |             print("{}/{}".format(sys.getfilesystemencoding(), | 
					
						
							|  |  |  |                                  sys.getfilesystemencodeerrors())) | 
					
						
							|  |  |  |         ''')
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  |         if MS_WINDOWS: | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  |             expected = 'utf-8/surrogatepass' | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             expected = 'utf-8/surrogateescape' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8', '-c', code) | 
					
						
							|  |  |  |         self.assertEqual(out, expected) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  |         if MS_WINDOWS: | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  |             # PYTHONLEGACYWINDOWSFSENCODING disables the UTF-8 mode | 
					
						
							|  |  |  |             # and has the priority over -X utf8 and PYTHONUTF8 | 
					
						
							|  |  |  |             out = self.get_output('-X', 'utf8', '-c', code, | 
					
						
							| 
									
										
										
										
											2018-09-19 14:56:36 -07:00
										 |  |  |                                   PYTHONUTF8='strict', | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  |                                   PYTHONLEGACYWINDOWSFSENCODING='1') | 
					
						
							|  |  |  |             self.assertEqual(out, 'mbcs/replace') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_stdio(self): | 
					
						
							|  |  |  |         code = textwrap.dedent('''
 | 
					
						
							|  |  |  |             import sys | 
					
						
							|  |  |  |             print(f"stdin: {sys.stdin.encoding}/{sys.stdin.errors}") | 
					
						
							|  |  |  |             print(f"stdout: {sys.stdout.encoding}/{sys.stdout.errors}") | 
					
						
							|  |  |  |             print(f"stderr: {sys.stderr.encoding}/{sys.stderr.errors}") | 
					
						
							|  |  |  |         ''')
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8', '-c', code, | 
					
						
							|  |  |  |                               PYTHONIOENCODING='') | 
					
						
							|  |  |  |         self.assertEqual(out.splitlines(), | 
					
						
							|  |  |  |                          ['stdin: utf-8/surrogateescape', | 
					
						
							|  |  |  |                           'stdout: utf-8/surrogateescape', | 
					
						
							|  |  |  |                           'stderr: utf-8/backslashreplace']) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # PYTHONIOENCODING has the priority over PYTHONUTF8 | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8', '-c', code, | 
					
						
							|  |  |  |                               PYTHONIOENCODING="latin1") | 
					
						
							|  |  |  |         self.assertEqual(out.splitlines(), | 
					
						
							| 
									
										
										
										
											2018-08-28 23:26:33 +02:00
										 |  |  |                          ['stdin: iso8859-1/strict', | 
					
						
							|  |  |  |                           'stdout: iso8859-1/strict', | 
					
						
							|  |  |  |                           'stderr: iso8859-1/backslashreplace']) | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8', '-c', code, | 
					
						
							|  |  |  |                               PYTHONIOENCODING=":namereplace") | 
					
						
							|  |  |  |         self.assertEqual(out.splitlines(), | 
					
						
							| 
									
										
										
										
											2018-08-28 23:26:33 +02:00
										 |  |  |                          ['stdin: utf-8/namereplace', | 
					
						
							|  |  |  |                           'stdout: utf-8/namereplace', | 
					
						
							|  |  |  |                           'stderr: utf-8/backslashreplace']) | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_io(self): | 
					
						
							|  |  |  |         code = textwrap.dedent('''
 | 
					
						
							|  |  |  |             import sys | 
					
						
							|  |  |  |             filename = sys.argv[1] | 
					
						
							|  |  |  |             with open(filename) as fp: | 
					
						
							|  |  |  |                 print(f"{fp.encoding}/{fp.errors}") | 
					
						
							|  |  |  |         ''')
 | 
					
						
							|  |  |  |         filename = __file__ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         out = self.get_output('-c', code, filename, PYTHONUTF8='1') | 
					
						
							|  |  |  |         self.assertEqual(out, 'UTF-8/strict') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _check_io_encoding(self, module, encoding=None, errors=None): | 
					
						
							|  |  |  |         filename = __file__ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Encoding explicitly set | 
					
						
							|  |  |  |         args = [] | 
					
						
							|  |  |  |         if encoding: | 
					
						
							|  |  |  |             args.append(f'encoding={encoding!r}') | 
					
						
							|  |  |  |         if errors: | 
					
						
							|  |  |  |             args.append(f'errors={errors!r}') | 
					
						
							|  |  |  |         code = textwrap.dedent('''
 | 
					
						
							|  |  |  |             import sys | 
					
						
							|  |  |  |             from %s import open | 
					
						
							|  |  |  |             filename = sys.argv[1] | 
					
						
							|  |  |  |             with open(filename, %s) as fp: | 
					
						
							|  |  |  |                 print(f"{fp.encoding}/{fp.errors}") | 
					
						
							|  |  |  |         ''') % (module, ', '.join(args))
 | 
					
						
							|  |  |  |         out = self.get_output('-c', code, filename, | 
					
						
							|  |  |  |                               PYTHONUTF8='1') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not encoding: | 
					
						
							|  |  |  |             encoding = 'UTF-8' | 
					
						
							|  |  |  |         if not errors: | 
					
						
							|  |  |  |             errors = 'strict' | 
					
						
							|  |  |  |         self.assertEqual(out, f'{encoding}/{errors}') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def check_io_encoding(self, module): | 
					
						
							|  |  |  |         self._check_io_encoding(module, encoding="latin1") | 
					
						
							|  |  |  |         self._check_io_encoding(module, errors="namereplace") | 
					
						
							|  |  |  |         self._check_io_encoding(module, | 
					
						
							|  |  |  |                                 encoding="latin1", errors="namereplace") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_io_encoding(self): | 
					
						
							|  |  |  |         self.check_io_encoding('io') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_io_encoding(self): | 
					
						
							|  |  |  |         self.check_io_encoding('_pyio') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_locale_getpreferredencoding(self): | 
					
						
							|  |  |  |         code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))' | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8', '-c', code) | 
					
						
							|  |  |  |         self.assertEqual(out, 'UTF-8 UTF-8') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-08-28 12:35:44 +02:00
										 |  |  |         for loc in POSIX_LOCALES: | 
					
						
							|  |  |  |             with self.subTest(LC_ALL=loc): | 
					
						
							|  |  |  |                 out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc) | 
					
						
							|  |  |  |                 self.assertEqual(out, 'UTF-8 UTF-8') | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  |     @unittest.skipIf(MS_WINDOWS, 'test specific to Unix') | 
					
						
							| 
									
										
										
										
											2017-12-16 04:54:22 +01:00
										 |  |  |     def test_cmd_line(self): | 
					
						
							|  |  |  |         arg = 'h\xe9\u20ac'.encode('utf-8') | 
					
						
							|  |  |  |         arg_utf8 = arg.decode('utf-8') | 
					
						
							|  |  |  |         arg_ascii = arg.decode('ascii', 'surrogateescape') | 
					
						
							|  |  |  |         code = 'import locale, sys; print("%s:%s" % (locale.getpreferredencoding(), ascii(sys.argv[1:])))' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def check(utf8_opt, expected, **kw): | 
					
						
							|  |  |  |             out = self.get_output('-X', utf8_opt, '-c', code, arg, **kw) | 
					
						
							|  |  |  |             args = out.partition(':')[2].rstrip() | 
					
						
							|  |  |  |             self.assertEqual(args, ascii(expected), out) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         check('utf8', [arg_utf8]) | 
					
						
							| 
									
										
										
										
											2018-08-28 12:35:44 +02:00
										 |  |  |         for loc in POSIX_LOCALES: | 
					
						
							|  |  |  |             with self.subTest(LC_ALL=loc): | 
					
						
							|  |  |  |                 check('utf8', [arg_utf8], LC_ALL=loc) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-06-26 02:11:06 +02:00
										 |  |  |         if sys.platform == 'darwin' or support.is_android: | 
					
						
							| 
									
										
										
										
											2017-12-16 04:54:22 +01:00
										 |  |  |             c_arg = arg_utf8 | 
					
						
							| 
									
										
										
										
											2018-08-27 15:40:17 +02:00
										 |  |  |         elif sys.platform.startswith("aix"): | 
					
						
							|  |  |  |             c_arg = arg.decode('iso-8859-1') | 
					
						
							| 
									
										
										
										
											2017-12-16 04:54:22 +01:00
										 |  |  |         else: | 
					
						
							|  |  |  |             c_arg = arg_ascii | 
					
						
							| 
									
										
										
										
											2018-08-28 12:35:44 +02:00
										 |  |  |         for loc in POSIX_LOCALES: | 
					
						
							|  |  |  |             with self.subTest(LC_ALL=loc): | 
					
						
							|  |  |  |                 check('utf8=0', [c_arg], LC_ALL=loc) | 
					
						
							| 
									
										
										
										
											2017-12-16 04:54:22 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2018-01-25 09:18:36 +01:00
										 |  |  |     def test_optim_level(self): | 
					
						
							|  |  |  |         # CPython: check that Py_Main() doesn't increment Py_OptimizeFlag | 
					
						
							|  |  |  |         # twice when -X utf8 requires to parse the configuration twice (when | 
					
						
							|  |  |  |         # the encoding changes after reading the configuration, the | 
					
						
							|  |  |  |         # configuration is read again with the new encoding). | 
					
						
							|  |  |  |         code = 'import sys; print(sys.flags.optimize)' | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8', '-O', '-c', code) | 
					
						
							|  |  |  |         self.assertEqual(out, '1') | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8', '-OO', '-c', code) | 
					
						
							|  |  |  |         self.assertEqual(out, '2') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         code = 'import sys; print(sys.flags.ignore_environment)' | 
					
						
							|  |  |  |         out = self.get_output('-X', 'utf8', '-E', '-c', code) | 
					
						
							|  |  |  |         self.assertEqual(out, '1') | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-12-13 12:29:09 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     unittest.main() |