mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	This commit is contained in:
		
							parent
							
								
									37bfa4e7ec
								
							
						
					
					
						commit
						e667e98faa
					
				
					 4 changed files with 105 additions and 10 deletions
				
			
		|  | @ -603,6 +603,49 @@ def _is_ipv6_enabled(): | ||||||
| # module name. | # module name. | ||||||
| TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid()) | TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid()) | ||||||
| 
 | 
 | ||||||
|  | # FS_NONASCII: non-ASCII character encodable by os.fsencode(), | ||||||
|  | # or None if there is no such character. | ||||||
|  | FS_NONASCII = None | ||||||
|  | for character in ( | ||||||
|  |     # First try printable and common characters to have a readable filename. | ||||||
|  |     # For each character, the encoding list are just example of encodings able | ||||||
|  |     # to encode the character (the list is not exhaustive). | ||||||
|  | 
 | ||||||
|  |     # U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1 | ||||||
|  |     '\u00E6', | ||||||
|  |     # U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3 | ||||||
|  |     '\u0130', | ||||||
|  |     # U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257 | ||||||
|  |     '\u0141', | ||||||
|  |     # U+03C6 (Greek Small Letter Phi): cp1253 | ||||||
|  |     '\u03C6', | ||||||
|  |     # U+041A (Cyrillic Capital Letter Ka): cp1251 | ||||||
|  |     '\u041A', | ||||||
|  |     # U+05D0 (Hebrew Letter Alef): Encodable to cp424 | ||||||
|  |     '\u05D0', | ||||||
|  |     # U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic | ||||||
|  |     '\u060C', | ||||||
|  |     # U+062A (Arabic Letter Teh): cp720 | ||||||
|  |     '\u062A', | ||||||
|  |     # U+0E01 (Thai Character Ko Kai): cp874 | ||||||
|  |     '\u0E01', | ||||||
|  | 
 | ||||||
|  |     # Then try more "special" characters. "special" because they may be | ||||||
|  |     # interpreted or displayed differently depending on the exact locale | ||||||
|  |     # encoding and the font. | ||||||
|  | 
 | ||||||
|  |     # U+00A0 (No-Break Space) | ||||||
|  |     '\u00A0', | ||||||
|  |     # U+20AC (Euro Sign) | ||||||
|  |     '\u20AC', | ||||||
|  | ): | ||||||
|  |     try: | ||||||
|  |         os.fsdecode(os.fsencode(character)) | ||||||
|  |     except UnicodeError: | ||||||
|  |         pass | ||||||
|  |     else: | ||||||
|  |         FS_NONASCII = character | ||||||
|  |         break | ||||||
| 
 | 
 | ||||||
| # TESTFN_UNICODE is a non-ascii filename | # TESTFN_UNICODE is a non-ascii filename | ||||||
| TESTFN_UNICODE = TESTFN + "-\xe0\xf2\u0258\u0141\u011f" | TESTFN_UNICODE = TESTFN + "-\xe0\xf2\u0258\u0141\u011f" | ||||||
|  | @ -647,6 +690,38 @@ def _is_ipv6_enabled(): | ||||||
|         # the byte 0xff. Skip some unicode filename tests. |         # the byte 0xff. Skip some unicode filename tests. | ||||||
|         pass |         pass | ||||||
| 
 | 
 | ||||||
|  | # TESTFN_UNDECODABLE is a filename (bytes type) that should *not* be able to be | ||||||
|  | # decoded from the filesystem encoding (in strict mode). It can be None if we | ||||||
|  | # cannot generate such filename (ex: the latin1 encoding can decode any byte | ||||||
|  | # sequence). On UNIX, TESTFN_UNDECODABLE can be decoded by os.fsdecode() thanks | ||||||
|  | # to the surrogateescape error handler (PEP 383), but not from the filesystem | ||||||
|  | # encoding in strict mode. | ||||||
|  | TESTFN_UNDECODABLE = None | ||||||
|  | for name in ( | ||||||
|  |     # b'\xff' is not decodable by os.fsdecode() with code page 932. Windows | ||||||
|  |     # accepts it to create a file or a directory, or don't accept to enter to | ||||||
|  |     # such directory (when the bytes name is used). So test b'\xe7' first: it is | ||||||
|  |     # not decodable from cp932. | ||||||
|  |     b'\xe7w\xf0', | ||||||
|  |     # undecodable from ASCII, UTF-8 | ||||||
|  |     b'\xff', | ||||||
|  |     # undecodable from iso8859-3, iso8859-6, iso8859-7, cp424, iso8859-8, cp856 | ||||||
|  |     # and cp857 | ||||||
|  |     b'\xae\xd5' | ||||||
|  |     # undecodable from UTF-8 (UNIX and Mac OS X) | ||||||
|  |     b'\xed\xb2\x80', b'\xed\xb4\x80', | ||||||
|  | ): | ||||||
|  |     try: | ||||||
|  |         name.decode(TESTFN_ENCODING) | ||||||
|  |     except UnicodeDecodeError: | ||||||
|  |         TESTFN_UNDECODABLE = os.fsencode(TESTFN) + name | ||||||
|  |         break | ||||||
|  | 
 | ||||||
|  | if FS_NONASCII: | ||||||
|  |     TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII | ||||||
|  | else: | ||||||
|  |     TESTFN_NONASCII = None | ||||||
|  | 
 | ||||||
| # Save the initial cwd | # Save the initial cwd | ||||||
| SAVEDCWD = os.getcwd() | SAVEDCWD = os.getcwd() | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -93,15 +93,15 @@ def test_run_code(self): | ||||||
|         # All good if execution is successful |         # All good if execution is successful | ||||||
|         assert_python_ok('-c', 'pass') |         assert_python_ok('-c', 'pass') | ||||||
| 
 | 
 | ||||||
|     @unittest.skipIf(sys.getfilesystemencoding() == 'ascii', |     @unittest.skipUnless(test.support.FS_NONASCII, 'need support.FS_NONASCII') | ||||||
|                      'need a filesystem encoding different than ASCII') |  | ||||||
|     def test_non_ascii(self): |     def test_non_ascii(self): | ||||||
|         # Test handling of non-ascii data |         # Test handling of non-ascii data | ||||||
|         if test.support.verbose: |         if test.support.verbose: | ||||||
|             import locale |             import locale | ||||||
|             print('locale encoding = %s, filesystem encoding = %s' |             print('locale encoding = %s, filesystem encoding = %s' | ||||||
|                   % (locale.getpreferredencoding(), sys.getfilesystemencoding())) |                   % (locale.getpreferredencoding(), sys.getfilesystemencoding())) | ||||||
|         command = "assert(ord('\xe9') == 0xe9)" |         command = ("assert(ord(%r) == %s)" | ||||||
|  |                    % (test.support.FS_NONASCII, ord(test.support.FS_NONASCII))) | ||||||
|         assert_python_ok('-c', command) |         assert_python_ok('-c', command) | ||||||
| 
 | 
 | ||||||
|     # On Windows, pass bytes to subprocess doesn't test how Python decodes the |     # On Windows, pass bytes to subprocess doesn't test how Python decodes the | ||||||
|  |  | ||||||
|  | @ -363,14 +363,30 @@ def test_pep_409_verbiage(self): | ||||||
|             self.assertTrue(text[1].startswith('  File ')) |             self.assertTrue(text[1].startswith('  File ')) | ||||||
|             self.assertTrue(text[3].startswith('NameError')) |             self.assertTrue(text[3].startswith('NameError')) | ||||||
| 
 | 
 | ||||||
|     def test_non_utf8(self): |     def test_non_ascii(self): | ||||||
|  |         # Mac OS X denies the creation of a file with an invalid UTF-8 name. | ||||||
|  |         # Windows allows to create a name with an arbitrary bytes name, but | ||||||
|  |         # Python cannot a undecodable bytes argument to a subprocess. | ||||||
|  |         #if (support.TESTFN_UNDECODABLE | ||||||
|  |         #and sys.platform not in ('win32', 'darwin')): | ||||||
|  |         #    name = os.fsdecode(support.TESTFN_UNDECODABLE) | ||||||
|  |         #elif support.TESTFN_NONASCII: | ||||||
|  |         if support.TESTFN_NONASCII: | ||||||
|  |             name = support.TESTFN_NONASCII | ||||||
|  |         else: | ||||||
|  |             self.skipTest("need support.TESTFN_NONASCII") | ||||||
|  | 
 | ||||||
|         # Issue #16218 |         # Issue #16218 | ||||||
|         with temp_dir() as script_dir: |         source = 'print(ascii(__file__))\n' | ||||||
|             script_name = _make_test_script(script_dir, |         script_name = _make_test_script(os.curdir, name, source) | ||||||
|                     '\udcf1\udcea\udcf0\udce8\udcef\udcf2') |         self.addCleanup(support.unlink, script_name) | ||||||
|             self._check_script(script_name, script_name, script_name, |         rc, stdout, stderr = assert_python_ok(script_name) | ||||||
|                                script_dir, None, |         self.assertEqual( | ||||||
|                                importlib.machinery.SourceFileLoader) |             ascii(script_name), | ||||||
|  |             stdout.rstrip().decode('ascii'), | ||||||
|  |             'stdout=%r stderr=%r' % (stdout, stderr)) | ||||||
|  |         self.assertEqual(0, rc) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def test_main(): | def test_main(): | ||||||
|     support.run_unittest(CmdLineTest) |     support.run_unittest(CmdLineTest) | ||||||
|  |  | ||||||
|  | @ -1243,6 +1243,8 @@ class Pep383Tests(unittest.TestCase): | ||||||
|         def setUp(self): |         def setUp(self): | ||||||
|             if support.TESTFN_UNENCODABLE: |             if support.TESTFN_UNENCODABLE: | ||||||
|                 self.dir = support.TESTFN_UNENCODABLE |                 self.dir = support.TESTFN_UNENCODABLE | ||||||
|  |             elif support.TESTFN_NONASCII: | ||||||
|  |                 self.dir = support.TESTFN_NONASCII | ||||||
|             else: |             else: | ||||||
|                 self.dir = support.TESTFN |                 self.dir = support.TESTFN | ||||||
|             self.bdir = os.fsencode(self.dir) |             self.bdir = os.fsencode(self.dir) | ||||||
|  | @ -1257,6 +1259,8 @@ def add_filename(fn): | ||||||
|             add_filename(support.TESTFN_UNICODE) |             add_filename(support.TESTFN_UNICODE) | ||||||
|             if support.TESTFN_UNENCODABLE: |             if support.TESTFN_UNENCODABLE: | ||||||
|                 add_filename(support.TESTFN_UNENCODABLE) |                 add_filename(support.TESTFN_UNENCODABLE) | ||||||
|  |             if support.TESTFN_NONASCII: | ||||||
|  |                 add_filename(support.TESTFN_NONASCII) | ||||||
|             if not bytesfn: |             if not bytesfn: | ||||||
|                 self.skipTest("couldn't create any non-ascii filename") |                 self.skipTest("couldn't create any non-ascii filename") | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Victor Stinner
						Victor Stinner