mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	gh-63161: Add more tests for source encoding (#139440)
This commit is contained in:
		
							parent
							
								
									bc172ee830
								
							
						
					
					
						commit
						b2f5ad0c6d
					
				
					 2 changed files with 179 additions and 22 deletions
				
			
		| 
						 | 
					@ -172,6 +172,8 @@ def test_tokenizer_fstring_warning_in_first_line(self):
 | 
				
			||||||
            os.unlink(TESTFN)
 | 
					            os.unlink(TESTFN)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					BUFSIZ = 2**13
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class AbstractSourceEncodingTest:
 | 
					class AbstractSourceEncodingTest:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_default_coding(self):
 | 
					    def test_default_coding(self):
 | 
				
			||||||
| 
						 | 
					@ -184,14 +186,20 @@ def test_first_coding_line(self):
 | 
				
			||||||
        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
					        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_second_coding_line(self):
 | 
					    def test_second_coding_line(self):
 | 
				
			||||||
        src = (b'#\n'
 | 
					        src = (b'#!/usr/bin/python\n'
 | 
				
			||||||
 | 
					               b'#coding:iso8859-15\n'
 | 
				
			||||||
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
 | 
					        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_second_coding_line_empty_first_line(self):
 | 
				
			||||||
 | 
					        src = (b'\n'
 | 
				
			||||||
               b'#coding:iso8859-15\n'
 | 
					               b'#coding:iso8859-15\n'
 | 
				
			||||||
               b'print(ascii("\xc3\xa4"))\n')
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
					        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_third_coding_line(self):
 | 
					    def test_third_coding_line(self):
 | 
				
			||||||
        # Only first two lines are tested for a magic comment.
 | 
					        # Only first two lines are tested for a magic comment.
 | 
				
			||||||
        src = (b'#\n'
 | 
					        src = (b'#!/usr/bin/python\n'
 | 
				
			||||||
               b'#\n'
 | 
					               b'#\n'
 | 
				
			||||||
               b'#coding:iso8859-15\n'
 | 
					               b'#coding:iso8859-15\n'
 | 
				
			||||||
               b'print(ascii("\xc3\xa4"))\n')
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
| 
						 | 
					@ -209,13 +217,52 @@ def test_double_coding_same_line(self):
 | 
				
			||||||
               b'print(ascii("\xc3\xa4"))\n')
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
					        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_double_coding_utf8(self):
 | 
				
			||||||
 | 
					        src = (b'#coding:utf-8\n'
 | 
				
			||||||
 | 
					               b'#coding:latin1\n'
 | 
				
			||||||
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
 | 
					        self.check_script_output(src, br"'\xe4'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_long_first_coding_line(self):
 | 
				
			||||||
 | 
					        src = (b'#' + b' '*BUFSIZ + b'coding:iso8859-15\n'
 | 
				
			||||||
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
 | 
					        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_long_second_coding_line(self):
 | 
				
			||||||
 | 
					        src = (b'#!/usr/bin/python\n'
 | 
				
			||||||
 | 
					               b'#' + b' '*BUFSIZ + b'coding:iso8859-15\n'
 | 
				
			||||||
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
 | 
					        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_long_coding_line(self):
 | 
				
			||||||
 | 
					        src = (b'#coding:iso-8859-15' + b' '*BUFSIZ + b'\n'
 | 
				
			||||||
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
 | 
					        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_long_coding_name(self):
 | 
				
			||||||
 | 
					        src = (b'#coding:iso-8859-1-' + b'x'*BUFSIZ + b'\n'
 | 
				
			||||||
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
 | 
					        self.check_script_output(src, br"'\xc3\xa4'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_long_first_utf8_line(self):
 | 
				
			||||||
 | 
					        src = b'#' + b'\xc3\xa4'*(BUFSIZ//2) + b'\n'
 | 
				
			||||||
 | 
					        self.check_script_output(src, b'')
 | 
				
			||||||
 | 
					        src = b'# ' + b'\xc3\xa4'*(BUFSIZ//2) + b'\n'
 | 
				
			||||||
 | 
					        self.check_script_output(src, b'')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_long_second_utf8_line(self):
 | 
				
			||||||
 | 
					        src = b'\n#' + b'\xc3\xa4'*(BUFSIZ//2) + b'\n'
 | 
				
			||||||
 | 
					        self.check_script_output(src, b'')
 | 
				
			||||||
 | 
					        src = b'\n# ' + b'\xc3\xa4'*(BUFSIZ//2) + b'\n'
 | 
				
			||||||
 | 
					        self.check_script_output(src, b'')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_first_non_utf8_coding_line(self):
 | 
					    def test_first_non_utf8_coding_line(self):
 | 
				
			||||||
        src = (b'#coding:iso-8859-15 \xa4\n'
 | 
					        src = (b'#coding:iso-8859-15 \xa4\n'
 | 
				
			||||||
               b'print(ascii("\xc3\xa4"))\n')
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
					        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_second_non_utf8_coding_line(self):
 | 
					    def test_second_non_utf8_coding_line(self):
 | 
				
			||||||
        src = (b'\n'
 | 
					        src = (b'#!/usr/bin/python\n'
 | 
				
			||||||
               b'#coding:iso-8859-15 \xa4\n'
 | 
					               b'#coding:iso-8859-15 \xa4\n'
 | 
				
			||||||
               b'print(ascii("\xc3\xa4"))\n')
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
					        self.check_script_output(src, br"'\xc3\u20ac'")
 | 
				
			||||||
| 
						 | 
					@ -224,27 +271,56 @@ def test_utf8_bom(self):
 | 
				
			||||||
        src = (b'\xef\xbb\xbfprint(ascii("\xc3\xa4"))\n')
 | 
					        src = (b'\xef\xbb\xbfprint(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
        self.check_script_output(src, br"'\xe4'")
 | 
					        self.check_script_output(src, br"'\xe4'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_utf8_bom_utf8_comments(self):
 | 
				
			||||||
 | 
					        src = (b'\xef\xbb\xbf#\xc3\xa4\n'
 | 
				
			||||||
 | 
					               b'#\xc3\xa4\n'
 | 
				
			||||||
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
 | 
					        self.check_script_output(src, br"'\xe4'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_utf8_bom_and_utf8_coding_line(self):
 | 
					    def test_utf8_bom_and_utf8_coding_line(self):
 | 
				
			||||||
        src = (b'\xef\xbb\xbf#coding:utf-8\n'
 | 
					        src = (b'\xef\xbb\xbf#coding:utf-8\n'
 | 
				
			||||||
               b'print(ascii("\xc3\xa4"))\n')
 | 
					               b'print(ascii("\xc3\xa4"))\n')
 | 
				
			||||||
        self.check_script_output(src, br"'\xe4'")
 | 
					        self.check_script_output(src, br"'\xe4'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_utf8_non_utf8_comment_line_error(self):
 | 
				
			||||||
 | 
					        src = (b'#coding: utf8\n'
 | 
				
			||||||
 | 
					               b'#\n'
 | 
				
			||||||
 | 
					               b'#\xa4\n'
 | 
				
			||||||
 | 
					               b'raise RuntimeError\n')
 | 
				
			||||||
 | 
					        self.check_script_error(src,
 | 
				
			||||||
 | 
					                br"'utf-8' codec can't decode byte|"
 | 
				
			||||||
 | 
					                br"encoding problem: utf8")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_crlf(self):
 | 
					    def test_crlf(self):
 | 
				
			||||||
        src = (b'print(ascii("""\r\n"""))\n')
 | 
					        src = (b'print(ascii("""\r\n"""))\n')
 | 
				
			||||||
        out = self.check_script_output(src, br"'\n'")
 | 
					        self.check_script_output(src, br"'\n'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_crcrlf(self):
 | 
					    def test_crcrlf(self):
 | 
				
			||||||
        src = (b'print(ascii("""\r\r\n"""))\n')
 | 
					        src = (b'print(ascii("""\r\r\n"""))\n')
 | 
				
			||||||
        out = self.check_script_output(src, br"'\n\n'")
 | 
					        self.check_script_output(src, br"'\n\n'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_crcrcrlf(self):
 | 
					    def test_crcrcrlf(self):
 | 
				
			||||||
        src = (b'print(ascii("""\r\r\r\n"""))\n')
 | 
					        src = (b'print(ascii("""\r\r\r\n"""))\n')
 | 
				
			||||||
        out = self.check_script_output(src, br"'\n\n\n'")
 | 
					        self.check_script_output(src, br"'\n\n\n'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_crcrcrlf2(self):
 | 
					    def test_crcrcrlf2(self):
 | 
				
			||||||
        src = (b'#coding:iso-8859-1\n'
 | 
					        src = (b'#coding:iso-8859-1\n'
 | 
				
			||||||
               b'print(ascii("""\r\r\r\n"""))\n')
 | 
					               b'print(ascii("""\r\r\r\n"""))\n')
 | 
				
			||||||
        out = self.check_script_output(src, br"'\n\n\n'")
 | 
					        self.check_script_output(src, br"'\n\n\n'")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_nul_in_first_coding_line(self):
 | 
				
			||||||
 | 
					        src = (b'#coding:iso8859-15\x00\n'
 | 
				
			||||||
 | 
					               b'\n'
 | 
				
			||||||
 | 
					               b'\n'
 | 
				
			||||||
 | 
					               b'raise RuntimeError\n')
 | 
				
			||||||
 | 
					        self.check_script_error(src, br"source code (string )?cannot contain null bytes")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_nul_in_second_coding_line(self):
 | 
				
			||||||
 | 
					        src = (b'#!/usr/bin/python\n'
 | 
				
			||||||
 | 
					               b'#coding:iso8859-15\x00\n'
 | 
				
			||||||
 | 
					               b'\n'
 | 
				
			||||||
 | 
					               b'raise RuntimeError\n')
 | 
				
			||||||
 | 
					        self.check_script_error(src, br"source code (string )?cannot contain null bytes")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class UTF8ValidatorTest(unittest.TestCase):
 | 
					class UTF8ValidatorTest(unittest.TestCase):
 | 
				
			||||||
| 
						 | 
					@ -324,6 +400,10 @@ def check_script_output(self, src, expected):
 | 
				
			||||||
        out = stdout.getvalue().encode('latin1')
 | 
					        out = stdout.getvalue().encode('latin1')
 | 
				
			||||||
        self.assertEqual(out.rstrip(), expected)
 | 
					        self.assertEqual(out.rstrip(), expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def check_script_error(self, src, expected):
 | 
				
			||||||
 | 
					        with self.assertRaisesRegex(SyntaxError, expected.decode()) as cm:
 | 
				
			||||||
 | 
					            exec(src)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class FileSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase):
 | 
					class FileSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -335,6 +415,14 @@ def check_script_output(self, src, expected):
 | 
				
			||||||
            res = script_helper.assert_python_ok(fn)
 | 
					            res = script_helper.assert_python_ok(fn)
 | 
				
			||||||
        self.assertEqual(res.out.rstrip(), expected)
 | 
					        self.assertEqual(res.out.rstrip(), expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def check_script_error(self, src, expected):
 | 
				
			||||||
 | 
					        with tempfile.TemporaryDirectory() as tmpd:
 | 
				
			||||||
 | 
					            fn = os.path.join(tmpd, 'test.py')
 | 
				
			||||||
 | 
					            with open(fn, 'wb') as fp:
 | 
				
			||||||
 | 
					                fp.write(src)
 | 
				
			||||||
 | 
					            res = script_helper.assert_python_failure(fn)
 | 
				
			||||||
 | 
					        self.assertRegex(res.err.rstrip().splitlines()[-1], b'SyntaxError.*?' + expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == "__main__":
 | 
					if __name__ == "__main__":
 | 
				
			||||||
    unittest.main()
 | 
					    unittest.main()
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1346,7 +1346,8 @@ def readline():
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_no_bom_no_encoding_cookie(self):
 | 
					    def test_no_bom_no_encoding_cookie(self):
 | 
				
			||||||
        lines = (
 | 
					        lines = (
 | 
				
			||||||
            b'# something\n',
 | 
					            b'#!/home/\xc3\xa4/bin/python\n',
 | 
				
			||||||
 | 
					            b'# something \xe2\x82\xac\n',
 | 
				
			||||||
            b'print(something)\n',
 | 
					            b'print(something)\n',
 | 
				
			||||||
            b'do_something(else)\n'
 | 
					            b'do_something(else)\n'
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
| 
						 | 
					@ -1354,16 +1355,54 @@ def test_no_bom_no_encoding_cookie(self):
 | 
				
			||||||
        self.assertEqual(encoding, 'utf-8')
 | 
					        self.assertEqual(encoding, 'utf-8')
 | 
				
			||||||
        self.assertEqual(consumed_lines, list(lines[:2]))
 | 
					        self.assertEqual(consumed_lines, list(lines[:2]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_no_bom_no_encoding_cookie_first_line_error(self):
 | 
				
			||||||
 | 
					        lines = (
 | 
				
			||||||
 | 
					            b'#!/home/\xa4/bin/python\n\n',
 | 
				
			||||||
 | 
					            b'print(something)\n',
 | 
				
			||||||
 | 
					            b'do_something(else)\n'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        with self.assertRaises(SyntaxError):
 | 
				
			||||||
 | 
					            tokenize.detect_encoding(self.get_readline(lines))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_no_bom_no_encoding_cookie_second_line_error(self):
 | 
				
			||||||
 | 
					        lines = (
 | 
				
			||||||
 | 
					            b'#!/usr/bin/python\n',
 | 
				
			||||||
 | 
					            b'# something \xe2\n',
 | 
				
			||||||
 | 
					            b'print(something)\n',
 | 
				
			||||||
 | 
					            b'do_something(else)\n'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        with self.assertRaises(SyntaxError):
 | 
				
			||||||
 | 
					            tokenize.detect_encoding(self.get_readline(lines))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_bom_no_cookie(self):
 | 
					    def test_bom_no_cookie(self):
 | 
				
			||||||
        lines = (
 | 
					        lines = (
 | 
				
			||||||
            b'\xef\xbb\xbf# something\n',
 | 
					            b'\xef\xbb\xbf#!/home/\xc3\xa4/bin/python\n',
 | 
				
			||||||
            b'print(something)\n',
 | 
					            b'print(something)\n',
 | 
				
			||||||
            b'do_something(else)\n'
 | 
					            b'do_something(else)\n'
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
 | 
					        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
 | 
				
			||||||
        self.assertEqual(encoding, 'utf-8-sig')
 | 
					        self.assertEqual(encoding, 'utf-8-sig')
 | 
				
			||||||
        self.assertEqual(consumed_lines,
 | 
					        self.assertEqual(consumed_lines,
 | 
				
			||||||
                         [b'# something\n', b'print(something)\n'])
 | 
					                         [b'#!/home/\xc3\xa4/bin/python\n', b'print(something)\n'])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_bom_no_cookie_first_line_error(self):
 | 
				
			||||||
 | 
					        lines = (
 | 
				
			||||||
 | 
					            b'\xef\xbb\xbf#!/home/\xa4/bin/python\n',
 | 
				
			||||||
 | 
					            b'print(something)\n',
 | 
				
			||||||
 | 
					            b'do_something(else)\n'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        with self.assertRaises(SyntaxError):
 | 
				
			||||||
 | 
					            tokenize.detect_encoding(self.get_readline(lines))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_bom_no_cookie_second_line_error(self):
 | 
				
			||||||
 | 
					        lines = (
 | 
				
			||||||
 | 
					            b'\xef\xbb\xbf#!/usr/bin/python\n',
 | 
				
			||||||
 | 
					            b'# something \xe2\n',
 | 
				
			||||||
 | 
					            b'print(something)\n',
 | 
				
			||||||
 | 
					            b'do_something(else)\n'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        with self.assertRaises(SyntaxError):
 | 
				
			||||||
 | 
					            tokenize.detect_encoding(self.get_readline(lines))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_cookie_first_line_no_bom(self):
 | 
					    def test_cookie_first_line_no_bom(self):
 | 
				
			||||||
        lines = (
 | 
					        lines = (
 | 
				
			||||||
| 
						 | 
					@ -1439,17 +1478,6 @@ def test_cookie_second_line_noncommented_first_line(self):
 | 
				
			||||||
        expected = [b"print('\xc2\xa3')\n"]
 | 
					        expected = [b"print('\xc2\xa3')\n"]
 | 
				
			||||||
        self.assertEqual(consumed_lines, expected)
 | 
					        self.assertEqual(consumed_lines, expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_cookie_second_line_commented_first_line(self):
 | 
					 | 
				
			||||||
        lines = (
 | 
					 | 
				
			||||||
            b"#print('\xc2\xa3')\n",
 | 
					 | 
				
			||||||
            b'# vim: set fileencoding=iso8859-15 :\n',
 | 
					 | 
				
			||||||
            b"print('\xe2\x82\xac')\n"
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
 | 
					 | 
				
			||||||
        self.assertEqual(encoding, 'iso8859-15')
 | 
					 | 
				
			||||||
        expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
 | 
					 | 
				
			||||||
        self.assertEqual(consumed_lines, expected)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def test_cookie_second_line_empty_first_line(self):
 | 
					    def test_cookie_second_line_empty_first_line(self):
 | 
				
			||||||
        lines = (
 | 
					        lines = (
 | 
				
			||||||
            b'\n',
 | 
					            b'\n',
 | 
				
			||||||
| 
						 | 
					@ -1461,6 +1489,48 @@ def test_cookie_second_line_empty_first_line(self):
 | 
				
			||||||
        expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
 | 
					        expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
 | 
				
			||||||
        self.assertEqual(consumed_lines, expected)
 | 
					        self.assertEqual(consumed_lines, expected)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_cookie_third_line(self):
 | 
				
			||||||
 | 
					        lines = (
 | 
				
			||||||
 | 
					            b'#!/home/\xc3\xa4/bin/python\n',
 | 
				
			||||||
 | 
					            b'# something\n',
 | 
				
			||||||
 | 
					            b'# vim: set fileencoding=ascii :\n',
 | 
				
			||||||
 | 
					            b'print(something)\n',
 | 
				
			||||||
 | 
					            b'do_something(else)\n'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
 | 
				
			||||||
 | 
					        self.assertEqual(encoding, 'utf-8')
 | 
				
			||||||
 | 
					        self.assertEqual(consumed_lines, list(lines[:2]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_double_coding_line(self):
 | 
				
			||||||
 | 
					        # If the first line matches the second line is ignored.
 | 
				
			||||||
 | 
					        lines = (
 | 
				
			||||||
 | 
					            b'#coding:iso8859-15\n',
 | 
				
			||||||
 | 
					            b'#coding:latin1\n',
 | 
				
			||||||
 | 
					            b'print(something)\n'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
 | 
				
			||||||
 | 
					        self.assertEqual(encoding, 'iso8859-15')
 | 
				
			||||||
 | 
					        self.assertEqual(consumed_lines, list(lines[:1]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_double_coding_same_line(self):
 | 
				
			||||||
 | 
					        lines = (
 | 
				
			||||||
 | 
					            b'#coding:iso8859-15 coding:latin1\n',
 | 
				
			||||||
 | 
					            b'print(something)\n'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
 | 
				
			||||||
 | 
					        self.assertEqual(encoding, 'iso8859-15')
 | 
				
			||||||
 | 
					        self.assertEqual(consumed_lines, list(lines[:1]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def test_double_coding_utf8(self):
 | 
				
			||||||
 | 
					        lines = (
 | 
				
			||||||
 | 
					            b'#coding:utf-8\n',
 | 
				
			||||||
 | 
					            b'#coding:latin1\n',
 | 
				
			||||||
 | 
					            b'print(something)\n'
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					        encoding, consumed_lines = tokenize.detect_encoding(self.get_readline(lines))
 | 
				
			||||||
 | 
					        self.assertEqual(encoding, 'utf-8')
 | 
				
			||||||
 | 
					        self.assertEqual(consumed_lines, list(lines[:1]))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def test_latin1_normalization(self):
 | 
					    def test_latin1_normalization(self):
 | 
				
			||||||
        # See get_normal_name() in Parser/tokenizer/helpers.c.
 | 
					        # See get_normal_name() in Parser/tokenizer/helpers.c.
 | 
				
			||||||
        encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
 | 
					        encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
 | 
				
			||||||
| 
						 | 
					@ -1485,7 +1555,6 @@ def test_syntaxerror_latin1(self):
 | 
				
			||||||
        readline = self.get_readline(lines)
 | 
					        readline = self.get_readline(lines)
 | 
				
			||||||
        self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
 | 
					        self.assertRaises(SyntaxError, tokenize.detect_encoding, readline)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
    def test_utf8_normalization(self):
 | 
					    def test_utf8_normalization(self):
 | 
				
			||||||
        # See get_normal_name() in Parser/tokenizer/helpers.c.
 | 
					        # See get_normal_name() in Parser/tokenizer/helpers.c.
 | 
				
			||||||
        encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
 | 
					        encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue