mirror of
https://github.com/python/cpython.git
synced 2026-04-15 08:11:10 +00:00
[3.13] gh-145234: Normalize decoded CR in string tokenizer (GH-145281) (#145312)
This commit is contained in:
parent
67f6368e47
commit
2daece9903
3 changed files with 26 additions and 0 deletions
|
|
@ -207,6 +207,14 @@ def test_quiet(self):
|
|||
with self.assertRaises(py_compile.PyCompileError):
|
||||
py_compile.compile(bad_coding, doraise=True, quiet=1)
|
||||
|
||||
def test_utf7_decoded_cr_compiles(self):
|
||||
with open(self.source_path, 'wb') as file:
|
||||
file.write(b"#coding=U7+AA0''\n")
|
||||
|
||||
pyc_path = py_compile.compile(self.source_path, self.pyc_path, doraise=True)
|
||||
self.assertEqual(pyc_path, self.pyc_path)
|
||||
self.assertTrue(os.path.exists(self.pyc_path))
|
||||
|
||||
|
||||
class PyCompileTestsWithSourceEpoch(PyCompileTestsBase,
|
||||
unittest.TestCase,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,5 @@
|
|||
Fixed a ``SystemError`` in the parser when an encoding cookie (for example,
|
||||
UTF-7) decodes to carriage returns (``\r``). Newlines are now normalized after
|
||||
decoding in the string tokenizer.
|
||||
|
||||
Patch by Pablo Galindo.
|
||||
|
|
@ -102,6 +102,19 @@ decode_str(const char *input, int single, struct tok_state *tok, int preserve_cr
|
|||
return _PyTokenizer_error_ret(tok);
|
||||
str = PyBytes_AS_STRING(utf8);
|
||||
}
|
||||
if (utf8 != NULL) {
|
||||
char *translated = _PyTokenizer_translate_newlines(
|
||||
str, single, preserve_crlf, tok);
|
||||
if (translated == NULL) {
|
||||
Py_DECREF(utf8);
|
||||
return _PyTokenizer_error_ret(tok);
|
||||
}
|
||||
PyMem_Free(tok->input);
|
||||
tok->input = translated;
|
||||
str = translated;
|
||||
Py_CLEAR(utf8);
|
||||
}
|
||||
tok->str = str;
|
||||
assert(tok->decoding_buffer == NULL);
|
||||
tok->decoding_buffer = utf8; /* CAUTION */
|
||||
return str;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue