mirror of
https://github.com/python/cpython.git
synced 2025-12-08 06:10:17 +00:00
[3.14] gh-63161: Fix PEP 263 support (GH-139481) (GH-139898)
* Support non-UTF-8 shebang and comments if non-UTF-8 encoding is specified.
* Detect decoding error in comments for UTF-8 encoding.
* Include the decoding error position for default encoding in SyntaxError.
(cherry picked from commit 5c942f11cd)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
69d263cfe1
commit
9ff705c6c5
9 changed files with 211 additions and 46 deletions
|
|
@ -86,15 +86,18 @@ decode_str(const char *input, int single, struct tok_state *tok, int preserve_cr
|
|||
/* need to check line 1 and 2 separately since check_coding_spec
|
||||
assumes a single line as input */
|
||||
if (newl[0]) {
|
||||
tok->lineno = 1;
|
||||
if (!_PyTokenizer_check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) {
|
||||
return NULL;
|
||||
}
|
||||
if (tok->enc == NULL && tok->decoding_state != STATE_NORMAL && newl[1]) {
|
||||
tok->lineno = 2;
|
||||
if (!_PyTokenizer_check_coding_spec(newl[0]+1, newl[1] - newl[0],
|
||||
tok, buf_setreadl))
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
tok->lineno = 0;
|
||||
if (tok->enc != NULL) {
|
||||
assert(utf8 == NULL);
|
||||
utf8 = _PyTokenizer_translate_into_utf8(str, tok->enc);
|
||||
|
|
@ -102,6 +105,9 @@ decode_str(const char *input, int single, struct tok_state *tok, int preserve_cr
|
|||
return _PyTokenizer_error_ret(tok);
|
||||
str = PyBytes_AS_STRING(utf8);
|
||||
}
|
||||
else if (!_PyTokenizer_ensure_utf8(str, tok, 1)) {
|
||||
return _PyTokenizer_error_ret(tok);
|
||||
}
|
||||
assert(tok->decoding_buffer == NULL);
|
||||
tok->decoding_buffer = utf8; /* CAUTION */
|
||||
return str;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue