[3.14] gh-63161: Fix PEP 263 support (GH-139481) (GH-139898)

* Support non-UTF-8 shebang and comments if non-UTF-8 encoding is specified.
* Detect decoding error in comments for UTF-8 encoding.
* Include the decoding error position for default encoding in SyntaxError.
(cherry picked from commit 5c942f11cd)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2025-10-19 21:16:33 +02:00 committed by GitHub
parent 69d263cfe1
commit 9ff705c6c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 211 additions and 46 deletions

View file

@ -2,6 +2,7 @@
#include <errcode.h>
#include "pycore_pyerrors.h" // _PyErr_ProgramDecodedTextObject()
#include "pycore_runtime.h" // _Py_ID()
#include "lexer/state.h"
#include "lexer/lexer.h"
#include "pegen.h"
@ -23,6 +24,13 @@ _PyPegen_raise_tokenizer_init_error(PyObject *filename)
PyObject *value;
PyObject *tback;
PyErr_Fetch(&type, &value, &tback);
if (PyErr_GivenExceptionMatches(value, PyExc_SyntaxError)) {
if (PyObject_SetAttr(value, &_Py_ID(filename), filename)) {
goto error;
}
PyErr_Restore(type, value, tback);
return;
}
errstr = PyObject_Str(value);
if (!errstr) {
goto error;