mirror of
https://github.com/python/cpython.git
synced 2025-10-23 18:03:48 +00:00
gh-107450: Check for overflow in the tokenizer and fix overflow test (#110832)
Co-authored-by: Filipe Laíns <lains@riseup.net> Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
b3c9faf056
commit
a1ac5590e0
4 changed files with 40 additions and 22 deletions
|
@ -37,6 +37,7 @@ extern "C" {
|
|||
#define E_LINECONT 25 /* Unexpected characters after a line continuation */
|
||||
#define E_BADSINGLE 27 /* Ill-formed single statement input */
|
||||
#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */
|
||||
#define E_COLUMNOVERFLOW 29 /* Column offset overflow */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -18,6 +18,12 @@
|
|||
from test.support.warnings_helper import check_warnings
|
||||
from test import support
|
||||
|
||||
try:
|
||||
from _testcapi import INT_MAX
|
||||
except ImportError:
|
||||
INT_MAX = 2**31 - 1
|
||||
|
||||
|
||||
|
||||
class NaiveException(Exception):
|
||||
def __init__(self, x):
|
||||
|
@ -318,11 +324,13 @@ def baz():
|
|||
check('(yield i) = 2', 1, 2)
|
||||
check('def f(*):\n pass', 1, 7)
|
||||
|
||||
@unittest.skipIf(INT_MAX >= sys.maxsize, "Downcasting to int is safe for col_offset")
|
||||
@support.requires_resource('cpu')
|
||||
@support.bigmemtest(support._2G, memuse=1.5)
|
||||
def testMemoryErrorBigSource(self, _size):
|
||||
with self.assertRaises(OverflowError):
|
||||
exec(f"if True:\n {' ' * 2**31}print('hello world')")
|
||||
@support.bigmemtest(INT_MAX, memuse=2, dry_run=False)
|
||||
def testMemoryErrorBigSource(self, size):
|
||||
src = b"if True:\n%*s" % (size, b"pass")
|
||||
with self.assertRaisesRegex(OverflowError, "Parser column offset overflow"):
|
||||
compile(src, '<fragment>', 'exec')
|
||||
|
||||
@cpython_only
|
||||
def testSettingException(self):
|
||||
|
|
|
@ -59,6 +59,10 @@ tok_nextc(struct tok_state *tok)
|
|||
int rc;
|
||||
for (;;) {
|
||||
if (tok->cur != tok->inp) {
|
||||
if ((unsigned int) tok->col_offset >= (unsigned int) INT_MAX) {
|
||||
tok->done = E_COLUMNOVERFLOW;
|
||||
return EOF;
|
||||
}
|
||||
tok->col_offset++;
|
||||
return Py_CHARMASK(*tok->cur++); /* Fast path */
|
||||
}
|
||||
|
|
|
@ -68,6 +68,7 @@ _Pypegen_tokenizer_error(Parser *p)
|
|||
const char *msg = NULL;
|
||||
PyObject* errtype = PyExc_SyntaxError;
|
||||
Py_ssize_t col_offset = -1;
|
||||
p->error_indicator = 1;
|
||||
switch (p->tok->done) {
|
||||
case E_TOKEN:
|
||||
msg = "invalid token";
|
||||
|
@ -103,6 +104,10 @@ _Pypegen_tokenizer_error(Parser *p)
|
|||
msg = "unexpected character after line continuation character";
|
||||
break;
|
||||
}
|
||||
case E_COLUMNOVERFLOW:
|
||||
PyErr_SetString(PyExc_OverflowError,
|
||||
"Parser column offset overflow - source line is too big");
|
||||
return -1;
|
||||
default:
|
||||
msg = "unknown parsing error";
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue