[3.13] gh-144759: Fix undefined behavior from NULL pointer arithmetic in lexer (GH-144788) (#145355)

This commit is contained in:
Stan Ulbrych 2026-02-28 13:49:37 +00:00 committed by GitHub
parent 2daece9903
commit 1a2b0fb3e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 24 additions and 4 deletions

View file

@ -143,6 +143,22 @@ def test_multiline_string_parsing(self):
output = kill_python(p)
self.assertEqual(p.returncode, 0)
@cpython_only
def test_lexer_buffer_realloc_with_null_start(self):
# gh-144759: NULL pointer arithmetic in the lexer when start and
# multi_line_start are NULL (uninitialized in tok_mode_stack[0])
# and the lexer buffer is reallocated while parsing long input.
long_value = "a" * 2000
user_input = dedent(f"""\
x = f'{{{long_value!r}}}'
print(x)
""")
p = spawn_repl()
p.stdin.write(user_input)
output = kill_python(p)
self.assertEqual(p.returncode, 0)
self.assertIn(long_value, output)
def test_close_stdin(self):
user_input = dedent('''
import os

View file

@ -0,0 +1,4 @@
Fix undefined behavior in the lexer when ``start`` and ``multi_line_start``
pointers are ``NULL`` in ``_PyLexer_remember_fstring_buffers()`` and
``_PyLexer_restore_fstring_buffers()``. The ``NULL`` pointer arithmetic
(``NULL - valid_pointer``) is now guarded with explicit ``NULL`` checks.

View file

@ -13,8 +13,8 @@ _PyLexer_remember_fstring_buffers(struct tok_state *tok)
for (index = tok->tok_mode_stack_index; index >= 0; --index) {
mode = &(tok->tok_mode_stack[index]);
mode->f_string_start_offset = mode->f_string_start - tok->buf;
mode->f_string_multi_line_start_offset = mode->f_string_multi_line_start - tok->buf;
mode->f_string_start_offset = mode->f_string_start == NULL ? -1 : mode->f_string_start - tok->buf;
mode->f_string_multi_line_start_offset = mode->f_string_multi_line_start == NULL ? -1 : mode->f_string_multi_line_start - tok->buf;
}
}
@ -27,8 +27,8 @@ _PyLexer_restore_fstring_buffers(struct tok_state *tok)
for (index = tok->tok_mode_stack_index; index >= 0; --index) {
mode = &(tok->tok_mode_stack[index]);
mode->f_string_start = tok->buf + mode->f_string_start_offset;
mode->f_string_multi_line_start = tok->buf + mode->f_string_multi_line_start_offset;
mode->f_string_start = mode->f_string_start_offset < 0 ? NULL : tok->buf + mode->f_string_start_offset;
mode->f_string_multi_line_start = mode->f_string_multi_line_start_offset < 0 ? NULL : tok->buf + mode->f_string_multi_line_start_offset;
}
}