[3.11] gh-98744: Prevent column-level decoding crashes on traceback module (#98850)

Co-authored-by: Batuhan Taskaya <isidentical@gmail.com>
This commit is contained in:
Batuhan Taskaya 2022-10-29 17:12:15 +03:00 committed by GitHub
parent 12957d7cbd
commit 751da28feb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 63 additions and 14 deletions

View file

@ -778,6 +778,56 @@ def f():
]
self.assertEqual(actual, expected)
def test_wide_characters_unicode_with_problematic_byte_offset(self):
def f():
actual = self.get_exception(f)
expected = [
f"Traceback (most recent call last):",
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
f" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
f" ",
]
self.assertEqual(actual, expected)
def test_byte_offset_with_wide_characters_middle(self):
def f():
= 1
raise ValueError()
actual = self.get_exception(f)
expected = [
f"Traceback (most recent call last):",
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
f" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 2}, in f",
f" raise ValueError()",
]
self.assertEqual(actual, expected)
def test_byte_offset_multiline(self):
def f():
= 1
= 0
print(1, (
))
actual = self.get_exception(f)
expected = [
f"Traceback (most recent call last):",
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
f" callable()",
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
f" print(1, (",
f" ^^^^",
]
self.assertEqual(actual, expected)
@cpython_only
@requires_debug_ranges()
class CPythonTracebackErrorCaretTests(TracebackErrorLocationCaretTests):

View file

@ -475,32 +475,32 @@ def format_frame_summary(self, frame_summary):
frame_summary.colno is not None
and frame_summary.end_colno is not None
):
colno = _byte_offset_to_character_offset(
frame_summary._original_line, frame_summary.colno)
end_colno = _byte_offset_to_character_offset(
frame_summary._original_line, frame_summary.end_colno)
start_offset = _byte_offset_to_character_offset(
frame_summary._original_line, frame_summary.colno) + 1
end_offset = _byte_offset_to_character_offset(
frame_summary._original_line, frame_summary.end_colno) + 1
anchors = None
if frame_summary.lineno == frame_summary.end_lineno:
with suppress(Exception):
anchors = _extract_caret_anchors_from_line_segment(
frame_summary._original_line[colno - 1:end_colno - 1]
frame_summary._original_line[start_offset - 1:end_offset - 1]
)
else:
end_colno = stripped_characters + len(stripped_line)
end_offset = stripped_characters + len(stripped_line)
# show indicators if primary char doesn't span the frame line
if end_colno - colno < len(stripped_line) or (
if end_offset - start_offset < len(stripped_line) or (
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
row.append(' ')
row.append(' ' * (colno - stripped_characters))
row.append(' ' * (start_offset - stripped_characters))
if anchors:
row.append(anchors.primary_char * (anchors.left_end_offset))
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
row.append(anchors.primary_char * (end_colno - colno - anchors.right_start_offset))
row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
else:
row.append('^' * (end_colno - colno))
row.append('^' * (end_offset - start_offset))
row.append('\n')
@ -560,10 +560,7 @@ def format(self):
def _byte_offset_to_character_offset(str, offset):
as_utf8 = str.encode('utf-8')
if offset > len(as_utf8):
offset = len(as_utf8)
return len(as_utf8[:offset + 1].decode("utf-8"))
return len(as_utf8[:offset].decode("utf-8", errors="replace"))
_Anchors = collections.namedtuple(

View file

@ -0,0 +1,2 @@
Prevent crashing in :mod:`traceback` when retrieving the byte-offset for
some source files that contain certain unicode characters.