mirror of
https://github.com/python/cpython.git
synced 2026-04-20 02:40:59 +00:00
gh-130273: Fix traceback color output with unicode characters (GH-142529)
Account for the display width of Unicode characters so that colors and underlining in traceback output is correct. Co-authored-by: Łukasz Langa <lukasz@langa.pl> Co-authored-by: Victor Stinner <vstinner@python.org>
This commit is contained in:
parent
cf59bf7647
commit
dfeb160bc3
4 changed files with 144 additions and 15 deletions
|
|
@ -16,6 +16,7 @@
|
|||
from .types import CharBuffer, CharWidths
|
||||
from .trace import trace
|
||||
|
||||
|
||||
ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
|
||||
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
|
||||
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
|
||||
|
|
|
|||
|
|
@ -1790,6 +1790,7 @@ def f():
|
|||
]
|
||||
self.assertEqual(result_lines, expected)
|
||||
|
||||
|
||||
class TestKeywordTypoSuggestions(unittest.TestCase):
|
||||
TYPO_CASES = [
|
||||
("with block ad something:\n pass", "and"),
|
||||
|
|
@ -5414,6 +5415,92 @@ def expected(t, m, fn, l, f, E, e, z, n):
|
|||
]
|
||||
self.assertEqual(actual, expected(**colors))
|
||||
|
||||
def test_colorized_traceback_unicode(self):
|
||||
try:
|
||||
啊哈=1; 啊哈/0####
|
||||
except Exception as e:
|
||||
exc = traceback.TracebackException.from_exception(e)
|
||||
|
||||
actual = "".join(exc.format(colorize=True)).splitlines()
|
||||
def expected(t, m, fn, l, f, E, e, z, n):
|
||||
return [
|
||||
f" 啊哈=1; {e}啊哈{z}{E}/{z}{e}0{z}####",
|
||||
f" {e}~~~~{z}{E}^{z}{e}~{z}",
|
||||
]
|
||||
self.assertEqual(actual[2:4], expected(**colors))
|
||||
|
||||
try:
|
||||
ééééé/0
|
||||
except Exception as e:
|
||||
exc = traceback.TracebackException.from_exception(e)
|
||||
|
||||
actual = "".join(exc.format(colorize=True)).splitlines()
|
||||
def expected(t, m, fn, l, f, E, e, z, n):
|
||||
return [
|
||||
f" {E}ééééé{z}/0",
|
||||
f" {E}^^^^^{z}",
|
||||
]
|
||||
self.assertEqual(actual[2:4], expected(**colors))
|
||||
|
||||
def test_colorized_syntax_error_ascii_display_width(self):
|
||||
"""Caret alignment for ASCII edge cases handled by _wlen.
|
||||
|
||||
The old ASCII fast track in _display_width returned the raw character
|
||||
offset for ASCII strings, which is wrong for CTRL-Z (display width 2)
|
||||
and ANSI escape sequences (display width 0).
|
||||
"""
|
||||
E = colors["E"]
|
||||
z = colors["z"]
|
||||
t = colors["t"]
|
||||
m = colors["m"]
|
||||
fn = colors["fn"]
|
||||
l = colors["l"]
|
||||
|
||||
def _make_syntax_error(text, offset, end_offset):
|
||||
err = SyntaxError("invalid syntax")
|
||||
err.filename = "<string>"
|
||||
err.lineno = 1
|
||||
err.end_lineno = 1
|
||||
err.text = text
|
||||
err.offset = offset
|
||||
err.end_offset = end_offset
|
||||
return err
|
||||
|
||||
# CTRL-Z (\x1a) is ASCII but displayed as ^Z (2 columns).
|
||||
# Verify caret aligns when CTRL-Z precedes the error.
|
||||
err = _make_syntax_error("a\x1a$\n", offset=3, end_offset=4)
|
||||
exc = traceback.TracebackException.from_exception(err)
|
||||
actual = "".join(exc.format(colorize=True))
|
||||
# 'a' (1 col) + '\x1a' (2 cols) = 3 cols before '$'
|
||||
self.assertIn(
|
||||
f' File {fn}"<string>"{z}, line {l}1{z}\n'
|
||||
f' a\x1a{E}${z}\n'
|
||||
f' {" " * 3}{E}^{z}\n'
|
||||
f'{t}SyntaxError{z}: {m}invalid syntax{z}\n',
|
||||
actual,
|
||||
)
|
||||
|
||||
# CTRL-Z in the highlighted (error) region counts as 2 columns.
|
||||
err = _make_syntax_error("$\x1a\n", offset=1, end_offset=3)
|
||||
exc = traceback.TracebackException.from_exception(err)
|
||||
actual = "".join(exc.format(colorize=True))
|
||||
# '$' (1 col) + '\x1a' (2 cols) = 3 columns of carets
|
||||
self.assertIn(
|
||||
f' {E}$\x1a{z}\n'
|
||||
f' {E}{"^" * 3}{z}\n',
|
||||
actual,
|
||||
)
|
||||
|
||||
# ANSI escape sequences are ASCII but take 0 display columns.
|
||||
err = _make_syntax_error("a\x1b[1mb$\n", offset=7, end_offset=8)
|
||||
exc = traceback.TracebackException.from_exception(err)
|
||||
actual = "".join(exc.format(colorize=True))
|
||||
# 'a' (1 col) + '\x1b[1m' (0 cols) + 'b' (1 col) = 2 before '$'
|
||||
self.assertIn(
|
||||
f' a\x1b[1mb{E}${z}\n'
|
||||
f' {" " * 2}{E}^{z}\n',
|
||||
actual,
|
||||
)
|
||||
|
||||
class TestLazyImportSuggestions(unittest.TestCase):
|
||||
"""Test that lazy imports are not reified when computing AttributeError suggestions."""
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
"""Extract, format and print information about Python stack traces."""
|
||||
|
||||
import collections.abc
|
||||
import functools
|
||||
import itertools
|
||||
import linecache
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import textwrap
|
||||
import types
|
||||
|
|
@ -684,12 +686,12 @@ def output_line(lineno):
|
|||
colorized_line_parts = []
|
||||
colorized_carets_parts = []
|
||||
|
||||
for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]):
|
||||
for color, group in itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]):
|
||||
caret_group = list(group)
|
||||
if color == "^":
|
||||
if "^" in color:
|
||||
colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in caret_group) + theme.reset)
|
||||
colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, caret in caret_group) + theme.reset)
|
||||
elif color == "~":
|
||||
elif "~" in color:
|
||||
colorized_line_parts.append(theme.error_range + "".join(char for char, _ in caret_group) + theme.reset)
|
||||
colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in caret_group) + theme.reset)
|
||||
else:
|
||||
|
|
@ -971,7 +973,54 @@ def setup_positions(expr, force_valid=True):
|
|||
|
||||
return None
|
||||
|
||||
_WIDE_CHAR_SPECIFIERS = "WF"
|
||||
|
||||
def _zip_display_width(line, carets):
|
||||
carets = iter(carets)
|
||||
if line.isascii() and '\x1a' not in line:
|
||||
for char in line:
|
||||
yield char, next(carets, "")
|
||||
return
|
||||
|
||||
import unicodedata
|
||||
for char in unicodedata.iter_graphemes(line):
|
||||
char = str(char)
|
||||
char_width = _display_width(char)
|
||||
yield char, "".join(itertools.islice(carets, char_width))
|
||||
|
||||
|
||||
@functools.cache
|
||||
def _str_width(c: str) -> int:
|
||||
# copied from _pyrepl.utils to fix gh-130273
|
||||
|
||||
if ord(c) < 128:
|
||||
return 1
|
||||
import unicodedata
|
||||
# gh-139246 for zero-width joiner and combining characters
|
||||
if unicodedata.combining(c):
|
||||
return 0
|
||||
category = unicodedata.category(c)
|
||||
if category == "Cf" and c != "\u00ad":
|
||||
return 0
|
||||
w = unicodedata.east_asian_width(c)
|
||||
if w in ("N", "Na", "H", "A"):
|
||||
return 1
|
||||
return 2
|
||||
|
||||
|
||||
_ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
|
||||
|
||||
|
||||
def _wlen(s: str) -> int:
|
||||
# copied from _pyrepl.utils to fix gh-130273
|
||||
|
||||
if len(s) == 1 and s != "\x1a":
|
||||
return _str_width(s)
|
||||
length = sum(_str_width(i) for i in s)
|
||||
# remove lengths of any escape sequences
|
||||
sequence = _ANSI_ESCAPE_SEQUENCE.findall(s)
|
||||
ctrl_z_cnt = s.count("\x1a")
|
||||
return length - sum(len(i) for i in sequence) + ctrl_z_cnt
|
||||
|
||||
|
||||
def _display_width(line, offset=None):
|
||||
"""Calculate the extra amount of width space the given source
|
||||
|
|
@ -979,18 +1028,9 @@ def _display_width(line, offset=None):
|
|||
width output device. Supports wide unicode characters and emojis."""
|
||||
|
||||
if offset is None:
|
||||
offset = len(line)
|
||||
return _wlen(line)
|
||||
|
||||
# Fast track for ASCII-only strings
|
||||
if line.isascii():
|
||||
return offset
|
||||
|
||||
import unicodedata
|
||||
|
||||
return sum(
|
||||
2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
|
||||
for char in line[:offset]
|
||||
)
|
||||
return _wlen(line[:offset])
|
||||
|
||||
|
||||
def _format_note(note, indent, theme):
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
Fix traceback color output with Unicode characters.
|
||||
Loading…
Add table
Add a link
Reference in a new issue