gh-148991: Add colour to tokenize CLI output (#148992)

Co-authored-by: Stan Ulbrych <stan@python.org>
This commit is contained in:
Hugo van Kemenade 2026-04-26 22:14:33 +03:00 committed by GitHub
parent 5d416324c5
commit 1e7dfbce93
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 85 additions and 12 deletions

View file

@ -28,7 +28,7 @@ type can be determined by checking the ``exact_type`` property on the
**undefined** when providing invalid Python code and it can change at any
point.
Tokenizing Input
Tokenizing input
----------------
The primary entry point is a :term:`generator`:
@ -146,7 +146,7 @@ function it uses to do this is available:
.. _tokenize-cli:
Command-Line Usage
Command-line usage
------------------
.. versionadded:: 3.3
@ -173,8 +173,12 @@ The following options are accepted:
If :file:`filename.py` is specified its contents are tokenized to stdout.
Otherwise, tokenization is performed on stdin.
.. versionadded:: next
Output is in color by default and can be
:ref:`controlled using environment variables <using-on-controlling-color>`.
Examples
------------------
--------
Example of a script rewriter that transforms float literals into Decimal
objects::
@ -227,7 +231,7 @@ Example of tokenizing from the command line. The script::
will be tokenized to the following output where the first column is the range
of the line/column coordinates where the token is found, the second column is
the name of the token, and the final column is the value of the token (if any)
the name of the token, and the final column is the value of the token (if any):
.. code-block:: shell-session

View file

@ -1244,6 +1244,15 @@ tkinter
(Contributed by Matthias Kievernagel and Serhiy Storchaka in :gh:`47655`.)
tokenize
--------
* The output of the :mod:`tokenize` :ref:`command-line interface
<tokenize-cli>` is colored by default. This can be controlled with
:ref:`environment variables <using-on-controlling-color>`.
(Contributed by Hugo van Kemenade in :gh:`148991`.)
.. _whatsnew315-tomllib-1-1-0:
tomllib

View file

@ -386,6 +386,14 @@ class Timeit(ThemeSection):
reset: str = ANSIColors.RESET
@dataclass(frozen=True, kw_only=True)
class Tokenize(ThemeSection):
whitespace: str = ANSIColors.GREY
error: str = ANSIColors.BOLD_RED
position: str = ANSIColors.GREY
delimiter: str = ANSIColors.RESET
@dataclass(frozen=True, kw_only=True)
class Traceback(ThemeSection):
type: str = ANSIColors.BOLD_MAGENTA
@ -423,6 +431,7 @@ class Theme:
live_profiler: LiveProfiler = field(default_factory=LiveProfiler)
syntax: Syntax = field(default_factory=Syntax)
timeit: Timeit = field(default_factory=Timeit)
tokenize: Tokenize = field(default_factory=Tokenize)
traceback: Traceback = field(default_factory=Traceback)
unittest: Unittest = field(default_factory=Unittest)
@ -437,6 +446,7 @@ def copy_with(
live_profiler: LiveProfiler | None = None,
syntax: Syntax | None = None,
timeit: Timeit | None = None,
tokenize: Tokenize | None = None,
traceback: Traceback | None = None,
unittest: Unittest | None = None,
) -> Self:
@ -454,6 +464,7 @@ def copy_with(
live_profiler=live_profiler or self.live_profiler,
syntax=syntax or self.syntax,
timeit=timeit or self.timeit,
tokenize=tokenize or self.tokenize,
traceback=traceback or self.traceback,
unittest=unittest or self.unittest,
)
@ -475,6 +486,7 @@ def no_colors(cls) -> Self:
live_profiler=LiveProfiler.no_colors(),
syntax=Syntax.no_colors(),
timeit=Timeit.no_colors(),
tokenize=Tokenize.no_colors(),
traceback=Traceback.no_colors(),
unittest=Unittest.no_colors(),
)

View file

@ -3326,6 +3326,7 @@ def test_newline_at_the_end_of_buffer(self):
run_test_script(file_name)
@support.force_not_colorized_test_class
class CommandLineTest(unittest.TestCase):
def setUp(self):
self.filename = tempfile.mktemp()

View file

@ -35,6 +35,7 @@
from token import *
from token import EXACT_TOKEN_TYPES
import _tokenize
lazy import _colorize
cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
@ -505,6 +506,56 @@ def generate_tokens(readline):
"""
return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)
def _get_token_colors(syntax, tokenize):
"""Map token type numbers to theme colors."""
return frozendict({
COMMENT: syntax.comment,
DEDENT: tokenize.whitespace,
ENCODING: tokenize.whitespace,
ENDMARKER: tokenize.whitespace,
ERRORTOKEN: tokenize.error,
FSTRING_START: syntax.string,
FSTRING_MIDDLE: syntax.string,
FSTRING_END: syntax.string,
INDENT: tokenize.whitespace,
NAME: syntax.reset,
NEWLINE: tokenize.whitespace,
NL: tokenize.whitespace,
NUMBER: syntax.number,
OP: syntax.op,
SOFT_KEYWORD: syntax.soft_keyword,
STRING: syntax.string,
TSTRING_START: syntax.string,
TSTRING_MIDDLE: syntax.string,
TSTRING_END: syntax.string,
})
def _format_tokens(tokens, *, color=False, exact=False):
theme = _colorize.get_theme(force_no_color=not color)
s = theme.syntax
t = theme.tokenize
token_colors = _get_token_colors(s, t)
for token in tokens:
token_range = (
f"{t.position}{token.start[0]}"
f"{t.delimiter},{t.position}{token.start[1]}"
f"{t.delimiter}-"
f"{t.position}{token.end[0]}"
f"{t.delimiter},{t.position}{token.end[1]}"
f"{t.delimiter}:"
)
token_color = token_colors.get(token.type, s.reset)
token_name = tok_name[token.exact_type if exact else token.type]
visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
yield (
f"{token_range}{' ' * (20 - len(visible_range))}"
f"{token_color}{token_name:<15}"
f"{s.reset}{token.string!r:<15}"
)
def _main(args=None):
import argparse
@ -524,7 +575,7 @@ def error(message, filename=None, location=None):
sys.exit(1)
# Parse the arguments and options
parser = argparse.ArgumentParser(color=True)
parser = argparse.ArgumentParser()
parser.add_argument(dest='filename', nargs='?',
metavar='filename.py',
help='the file to tokenize; defaults to stdin')
@ -545,13 +596,8 @@ def error(message, filename=None, location=None):
# Output the tokenization
for token in tokens:
token_type = token.type
if args.exact:
token_type = token.exact_type
token_range = "%d,%d-%d,%d:" % (token.start + token.end)
print("%-20s%-15s%-15r" %
(token_range, tok_name[token_type], token.string))
for line in _format_tokens(tokens, color=True, exact=args.exact):
print(line)
except IndentationError as err:
line, column = err.args[1][1:3]
error(err.args[0], filename, (line, column))

View file

@ -0,0 +1 @@
Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade.