gh-148991: Add colour to tokenize CLI output (#148992)

Co-authored-by: Stan Ulbrych <stan@python.org>
2026-05-04 09:31:02 +00:00 · 2026-04-26 22:14:33 +03:00 · 2026-04-26 22:14:33 +03:00 · 1e7dfbce93
commit 1e7dfbce93
parent 5d416324c5
6 changed files with 85 additions and 12 deletions
--- a/Doc/library/tokenize.rst
+++ b/Doc/library/tokenize.rst
@ -28,7 +28,7 @@ type can be determined by checking the ``exact_type`` property on the
   **undefined** when providing invalid Python code and it can change at any
   point.

-Tokenizing Input
+Tokenizing input
 ----------------

 The primary entry point is a :term:`generator`:
@ -146,7 +146,7 @@ function it uses to do this is available:

 .. _tokenize-cli:

-Command-Line Usage
+Command-line usage
 ------------------

 .. versionadded:: 3.3
@ -173,8 +173,12 @@ The following options are accepted:
 If :file:`filename.py` is specified its contents are tokenized to stdout.
 Otherwise, tokenization is performed on stdin.

+.. versionadded:: next
+   Output is in color by default and can be
+   :ref:`controlled using environment variables <using-on-controlling-color>`.
+
 Examples
------------------
+--------

 Example of a script rewriter that transforms float literals into Decimal
 objects::
@ -227,7 +231,7 @@ Example of tokenizing from the command line.  The script::

 will be tokenized to the following output where the first column is the range
 of the line/column coordinates where the token is found, the second column is
-the name of the token, and the final column is the value of the token (if any)
+the name of the token, and the final column is the value of the token (if any):

 .. code-block:: shell-session

--- a/Doc/whatsnew/3.15.rst
+++ b/Doc/whatsnew/3.15.rst
@ -1244,6 +1244,15 @@ tkinter
  (Contributed by Matthias Kievernagel and Serhiy Storchaka in :gh:`47655`.)


+tokenize
+--------
+
+* The output of the :mod:`tokenize` :ref:`command-line interface
+  <tokenize-cli>` is colored by default. This can be controlled with
+  :ref:`environment variables <using-on-controlling-color>`.
+  (Contributed by Hugo van Kemenade in :gh:`148991`.)
+
+
 .. _whatsnew315-tomllib-1-1-0:

 tomllib
--- a/Lib/_colorize.py
+++ b/Lib/_colorize.py
@ -386,6 +386,14 @@ class Timeit(ThemeSection):
    reset: str = ANSIColors.RESET


+@dataclass(frozen=True, kw_only=True)
+class Tokenize(ThemeSection):
+    whitespace: str = ANSIColors.GREY
+    error: str = ANSIColors.BOLD_RED
+    position: str = ANSIColors.GREY
+    delimiter: str = ANSIColors.RESET
+
+
@dataclass(frozen=True, kw_only=True)
 class Traceback(ThemeSection):
    type: str = ANSIColors.BOLD_MAGENTA
@ -423,6 +431,7 @@ class Theme:
    live_profiler: LiveProfiler = field(default_factory=LiveProfiler)
    syntax: Syntax = field(default_factory=Syntax)
    timeit: Timeit = field(default_factory=Timeit)
+    tokenize: Tokenize = field(default_factory=Tokenize)
    traceback: Traceback = field(default_factory=Traceback)
    unittest: Unittest = field(default_factory=Unittest)

@ -437,6 +446,7 @@ def copy_with(
        live_profiler: LiveProfiler | None = None,
        syntax: Syntax | None = None,
        timeit: Timeit | None = None,
+        tokenize: Tokenize | None = None,
        traceback: Traceback | None = None,
        unittest: Unittest | None = None,
    ) -> Self:
@ -454,6 +464,7 @@ def copy_with(
            live_profiler=live_profiler or self.live_profiler,
            syntax=syntax or self.syntax,
            timeit=timeit or self.timeit,
+            tokenize=tokenize or self.tokenize,
            traceback=traceback or self.traceback,
            unittest=unittest or self.unittest,
        )
@ -475,6 +486,7 @@ def no_colors(cls) -> Self:
            live_profiler=LiveProfiler.no_colors(),
            syntax=Syntax.no_colors(),
            timeit=Timeit.no_colors(),
+            tokenize=Tokenize.no_colors(),
            traceback=Traceback.no_colors(),
            unittest=Unittest.no_colors(),
        )
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@ -3326,6 +3326,7 @@ def test_newline_at_the_end_of_buffer(self):
            run_test_script(file_name)


+@support.force_not_colorized_test_class
 class CommandLineTest(unittest.TestCase):
    def setUp(self):
        self.filename = tempfile.mktemp()
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@ -35,6 +35,7 @@
 from token import *
 from token import EXACT_TOKEN_TYPES
 import _tokenize
+lazy import _colorize

 cookie_re = re.compile(br'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII)
 blank_re = re.compile(br'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII)
@ -505,6 +506,56 @@ def generate_tokens(readline):
    """
    return _generate_tokens_from_c_tokenizer(readline, extra_tokens=True)

+
+def _get_token_colors(syntax, tokenize):
+    """Map token type numbers to theme colors."""
+    return frozendict({
+        COMMENT: syntax.comment,
+        DEDENT: tokenize.whitespace,
+        ENCODING: tokenize.whitespace,
+        ENDMARKER: tokenize.whitespace,
+        ERRORTOKEN: tokenize.error,
+        FSTRING_START: syntax.string,
+        FSTRING_MIDDLE: syntax.string,
+        FSTRING_END: syntax.string,
+        INDENT: tokenize.whitespace,
+        NAME: syntax.reset,
+        NEWLINE: tokenize.whitespace,
+        NL: tokenize.whitespace,
+        NUMBER: syntax.number,
+        OP: syntax.op,
+        SOFT_KEYWORD: syntax.soft_keyword,
+        STRING: syntax.string,
+        TSTRING_START: syntax.string,
+        TSTRING_MIDDLE: syntax.string,
+        TSTRING_END: syntax.string,
+    })
+
+
+def _format_tokens(tokens, *, color=False, exact=False):
+    theme = _colorize.get_theme(force_no_color=not color)
+    s = theme.syntax
+    t = theme.tokenize
+    token_colors = _get_token_colors(s, t)
+    for token in tokens:
+        token_range = (
+            f"{t.position}{token.start[0]}"
+            f"{t.delimiter},{t.position}{token.start[1]}"
+            f"{t.delimiter}-"
+            f"{t.position}{token.end[0]}"
+            f"{t.delimiter},{t.position}{token.end[1]}"
+            f"{t.delimiter}:"
+        )
+        token_color = token_colors.get(token.type, s.reset)
+        token_name = tok_name[token.exact_type if exact else token.type]
+        visible_range = f"{token.start[0]},{token.start[1]}-{token.end[0]},{token.end[1]}:"
+        yield (
+            f"{token_range}{' ' * (20 - len(visible_range))}"
+            f"{token_color}{token_name:<15}"
+            f"{s.reset}{token.string!r:<15}"
+        )
+
+
 def _main(args=None):
    import argparse

@ -524,7 +575,7 @@ def error(message, filename=None, location=None):
        sys.exit(1)

    # Parse the arguments and options
-    parser = argparse.ArgumentParser(color=True)
+    parser = argparse.ArgumentParser()
    parser.add_argument(dest='filename', nargs='?',
                        metavar='filename.py',
                        help='the file to tokenize; defaults to stdin')
@ -545,13 +596,8 @@ def error(message, filename=None, location=None):


        # Output the tokenization
-        for token in tokens:
-            token_type = token.type
-            if args.exact:
-                token_type = token.exact_type
-            token_range = "%d,%d-%d,%d:" % (token.start + token.end)
-            print("%-20s%-15s%-15r" %
-                  (token_range, tok_name[token_type], token.string))
+        for line in _format_tokens(tokens, color=True, exact=args.exact):
+            print(line)
    except IndentationError as err:
        line, column = err.args[1][1:3]
        error(err.args[0], filename, (line, column))
--- a/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst
+++ b/Misc/NEWS.d/next/Library/2026-04-25-18-09-16.gh-issue-148991.AZ64Et.rst
@ -0,0 +1 @@
+Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade.
				`@ -0,0 +1 @@`
				Add colour to :mod:`tokenize` CLI output. Patch by Hugo van Kemenade.