mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			364 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			364 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from __future__ import annotations
 | 
						|
import builtins
 | 
						|
import functools
 | 
						|
import keyword
 | 
						|
import re
 | 
						|
import token as T
 | 
						|
import tokenize
 | 
						|
import unicodedata
 | 
						|
import _colorize
 | 
						|
 | 
						|
from collections import deque
 | 
						|
from io import StringIO
 | 
						|
from tokenize import TokenInfo as TI
 | 
						|
from typing import Iterable, Iterator, Match, NamedTuple, Self
 | 
						|
 | 
						|
from .types import CharBuffer, CharWidths
 | 
						|
from .trace import trace
 | 
						|
 | 
						|
ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
 | 
						|
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
 | 
						|
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
 | 
						|
IDENTIFIERS_AFTER = {"def", "class"}
 | 
						|
BUILTINS = {str(name) for name in dir(builtins) if not name.startswith('_')}
 | 
						|
 | 
						|
 | 
						|
def THEME(**kwargs):
 | 
						|
    # Not cached: the user can modify the theme inside the interactive session.
 | 
						|
    return _colorize.get_theme(**kwargs).syntax
 | 
						|
 | 
						|
 | 
						|
class Span(NamedTuple):
 | 
						|
    """Span indexing that's inclusive on both ends."""
 | 
						|
 | 
						|
    start: int
 | 
						|
    end: int
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def from_re(cls, m: Match[str], group: int | str) -> Self:
 | 
						|
        re_span = m.span(group)
 | 
						|
        return cls(re_span[0], re_span[1] - 1)
 | 
						|
 | 
						|
    @classmethod
 | 
						|
    def from_token(cls, token: TI, line_len: list[int]) -> Self:
 | 
						|
        return cls(
 | 
						|
            line_len[token.start[0] - 1] + token.start[1],
 | 
						|
            line_len[token.end[0] - 1] + token.end[1] - 1,
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
class ColorSpan(NamedTuple):
 | 
						|
    span: Span
 | 
						|
    tag: str
 | 
						|
 | 
						|
 | 
						|
@functools.cache
 | 
						|
def str_width(c: str) -> int:
 | 
						|
    if ord(c) < 128:
 | 
						|
        return 1
 | 
						|
    w = unicodedata.east_asian_width(c)
 | 
						|
    if w in ("N", "Na", "H", "A"):
 | 
						|
        return 1
 | 
						|
    return 2
 | 
						|
 | 
						|
 | 
						|
def wlen(s: str) -> int:
 | 
						|
    if len(s) == 1 and s != "\x1a":
 | 
						|
        return str_width(s)
 | 
						|
    length = sum(str_width(i) for i in s)
 | 
						|
    # remove lengths of any escape sequences
 | 
						|
    sequence = ANSI_ESCAPE_SEQUENCE.findall(s)
 | 
						|
    ctrl_z_cnt = s.count("\x1a")
 | 
						|
    return length - sum(len(i) for i in sequence) + ctrl_z_cnt
 | 
						|
 | 
						|
 | 
						|
def unbracket(s: str, including_content: bool = False) -> str:
 | 
						|
    r"""Return `s` with \001 and \002 characters removed.
 | 
						|
 | 
						|
    If `including_content` is True, content between \001 and \002 is also
 | 
						|
    stripped.
 | 
						|
    """
 | 
						|
    if including_content:
 | 
						|
        return ZERO_WIDTH_BRACKET.sub("", s)
 | 
						|
    return s.translate(ZERO_WIDTH_TRANS)
 | 
						|
 | 
						|
 | 
						|
def gen_colors(buffer: str) -> Iterator[ColorSpan]:
 | 
						|
    """Returns a list of index spans to color using the given color tag.
 | 
						|
 | 
						|
    The input `buffer` should be a valid start of a Python code block, i.e.
 | 
						|
    it cannot be a block starting in the middle of a multiline string.
 | 
						|
    """
 | 
						|
    sio = StringIO(buffer)
 | 
						|
    line_lengths = [0] + [len(line) for line in sio.readlines()]
 | 
						|
    # make line_lengths cumulative
 | 
						|
    for i in range(1, len(line_lengths)):
 | 
						|
        line_lengths[i] += line_lengths[i-1]
 | 
						|
 | 
						|
    sio.seek(0)
 | 
						|
    gen = tokenize.generate_tokens(sio.readline)
 | 
						|
    last_emitted: ColorSpan | None = None
 | 
						|
    try:
 | 
						|
        for color in gen_colors_from_token_stream(gen, line_lengths):
 | 
						|
            yield color
 | 
						|
            last_emitted = color
 | 
						|
    except tokenize.TokenError as te:
 | 
						|
        yield from recover_unterminated_string(
 | 
						|
            te, line_lengths, last_emitted, buffer
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def recover_unterminated_string(
 | 
						|
    exc: tokenize.TokenError,
 | 
						|
    line_lengths: list[int],
 | 
						|
    last_emitted: ColorSpan | None,
 | 
						|
    buffer: str,
 | 
						|
) -> Iterator[ColorSpan]:
 | 
						|
    msg, loc = exc.args
 | 
						|
    if loc is None:
 | 
						|
        return
 | 
						|
 | 
						|
    line_no, column = loc
 | 
						|
 | 
						|
    if msg.startswith(
 | 
						|
        (
 | 
						|
            "unterminated string literal",
 | 
						|
            "unterminated f-string literal",
 | 
						|
            "unterminated t-string literal",
 | 
						|
            "EOF in multi-line string",
 | 
						|
            "unterminated triple-quoted f-string literal",
 | 
						|
            "unterminated triple-quoted t-string literal",
 | 
						|
        )
 | 
						|
    ):
 | 
						|
        start = line_lengths[line_no - 1] + column - 1
 | 
						|
        end = line_lengths[-1] - 1
 | 
						|
 | 
						|
        # in case FSTRING_START was already emitted
 | 
						|
        if last_emitted and start <= last_emitted.span.start:
 | 
						|
            trace("before last emitted = {s}", s=start)
 | 
						|
            start = last_emitted.span.end + 1
 | 
						|
 | 
						|
        span = Span(start, end)
 | 
						|
        trace("yielding span {a} -> {b}", a=span.start, b=span.end)
 | 
						|
        yield ColorSpan(span, "string")
 | 
						|
    else:
 | 
						|
        trace(
 | 
						|
            "unhandled token error({buffer}) = {te}",
 | 
						|
            buffer=repr(buffer),
 | 
						|
            te=str(exc),
 | 
						|
        )
 | 
						|
 | 
						|
 | 
						|
def gen_colors_from_token_stream(
 | 
						|
    token_generator: Iterator[TI],
 | 
						|
    line_lengths: list[int],
 | 
						|
) -> Iterator[ColorSpan]:
 | 
						|
    token_window = prev_next_window(token_generator)
 | 
						|
 | 
						|
    is_def_name = False
 | 
						|
    bracket_level = 0
 | 
						|
    for prev_token, token, next_token in token_window:
 | 
						|
        assert token is not None
 | 
						|
        if token.start == token.end:
 | 
						|
            continue
 | 
						|
 | 
						|
        match token.type:
 | 
						|
            case (
 | 
						|
                T.STRING
 | 
						|
                | T.FSTRING_START | T.FSTRING_MIDDLE | T.FSTRING_END
 | 
						|
                | T.TSTRING_START | T.TSTRING_MIDDLE | T.TSTRING_END
 | 
						|
            ):
 | 
						|
                span = Span.from_token(token, line_lengths)
 | 
						|
                yield ColorSpan(span, "string")
 | 
						|
            case T.COMMENT:
 | 
						|
                span = Span.from_token(token, line_lengths)
 | 
						|
                yield ColorSpan(span, "comment")
 | 
						|
            case T.NUMBER:
 | 
						|
                span = Span.from_token(token, line_lengths)
 | 
						|
                yield ColorSpan(span, "number")
 | 
						|
            case T.OP:
 | 
						|
                if token.string in "([{":
 | 
						|
                    bracket_level += 1
 | 
						|
                elif token.string in ")]}":
 | 
						|
                    bracket_level -= 1
 | 
						|
                span = Span.from_token(token, line_lengths)
 | 
						|
                yield ColorSpan(span, "op")
 | 
						|
            case T.NAME:
 | 
						|
                if is_def_name:
 | 
						|
                    is_def_name = False
 | 
						|
                    span = Span.from_token(token, line_lengths)
 | 
						|
                    yield ColorSpan(span, "definition")
 | 
						|
                elif keyword.iskeyword(token.string):
 | 
						|
                    span = Span.from_token(token, line_lengths)
 | 
						|
                    yield ColorSpan(span, "keyword")
 | 
						|
                    if token.string in IDENTIFIERS_AFTER:
 | 
						|
                        is_def_name = True
 | 
						|
                elif (
 | 
						|
                    keyword.issoftkeyword(token.string)
 | 
						|
                    and bracket_level == 0
 | 
						|
                    and is_soft_keyword_used(prev_token, token, next_token)
 | 
						|
                ):
 | 
						|
                    span = Span.from_token(token, line_lengths)
 | 
						|
                    yield ColorSpan(span, "soft_keyword")
 | 
						|
                elif token.string in BUILTINS:
 | 
						|
                    span = Span.from_token(token, line_lengths)
 | 
						|
                    yield ColorSpan(span, "builtin")
 | 
						|
 | 
						|
 | 
						|
keyword_first_sets_match = {"False", "None", "True", "await", "lambda", "not"}
 | 
						|
keyword_first_sets_case = {"False", "None", "True"}
 | 
						|
 | 
						|
 | 
						|
def is_soft_keyword_used(*tokens: TI | None) -> bool:
 | 
						|
    """Returns True if the current token is a keyword in this context.
 | 
						|
 | 
						|
    For the `*tokens` to match anything, they have to be a three-tuple of
 | 
						|
    (previous, current, next).
 | 
						|
    """
 | 
						|
    trace("is_soft_keyword_used{t}", t=tokens)
 | 
						|
    match tokens:
 | 
						|
        case (
 | 
						|
            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),
 | 
						|
            TI(string="match"),
 | 
						|
            TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START)
 | 
						|
            | TI(T.OP, string="(" | "*" | "[" | "{" | "~" | "...")
 | 
						|
        ):
 | 
						|
            return True
 | 
						|
        case (
 | 
						|
            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),
 | 
						|
            TI(string="match"),
 | 
						|
            TI(T.NAME, string=s)
 | 
						|
        ):
 | 
						|
            if keyword.iskeyword(s):
 | 
						|
                return s in keyword_first_sets_match
 | 
						|
            return True
 | 
						|
        case (
 | 
						|
            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),
 | 
						|
            TI(string="case"),
 | 
						|
            TI(T.NUMBER | T.STRING | T.FSTRING_START | T.TSTRING_START)
 | 
						|
            | TI(T.OP, string="(" | "*" | "-" | "[" | "{")
 | 
						|
        ):
 | 
						|
            return True
 | 
						|
        case (
 | 
						|
            None | TI(T.NEWLINE) | TI(T.INDENT) | TI(string=":"),
 | 
						|
            TI(string="case"),
 | 
						|
            TI(T.NAME, string=s)
 | 
						|
        ):
 | 
						|
            if keyword.iskeyword(s):
 | 
						|
                return s in keyword_first_sets_case
 | 
						|
            return True
 | 
						|
        case (TI(string="case"), TI(string="_"), TI(string=":")):
 | 
						|
            return True
 | 
						|
        case _:
 | 
						|
            return False
 | 
						|
 | 
						|
 | 
						|
def disp_str(
 | 
						|
    buffer: str,
 | 
						|
    colors: list[ColorSpan] | None = None,
 | 
						|
    start_index: int = 0,
 | 
						|
    force_color: bool = False,
 | 
						|
) -> tuple[CharBuffer, CharWidths]:
 | 
						|
    r"""Decompose the input buffer into a printable variant with applied colors.
 | 
						|
 | 
						|
    Returns a tuple of two lists:
 | 
						|
    - the first list is the input buffer, character by character, with color
 | 
						|
      escape codes added (while those codes contain multiple ASCII characters,
 | 
						|
      each code is considered atomic *and is attached for the corresponding
 | 
						|
      visible character*);
 | 
						|
    - the second list is the visible width of each character in the input
 | 
						|
      buffer.
 | 
						|
 | 
						|
    Note on colors:
 | 
						|
    - The `colors` list, if provided, is partially consumed within. We're using
 | 
						|
      a list and not a generator since we need to hold onto the current
 | 
						|
      unfinished span between calls to disp_str in case of multiline strings.
 | 
						|
    - The `colors` list is computed from the start of the input block. `buffer`
 | 
						|
      is only a subset of that input block, a single line within. This is why
 | 
						|
      we need `start_index` to inform us which position is the start of `buffer`
 | 
						|
      actually within user input. This allows us to match color spans correctly.
 | 
						|
 | 
						|
    Examples:
 | 
						|
    >>> utils.disp_str("a = 9")
 | 
						|
    (['a', ' ', '=', ' ', '9'], [1, 1, 1, 1, 1])
 | 
						|
 | 
						|
    >>> line = "while 1:"
 | 
						|
    >>> colors = list(utils.gen_colors(line))
 | 
						|
    >>> utils.disp_str(line, colors=colors)
 | 
						|
    (['\x1b[1;34mw', 'h', 'i', 'l', 'e\x1b[0m', ' ', '1', ':'], [1, 1, 1, 1, 1, 1, 1, 1])
 | 
						|
 | 
						|
    """
 | 
						|
    chars: CharBuffer = []
 | 
						|
    char_widths: CharWidths = []
 | 
						|
 | 
						|
    if not buffer:
 | 
						|
        return chars, char_widths
 | 
						|
 | 
						|
    while colors and colors[0].span.end < start_index:
 | 
						|
        # move past irrelevant spans
 | 
						|
        colors.pop(0)
 | 
						|
 | 
						|
    theme = THEME(force_color=force_color)
 | 
						|
    pre_color = ""
 | 
						|
    post_color = ""
 | 
						|
    if colors and colors[0].span.start < start_index:
 | 
						|
        # looks like we're continuing a previous color (e.g. a multiline str)
 | 
						|
        pre_color = theme[colors[0].tag]
 | 
						|
 | 
						|
    for i, c in enumerate(buffer, start_index):
 | 
						|
        if colors and colors[0].span.start == i:  # new color starts now
 | 
						|
            pre_color = theme[colors[0].tag]
 | 
						|
 | 
						|
        if c == "\x1a":  # CTRL-Z on Windows
 | 
						|
            chars.append(c)
 | 
						|
            char_widths.append(2)
 | 
						|
        elif ord(c) < 128:
 | 
						|
            chars.append(c)
 | 
						|
            char_widths.append(1)
 | 
						|
        elif unicodedata.category(c).startswith("C"):
 | 
						|
            c = r"\u%04x" % ord(c)
 | 
						|
            chars.append(c)
 | 
						|
            char_widths.append(len(c))
 | 
						|
        else:
 | 
						|
            chars.append(c)
 | 
						|
            char_widths.append(str_width(c))
 | 
						|
 | 
						|
        if colors and colors[0].span.end == i:  # current color ends now
 | 
						|
            post_color = theme.reset
 | 
						|
            colors.pop(0)
 | 
						|
 | 
						|
        chars[-1] = pre_color + chars[-1] + post_color
 | 
						|
        pre_color = ""
 | 
						|
        post_color = ""
 | 
						|
 | 
						|
    if colors and colors[0].span.start < i and colors[0].span.end > i:
 | 
						|
        # even though the current color should be continued, reset it for now.
 | 
						|
        # the next call to `disp_str()` will revive it.
 | 
						|
        chars[-1] += theme.reset
 | 
						|
 | 
						|
    return chars, char_widths
 | 
						|
 | 
						|
 | 
						|
def prev_next_window[T](
 | 
						|
    iterable: Iterable[T]
 | 
						|
) -> Iterator[tuple[T | None, ...]]:
 | 
						|
    """Generates three-tuples of (previous, current, next) items.
 | 
						|
 | 
						|
    On the first iteration previous is None. On the last iteration next
 | 
						|
    is None. In case of exception next is None and the exception is re-raised
 | 
						|
    on a subsequent next() call.
 | 
						|
 | 
						|
    Inspired by `sliding_window` from `itertools` recipes.
 | 
						|
    """
 | 
						|
 | 
						|
    iterator = iter(iterable)
 | 
						|
    window = deque((None, next(iterator)), maxlen=3)
 | 
						|
    try:
 | 
						|
        for x in iterator:
 | 
						|
            window.append(x)
 | 
						|
            yield tuple(window)
 | 
						|
    except Exception:
 | 
						|
        raise
 | 
						|
    finally:
 | 
						|
        window.append(None)
 | 
						|
        yield tuple(window)
 |