mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1191 lines
		
	
	
	
		
			45 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1191 lines
		
	
	
	
		
			45 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """Generate the main interpreter switch.
 | |
| 
 | |
| Reads the instruction definitions from bytecodes.c.
 | |
| Writes the cases to generated_cases.c.h, which is #included in ceval.c.
 | |
| """
 | |
| 
 | |
| import argparse
 | |
| import contextlib
 | |
| import dataclasses
 | |
| import os
 | |
| import re
 | |
| import sys
 | |
| import typing
 | |
| 
 | |
| import parser
 | |
| from parser import StackEffect
 | |
| 
 | |
| HERE = os.path.dirname(__file__)
 | |
| ROOT = os.path.join(HERE, "../..")
 | |
| THIS = os.path.relpath(__file__, ROOT)
 | |
| 
 | |
| DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c"))
 | |
| DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h"))
 | |
| DEFAULT_METADATA_OUTPUT = os.path.relpath(
 | |
|     os.path.join(ROOT, "Python/opcode_metadata.h")
 | |
| )
 | |
| BEGIN_MARKER = "// BEGIN BYTECODES //"
 | |
| END_MARKER = "// END BYTECODES //"
 | |
| RE_PREDICTED = (
 | |
|     r"^\s*(?:PREDICT\(|GO_TO_INSTRUCTION\(|DEOPT_IF\(.*?,\s*)(\w+)\);\s*(?://.*)?$"
 | |
| )
 | |
| UNUSED = "unused"
 | |
| BITS_PER_CODE_UNIT = 16
 | |
| 
 | |
| arg_parser = argparse.ArgumentParser(
 | |
|     description="Generate the code for the interpreter switch.",
 | |
|     formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 | |
| )
 | |
| arg_parser.add_argument(
 | |
|     "-i", "--input", type=str, help="Instruction definitions", default=DEFAULT_INPUT
 | |
| )
 | |
| arg_parser.add_argument(
 | |
|     "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT
 | |
| )
 | |
| arg_parser.add_argument(
 | |
|     "-m",
 | |
|     "--metadata",
 | |
|     action="store_true",
 | |
|     help=f"Generate metadata instead, changes output default to {DEFAULT_METADATA_OUTPUT}",
 | |
| )
 | |
| 
 | |
| 
 | |
| def effect_size(effect: StackEffect) -> tuple[int, str]:
 | |
|     """Return the 'size' impact of a stack effect.
 | |
| 
 | |
|     Returns a tuple (numeric, symbolic) where:
 | |
| 
 | |
|     - numeric is an int giving the statically analyzable size of the effect
 | |
|     - symbolic is a string representing a variable effect (e.g. 'oparg*2')
 | |
| 
 | |
|     At most one of these will be non-zero / non-empty.
 | |
|     """
 | |
|     if effect.size:
 | |
|         assert not effect.cond, "Array effects cannot have a condition"
 | |
|         return 0, effect.size
 | |
|     elif effect.cond:
 | |
|         return 0, f"{maybe_parenthesize(effect.cond)} ? 1 : 0"
 | |
|     else:
 | |
|         return 1, ""
 | |
| 
 | |
| 
 | |
| def maybe_parenthesize(sym: str) -> str:
 | |
|     """Add parentheses around a string if it contains an operator.
 | |
| 
 | |
|     An exception is made for '*' which is common and harmless
 | |
|     in the context where the symbolic size is used.
 | |
|     """
 | |
|     if re.match(r"^[\s\w*]+$", sym):
 | |
|         return sym
 | |
|     else:
 | |
|         return f"({sym})"
 | |
| 
 | |
| 
 | |
| def list_effect_size(effects: list[StackEffect]) -> tuple[int, str]:
 | |
|     numeric = 0
 | |
|     symbolic: list[str] = []
 | |
|     for effect in effects:
 | |
|         diff, sym = effect_size(effect)
 | |
|         numeric += diff
 | |
|         if sym:
 | |
|             symbolic.append(maybe_parenthesize(sym))
 | |
|     return numeric, " + ".join(symbolic)
 | |
| 
 | |
| 
 | |
| def string_effect_size(arg: tuple[int, str]) -> str:
 | |
|     numeric, symbolic = arg
 | |
|     if numeric and symbolic:
 | |
|         return f"{numeric} + {symbolic}"
 | |
|     elif symbolic:
 | |
|         return symbolic
 | |
|     else:
 | |
|         return str(numeric)
 | |
| 
 | |
| 
 | |
| class Formatter:
 | |
|     """Wraps an output stream with the ability to indent etc."""
 | |
| 
 | |
|     stream: typing.TextIO
 | |
|     prefix: str
 | |
| 
 | |
|     def __init__(self, stream: typing.TextIO, indent: int) -> None:
 | |
|         self.stream = stream
 | |
|         self.prefix = " " * indent
 | |
| 
 | |
|     def write_raw(self, s: str) -> None:
 | |
|         self.stream.write(s)
 | |
| 
 | |
|     def emit(self, arg: str) -> None:
 | |
|         if arg:
 | |
|             self.write_raw(f"{self.prefix}{arg}\n")
 | |
|         else:
 | |
|             self.write_raw("\n")
 | |
| 
 | |
|     @contextlib.contextmanager
 | |
|     def indent(self):
 | |
|         self.prefix += "    "
 | |
|         yield
 | |
|         self.prefix = self.prefix[:-4]
 | |
| 
 | |
|     @contextlib.contextmanager
 | |
|     def block(self, head: str):
 | |
|         if head:
 | |
|             self.emit(head + " {")
 | |
|         else:
 | |
|             self.emit("{")
 | |
|         with self.indent():
 | |
|             yield
 | |
|         self.emit("}")
 | |
| 
 | |
|     def stack_adjust(
 | |
|         self,
 | |
|         diff: int,
 | |
|         input_effects: list[StackEffect],
 | |
|         output_effects: list[StackEffect],
 | |
|     ):
 | |
|         # TODO: Get rid of 'diff' parameter
 | |
|         shrink, isym = list_effect_size(input_effects)
 | |
|         grow, osym = list_effect_size(output_effects)
 | |
|         diff += grow - shrink
 | |
|         if isym and isym != osym:
 | |
|             self.emit(f"STACK_SHRINK({isym});")
 | |
|         if diff < 0:
 | |
|             self.emit(f"STACK_SHRINK({-diff});")
 | |
|         if diff > 0:
 | |
|             self.emit(f"STACK_GROW({diff});")
 | |
|         if osym and osym != isym:
 | |
|             self.emit(f"STACK_GROW({osym});")
 | |
| 
 | |
|     def declare(self, dst: StackEffect, src: StackEffect | None):
 | |
|         if dst.name == UNUSED:
 | |
|             return
 | |
|         typ = f"{dst.type}" if dst.type else "PyObject *"
 | |
|         if src:
 | |
|             cast = self.cast(dst, src)
 | |
|             init = f" = {cast}{src.name}"
 | |
|         elif dst.cond:
 | |
|             init = " = NULL"
 | |
|         else:
 | |
|             init = ""
 | |
|         sepa = "" if typ.endswith("*") else " "
 | |
|         self.emit(f"{typ}{sepa}{dst.name}{init};")
 | |
| 
 | |
|     def assign(self, dst: StackEffect, src: StackEffect):
 | |
|         if src.name == UNUSED:
 | |
|             return
 | |
|         cast = self.cast(dst, src)
 | |
|         if m := re.match(r"^PEEK\((.*)\)$", dst.name):
 | |
|             stmt = f"POKE({m.group(1)}, {cast}{src.name});"
 | |
|             if src.cond:
 | |
|                 stmt = f"if ({src.cond}) {{ {stmt} }}"
 | |
|             self.emit(stmt)
 | |
|         elif m := re.match(r"^&PEEK\(.*\)$", dst.name):
 | |
|             # The user code is responsible for writing to the output array.
 | |
|             pass
 | |
|         elif m := re.match(r"^REG\(oparg(\d+)\)$", dst.name):
 | |
|             self.emit(f"Py_XSETREF({dst.name}, {cast}{src.name});")
 | |
|         else:
 | |
|             self.emit(f"{dst.name} = {cast}{src.name};")
 | |
| 
 | |
|     def cast(self, dst: StackEffect, src: StackEffect) -> str:
 | |
|         return f"({dst.type or 'PyObject *'})" if src.type != dst.type else ""
 | |
| 
 | |
| 
 | |
| @dataclasses.dataclass
 | |
| class Instruction:
 | |
|     """An instruction with additional data and code."""
 | |
| 
 | |
|     # Parts of the underlying instruction definition
 | |
|     inst: parser.InstDef
 | |
|     register: bool
 | |
|     kind: typing.Literal["inst", "op", "legacy"]  # Legacy means no (input -- output)
 | |
|     name: str
 | |
|     block: parser.Block
 | |
|     block_text: list[str]  # Block.text, less curlies, less PREDICT() calls
 | |
|     predictions: list[str]  # Prediction targets (instruction names)
 | |
| 
 | |
|     # Computed by constructor
 | |
|     always_exits: bool
 | |
|     cache_offset: int
 | |
|     cache_effects: list[parser.CacheEffect]
 | |
|     input_effects: list[StackEffect]
 | |
|     output_effects: list[StackEffect]
 | |
|     unmoved_names: frozenset[str]
 | |
|     instr_fmt: str
 | |
| 
 | |
|     # Parallel to input_effects; set later
 | |
|     input_registers: list[str] = dataclasses.field(repr=False)
 | |
|     output_registers: list[str] = dataclasses.field(repr=False)
 | |
| 
 | |
|     # Set later
 | |
|     family: parser.Family | None = None
 | |
|     predicted: bool = False
 | |
| 
 | |
|     def __init__(self, inst: parser.InstDef):
 | |
|         self.inst = inst
 | |
|         self.register = inst.register
 | |
|         self.kind = inst.kind
 | |
|         self.name = inst.name
 | |
|         self.block = inst.block
 | |
|         self.block_text, self.check_eval_breaker, self.predictions = \
 | |
|             extract_block_text(self.block)
 | |
|         self.always_exits = always_exits(self.block_text)
 | |
|         self.cache_effects = [
 | |
|             effect for effect in inst.inputs if isinstance(effect, parser.CacheEffect)
 | |
|         ]
 | |
|         self.cache_offset = sum(c.size for c in self.cache_effects)
 | |
|         self.input_effects = [
 | |
|             effect for effect in inst.inputs if isinstance(effect, StackEffect)
 | |
|         ]
 | |
|         self.output_effects = inst.outputs  # For consistency/completeness
 | |
|         unmoved_names: set[str] = set()
 | |
|         for ieffect, oeffect in zip(self.input_effects, self.output_effects):
 | |
|             if ieffect.name == oeffect.name:
 | |
|                 unmoved_names.add(ieffect.name)
 | |
|             else:
 | |
|                 break
 | |
|         self.unmoved_names = frozenset(unmoved_names)
 | |
|         if self.register:
 | |
|             num_regs = len(self.input_effects) + len(self.output_effects)
 | |
|             num_dummies = (num_regs // 2) * 2 + 1 - num_regs
 | |
|             fmt = "I" + "B" * num_regs + "X" * num_dummies
 | |
|         else:
 | |
|             if variable_used(inst, "oparg"):
 | |
|                 fmt = "IB"
 | |
|             else:
 | |
|                 fmt = "IX"
 | |
|         cache = "C"
 | |
|         for ce in self.cache_effects:
 | |
|             for _ in range(ce.size):
 | |
|                 fmt += cache
 | |
|                 cache = "0"
 | |
|         self.instr_fmt = fmt
 | |
| 
 | |
|     def analyze_registers(self, a: "Analyzer") -> None:
 | |
|         regs = iter(("REG(oparg1)", "REG(oparg2)", "REG(oparg3)"))
 | |
|         try:
 | |
|             self.input_registers = [
 | |
|                 next(regs) for ieff in self.input_effects if ieff.name != UNUSED
 | |
|             ]
 | |
|             self.output_registers = [
 | |
|                 next(regs) for oeff in self.output_effects if oeff.name != UNUSED
 | |
|             ]
 | |
|         except StopIteration:  # Running out of registers
 | |
|             a.error(
 | |
|                 f"Instruction {self.name} has too many register effects", node=self.inst
 | |
|             )
 | |
| 
 | |
|     def write(self, out: Formatter) -> None:
 | |
|         """Write one instruction, sans prologue and epilogue."""
 | |
|         # Write a static assertion that a family's cache size is correct
 | |
|         if family := self.family:
 | |
|             if self.name == family.members[0]:
 | |
|                 if cache_size := family.size:
 | |
|                     out.emit(
 | |
|                         f"static_assert({cache_size} == "
 | |
|                         f'{self.cache_offset}, "incorrect cache size");'
 | |
|                     )
 | |
| 
 | |
|         if not self.register:
 | |
|             # Write input stack effect variable declarations and initializations
 | |
|             ieffects = list(reversed(self.input_effects))
 | |
|             for i, ieffect in enumerate(ieffects):
 | |
|                 isize = string_effect_size(
 | |
|                     list_effect_size([ieff for ieff in ieffects[: i + 1]])
 | |
|                 )
 | |
|                 if ieffect.size:
 | |
|                     src = StackEffect(f"&PEEK({isize})", "PyObject **")
 | |
|                 elif ieffect.cond:
 | |
|                     src = StackEffect(f"({ieffect.cond}) ? PEEK({isize}) : NULL", "")
 | |
|                 else:
 | |
|                     src = StackEffect(f"PEEK({isize})", "")
 | |
|                 out.declare(ieffect, src)
 | |
|         else:
 | |
|             # Write input register variable declarations and initializations
 | |
|             for ieffect, reg in zip(self.input_effects, self.input_registers):
 | |
|                 src = StackEffect(reg, "")
 | |
|                 out.declare(ieffect, src)
 | |
| 
 | |
|         # Write output stack effect variable declarations
 | |
|         isize = string_effect_size(list_effect_size(self.input_effects))
 | |
|         input_names = {ieffect.name for ieffect in self.input_effects}
 | |
|         for i, oeffect in enumerate(self.output_effects):
 | |
|             if oeffect.name not in input_names:
 | |
|                 if oeffect.size:
 | |
|                     osize = string_effect_size(
 | |
|                         list_effect_size([oeff for oeff in self.output_effects[:i]])
 | |
|                     )
 | |
|                     offset = "stack_pointer"
 | |
|                     if isize != osize:
 | |
|                         if isize != "0":
 | |
|                             offset += f" - ({isize})"
 | |
|                         if osize != "0":
 | |
|                             offset += f" + {osize}"
 | |
|                     src = StackEffect(offset, "PyObject **")
 | |
|                     out.declare(oeffect, src)
 | |
|                 else:
 | |
|                     out.declare(oeffect, None)
 | |
| 
 | |
|         # out.emit(f"JUMPBY(OPSIZE({self.inst.name}) - 1);")
 | |
| 
 | |
|         self.write_body(out, 0)
 | |
| 
 | |
|         # Skip the rest if the block always exits
 | |
|         if self.always_exits:
 | |
|             return
 | |
| 
 | |
|         if not self.register:
 | |
|             # Write net stack growth/shrinkage
 | |
|             out.stack_adjust(
 | |
|                 0,
 | |
|                 [ieff for ieff in self.input_effects],
 | |
|                 [oeff for oeff in self.output_effects],
 | |
|             )
 | |
| 
 | |
|             # Write output stack effect assignments
 | |
|             oeffects = list(reversed(self.output_effects))
 | |
|             for i, oeffect in enumerate(oeffects):
 | |
|                 if oeffect.name in self.unmoved_names:
 | |
|                     continue
 | |
|                 osize = string_effect_size(
 | |
|                     list_effect_size([oeff for oeff in oeffects[: i + 1]])
 | |
|                 )
 | |
|                 if oeffect.size:
 | |
|                     dst = StackEffect(f"&PEEK({osize})", "PyObject **")
 | |
|                 else:
 | |
|                     dst = StackEffect(f"PEEK({osize})", "")
 | |
|                 out.assign(dst, oeffect)
 | |
|         else:
 | |
|             # Write output register assignments
 | |
|             for oeffect, reg in zip(self.output_effects, self.output_registers):
 | |
|                 dst = StackEffect(reg, "")
 | |
|                 out.assign(dst, oeffect)
 | |
| 
 | |
|         # Write cache effect
 | |
|         if self.cache_offset:
 | |
|             out.emit(f"JUMPBY({self.cache_offset});")
 | |
| 
 | |
|     def write_body(self, out: Formatter, dedent: int, cache_adjust: int = 0) -> None:
 | |
|         """Write the instruction body."""
 | |
|         # Write cache effect variable declarations and initializations
 | |
|         cache_offset = cache_adjust
 | |
|         for ceffect in self.cache_effects:
 | |
|             if ceffect.name != UNUSED:
 | |
|                 bits = ceffect.size * BITS_PER_CODE_UNIT
 | |
|                 if bits == 64:
 | |
|                     # NOTE: We assume that 64-bit data in the cache
 | |
|                     # is always an object pointer.
 | |
|                     # If this becomes false, we need a way to specify
 | |
|                     # syntactically what type the cache data is.
 | |
|                     typ = "PyObject *"
 | |
|                     func = "read_obj"
 | |
|                 else:
 | |
|                     typ = f"uint{bits}_t "
 | |
|                     func = f"read_u{bits}"
 | |
|                 out.emit(
 | |
|                     f"{typ}{ceffect.name} = {func}(&next_instr[{cache_offset}].cache);"
 | |
|                 )
 | |
|             cache_offset += ceffect.size
 | |
|         assert cache_offset == self.cache_offset + cache_adjust
 | |
| 
 | |
|         # Write the body, substituting a goto for ERROR_IF() and other stuff
 | |
|         assert dedent <= 0
 | |
|         extra = " " * -dedent
 | |
|         for line in self.block_text:
 | |
|             if m := re.match(r"(\s*)ERROR_IF\((.+), (\w+)\);\s*(?://.*)?$", line):
 | |
|                 space, cond, label = m.groups()
 | |
|                 # ERROR_IF() must pop the inputs from the stack.
 | |
|                 # The code block is responsible for DECREF()ing them.
 | |
|                 # NOTE: If the label doesn't exist, just add it to ceval.c.
 | |
|                 if not self.register:
 | |
|                     # Don't pop common input/output effects at the bottom!
 | |
|                     # These aren't DECREF'ed so they can stay.
 | |
|                     ieffs = list(self.input_effects)
 | |
|                     oeffs = list(self.output_effects)
 | |
|                     while ieffs and oeffs and ieffs[0] == oeffs[0]:
 | |
|                         ieffs.pop(0)
 | |
|                         oeffs.pop(0)
 | |
|                     ninputs, symbolic = list_effect_size(ieffs)
 | |
|                     if ninputs:
 | |
|                         label = f"pop_{ninputs}_{label}"
 | |
|                 else:
 | |
|                     symbolic = ""
 | |
|                 if symbolic:
 | |
|                     out.write_raw(
 | |
|                         f"{extra}{space}if ({cond}) {{ STACK_SHRINK({symbolic}); goto {label}; }}\n"
 | |
|                     )
 | |
|                 else:
 | |
|                     out.write_raw(f"{extra}{space}if ({cond}) goto {label};\n")
 | |
|             elif m := re.match(r"(\s*)DECREF_INPUTS\(\);\s*(?://.*)?$", line):
 | |
|                 if not self.register:
 | |
|                     space = m.group(1)
 | |
|                     for ieff in self.input_effects:
 | |
|                         if ieff.name not in self.unmoved_names:
 | |
|                             out.write_raw(f"{extra}{space}Py_DECREF({ieff.name});\n")
 | |
|             else:
 | |
|                 out.write_raw(extra + line)
 | |
| 
 | |
| 
 | |
| InstructionOrCacheEffect = Instruction | parser.CacheEffect
 | |
| StackEffectMapping = list[tuple[StackEffect, StackEffect]]
 | |
| 
 | |
| 
 | |
| @dataclasses.dataclass
 | |
| class Component:
 | |
|     instr: Instruction
 | |
|     input_mapping: StackEffectMapping
 | |
|     output_mapping: StackEffectMapping
 | |
| 
 | |
|     def write_body(self, out: Formatter, cache_adjust: int) -> None:
 | |
|         with out.block(""):
 | |
|             input_names = {ieffect.name for _, ieffect in self.input_mapping}
 | |
|             for var, ieffect in self.input_mapping:
 | |
|                 out.declare(ieffect, var)
 | |
|             for _, oeffect in self.output_mapping:
 | |
|                 if oeffect.name not in input_names:
 | |
|                     out.declare(oeffect, None)
 | |
| 
 | |
|             self.instr.write_body(out, dedent=-4, cache_adjust=cache_adjust)
 | |
| 
 | |
|             for var, oeffect in self.output_mapping:
 | |
|                 out.assign(var, oeffect)
 | |
| 
 | |
| 
 | |
| @dataclasses.dataclass
 | |
| class SuperOrMacroInstruction:
 | |
|     """Common fields for super- and macro instructions."""
 | |
| 
 | |
|     name: str
 | |
|     stack: list[StackEffect]
 | |
|     initial_sp: int
 | |
|     final_sp: int
 | |
|     instr_fmt: str
 | |
| 
 | |
| 
 | |
| @dataclasses.dataclass
 | |
| class SuperInstruction(SuperOrMacroInstruction):
 | |
|     """A super-instruction."""
 | |
| 
 | |
|     super: parser.Super
 | |
|     parts: list[Component]
 | |
| 
 | |
| 
 | |
| @dataclasses.dataclass
 | |
| class MacroInstruction(SuperOrMacroInstruction):
 | |
|     """A macro instruction."""
 | |
| 
 | |
|     macro: parser.Macro
 | |
|     parts: list[Component | parser.CacheEffect]
 | |
| 
 | |
| 
 | |
| AnyInstruction = Instruction | SuperInstruction | MacroInstruction
 | |
| INSTR_FMT_PREFIX = "INSTR_FMT_"
 | |
| 
 | |
| 
 | |
| class Analyzer:
 | |
|     """Parse input, analyze it, and write to output."""
 | |
| 
 | |
|     filename: str
 | |
|     output_filename: str
 | |
|     src: str
 | |
|     errors: int = 0
 | |
| 
 | |
|     def __init__(self, filename: str, output_filename: str):
 | |
|         """Read the input file."""
 | |
|         self.filename = filename
 | |
|         self.output_filename = output_filename
 | |
|         with open(filename) as f:
 | |
|             self.src = f.read()
 | |
| 
 | |
|     def error(self, msg: str, node: parser.Node) -> None:
 | |
|         lineno = 0
 | |
|         if context := node.context:
 | |
|             # Use line number of first non-comment in the node
 | |
|             for token in context.owner.tokens[context.begin : context.end]:
 | |
|                 lineno = token.line
 | |
|                 if token.kind != "COMMENT":
 | |
|                     break
 | |
|         print(f"{self.filename}:{lineno}: {msg}", file=sys.stderr)
 | |
|         self.errors += 1
 | |
| 
 | |
|     everything: list[parser.InstDef | parser.Super | parser.Macro]
 | |
|     instrs: dict[str, Instruction]  # Includes ops
 | |
|     supers: dict[str, parser.Super]
 | |
|     super_instrs: dict[str, SuperInstruction]
 | |
|     macros: dict[str, parser.Macro]
 | |
|     macro_instrs: dict[str, MacroInstruction]
 | |
|     families: dict[str, parser.Family]
 | |
| 
 | |
|     def parse(self) -> None:
 | |
|         """Parse the source text.
 | |
| 
 | |
|         We only want the parser to see the stuff between the
 | |
|         begin and end markers.
 | |
|         """
 | |
|         psr = parser.Parser(self.src, filename=self.filename)
 | |
| 
 | |
|         # Skip until begin marker
 | |
|         while tkn := psr.next(raw=True):
 | |
|             if tkn.text == BEGIN_MARKER:
 | |
|                 break
 | |
|         else:
 | |
|             raise psr.make_syntax_error(
 | |
|                 f"Couldn't find {BEGIN_MARKER!r} in {psr.filename}"
 | |
|             )
 | |
|         start = psr.getpos()
 | |
| 
 | |
|         # Find end marker, then delete everything after it
 | |
|         while tkn := psr.next(raw=True):
 | |
|             if tkn.text == END_MARKER:
 | |
|                 break
 | |
|         del psr.tokens[psr.getpos() - 1 :]
 | |
| 
 | |
|         # Parse from start
 | |
|         psr.setpos(start)
 | |
|         self.everything = []
 | |
|         self.instrs = {}
 | |
|         self.supers = {}
 | |
|         self.macros = {}
 | |
|         self.families = {}
 | |
|         thing: parser.InstDef | parser.Super | parser.Macro | parser.Family | None
 | |
|         while thing := psr.definition():
 | |
|             match thing:
 | |
|                 case parser.InstDef(name=name):
 | |
|                     self.instrs[name] = Instruction(thing)
 | |
|                     self.everything.append(thing)
 | |
|                 case parser.Super(name):
 | |
|                     self.supers[name] = thing
 | |
|                     self.everything.append(thing)
 | |
|                 case parser.Macro(name):
 | |
|                     self.macros[name] = thing
 | |
|                     self.everything.append(thing)
 | |
|                 case parser.Family(name):
 | |
|                     self.families[name] = thing
 | |
|                 case _:
 | |
|                     typing.assert_never(thing)
 | |
|         if not psr.eof():
 | |
|             raise psr.make_syntax_error("Extra stuff at the end")
 | |
| 
 | |
|         print(
 | |
|             f"Read {len(self.instrs)} instructions/ops, "
 | |
|             f"{len(self.supers)} supers, {len(self.macros)} macros, "
 | |
|             f"and {len(self.families)} families from {self.filename}",
 | |
|             file=sys.stderr,
 | |
|         )
 | |
| 
 | |
|     def analyze(self) -> None:
 | |
|         """Analyze the inputs.
 | |
| 
 | |
|         Raises SystemExit if there is an error.
 | |
|         """
 | |
|         self.find_predictions()
 | |
|         self.analyze_register_instrs()
 | |
|         self.analyze_supers_and_macros()
 | |
|         self.map_families()
 | |
|         self.check_families()
 | |
| 
 | |
|     def find_predictions(self) -> None:
 | |
|         """Find the instructions that need PREDICTED() labels."""
 | |
|         for instr in self.instrs.values():
 | |
|             targets = set(instr.predictions)
 | |
|             for line in instr.block_text:
 | |
|                 if m := re.match(RE_PREDICTED, line):
 | |
|                     targets.add(m.group(1))
 | |
|             for target in targets:
 | |
|                 if target_instr := self.instrs.get(target):
 | |
|                     target_instr.predicted = True
 | |
|                 else:
 | |
|                     self.error(
 | |
|                         f"Unknown instruction {target!r} predicted in {instr.name!r}",
 | |
|                         instr.inst,  # TODO: Use better location
 | |
|                     )
 | |
| 
 | |
|     def map_families(self) -> None:
 | |
|         """Link instruction names back to their family, if they have one."""
 | |
|         for family in self.families.values():
 | |
|             for member in family.members:
 | |
|                 if member_instr := self.instrs.get(member):
 | |
|                     if member_instr.family not in (family, None):
 | |
|                         self.error(
 | |
|                             f"Instruction {member} is a member of multiple families "
 | |
|                             f"({member_instr.family.name}, {family.name}).",
 | |
|                             family,
 | |
|                         )
 | |
|                     else:
 | |
|                         member_instr.family = family
 | |
|                 elif member_macro := self.macro_instrs.get(member):
 | |
|                     for part in member_macro.parts:
 | |
|                         if isinstance(part, Component):
 | |
|                             if part.instr.family not in (family, None):
 | |
|                                 self.error(
 | |
|                                     f"Component {part.instr.name} of macro {member} "
 | |
|                                     f"is a member of multiple families "
 | |
|                                     f"({part.instr.family.name}, {family.name}).",
 | |
|                                     family,
 | |
|                                 )
 | |
|                             else:
 | |
|                                 part.instr.family = family
 | |
|                 else:
 | |
|                     self.error(
 | |
|                         f"Unknown instruction {member!r} referenced in family {family.name!r}",
 | |
|                         family,
 | |
|                     )
 | |
| 
 | |
|     def check_families(self) -> None:
 | |
|         """Check each family:
 | |
| 
 | |
|         - Must have at least 2 members
 | |
|         - All members must be known instructions
 | |
|         - All members must have the same cache, input and output effects
 | |
|         """
 | |
|         for family in self.families.values():
 | |
|             if len(family.members) < 2:
 | |
|                 self.error(f"Family {family.name!r} has insufficient members", family)
 | |
|             members = [
 | |
|                 member
 | |
|                 for member in family.members
 | |
|                 if member in self.instrs or member in self.macro_instrs
 | |
|             ]
 | |
|             if members != family.members:
 | |
|                 unknown = set(family.members) - set(members)
 | |
|                 self.error(
 | |
|                     f"Family {family.name!r} has unknown members: {unknown}", family
 | |
|                 )
 | |
|             if len(members) < 2:
 | |
|                 continue
 | |
|             expected_effects = self.effect_counts(members[0])
 | |
|             for member in members[1:]:
 | |
|                 member_effects = self.effect_counts(member)
 | |
|                 if member_effects != expected_effects:
 | |
|                     self.error(
 | |
|                         f"Family {family.name!r} has inconsistent "
 | |
|                         f"(cache, input, output) effects:\n"
 | |
|                         f"  {family.members[0]} = {expected_effects}; "
 | |
|                         f"{member} = {member_effects}",
 | |
|                         family,
 | |
|                     )
 | |
| 
 | |
|     def effect_counts(self, name: str) -> tuple[int, int, int]:
 | |
|         if instr := self.instrs.get(name):
 | |
|             cache = instr.cache_offset
 | |
|             input = len(instr.input_effects)
 | |
|             output = len(instr.output_effects)
 | |
|         elif macro := self.macro_instrs.get(name):
 | |
|             cache, input, output = 0, 0, 0
 | |
|             for part in macro.parts:
 | |
|                 if isinstance(part, Component):
 | |
|                     cache += part.instr.cache_offset
 | |
|                     # A component may pop what the previous component pushed,
 | |
|                     # so we offset the input/output counts by that.
 | |
|                     delta_i = len(part.instr.input_effects)
 | |
|                     delta_o = len(part.instr.output_effects)
 | |
|                     offset = min(delta_i, output)
 | |
|                     input += delta_i - offset
 | |
|                     output += delta_o - offset
 | |
|                 else:
 | |
|                     assert isinstance(part, parser.CacheEffect), part
 | |
|                     cache += part.size
 | |
|         else:
 | |
|             assert False, f"Unknown instruction {name!r}"
 | |
|         return cache, input, output
 | |
| 
 | |
|     def analyze_register_instrs(self) -> None:
 | |
|         for instr in self.instrs.values():
 | |
|             if instr.register:
 | |
|                 instr.analyze_registers(self)
 | |
| 
 | |
|     def analyze_supers_and_macros(self) -> None:
 | |
|         """Analyze each super- and macro instruction."""
 | |
|         self.super_instrs = {}
 | |
|         self.macro_instrs = {}
 | |
|         for name, super in self.supers.items():
 | |
|             self.super_instrs[name] = self.analyze_super(super)
 | |
|         for name, macro in self.macros.items():
 | |
|             self.macro_instrs[name] = self.analyze_macro(macro)
 | |
| 
 | |
|     def analyze_super(self, super: parser.Super) -> SuperInstruction:
 | |
|         components = self.check_super_components(super)
 | |
|         stack, initial_sp = self.stack_analysis(components)
 | |
|         sp = initial_sp
 | |
|         parts: list[Component] = []
 | |
|         format = ""
 | |
|         for instr in components:
 | |
|             part, sp = self.analyze_instruction(instr, stack, sp)
 | |
|             parts.append(part)
 | |
|             format += instr.instr_fmt
 | |
|         final_sp = sp
 | |
|         return SuperInstruction(
 | |
|             super.name, stack, initial_sp, final_sp, format, super, parts
 | |
|         )
 | |
| 
 | |
|     def analyze_macro(self, macro: parser.Macro) -> MacroInstruction:
 | |
|         components = self.check_macro_components(macro)
 | |
|         stack, initial_sp = self.stack_analysis(components)
 | |
|         sp = initial_sp
 | |
|         parts: list[Component | parser.CacheEffect] = []
 | |
|         format = "IB"  # Macros don't support register instructions yet
 | |
|         cache = "C"
 | |
|         for component in components:
 | |
|             match component:
 | |
|                 case parser.CacheEffect() as ceffect:
 | |
|                     parts.append(ceffect)
 | |
|                     for _ in range(ceffect.size):
 | |
|                         format += cache
 | |
|                         cache = "0"
 | |
|                 case Instruction() as instr:
 | |
|                     part, sp = self.analyze_instruction(instr, stack, sp)
 | |
|                     parts.append(part)
 | |
|                     for ce in instr.cache_effects:
 | |
|                         for _ in range(ce.size):
 | |
|                             format += cache
 | |
|                             cache = "0"
 | |
|                 case _:
 | |
|                     typing.assert_never(component)
 | |
|         final_sp = sp
 | |
|         return MacroInstruction(
 | |
|             macro.name, stack, initial_sp, final_sp, format, macro, parts
 | |
|         )
 | |
| 
 | |
|     def analyze_instruction(
 | |
|         self, instr: Instruction, stack: list[StackEffect], sp: int
 | |
|     ) -> tuple[Component, int]:
 | |
|         input_mapping: StackEffectMapping = []
 | |
|         for ieffect in reversed(instr.input_effects):
 | |
|             sp -= 1
 | |
|             input_mapping.append((stack[sp], ieffect))
 | |
|         output_mapping: StackEffectMapping = []
 | |
|         for oeffect in instr.output_effects:
 | |
|             output_mapping.append((stack[sp], oeffect))
 | |
|             sp += 1
 | |
|         return Component(instr, input_mapping, output_mapping), sp
 | |
| 
 | |
|     def check_super_components(self, super: parser.Super) -> list[Instruction]:
 | |
|         components: list[Instruction] = []
 | |
|         for op in super.ops:
 | |
|             if op.name not in self.instrs:
 | |
|                 self.error(f"Unknown instruction {op.name!r}", super)
 | |
|             else:
 | |
|                 components.append(self.instrs[op.name])
 | |
|         return components
 | |
| 
 | |
|     def check_macro_components(
 | |
|         self, macro: parser.Macro
 | |
|     ) -> list[InstructionOrCacheEffect]:
 | |
|         components: list[InstructionOrCacheEffect] = []
 | |
|         for uop in macro.uops:
 | |
|             match uop:
 | |
|                 case parser.OpName(name):
 | |
|                     if name not in self.instrs:
 | |
|                         self.error(f"Unknown instruction {name!r}", macro)
 | |
|                     components.append(self.instrs[name])
 | |
|                 case parser.CacheEffect():
 | |
|                     components.append(uop)
 | |
|                 case _:
 | |
|                     typing.assert_never(uop)
 | |
|         return components
 | |
| 
 | |
|     def stack_analysis(
 | |
|         self, components: typing.Iterable[InstructionOrCacheEffect]
 | |
|     ) -> tuple[list[StackEffect], int]:
 | |
|         """Analyze a super-instruction or macro.
 | |
| 
 | |
|         Ignore cache effects.
 | |
| 
 | |
|         Return the list of variable names and the initial stack pointer.
 | |
|         """
 | |
|         lowest = current = highest = 0
 | |
|         for thing in components:
 | |
|             match thing:
 | |
|                 case Instruction() as instr:
 | |
|                     if any(
 | |
|                         eff.size for eff in instr.input_effects + instr.output_effects
 | |
|                     ):
 | |
|                         # TODO: Eventually this will be needed, at least for macros.
 | |
|                         self.error(
 | |
|                             f"Instruction {instr.name!r} has variable-sized stack effect, "
 | |
|                             "which are not supported in super- or macro instructions",
 | |
|                             instr.inst,  # TODO: Pass name+location of super/macro
 | |
|                         )
 | |
|                     current -= len(instr.input_effects)
 | |
|                     lowest = min(lowest, current)
 | |
|                     current += len(instr.output_effects)
 | |
|                     highest = max(highest, current)
 | |
|                 case parser.CacheEffect():
 | |
|                     pass
 | |
|                 case _:
 | |
|                     typing.assert_never(thing)
 | |
|         # At this point, 'current' is the net stack effect,
 | |
|         # and 'lowest' and 'highest' are the extremes.
 | |
|         # Note that 'lowest' may be negative.
 | |
|         # TODO: Reverse the numbering.
 | |
|         stack = [
 | |
|             StackEffect(f"_tmp_{i+1}", "") for i in reversed(range(highest - lowest))
 | |
|         ]
 | |
|         return stack, -lowest
 | |
| 
 | |
|     def get_stack_effect_info(
 | |
|         self, thing: parser.InstDef | parser.Super | parser.Macro
 | |
|     ) -> tuple[AnyInstruction | None, str, str]:
 | |
|         def effect_str(effects: list[StackEffect]) -> str:
 | |
|             if getattr(thing, "kind", None) == "legacy":
 | |
|                 return str(-1)
 | |
|             n_effect, sym_effect = list_effect_size(effects)
 | |
|             if sym_effect:
 | |
|                 return f"{sym_effect} + {n_effect}" if n_effect else sym_effect
 | |
|             return str(n_effect)
 | |
| 
 | |
|         instr: AnyInstruction | None
 | |
|         match thing:
 | |
|             case parser.InstDef():
 | |
|                 if thing.kind != "op":
 | |
|                     instr = self.instrs[thing.name]
 | |
|                     popped = effect_str(instr.input_effects)
 | |
|                     pushed = effect_str(instr.output_effects)
 | |
|                 else:
 | |
|                     instr = None
 | |
|                     popped = ""
 | |
|                     pushed = ""
 | |
|             case parser.Super():
 | |
|                 instr = self.super_instrs[thing.name]
 | |
|                 popped = "+".join(
 | |
|                     effect_str(comp.instr.input_effects) for comp in instr.parts
 | |
|                 )
 | |
|                 pushed = "+".join(
 | |
|                     effect_str(comp.instr.output_effects) for comp in instr.parts
 | |
|                 )
 | |
|             case parser.Macro():
 | |
|                 instr = self.macro_instrs[thing.name]
 | |
|                 parts = [comp for comp in instr.parts if isinstance(comp, Component)]
 | |
|                 popped = "+".join(
 | |
|                     effect_str(comp.instr.input_effects) for comp in parts
 | |
|                 )
 | |
|                 pushed = "+".join(
 | |
|                     effect_str(comp.instr.output_effects) for comp in parts
 | |
|                 )
 | |
|             case _:
 | |
|                 typing.assert_never(thing)
 | |
|         return instr, popped, pushed
 | |
| 
 | |
|     def write_stack_effect_functions(self) -> None:
 | |
|         popped_data: list[tuple[AnyInstruction, str]] = []
 | |
|         pushed_data: list[tuple[AnyInstruction, str]] = []
 | |
|         for thing in self.everything:
 | |
|             instr, popped, pushed = self.get_stack_effect_info(thing)
 | |
|             if instr is not None:
 | |
|                 popped_data.append((instr, popped))
 | |
|                 pushed_data.append((instr, pushed))
 | |
| 
 | |
|         def write_function(
 | |
|             direction: str, data: list[tuple[AnyInstruction, str]]
 | |
|         ) -> None:
 | |
|             self.out.emit("\n#ifndef NDEBUG")
 | |
|             self.out.emit("static int")
 | |
|             self.out.emit(f"_PyOpcode_num_{direction}(int opcode, int oparg, bool jump) {{")
 | |
|             self.out.emit("    switch(opcode) {")
 | |
|             for instr, effect in data:
 | |
|                 self.out.emit(f"        case {instr.name}:")
 | |
|                 self.out.emit(f"            return {effect};")
 | |
|             self.out.emit("        default:")
 | |
|             self.out.emit("            Py_UNREACHABLE();")
 | |
|             self.out.emit("    }")
 | |
|             self.out.emit("}")
 | |
|             self.out.emit("#endif")
 | |
| 
 | |
|         write_function("popped", popped_data)
 | |
|         write_function("pushed", pushed_data)
 | |
| 
 | |
|     def write_metadata(self) -> None:
 | |
|         """Write instruction metadata to output file."""
 | |
| 
 | |
|         # Compute the set of all instruction formats.
 | |
|         all_formats: set[str] = set()
 | |
|         for thing in self.everything:
 | |
|             match thing:
 | |
|                 case parser.InstDef():
 | |
|                     format = self.instrs[thing.name].instr_fmt
 | |
|                 case parser.Super():
 | |
|                     format = self.super_instrs[thing.name].instr_fmt
 | |
|                 case parser.Macro():
 | |
|                     format = self.macro_instrs[thing.name].instr_fmt
 | |
|                 case _:
 | |
|                     typing.assert_never(thing)
 | |
|             all_formats.add(format)
 | |
|         # Turn it into a list of enum definitions.
 | |
|         format_enums = [INSTR_FMT_PREFIX + format for format in sorted(all_formats)]
 | |
| 
 | |
|         with open(self.output_filename, "w") as f:
 | |
|             # Write provenance header
 | |
|             f.write(f"// This file is generated by {THIS} --metadata\n")
 | |
|             f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
 | |
|             f.write(f"// Do not edit!\n")
 | |
| 
 | |
|             # Create formatter; the rest of the code uses this
 | |
|             self.out = Formatter(f, 0)
 | |
| 
 | |
|             self.write_stack_effect_functions()
 | |
| 
 | |
|             # Write variable definition
 | |
|             self.out.emit("enum Direction { DIR_NONE, DIR_READ, DIR_WRITE };")
 | |
|             self.out.emit(f"enum InstructionFormat {{ {', '.join(format_enums)} }};")
 | |
|             self.out.emit("struct opcode_metadata {")
 | |
|             with self.out.indent():
 | |
|                 self.out.emit("enum Direction dir_op1;")
 | |
|                 self.out.emit("enum Direction dir_op2;")
 | |
|                 self.out.emit("enum Direction dir_op3;")
 | |
|                 self.out.emit("bool valid_entry;")
 | |
|                 self.out.emit("enum InstructionFormat instr_format;")
 | |
|             self.out.emit("} _PyOpcode_opcode_metadata[256] = {")
 | |
| 
 | |
|             # Write metadata for each instruction
 | |
|             for thing in self.everything:
 | |
|                 match thing:
 | |
|                     case parser.InstDef():
 | |
|                         if thing.kind != "op":
 | |
|                             self.write_metadata_for_inst(self.instrs[thing.name])
 | |
|                     case parser.Super():
 | |
|                         self.write_metadata_for_super(self.super_instrs[thing.name])
 | |
|                     case parser.Macro():
 | |
|                         self.write_metadata_for_macro(self.macro_instrs[thing.name])
 | |
|                     case _:
 | |
|                         typing.assert_never(thing)
 | |
| 
 | |
|             # Write end of array
 | |
|             self.out.emit("};")
 | |
| 
 | |
|     def write_metadata_for_inst(self, instr: Instruction) -> None:
 | |
|         """Write metadata for a single instruction."""
 | |
|         dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
 | |
|         if instr.kind == "legacy":
 | |
|             assert not instr.register
 | |
|         else:
 | |
|             if instr.register:
 | |
|                 directions: list[str] = []
 | |
|                 directions.extend("DIR_READ" for _ in instr.input_effects)
 | |
|                 directions.extend("DIR_WRITE" for _ in instr.output_effects)
 | |
|                 directions.extend("DIR_NONE" for _ in range(3))
 | |
|                 dir_op1, dir_op2, dir_op3 = directions[:3]
 | |
|         self.out.emit(
 | |
|             f"    [{instr.name}] = {{ {dir_op1}, {dir_op2}, {dir_op3}, true, {INSTR_FMT_PREFIX}{instr.instr_fmt} }},"
 | |
|         )
 | |
| 
 | |
|     def write_metadata_for_super(self, sup: SuperInstruction) -> None:
 | |
|         """Write metadata for a super-instruction."""
 | |
|         dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
 | |
|         self.out.emit(
 | |
|             f"    [{sup.name}] = {{ {dir_op1}, {dir_op2}, {dir_op3}, true, {INSTR_FMT_PREFIX}{sup.instr_fmt} }},"
 | |
|         )
 | |
| 
 | |
|     def write_metadata_for_macro(self, mac: MacroInstruction) -> None:
 | |
|         """Write metadata for a macro-instruction."""
 | |
|         dir_op1 = dir_op2 = dir_op3 = "DIR_NONE"
 | |
|         self.out.emit(
 | |
|             f"    [{mac.name}] = {{ {dir_op1}, {dir_op2}, {dir_op3}, true, {INSTR_FMT_PREFIX}{mac.instr_fmt} }},"
 | |
|         )
 | |
| 
 | |
|     def write_instructions(self) -> None:
 | |
|         """Write instructions to output file."""
 | |
|         with open(self.output_filename, "w") as f:
 | |
|             # Write provenance header
 | |
|             f.write(f"// This file is generated by {THIS}\n")
 | |
|             f.write(f"// from {os.path.relpath(self.filename, ROOT)}\n")
 | |
|             f.write(f"// Do not edit!\n")
 | |
| 
 | |
|             # Create formatter; the rest of the code uses this
 | |
|             self.out = Formatter(f, 8)
 | |
| 
 | |
|             # Write and count instructions of all kinds
 | |
|             n_instrs = 0
 | |
|             n_supers = 0
 | |
|             n_macros = 0
 | |
|             for thing in self.everything:
 | |
|                 match thing:
 | |
|                     case parser.InstDef():
 | |
|                         if thing.kind != "op":
 | |
|                             n_instrs += 1
 | |
|                             self.write_instr(self.instrs[thing.name])
 | |
|                     case parser.Super():
 | |
|                         n_supers += 1
 | |
|                         self.write_super(self.super_instrs[thing.name])
 | |
|                     case parser.Macro():
 | |
|                         n_macros += 1
 | |
|                         self.write_macro(self.macro_instrs[thing.name])
 | |
|                     case _:
 | |
|                         typing.assert_never(thing)
 | |
| 
 | |
|         print(
 | |
|             f"Wrote {n_instrs} instructions, {n_supers} supers, "
 | |
|             f"and {n_macros} macros to {self.output_filename}",
 | |
|             file=sys.stderr,
 | |
|         )
 | |
| 
 | |
|     def write_instr(self, instr: Instruction) -> None:
 | |
|         name = instr.name
 | |
|         self.out.emit("")
 | |
|         with self.out.block(f"TARGET({name})"):
 | |
|             if instr.predicted:
 | |
|                 self.out.emit(f"PREDICTED({name});")
 | |
|             instr.write(self.out)
 | |
|             if not instr.always_exits:
 | |
|                 for prediction in instr.predictions:
 | |
|                     self.out.emit(f"PREDICT({prediction});")
 | |
|                 if instr.check_eval_breaker:
 | |
|                     self.out.emit("CHECK_EVAL_BREAKER();")
 | |
|                 self.out.emit(f"DISPATCH();")
 | |
| 
 | |
|     def write_super(self, sup: SuperInstruction) -> None:
 | |
|         """Write code for a super-instruction."""
 | |
|         with self.wrap_super_or_macro(sup):
 | |
|             first = True
 | |
|             for comp in sup.parts:
 | |
|                 if first:
 | |
|                     pass
 | |
|                     # self.out.emit("JUMPBY(OPSIZE(opcode) - 1);")
 | |
|                 else:
 | |
|                     self.out.emit("NEXTOPARG();")
 | |
|                     self.out.emit("JUMPBY(1);")
 | |
|                     # self.out.emit("JUMPBY(OPSIZE(opcode));")
 | |
|                 first = False
 | |
|                 comp.write_body(self.out, 0)
 | |
|                 if comp.instr.cache_offset:
 | |
|                     self.out.emit(f"JUMPBY({comp.instr.cache_offset});")
 | |
| 
 | |
|     def write_macro(self, mac: MacroInstruction) -> None:
 | |
|         """Write code for a macro instruction."""
 | |
|         last_instr: Instruction | None = None
 | |
|         with self.wrap_super_or_macro(mac):
 | |
|             cache_adjust = 0
 | |
|             for part in mac.parts:
 | |
|                 match part:
 | |
|                     case parser.CacheEffect(size=size):
 | |
|                         cache_adjust += size
 | |
|                     case Component() as comp:
 | |
|                         last_instr = comp.instr
 | |
|                         comp.write_body(self.out, cache_adjust)
 | |
|                         cache_adjust += comp.instr.cache_offset
 | |
| 
 | |
|             if cache_adjust:
 | |
|                 self.out.emit(f"JUMPBY({cache_adjust});")
 | |
| 
 | |
|             if (
 | |
|                 last_instr
 | |
|                 and (family := last_instr.family)
 | |
|                 and mac.name == family.members[0]
 | |
|                 and (cache_size := family.size)
 | |
|             ):
 | |
|                 self.out.emit(
 | |
|                     f"static_assert({cache_size} == "
 | |
|                     f'{cache_adjust}, "incorrect cache size");'
 | |
|                 )
 | |
| 
 | |
|     @contextlib.contextmanager
 | |
|     def wrap_super_or_macro(self, up: SuperOrMacroInstruction):
 | |
|         """Shared boilerplate for super- and macro instructions."""
 | |
|         # TODO: Somewhere (where?) make it so that if one instruction
 | |
|         # has an output that is input to another, and the variable names
 | |
|         # and types match and don't conflict with other instructions,
 | |
|         # that variable is declared with the right name and type in the
 | |
|         # outer block, rather than trusting the compiler to optimize it.
 | |
|         self.out.emit("")
 | |
|         with self.out.block(f"TARGET({up.name})"):
 | |
|             for i, var in reversed(list(enumerate(up.stack))):
 | |
|                 src = None
 | |
|                 if i < up.initial_sp:
 | |
|                     src = StackEffect(f"PEEK({up.initial_sp - i})", "")
 | |
|                 self.out.declare(var, src)
 | |
| 
 | |
|             yield
 | |
| 
 | |
|             # TODO: Use slices of up.stack instead of numeric values
 | |
|             self.out.stack_adjust(up.final_sp - up.initial_sp, [], [])
 | |
| 
 | |
|             for i, var in enumerate(reversed(up.stack[: up.final_sp]), 1):
 | |
|                 dst = StackEffect(f"PEEK({i})", "")
 | |
|                 self.out.assign(dst, var)
 | |
| 
 | |
|             self.out.emit(f"DISPATCH();")
 | |
| 
 | |
| 
 | |
| def extract_block_text(block: parser.Block) -> tuple[list[str], bool, list[str]]:
 | |
|     # Get lines of text with proper dedent
 | |
|     blocklines = block.text.splitlines(True)
 | |
| 
 | |
|     # Remove blank lines from both ends
 | |
|     while blocklines and not blocklines[0].strip():
 | |
|         blocklines.pop(0)
 | |
|     while blocklines and not blocklines[-1].strip():
 | |
|         blocklines.pop()
 | |
| 
 | |
|     # Remove leading and trailing braces
 | |
|     assert blocklines and blocklines[0].strip() == "{"
 | |
|     assert blocklines and blocklines[-1].strip() == "}"
 | |
|     blocklines.pop()
 | |
|     blocklines.pop(0)
 | |
| 
 | |
|     # Remove trailing blank lines
 | |
|     while blocklines and not blocklines[-1].strip():
 | |
|         blocklines.pop()
 | |
| 
 | |
|     # Separate CHECK_EVAL_BREAKER() macro from end
 | |
|     check_eval_breaker = \
 | |
|         blocklines != [] and blocklines[-1].strip() == "CHECK_EVAL_BREAKER();"
 | |
|     if check_eval_breaker:
 | |
|         del blocklines[-1]
 | |
| 
 | |
|     # Separate PREDICT(...) macros from end
 | |
|     predictions: list[str] = []
 | |
|     while blocklines and (
 | |
|         m := re.match(r"^\s*PREDICT\((\w+)\);\s*(?://.*)?$", blocklines[-1])
 | |
|     ):
 | |
|         predictions.insert(0, m.group(1))
 | |
|         blocklines.pop()
 | |
| 
 | |
|     return blocklines, check_eval_breaker, predictions
 | |
| 
 | |
| 
 | |
| def always_exits(lines: list[str]) -> bool:
 | |
|     """Determine whether a block always ends in a return/goto/etc."""
 | |
|     if not lines:
 | |
|         return False
 | |
|     line = lines[-1].rstrip()
 | |
|     # Indent must match exactly (TODO: Do something better)
 | |
|     if line[:12] != " " * 12:
 | |
|         return False
 | |
|     line = line[12:]
 | |
|     return line.startswith(
 | |
|         (
 | |
|             "goto ",
 | |
|             "return ",
 | |
|             "DISPATCH",
 | |
|             "GO_TO_",
 | |
|             "Py_UNREACHABLE()",
 | |
|             "ERROR_IF(true, ",
 | |
|         )
 | |
|     )
 | |
| 
 | |
| 
 | |
| def variable_used(node: parser.Node, name: str) -> bool:
 | |
|     """Determine whether a variable with a given name is used in a node."""
 | |
|     return any(
 | |
|         token.kind == "IDENTIFIER" and token.text == name for token in node.tokens
 | |
|     )
 | |
| 
 | |
| 
 | |
| def main():
 | |
|     """Parse command line, parse input, analyze, write output."""
 | |
|     args = arg_parser.parse_args()  # Prints message and sys.exit(2) on error
 | |
|     if args.metadata:
 | |
|         if args.output == DEFAULT_OUTPUT:
 | |
|             args.output = DEFAULT_METADATA_OUTPUT
 | |
|     a = Analyzer(args.input, args.output)  # Raises OSError if input unreadable
 | |
|     a.parse()  # Raises SyntaxError on failure
 | |
|     a.analyze()  # Prints messages and sets a.errors on failure
 | |
|     if a.errors:
 | |
|         sys.exit(f"Found {a.errors} errors")
 | |
|     if args.metadata:
 | |
|         a.write_metadata()
 | |
|     else:
 | |
|         a.write_instructions()  # Raises OSError if output can't be written
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 | 
