mirror of
https://github.com/python/cpython.git
synced 2025-11-09 18:11:38 +00:00
This PR sets up tagged pointers for CPython. The general idea is to create a separate struct _PyStackRef for everything on the evaluation stack to store the bits. This forces the C compiler to warn us if we try to cast things or pull things out of the struct directly. Only for free threading: We tag the low bit if something is deferred - that means we skip incref and decref operations on it. This behavior may change in the future if Mark's plans to defer all objects in the interpreter loop pans out. This implies a strict stack reference discipline is required. ALL incref and decref operations on stackrefs must use the stackref variants. It is unsafe to untag something then do normal incref/decref ops on it. The new incref and decref variants are called dup and close. They mimic a "handle" API operating on these stackrefs. Please read Include/internal/pycore_stackref.h for more information! --------- Co-authored-by: Mark Shannon <9448417+markshannon@users.noreply.github.com>
484 lines
15 KiB
Python
484 lines
15 KiB
Python
"""Parser for bytecodes.inst."""
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import NamedTuple, Callable, TypeVar, Literal, cast
|
|
|
|
import lexer as lx
|
|
from plexer import PLexer
|
|
|
|
|
|
P = TypeVar("P", bound="Parser")
|
|
N = TypeVar("N", bound="Node")
|
|
|
|
|
|
def contextual(func: Callable[[P], N | None]) -> Callable[[P], N | None]:
|
|
# Decorator to wrap grammar methods.
|
|
# Resets position if `func` returns None.
|
|
def contextual_wrapper(self: P) -> N | None:
|
|
begin = self.getpos()
|
|
res = func(self)
|
|
if res is None:
|
|
self.setpos(begin)
|
|
return None
|
|
end = self.getpos()
|
|
res.context = Context(begin, end, self)
|
|
return res
|
|
|
|
return contextual_wrapper
|
|
|
|
|
|
class Context(NamedTuple):
|
|
begin: int
|
|
end: int
|
|
owner: PLexer
|
|
|
|
def __repr__(self) -> str:
|
|
return f"<{self.owner.filename}: {self.begin}-{self.end}>"
|
|
|
|
|
|
@dataclass
|
|
class Node:
|
|
context: Context | None = field(init=False, compare=False, default=None)
|
|
|
|
@property
|
|
def text(self) -> str:
|
|
return self.to_text()
|
|
|
|
def to_text(self, dedent: int = 0) -> str:
|
|
context = self.context
|
|
if not context:
|
|
return ""
|
|
return lx.to_text(self.tokens, dedent)
|
|
|
|
@property
|
|
def tokens(self) -> list[lx.Token]:
|
|
context = self.context
|
|
if not context:
|
|
return []
|
|
tokens = context.owner.tokens
|
|
begin = context.begin
|
|
end = context.end
|
|
return tokens[begin:end]
|
|
|
|
|
|
@dataclass
|
|
class Block(Node):
|
|
# This just holds a context which has the list of tokens.
|
|
pass
|
|
|
|
|
|
@dataclass
|
|
class StackEffect(Node):
|
|
name: str = field(compare=False) # __eq__ only uses type, cond, size
|
|
type: str = "" # Optional `:type`
|
|
cond: str = "" # Optional `if (cond)`
|
|
size: str = "" # Optional `[size]`
|
|
# Note: size cannot be combined with type or cond
|
|
|
|
def __repr__(self) -> str:
|
|
items = [self.name, self.type, self.cond, self.size]
|
|
while items and items[-1] == "":
|
|
del items[-1]
|
|
return f"StackEffect({', '.join(repr(item) for item in items)})"
|
|
|
|
|
|
@dataclass
|
|
class Expression(Node):
|
|
size: str
|
|
|
|
|
|
@dataclass
|
|
class CacheEffect(Node):
|
|
name: str
|
|
size: int
|
|
|
|
|
|
@dataclass
|
|
class OpName(Node):
|
|
name: str
|
|
|
|
|
|
InputEffect = StackEffect | CacheEffect
|
|
OutputEffect = StackEffect
|
|
UOp = OpName | CacheEffect
|
|
|
|
|
|
@dataclass
|
|
class InstHeader(Node):
|
|
annotations: list[str]
|
|
kind: Literal["inst", "op"]
|
|
name: str
|
|
inputs: list[InputEffect]
|
|
outputs: list[OutputEffect]
|
|
|
|
|
|
@dataclass
|
|
class InstDef(Node):
|
|
annotations: list[str]
|
|
kind: Literal["inst", "op"]
|
|
name: str
|
|
inputs: list[InputEffect]
|
|
outputs: list[OutputEffect]
|
|
block: Block
|
|
|
|
|
|
@dataclass
|
|
class Macro(Node):
|
|
name: str
|
|
uops: list[UOp]
|
|
|
|
|
|
@dataclass
|
|
class Family(Node):
|
|
name: str
|
|
size: str # Variable giving the cache size in code units
|
|
members: list[str]
|
|
|
|
|
|
@dataclass
|
|
class Pseudo(Node):
|
|
name: str
|
|
inputs: list[InputEffect]
|
|
outputs: list[OutputEffect]
|
|
flags: list[str] # instr flags to set on the pseudo instruction
|
|
targets: list[str] # opcodes this can be replaced by
|
|
|
|
|
|
AstNode = InstDef | Macro | Pseudo | Family
|
|
|
|
|
|
class Parser(PLexer):
|
|
@contextual
|
|
def definition(self) -> AstNode | None:
|
|
if macro := self.macro_def():
|
|
return macro
|
|
if family := self.family_def():
|
|
return family
|
|
if pseudo := self.pseudo_def():
|
|
return pseudo
|
|
if inst := self.inst_def():
|
|
return inst
|
|
return None
|
|
|
|
@contextual
|
|
def inst_def(self) -> InstDef | None:
|
|
if hdr := self.inst_header():
|
|
if block := self.block():
|
|
return InstDef(
|
|
hdr.annotations,
|
|
hdr.kind,
|
|
hdr.name,
|
|
hdr.inputs,
|
|
hdr.outputs,
|
|
block,
|
|
)
|
|
raise self.make_syntax_error("Expected block")
|
|
return None
|
|
|
|
@contextual
|
|
def inst_header(self) -> InstHeader | None:
|
|
# annotation* inst(NAME, (inputs -- outputs))
|
|
# | annotation* op(NAME, (inputs -- outputs))
|
|
annotations = []
|
|
while anno := self.expect(lx.ANNOTATION):
|
|
if anno.text == "replicate":
|
|
self.require(lx.LPAREN)
|
|
times = self.require(lx.NUMBER)
|
|
self.require(lx.RPAREN)
|
|
annotations.append(f"replicate({times.text})")
|
|
else:
|
|
annotations.append(anno.text)
|
|
tkn = self.expect(lx.INST)
|
|
if not tkn:
|
|
tkn = self.expect(lx.OP)
|
|
if tkn:
|
|
kind = cast(Literal["inst", "op"], tkn.text)
|
|
if self.expect(lx.LPAREN) and (tkn := self.expect(lx.IDENTIFIER)):
|
|
name = tkn.text
|
|
if self.expect(lx.COMMA):
|
|
inp, outp = self.io_effect()
|
|
if self.expect(lx.RPAREN):
|
|
if (tkn := self.peek()) and tkn.kind == lx.LBRACE:
|
|
return InstHeader(annotations, kind, name, inp, outp)
|
|
return None
|
|
|
|
def io_effect(self) -> tuple[list[InputEffect], list[OutputEffect]]:
|
|
# '(' [inputs] '--' [outputs] ')'
|
|
if self.expect(lx.LPAREN):
|
|
inputs = self.inputs() or []
|
|
if self.expect(lx.MINUSMINUS):
|
|
outputs = self.outputs() or []
|
|
if self.expect(lx.RPAREN):
|
|
return inputs, outputs
|
|
raise self.make_syntax_error("Expected stack effect")
|
|
|
|
def inputs(self) -> list[InputEffect] | None:
|
|
# input (',' input)*
|
|
here = self.getpos()
|
|
if inp := self.input():
|
|
inp = cast(InputEffect, inp)
|
|
near = self.getpos()
|
|
if self.expect(lx.COMMA):
|
|
if rest := self.inputs():
|
|
return [inp] + rest
|
|
self.setpos(near)
|
|
return [inp]
|
|
self.setpos(here)
|
|
return None
|
|
|
|
@contextual
|
|
def input(self) -> InputEffect | None:
|
|
return self.cache_effect() or self.stack_effect()
|
|
|
|
def outputs(self) -> list[OutputEffect] | None:
|
|
# output (, output)*
|
|
here = self.getpos()
|
|
if outp := self.output():
|
|
near = self.getpos()
|
|
if self.expect(lx.COMMA):
|
|
if rest := self.outputs():
|
|
return [outp] + rest
|
|
self.setpos(near)
|
|
return [outp]
|
|
self.setpos(here)
|
|
return None
|
|
|
|
@contextual
|
|
def output(self) -> OutputEffect | None:
|
|
return self.stack_effect()
|
|
|
|
@contextual
|
|
def cache_effect(self) -> CacheEffect | None:
|
|
# IDENTIFIER '/' NUMBER
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
if self.expect(lx.DIVIDE):
|
|
num = self.require(lx.NUMBER).text
|
|
try:
|
|
size = int(num)
|
|
except ValueError:
|
|
raise self.make_syntax_error(f"Expected integer, got {num!r}")
|
|
else:
|
|
return CacheEffect(tkn.text, size)
|
|
return None
|
|
|
|
@contextual
|
|
def stack_effect(self) -> StackEffect | None:
|
|
# IDENTIFIER [':' IDENTIFIER [TIMES]] ['if' '(' expression ')']
|
|
# | IDENTIFIER '[' expression ']'
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
type_text = ""
|
|
if self.expect(lx.COLON):
|
|
type_text = self.require(lx.IDENTIFIER).text.strip()
|
|
if self.expect(lx.TIMES):
|
|
type_text += " *"
|
|
cond_text = ""
|
|
if self.expect(lx.IF):
|
|
self.require(lx.LPAREN)
|
|
if not (cond := self.expression()):
|
|
raise self.make_syntax_error("Expected condition")
|
|
self.require(lx.RPAREN)
|
|
cond_text = cond.text.strip()
|
|
size_text = ""
|
|
if self.expect(lx.LBRACKET):
|
|
if type_text or cond_text:
|
|
raise self.make_syntax_error("Unexpected [")
|
|
if not (size := self.expression()):
|
|
raise self.make_syntax_error("Expected expression")
|
|
self.require(lx.RBRACKET)
|
|
type_text = "_PyStackRef *"
|
|
size_text = size.text.strip()
|
|
return StackEffect(tkn.text, type_text, cond_text, size_text)
|
|
return None
|
|
|
|
@contextual
|
|
def expression(self) -> Expression | None:
|
|
tokens: list[lx.Token] = []
|
|
level = 1
|
|
while tkn := self.peek():
|
|
if tkn.kind in (lx.LBRACKET, lx.LPAREN):
|
|
level += 1
|
|
elif tkn.kind in (lx.RBRACKET, lx.RPAREN):
|
|
level -= 1
|
|
if level == 0:
|
|
break
|
|
tokens.append(tkn)
|
|
self.next()
|
|
if not tokens:
|
|
return None
|
|
return Expression(lx.to_text(tokens).strip())
|
|
|
|
# def ops(self) -> list[OpName] | None:
|
|
# if op := self.op():
|
|
# ops = [op]
|
|
# while self.expect(lx.PLUS):
|
|
# if op := self.op():
|
|
# ops.append(op)
|
|
# return ops
|
|
|
|
@contextual
|
|
def op(self) -> OpName | None:
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
return OpName(tkn.text)
|
|
return None
|
|
|
|
@contextual
|
|
def macro_def(self) -> Macro | None:
|
|
if tkn := self.expect(lx.MACRO):
|
|
if self.expect(lx.LPAREN):
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
if self.expect(lx.RPAREN):
|
|
if self.expect(lx.EQUALS):
|
|
if uops := self.uops():
|
|
self.require(lx.SEMI)
|
|
res = Macro(tkn.text, uops)
|
|
return res
|
|
return None
|
|
|
|
def uops(self) -> list[UOp] | None:
|
|
if uop := self.uop():
|
|
uop = cast(UOp, uop)
|
|
uops = [uop]
|
|
while self.expect(lx.PLUS):
|
|
if uop := self.uop():
|
|
uop = cast(UOp, uop)
|
|
uops.append(uop)
|
|
else:
|
|
raise self.make_syntax_error("Expected op name or cache effect")
|
|
return uops
|
|
return None
|
|
|
|
@contextual
|
|
def uop(self) -> UOp | None:
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
if self.expect(lx.DIVIDE):
|
|
if num := self.expect(lx.NUMBER):
|
|
try:
|
|
size = int(num.text)
|
|
except ValueError:
|
|
raise self.make_syntax_error(
|
|
f"Expected integer, got {num.text!r}"
|
|
)
|
|
else:
|
|
return CacheEffect(tkn.text, size)
|
|
raise self.make_syntax_error("Expected integer")
|
|
else:
|
|
return OpName(tkn.text)
|
|
return None
|
|
|
|
@contextual
|
|
def family_def(self) -> Family | None:
|
|
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "family":
|
|
size = None
|
|
if self.expect(lx.LPAREN):
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
if self.expect(lx.COMMA):
|
|
if not (size := self.expect(lx.IDENTIFIER)):
|
|
if not (size := self.expect(lx.NUMBER)):
|
|
raise self.make_syntax_error(
|
|
"Expected identifier or number"
|
|
)
|
|
if self.expect(lx.RPAREN):
|
|
if self.expect(lx.EQUALS):
|
|
if not self.expect(lx.LBRACE):
|
|
raise self.make_syntax_error("Expected {")
|
|
if members := self.members():
|
|
if self.expect(lx.RBRACE) and self.expect(lx.SEMI):
|
|
return Family(
|
|
tkn.text, size.text if size else "", members
|
|
)
|
|
return None
|
|
|
|
def flags(self) -> list[str]:
|
|
here = self.getpos()
|
|
if self.expect(lx.LPAREN):
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
flags = [tkn.text]
|
|
while self.expect(lx.COMMA):
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
flags.append(tkn.text)
|
|
else:
|
|
break
|
|
if not self.expect(lx.RPAREN):
|
|
raise self.make_syntax_error("Expected comma or right paren")
|
|
return flags
|
|
self.setpos(here)
|
|
return []
|
|
|
|
@contextual
|
|
def pseudo_def(self) -> Pseudo | None:
|
|
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "pseudo":
|
|
size = None
|
|
if self.expect(lx.LPAREN):
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
if self.expect(lx.COMMA):
|
|
inp, outp = self.io_effect()
|
|
if self.expect(lx.COMMA):
|
|
flags = self.flags()
|
|
else:
|
|
flags = []
|
|
if self.expect(lx.RPAREN):
|
|
if self.expect(lx.EQUALS):
|
|
if not self.expect(lx.LBRACE):
|
|
raise self.make_syntax_error("Expected {")
|
|
if members := self.members():
|
|
if self.expect(lx.RBRACE) and self.expect(lx.SEMI):
|
|
return Pseudo(tkn.text, inp, outp, flags, members)
|
|
return None
|
|
|
|
def members(self) -> list[str] | None:
|
|
here = self.getpos()
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
members = [tkn.text]
|
|
while self.expect(lx.COMMA):
|
|
if tkn := self.expect(lx.IDENTIFIER):
|
|
members.append(tkn.text)
|
|
else:
|
|
break
|
|
peek = self.peek()
|
|
if not peek or peek.kind != lx.RBRACE:
|
|
raise self.make_syntax_error("Expected comma or right paren")
|
|
return members
|
|
self.setpos(here)
|
|
return None
|
|
|
|
@contextual
|
|
def block(self) -> Block | None:
|
|
if self.c_blob():
|
|
return Block()
|
|
return None
|
|
|
|
def c_blob(self) -> list[lx.Token]:
|
|
tokens: list[lx.Token] = []
|
|
level = 0
|
|
while tkn := self.next(raw=True):
|
|
tokens.append(tkn)
|
|
if tkn.kind in (lx.LBRACE, lx.LPAREN, lx.LBRACKET):
|
|
level += 1
|
|
elif tkn.kind in (lx.RBRACE, lx.RPAREN, lx.RBRACKET):
|
|
level -= 1
|
|
if level <= 0:
|
|
break
|
|
return tokens
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import sys
|
|
|
|
if sys.argv[1:]:
|
|
filename = sys.argv[1]
|
|
if filename == "-c" and sys.argv[2:]:
|
|
src = sys.argv[2]
|
|
filename = "<string>"
|
|
else:
|
|
with open(filename, "r") as f:
|
|
src = f.read()
|
|
srclines = src.splitlines()
|
|
begin = srclines.index("// BEGIN BYTECODES //")
|
|
end = srclines.index("// END BYTECODES //")
|
|
src = "\n".join(srclines[begin + 1 : end])
|
|
else:
|
|
filename = "<default>"
|
|
src = "if (x) { x.foo; // comment\n}"
|
|
parser = Parser(src, filename)
|
|
x = parser.definition()
|
|
print(x)
|