cpython/Tools/cases_generator/parser.py

307 lines
9.2 KiB
Python
Raw Normal View History

"""Parser for bytecodes.inst."""
from dataclasses import dataclass, field
from typing import NamedTuple, Callable, TypeVar
import lexer as lx
from plexer import PLexer
P = TypeVar("P", bound="Parser")
N = TypeVar("N", bound="Node")
def contextual(func: Callable[[P], N|None]) -> Callable[[P], N|None]:
# Decorator to wrap grammar methods.
# Resets position if `func` returns None.
def contextual_wrapper(self: P) -> N|None:
begin = self.getpos()
res = func(self)
if res is None:
self.setpos(begin)
return
end = self.getpos()
res.context = Context(begin, end, self)
return res
return contextual_wrapper
class Context(NamedTuple):
begin: int
end: int
owner: PLexer
def __repr__(self):
return f"<{self.begin}-{self.end}>"
@dataclass
class Node:
context: Context|None = field(init=False, default=None)
@property
def text(self) -> str:
return self.to_text()
def to_text(self, dedent: int = 0) -> str:
context = self.context
if not context:
return ""
tokens = context.owner.tokens
begin = context.begin
end = context.end
return lx.to_text(tokens[begin:end], dedent)
@dataclass
class Block(Node):
tokens: list[lx.Token]
@dataclass
class Effect(Node):
pass
@dataclass
class StackEffect(Effect):
name: str
# TODO: type, condition
@dataclass
class CacheEffect(Effect):
name: str
size: int
@dataclass
class InstHeader(Node):
name: str
inputs: list[Effect]
outputs: list[Effect]
@dataclass
class InstDef(Node):
header: InstHeader
block: Block
@property
def name(self) -> str:
return self.header.name
@property
def inputs(self) -> list[Effect]:
return self.header.inputs
@property
def outputs(self) -> list[StackEffect]:
# This is always true
return [x for x in self.header.outputs if isinstance(x, StackEffect)]
@dataclass
class Super(Node):
name: str
ops: list[str]
@dataclass
class Family(Node):
name: str
size: str # Variable giving the cache size in code units
members: list[str]
class Parser(PLexer):
@contextual
def inst_def(self) -> InstDef | None:
if header := self.inst_header():
if block := self.block():
return InstDef(header, block)
raise self.make_syntax_error("Expected block")
return None
@contextual
def inst_header(self) -> InstHeader | None:
# inst(NAME) | inst(NAME, (inputs -- outputs))
# TODO: Error out when there is something unexpected.
# TODO: Make INST a keyword in the lexer.``
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "inst":
if (self.expect(lx.LPAREN)
and (tkn := self.expect(lx.IDENTIFIER))):
name = tkn.text
if self.expect(lx.COMMA):
inp, outp = self.stack_effect()
if self.expect(lx.RPAREN):
if ((tkn := self.peek())
and tkn.kind == lx.LBRACE):
self.check_overlaps(inp, outp)
return InstHeader(name, inp, outp)
elif self.expect(lx.RPAREN):
return InstHeader(name, [], [])
return None
def check_overlaps(self, inp: list[Effect], outp: list[Effect]):
for i, name in enumerate(inp):
for j, name2 in enumerate(outp):
if name == name2:
if i != j:
raise self.make_syntax_error(
f"Input {name!r} at pos {i} repeated in output at different pos {j}")
break
def stack_effect(self) -> tuple[list[Effect], list[Effect]]:
# '(' [inputs] '--' [outputs] ')'
if self.expect(lx.LPAREN):
inp = self.inputs() or []
if self.expect(lx.MINUSMINUS):
outp = self.outputs() or []
if self.expect(lx.RPAREN):
return inp, outp
raise self.make_syntax_error("Expected stack effect")
def inputs(self) -> list[Effect] | None:
# input (',' input)*
here = self.getpos()
if inp := self.input():
near = self.getpos()
if self.expect(lx.COMMA):
if rest := self.inputs():
return [inp] + rest
self.setpos(near)
return [inp]
self.setpos(here)
return None
@contextual
def input(self) -> Effect | None:
# IDENTIFIER '/' INTEGER (CacheEffect)
# IDENTIFIER (StackEffect)
if (tkn := self.expect(lx.IDENTIFIER)):
if self.expect(lx.DIVIDE):
if num := self.expect(lx.NUMBER):
try:
size = int(num.text)
except ValueError:
raise self.make_syntax_error(
f"Expected integer, got {num.text!r}")
else:
return CacheEffect(tkn.text, size)
raise self.make_syntax_error("Expected integer")
else:
return StackEffect(tkn.text)
def outputs(self) -> list[Effect] | None:
# output (, output)*
here = self.getpos()
if outp := self.output():
near = self.getpos()
if self.expect(lx.COMMA):
if rest := self.outputs():
return [outp] + rest
self.setpos(near)
return [outp]
self.setpos(here)
return None
@contextual
def output(self) -> Effect | None:
if (tkn := self.expect(lx.IDENTIFIER)):
return StackEffect(tkn.text)
@contextual
def super_def(self) -> Super | None:
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "super":
if self.expect(lx.LPAREN):
if (tkn := self.expect(lx.IDENTIFIER)):
if self.expect(lx.RPAREN):
if self.expect(lx.EQUALS):
if ops := self.ops():
res = Super(tkn.text, ops)
return res
def ops(self) -> list[str] | None:
if tkn := self.expect(lx.IDENTIFIER):
ops = [tkn.text]
while self.expect(lx.PLUS):
if tkn := self.require(lx.IDENTIFIER):
ops.append(tkn.text)
self.require(lx.SEMI)
return ops
@contextual
def family_def(self) -> Family | None:
if (tkn := self.expect(lx.IDENTIFIER)) and tkn.text == "family":
size = None
if self.expect(lx.LPAREN):
if (tkn := self.expect(lx.IDENTIFIER)):
if self.expect(lx.COMMA):
if not (size := self.expect(lx.IDENTIFIER)):
raise self.make_syntax_error(
"Expected identifier")
if self.expect(lx.RPAREN):
if self.expect(lx.EQUALS):
if not self.expect(lx.LBRACE):
raise self.make_syntax_error("Expected {")
if members := self.members():
if self.expect(lx.RBRACE) and self.expect(lx.SEMI):
return Family(tkn.text, size.text if size else "", members)
return None
def members(self) -> list[str] | None:
here = self.getpos()
if tkn := self.expect(lx.IDENTIFIER):
members = [tkn.text]
while self.expect(lx.COMMA):
if tkn := self.expect(lx.IDENTIFIER):
members.append(tkn.text)
else:
break
peek = self.peek()
if not peek or peek.kind != lx.RBRACE:
raise self.make_syntax_error("Expected comma or right paren")
return members
self.setpos(here)
return None
@contextual
def block(self) -> Block:
tokens = self.c_blob()
return Block(tokens)
def c_blob(self) -> list[lx.Token]:
tokens: list[lx.Token] = []
level = 0
while tkn := self.next(raw=True):
if tkn.kind in (lx.LBRACE, lx.LPAREN, lx.LBRACKET):
level += 1
elif tkn.kind in (lx.RBRACE, lx.RPAREN, lx.RBRACKET):
level -= 1
if level <= 0:
break
tokens.append(tkn)
return tokens
if __name__ == "__main__":
import sys
if sys.argv[1:]:
filename = sys.argv[1]
if filename == "-c" and sys.argv[2:]:
src = sys.argv[2]
filename = None
else:
with open(filename) as f:
src = f.read()
srclines = src.splitlines()
begin = srclines.index("// BEGIN BYTECODES //")
end = srclines.index("// END BYTECODES //")
src = "\n".join(srclines[begin+1 : end])
else:
filename = None
src = "if (x) { x.foo; // comment\n}"
parser = Parser(src, filename)
x = parser.inst_def() or parser.super_def() or parser.family_def()
print(x)