mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			152 lines
		
	
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			152 lines
		
	
	
	
		
			4.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
#!/usr/bin/env python3.8
 | 
						|
 | 
						|
import argparse
 | 
						|
import pprint
 | 
						|
import sys
 | 
						|
from typing import Set, Dict
 | 
						|
 | 
						|
from pegen.build import build_parser
 | 
						|
from pegen.grammar import (
 | 
						|
    Alt,
 | 
						|
    Cut,
 | 
						|
    Gather,
 | 
						|
    Grammar,
 | 
						|
    GrammarVisitor,
 | 
						|
    Group,
 | 
						|
    Leaf,
 | 
						|
    Lookahead,
 | 
						|
    NamedItem,
 | 
						|
    NameLeaf,
 | 
						|
    NegativeLookahead,
 | 
						|
    Opt,
 | 
						|
    Repeat,
 | 
						|
    Repeat0,
 | 
						|
    Repeat1,
 | 
						|
    Rhs,
 | 
						|
    Rule,
 | 
						|
    StringLeaf,
 | 
						|
    PositiveLookahead,
 | 
						|
)
 | 
						|
 | 
						|
argparser = argparse.ArgumentParser(
 | 
						|
    prog="calculate_first_sets", description="Calculate the first sets of a grammar",
 | 
						|
)
 | 
						|
argparser.add_argument("grammar_file", help="The grammar file")
 | 
						|
 | 
						|
 | 
						|
class FirstSetCalculator(GrammarVisitor):
 | 
						|
    def __init__(self, rules: Dict[str, Rule]) -> None:
 | 
						|
        self.rules = rules
 | 
						|
        for rule in rules.values():
 | 
						|
            rule.nullable_visit(rules)
 | 
						|
        self.first_sets: Dict[str, Set[str]] = dict()
 | 
						|
        self.in_process: Set[str] = set()
 | 
						|
 | 
						|
    def calculate(self) -> Dict[str, Set[str]]:
 | 
						|
        for name, rule in self.rules.items():
 | 
						|
            self.visit(rule)
 | 
						|
        return self.first_sets
 | 
						|
 | 
						|
    def visit_Alt(self, item: Alt) -> Set[str]:
 | 
						|
        result: Set[str] = set()
 | 
						|
        to_remove: Set[str] = set()
 | 
						|
        for other in item.items:
 | 
						|
            new_terminals = self.visit(other)
 | 
						|
            if isinstance(other.item, NegativeLookahead):
 | 
						|
                to_remove |= new_terminals
 | 
						|
            result |= new_terminals
 | 
						|
            if to_remove:
 | 
						|
                result -= to_remove
 | 
						|
 | 
						|
            # If the set of new terminals can start with the empty string,
 | 
						|
            # it means that the item is completelly nullable and we should
 | 
						|
            # also considering at least the next item in case the current
 | 
						|
            # one fails to parse.
 | 
						|
 | 
						|
            if "" in new_terminals:
 | 
						|
                continue
 | 
						|
 | 
						|
            if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
 | 
						|
                break
 | 
						|
 | 
						|
        # Do not allow the empty string to propagate.
 | 
						|
        result.discard("")
 | 
						|
 | 
						|
        return result
 | 
						|
 | 
						|
    def visit_Cut(self, item: Cut) -> Set[str]:
 | 
						|
        return set()
 | 
						|
 | 
						|
    def visit_Group(self, item: Group) -> Set[str]:
 | 
						|
        return self.visit(item.rhs)
 | 
						|
 | 
						|
    def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
 | 
						|
        return self.visit(item.node)
 | 
						|
 | 
						|
    def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
 | 
						|
        return self.visit(item.node)
 | 
						|
 | 
						|
    def visit_NamedItem(self, item: NamedItem) -> Set[str]:
 | 
						|
        return self.visit(item.item)
 | 
						|
 | 
						|
    def visit_Opt(self, item: Opt) -> Set[str]:
 | 
						|
        return self.visit(item.node)
 | 
						|
 | 
						|
    def visit_Gather(self, item: Gather) -> Set[str]:
 | 
						|
        return self.visit(item.node)
 | 
						|
 | 
						|
    def visit_Repeat0(self, item: Repeat0) -> Set[str]:
 | 
						|
        return self.visit(item.node)
 | 
						|
 | 
						|
    def visit_Repeat1(self, item: Repeat1) -> Set[str]:
 | 
						|
        return self.visit(item.node)
 | 
						|
 | 
						|
    def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
 | 
						|
        if item.value not in self.rules:
 | 
						|
            return {item.value}
 | 
						|
 | 
						|
        if item.value not in self.first_sets:
 | 
						|
            self.first_sets[item.value] = self.visit(self.rules[item.value])
 | 
						|
            return self.first_sets[item.value]
 | 
						|
        elif item.value in self.in_process:
 | 
						|
            return set()
 | 
						|
 | 
						|
        return self.first_sets[item.value]
 | 
						|
 | 
						|
    def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
 | 
						|
        return {item.value}
 | 
						|
 | 
						|
    def visit_Rhs(self, item: Rhs) -> Set[str]:
 | 
						|
        result: Set[str] = set()
 | 
						|
        for alt in item.alts:
 | 
						|
            result |= self.visit(alt)
 | 
						|
        return result
 | 
						|
 | 
						|
    def visit_Rule(self, item: Rule) -> Set[str]:
 | 
						|
        if item.name in self.in_process:
 | 
						|
            return set()
 | 
						|
        elif item.name not in self.first_sets:
 | 
						|
            self.in_process.add(item.name)
 | 
						|
            terminals = self.visit(item.rhs)
 | 
						|
            if item.nullable:
 | 
						|
                terminals.add("")
 | 
						|
            self.first_sets[item.name] = terminals
 | 
						|
            self.in_process.remove(item.name)
 | 
						|
        return self.first_sets[item.name]
 | 
						|
 | 
						|
 | 
						|
def main() -> None:
 | 
						|
    args = argparser.parse_args()
 | 
						|
 | 
						|
    try:
 | 
						|
        grammar, parser, tokenizer = build_parser(args.grammar_file)
 | 
						|
    except Exception as err:
 | 
						|
        print("ERROR: Failed to parse grammar file", file=sys.stderr)
 | 
						|
        sys.exit(1)
 | 
						|
 | 
						|
    firs_sets = FirstSetCalculator(grammar.rules).calculate()
 | 
						|
    pprint.pprint(firs_sets)
 | 
						|
 | 
						|
 | 
						|
if __name__ == "__main__":
 | 
						|
    main()
 |