mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	
		
			
	
	
		
			226 lines
		
	
	
	
		
			6.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			226 lines
		
	
	
	
		
			6.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| 
								 | 
							
								import unittest
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from test import test_tools
							 | 
						||
| 
								 | 
							
								from typing import Dict, Set
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								test_tools.skip_if_missing('peg_generator')
							 | 
						||
| 
								 | 
							
								with test_tools.imports_under_tool('peg_generator'):
							 | 
						||
| 
								 | 
							
								    from pegen.grammar_parser import GeneratedParser as GrammarParser
							 | 
						||
| 
								 | 
							
								    from pegen.testutil import parse_string
							 | 
						||
| 
								 | 
							
								    from pegen.first_sets import FirstSetCalculator
							 | 
						||
| 
								 | 
							
								    from pegen.grammar import Grammar
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class TestFirstSets(unittest.TestCase):
							 | 
						||
| 
								 | 
							
								    def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]:
							 | 
						||
| 
								 | 
							
								        grammar: Grammar = parse_string(grammar_source, GrammarParser)
							 | 
						||
| 
								 | 
							
								        return FirstSetCalculator(grammar.rules).calculate()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_alternatives(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								            start: expr NEWLINE? ENDMARKER
							 | 
						||
| 
								 | 
							
								            expr: A | B
							 | 
						||
| 
								 | 
							
								            A: 'a' | '-'
							 | 
						||
| 
								 | 
							
								            B: 'b' | '+'
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {
							 | 
						||
| 
								 | 
							
								            "A": {"'a'", "'-'"},
							 | 
						||
| 
								 | 
							
								            "B": {"'+'", "'b'"},
							 | 
						||
| 
								 | 
							
								            "expr": {"'+'", "'a'", "'b'", "'-'"},
							 | 
						||
| 
								 | 
							
								            "start": {"'+'", "'a'", "'b'", "'-'"},
							 | 
						||
| 
								 | 
							
								        })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_optionals(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								            start: expr NEWLINE
							 | 
						||
| 
								 | 
							
								            expr: ['a'] ['b'] 'c'
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {
							 | 
						||
| 
								 | 
							
								            "expr": {"'c'", "'a'", "'b'"},
							 | 
						||
| 
								 | 
							
								            "start": {"'c'", "'a'", "'b'"},
							 | 
						||
| 
								 | 
							
								        })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_repeat_with_separator(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: ','.thing+ NEWLINE
							 | 
						||
| 
								 | 
							
								        thing: NUMBER
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_optional_operator(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: sum NEWLINE
							 | 
						||
| 
								 | 
							
								        sum: (term)? 'b'
							 | 
						||
| 
								 | 
							
								        term: NUMBER
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {
							 | 
						||
| 
								 | 
							
								            "term": {"NUMBER"},
							 | 
						||
| 
								 | 
							
								            "sum": {"NUMBER", "'b'"},
							 | 
						||
| 
								 | 
							
								            "start": {"'b'", "NUMBER"},
							 | 
						||
| 
								 | 
							
								        })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_optional_literal(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: sum NEWLINE
							 | 
						||
| 
								 | 
							
								        sum: '+' ? term
							 | 
						||
| 
								 | 
							
								        term: NUMBER
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {
							 | 
						||
| 
								 | 
							
								            "term": {"NUMBER"},
							 | 
						||
| 
								 | 
							
								            "sum": {"'+'", "NUMBER"},
							 | 
						||
| 
								 | 
							
								            "start": {"'+'", "NUMBER"},
							 | 
						||
| 
								 | 
							
								        })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_optional_after(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: term NEWLINE
							 | 
						||
| 
								 | 
							
								        term: NUMBER ['+']
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_optional_before(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: term NEWLINE
							 | 
						||
| 
								 | 
							
								        term: ['+'] NUMBER
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_repeat_0(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: thing* "+" NEWLINE
							 | 
						||
| 
								 | 
							
								        thing: NUMBER
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_repeat_0_with_group(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: ('+' '-')* term NEWLINE
							 | 
						||
| 
								 | 
							
								        term: NUMBER
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_repeat_1(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: thing+ '-' NEWLINE
							 | 
						||
| 
								 | 
							
								        thing: NUMBER
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_repeat_1_with_group(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: ('+' term)+ term NEWLINE
							 | 
						||
| 
								 | 
							
								        term: NUMBER
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_gather(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: ','.thing+ NEWLINE
							 | 
						||
| 
								 | 
							
								        thing: NUMBER
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_positive_lookahead(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: expr NEWLINE
							 | 
						||
| 
								 | 
							
								        expr: &'a' opt
							 | 
						||
| 
								 | 
							
								        opt: 'a' | 'b' | 'c'
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {
							 | 
						||
| 
								 | 
							
								            "expr": {"'a'"},
							 | 
						||
| 
								 | 
							
								            "start": {"'a'"},
							 | 
						||
| 
								 | 
							
								            "opt": {"'b'", "'c'", "'a'"},
							 | 
						||
| 
								 | 
							
								        })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_negative_lookahead(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: expr NEWLINE
							 | 
						||
| 
								 | 
							
								        expr: !'a' opt
							 | 
						||
| 
								 | 
							
								        opt: 'a' | 'b' | 'c'
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {
							 | 
						||
| 
								 | 
							
								            "opt": {"'b'", "'a'", "'c'"},
							 | 
						||
| 
								 | 
							
								            "expr": {"'b'", "'c'"},
							 | 
						||
| 
								 | 
							
								            "start": {"'b'", "'c'"},
							 | 
						||
| 
								 | 
							
								        })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_left_recursion(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: expr NEWLINE
							 | 
						||
| 
								 | 
							
								        expr: ('-' term | expr '+' term | term)
							 | 
						||
| 
								 | 
							
								        term: NUMBER
							 | 
						||
| 
								 | 
							
								        foo: 'foo'
							 | 
						||
| 
								 | 
							
								        bar: 'bar'
							 | 
						||
| 
								 | 
							
								        baz: 'baz'
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {
							 | 
						||
| 
								 | 
							
								            "expr": {"NUMBER", "'-'"},
							 | 
						||
| 
								 | 
							
								            "term": {"NUMBER"},
							 | 
						||
| 
								 | 
							
								            "start": {"NUMBER", "'-'"},
							 | 
						||
| 
								 | 
							
								            "foo": {"'foo'"},
							 | 
						||
| 
								 | 
							
								            "bar": {"'bar'"},
							 | 
						||
| 
								 | 
							
								            "baz": {"'baz'"},
							 | 
						||
| 
								 | 
							
								        })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_advance_left_recursion(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: NUMBER | sign start
							 | 
						||
| 
								 | 
							
								        sign: ['-']
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_mutual_left_recursion(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: foo 'E'
							 | 
						||
| 
								 | 
							
								        foo: bar 'A' | 'B'
							 | 
						||
| 
								 | 
							
								        bar: foo 'C' | 'D'
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {
							 | 
						||
| 
								 | 
							
								            "foo": {"'D'", "'B'"},
							 | 
						||
| 
								 | 
							
								            "bar": {"'D'"},
							 | 
						||
| 
								 | 
							
								            "start": {"'D'", "'B'"},
							 | 
						||
| 
								 | 
							
								        })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_nasty_left_recursion(self) -> None:
							 | 
						||
| 
								 | 
							
								        # TODO: Validate this
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: target '='
							 | 
						||
| 
								 | 
							
								        target: maybe '+' | NAME
							 | 
						||
| 
								 | 
							
								        maybe: maybe '-' | target
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_nullable_rule(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: sign thing $
							 | 
						||
| 
								 | 
							
								        sign: ['-']
							 | 
						||
| 
								 | 
							
								        thing: NUMBER
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {
							 | 
						||
| 
								 | 
							
								            "sign": {"", "'-'"},
							 | 
						||
| 
								 | 
							
								            "thing": {"NUMBER"},
							 | 
						||
| 
								 | 
							
								            "start": {"NUMBER", "'-'"},
							 | 
						||
| 
								 | 
							
								        })
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_epsilon_production_in_start_rule(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: ['-'] $
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}})
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def test_multiple_nullable_rules(self) -> None:
							 | 
						||
| 
								 | 
							
								        grammar = """
							 | 
						||
| 
								 | 
							
								        start: sign thing other another $
							 | 
						||
| 
								 | 
							
								        sign: ['-']
							 | 
						||
| 
								 | 
							
								        thing: ['+']
							 | 
						||
| 
								 | 
							
								        other: '*'
							 | 
						||
| 
								 | 
							
								        another: '/'
							 | 
						||
| 
								 | 
							
								        """
							 | 
						||
| 
								 | 
							
								        self.assertEqual(self.calculate_first_sets(grammar), {
							 | 
						||
| 
								 | 
							
								            "sign": {"", "'-'"},
							 | 
						||
| 
								 | 
							
								            "thing": {"'+'", ""},
							 | 
						||
| 
								 | 
							
								            "start": {"'+'", "'-'", "'*'"},
							 | 
						||
| 
								 | 
							
								            "other": {"'*'"},
							 | 
						||
| 
								 | 
							
								            "another": {"'/'"},
							 | 
						||
| 
								 | 
							
								        })
							 |