mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			286 lines
		
	
	
	
		
			7.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			286 lines
		
	
	
	
		
			7.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import unittest
 | |
| 
 | |
| from test import test_tools
 | |
| from typing import Dict, Set
 | |
| 
 | |
| test_tools.skip_if_missing("peg_generator")
 | |
| with test_tools.imports_under_tool("peg_generator"):
 | |
|     from pegen.grammar_parser import GeneratedParser as GrammarParser
 | |
|     from pegen.testutil import parse_string
 | |
|     from pegen.first_sets import FirstSetCalculator
 | |
|     from pegen.grammar import Grammar
 | |
| 
 | |
| 
 | |
| class TestFirstSets(unittest.TestCase):
 | |
|     def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]:
 | |
|         grammar: Grammar = parse_string(grammar_source, GrammarParser)
 | |
|         return FirstSetCalculator(grammar.rules).calculate()
 | |
| 
 | |
|     def test_alternatives(self) -> None:
 | |
|         grammar = """
 | |
|             start: expr NEWLINE? ENDMARKER
 | |
|             expr: A | B
 | |
|             A: 'a' | '-'
 | |
|             B: 'b' | '+'
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {
 | |
|                 "A": {"'a'", "'-'"},
 | |
|                 "B": {"'+'", "'b'"},
 | |
|                 "expr": {"'+'", "'a'", "'b'", "'-'"},
 | |
|                 "start": {"'+'", "'a'", "'b'", "'-'"},
 | |
|             },
 | |
|         )
 | |
| 
 | |
|     def test_optionals(self) -> None:
 | |
|         grammar = """
 | |
|             start: expr NEWLINE
 | |
|             expr: ['a'] ['b'] 'c'
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {
 | |
|                 "expr": {"'c'", "'a'", "'b'"},
 | |
|                 "start": {"'c'", "'a'", "'b'"},
 | |
|             },
 | |
|         )
 | |
| 
 | |
|     def test_repeat_with_separator(self) -> None:
 | |
|         grammar = """
 | |
|         start: ','.thing+ NEWLINE
 | |
|         thing: NUMBER
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {"thing": {"NUMBER"}, "start": {"NUMBER"}},
 | |
|         )
 | |
| 
 | |
|     def test_optional_operator(self) -> None:
 | |
|         grammar = """
 | |
|         start: sum NEWLINE
 | |
|         sum: (term)? 'b'
 | |
|         term: NUMBER
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {
 | |
|                 "term": {"NUMBER"},
 | |
|                 "sum": {"NUMBER", "'b'"},
 | |
|                 "start": {"'b'", "NUMBER"},
 | |
|             },
 | |
|         )
 | |
| 
 | |
|     def test_optional_literal(self) -> None:
 | |
|         grammar = """
 | |
|         start: sum NEWLINE
 | |
|         sum: '+' ? term
 | |
|         term: NUMBER
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {
 | |
|                 "term": {"NUMBER"},
 | |
|                 "sum": {"'+'", "NUMBER"},
 | |
|                 "start": {"'+'", "NUMBER"},
 | |
|             },
 | |
|         )
 | |
| 
 | |
|     def test_optional_after(self) -> None:
 | |
|         grammar = """
 | |
|         start: term NEWLINE
 | |
|         term: NUMBER ['+']
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {"term": {"NUMBER"}, "start": {"NUMBER"}},
 | |
|         )
 | |
| 
 | |
|     def test_optional_before(self) -> None:
 | |
|         grammar = """
 | |
|         start: term NEWLINE
 | |
|         term: ['+'] NUMBER
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}},
 | |
|         )
 | |
| 
 | |
|     def test_repeat_0(self) -> None:
 | |
|         grammar = """
 | |
|         start: thing* "+" NEWLINE
 | |
|         thing: NUMBER
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}},
 | |
|         )
 | |
| 
 | |
|     def test_repeat_0_with_group(self) -> None:
 | |
|         grammar = """
 | |
|         start: ('+' '-')* term NEWLINE
 | |
|         term: NUMBER
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}},
 | |
|         )
 | |
| 
 | |
|     def test_repeat_1(self) -> None:
 | |
|         grammar = """
 | |
|         start: thing+ '-' NEWLINE
 | |
|         thing: NUMBER
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {"thing": {"NUMBER"}, "start": {"NUMBER"}},
 | |
|         )
 | |
| 
 | |
|     def test_repeat_1_with_group(self) -> None:
 | |
|         grammar = """
 | |
|         start: ('+' term)+ term NEWLINE
 | |
|         term: NUMBER
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}
 | |
|         )
 | |
| 
 | |
|     def test_gather(self) -> None:
 | |
|         grammar = """
 | |
|         start: ','.thing+ NEWLINE
 | |
|         thing: NUMBER
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {"thing": {"NUMBER"}, "start": {"NUMBER"}},
 | |
|         )
 | |
| 
 | |
|     def test_positive_lookahead(self) -> None:
 | |
|         grammar = """
 | |
|         start: expr NEWLINE
 | |
|         expr: &'a' opt
 | |
|         opt: 'a' | 'b' | 'c'
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {
 | |
|                 "expr": {"'a'"},
 | |
|                 "start": {"'a'"},
 | |
|                 "opt": {"'b'", "'c'", "'a'"},
 | |
|             },
 | |
|         )
 | |
| 
 | |
|     def test_negative_lookahead(self) -> None:
 | |
|         grammar = """
 | |
|         start: expr NEWLINE
 | |
|         expr: !'a' opt
 | |
|         opt: 'a' | 'b' | 'c'
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {
 | |
|                 "opt": {"'b'", "'a'", "'c'"},
 | |
|                 "expr": {"'b'", "'c'"},
 | |
|                 "start": {"'b'", "'c'"},
 | |
|             },
 | |
|         )
 | |
| 
 | |
|     def test_left_recursion(self) -> None:
 | |
|         grammar = """
 | |
|         start: expr NEWLINE
 | |
|         expr: ('-' term | expr '+' term | term)
 | |
|         term: NUMBER
 | |
|         foo: 'foo'
 | |
|         bar: 'bar'
 | |
|         baz: 'baz'
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {
 | |
|                 "expr": {"NUMBER", "'-'"},
 | |
|                 "term": {"NUMBER"},
 | |
|                 "start": {"NUMBER", "'-'"},
 | |
|                 "foo": {"'foo'"},
 | |
|                 "bar": {"'bar'"},
 | |
|                 "baz": {"'baz'"},
 | |
|             },
 | |
|         )
 | |
| 
 | |
|     def test_advance_left_recursion(self) -> None:
 | |
|         grammar = """
 | |
|         start: NUMBER | sign start
 | |
|         sign: ['-']
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}},
 | |
|         )
 | |
| 
 | |
|     def test_mutual_left_recursion(self) -> None:
 | |
|         grammar = """
 | |
|         start: foo 'E'
 | |
|         foo: bar 'A' | 'B'
 | |
|         bar: foo 'C' | 'D'
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {
 | |
|                 "foo": {"'D'", "'B'"},
 | |
|                 "bar": {"'D'"},
 | |
|                 "start": {"'D'", "'B'"},
 | |
|             },
 | |
|         )
 | |
| 
 | |
|     def test_nasty_left_recursion(self) -> None:
 | |
|         # TODO: Validate this
 | |
|         grammar = """
 | |
|         start: target '='
 | |
|         target: maybe '+' | NAME
 | |
|         maybe: maybe '-' | target
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}},
 | |
|         )
 | |
| 
 | |
|     def test_nullable_rule(self) -> None:
 | |
|         grammar = """
 | |
|         start: sign thing $
 | |
|         sign: ['-']
 | |
|         thing: NUMBER
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {
 | |
|                 "sign": {"", "'-'"},
 | |
|                 "thing": {"NUMBER"},
 | |
|                 "start": {"NUMBER", "'-'"},
 | |
|             },
 | |
|         )
 | |
| 
 | |
|     def test_epsilon_production_in_start_rule(self) -> None:
 | |
|         grammar = """
 | |
|         start: ['-'] $
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}
 | |
|         )
 | |
| 
 | |
|     def test_multiple_nullable_rules(self) -> None:
 | |
|         grammar = """
 | |
|         start: sign thing other another $
 | |
|         sign: ['-']
 | |
|         thing: ['+']
 | |
|         other: '*'
 | |
|         another: '/'
 | |
|         """
 | |
|         self.assertEqual(
 | |
|             self.calculate_first_sets(grammar),
 | |
|             {
 | |
|                 "sign": {"", "'-'"},
 | |
|                 "thing": {"'+'", ""},
 | |
|                 "start": {"'+'", "'-'", "'*'"},
 | |
|                 "other": {"'*'"},
 | |
|                 "another": {"'/'"},
 | |
|             },
 | |
|         )
 | 
