| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | import unittest | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | from test import test_tools | 
					
						
							|  |  |  | from typing import Dict, Set | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  | test_tools.skip_if_missing("peg_generator") | 
					
						
							|  |  |  | with test_tools.imports_under_tool("peg_generator"): | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  |     from pegen.grammar_parser import GeneratedParser as GrammarParser | 
					
						
							|  |  |  |     from pegen.testutil import parse_string | 
					
						
							|  |  |  |     from pegen.first_sets import FirstSetCalculator | 
					
						
							|  |  |  |     from pegen.grammar import Grammar | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class TestFirstSets(unittest.TestCase): | 
					
						
							|  |  |  |     def calculate_first_sets(self, grammar_source: str) -> Dict[str, Set[str]]: | 
					
						
							|  |  |  |         grammar: Grammar = parse_string(grammar_source, GrammarParser) | 
					
						
							|  |  |  |         return FirstSetCalculator(grammar.rules).calculate() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def test_alternatives(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |             start: expr NEWLINE? ENDMARKER | 
					
						
							|  |  |  |             expr: A | B | 
					
						
							|  |  |  |             A: 'a' | '-' | 
					
						
							|  |  |  |             B: 'b' | '+' | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "A": {"'a'", "'-'"}, | 
					
						
							|  |  |  |                 "B": {"'+'", "'b'"}, | 
					
						
							|  |  |  |                 "expr": {"'+'", "'a'", "'b'", "'-'"}, | 
					
						
							|  |  |  |                 "start": {"'+'", "'a'", "'b'", "'-'"}, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_optionals(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |             start: expr NEWLINE | 
					
						
							|  |  |  |             expr: ['a'] ['b'] 'c' | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "expr": {"'c'", "'a'", "'b'"}, | 
					
						
							|  |  |  |                 "start": {"'c'", "'a'", "'b'"}, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_repeat_with_separator(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: ','.thing+ NEWLINE | 
					
						
							|  |  |  |         thing: NUMBER | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             {"thing": {"NUMBER"}, "start": {"NUMBER"}}, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_optional_operator(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: sum NEWLINE | 
					
						
							|  |  |  |         sum: (term)? 'b' | 
					
						
							|  |  |  |         term: NUMBER | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "term": {"NUMBER"}, | 
					
						
							|  |  |  |                 "sum": {"NUMBER", "'b'"}, | 
					
						
							|  |  |  |                 "start": {"'b'", "NUMBER"}, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_optional_literal(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: sum NEWLINE | 
					
						
							|  |  |  |         sum: '+' ? term | 
					
						
							|  |  |  |         term: NUMBER | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "term": {"NUMBER"}, | 
					
						
							|  |  |  |                 "sum": {"'+'", "NUMBER"}, | 
					
						
							|  |  |  |                 "start": {"'+'", "NUMBER"}, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_optional_after(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: term NEWLINE | 
					
						
							|  |  |  |         term: NUMBER ['+'] | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             {"term": {"NUMBER"}, "start": {"NUMBER"}}, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_optional_before(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: term NEWLINE | 
					
						
							|  |  |  |         term: ['+'] NUMBER | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}}, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_repeat_0(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: thing* "+" NEWLINE | 
					
						
							|  |  |  |         thing: NUMBER | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}}, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_repeat_0_with_group(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: ('+' '-')* term NEWLINE | 
					
						
							|  |  |  |         term: NUMBER | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}}, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_repeat_1(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: thing+ '-' NEWLINE | 
					
						
							|  |  |  |         thing: NUMBER | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             {"thing": {"NUMBER"}, "start": {"NUMBER"}}, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_repeat_1_with_group(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: ('+' term)+ term NEWLINE | 
					
						
							|  |  |  |         term: NUMBER | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}} | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_gather(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: ','.thing+ NEWLINE | 
					
						
							|  |  |  |         thing: NUMBER | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             {"thing": {"NUMBER"}, "start": {"NUMBER"}}, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_positive_lookahead(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: expr NEWLINE | 
					
						
							|  |  |  |         expr: &'a' opt | 
					
						
							|  |  |  |         opt: 'a' | 'b' | 'c' | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "expr": {"'a'"}, | 
					
						
							|  |  |  |                 "start": {"'a'"}, | 
					
						
							|  |  |  |                 "opt": {"'b'", "'c'", "'a'"}, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_negative_lookahead(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: expr NEWLINE | 
					
						
							|  |  |  |         expr: !'a' opt | 
					
						
							|  |  |  |         opt: 'a' | 'b' | 'c' | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "opt": {"'b'", "'a'", "'c'"}, | 
					
						
							|  |  |  |                 "expr": {"'b'", "'c'"}, | 
					
						
							|  |  |  |                 "start": {"'b'", "'c'"}, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_left_recursion(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: expr NEWLINE | 
					
						
							|  |  |  |         expr: ('-' term | expr '+' term | term) | 
					
						
							|  |  |  |         term: NUMBER | 
					
						
							|  |  |  |         foo: 'foo' | 
					
						
							|  |  |  |         bar: 'bar' | 
					
						
							|  |  |  |         baz: 'baz' | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "expr": {"NUMBER", "'-'"}, | 
					
						
							|  |  |  |                 "term": {"NUMBER"}, | 
					
						
							|  |  |  |                 "start": {"NUMBER", "'-'"}, | 
					
						
							|  |  |  |                 "foo": {"'foo'"}, | 
					
						
							|  |  |  |                 "bar": {"'bar'"}, | 
					
						
							|  |  |  |                 "baz": {"'baz'"}, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_advance_left_recursion(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: NUMBER | sign start | 
					
						
							|  |  |  |         sign: ['-'] | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}}, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_mutual_left_recursion(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: foo 'E' | 
					
						
							|  |  |  |         foo: bar 'A' | 'B' | 
					
						
							|  |  |  |         bar: foo 'C' | 'D' | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "foo": {"'D'", "'B'"}, | 
					
						
							|  |  |  |                 "bar": {"'D'"}, | 
					
						
							|  |  |  |                 "start": {"'D'", "'B'"}, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_nasty_left_recursion(self) -> None: | 
					
						
							|  |  |  |         # TODO: Validate this | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: target '=' | 
					
						
							|  |  |  |         target: maybe '+' | NAME | 
					
						
							|  |  |  |         maybe: maybe '-' | target | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}}, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_nullable_rule(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: sign thing $ | 
					
						
							|  |  |  |         sign: ['-'] | 
					
						
							|  |  |  |         thing: NUMBER | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "sign": {"", "'-'"}, | 
					
						
							|  |  |  |                 "thing": {"NUMBER"}, | 
					
						
							|  |  |  |                 "start": {"NUMBER", "'-'"}, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_epsilon_production_in_start_rule(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: ['-'] $ | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}} | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2020-04-22 23:29:27 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     def test_multiple_nullable_rules(self) -> None: | 
					
						
							|  |  |  |         grammar = """
 | 
					
						
							|  |  |  |         start: sign thing other another $ | 
					
						
							|  |  |  |         sign: ['-'] | 
					
						
							|  |  |  |         thing: ['+'] | 
					
						
							|  |  |  |         other: '*' | 
					
						
							|  |  |  |         another: '/' | 
					
						
							|  |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2021-08-12 17:37:30 +01:00
										 |  |  |         self.assertEqual( | 
					
						
							|  |  |  |             self.calculate_first_sets(grammar), | 
					
						
							|  |  |  |             { | 
					
						
							|  |  |  |                 "sign": {"", "'-'"}, | 
					
						
							|  |  |  |                 "thing": {"'+'", ""}, | 
					
						
							|  |  |  |                 "start": {"'+'", "'-'", "'*'"}, | 
					
						
							|  |  |  |                 "other": {"'*'"}, | 
					
						
							|  |  |  |                 "another": {"'/'"}, | 
					
						
							|  |  |  |             }, | 
					
						
							|  |  |  |         ) |