mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			1108 lines
		
	
	
	
		
			38 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			1108 lines
		
	
	
	
		
			38 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import ast
 | |
| import difflib
 | |
| import io
 | |
| import textwrap
 | |
| import unittest
 | |
| 
 | |
| from test import test_tools
 | |
| from typing import Dict, Any
 | |
| from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP
 | |
| 
 | |
| test_tools.skip_if_missing("peg_generator")
 | |
| with test_tools.imports_under_tool("peg_generator"):
 | |
|     from pegen.grammar_parser import GeneratedParser as GrammarParser
 | |
|     from pegen.testutil import parse_string, generate_parser, make_parser
 | |
|     from pegen.grammar import GrammarVisitor, GrammarError, Grammar
 | |
|     from pegen.grammar_visualizer import ASTGrammarPrinter
 | |
|     from pegen.parser import Parser
 | |
|     from pegen.parser_generator import compute_nullables, compute_left_recursives
 | |
|     from pegen.python_generator import PythonParserGenerator
 | |
| 
 | |
| 
 | |
| class TestPegen(unittest.TestCase):
 | |
|     def test_parse_grammar(self) -> None:
 | |
|         grammar_source = """
 | |
|         start: sum NEWLINE
 | |
|         sum: t1=term '+' t2=term { action } | term
 | |
|         term: NUMBER
 | |
|         """
 | |
|         expected = """
 | |
|         start: sum NEWLINE
 | |
|         sum: term '+' term | term
 | |
|         term: NUMBER
 | |
|         """
 | |
|         grammar: Grammar = parse_string(grammar_source, GrammarParser)
 | |
|         rules = grammar.rules
 | |
|         self.assertEqual(str(grammar), textwrap.dedent(expected).strip())
 | |
|         # Check the str() and repr() of a few rules; AST nodes don't support ==.
 | |
|         self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
 | |
|         self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
 | |
|         expected_repr = (
 | |
|             "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
 | |
|         )
 | |
|         self.assertEqual(repr(rules["term"]), expected_repr)
 | |
| 
 | |
|     def test_repeated_rules(self) -> None:
 | |
|         grammar_source = """
 | |
|         start: the_rule NEWLINE
 | |
|         the_rule: 'b' NEWLINE
 | |
|         the_rule: 'a' NEWLINE
 | |
|         """
 | |
|         with self.assertRaisesRegex(GrammarError, "Repeated rule 'the_rule'"):
 | |
|             parse_string(grammar_source, GrammarParser)
 | |
| 
 | |
|     def test_long_rule_str(self) -> None:
 | |
|         grammar_source = """
 | |
|         start: zero | one | one zero | one one | one zero zero | one zero one | one one zero | one one one
 | |
|         """
 | |
|         expected = """
 | |
|         start:
 | |
|             | zero
 | |
|             | one
 | |
|             | one zero
 | |
|             | one one
 | |
|             | one zero zero
 | |
|             | one zero one
 | |
|             | one one zero
 | |
|             | one one one
 | |
|         """
 | |
|         grammar: Grammar = parse_string(grammar_source, GrammarParser)
 | |
|         self.assertEqual(str(grammar.rules["start"]), textwrap.dedent(expected).strip())
 | |
| 
 | |
|     def test_typed_rules(self) -> None:
 | |
|         grammar = """
 | |
|         start[int]: sum NEWLINE
 | |
|         sum[int]: t1=term '+' t2=term { action } | term
 | |
|         term[int]: NUMBER
 | |
|         """
 | |
|         rules = parse_string(grammar, GrammarParser).rules
 | |
|         # Check the str() and repr() of a few rules; AST nodes don't support ==.
 | |
|         self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
 | |
|         self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
 | |
|         self.assertEqual(
 | |
|             repr(rules["term"]),
 | |
|             "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))",
 | |
|         )
 | |
| 
 | |
|     def test_gather(self) -> None:
 | |
|         grammar = """
 | |
|         start: ','.thing+ NEWLINE
 | |
|         thing: NUMBER
 | |
|         """
 | |
|         rules = parse_string(grammar, GrammarParser).rules
 | |
|         self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
 | |
|         self.assertStartsWith(repr(rules["start"]),
 | |
|             "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
 | |
|         )
 | |
|         self.assertEqual(str(rules["thing"]), "thing: NUMBER")
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("42\n", parser_class)
 | |
|         node = parse_string("1, 2\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n"
 | |
|                     ),
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n"
 | |
|                     ),
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_expr_grammar(self) -> None:
 | |
|         grammar = """
 | |
|         start: sum NEWLINE
 | |
|         sum: term '+' term | term
 | |
|         term: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("42\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"),
 | |
|                 TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_optional_operator(self) -> None:
 | |
|         grammar = """
 | |
|         start: sum NEWLINE
 | |
|         sum: term ('+' term)?
 | |
|         term: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("1 + 2\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
 | |
|                     ),
 | |
|                     [
 | |
|                         TokenInfo(
 | |
|                             OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
 | |
|                         ),
 | |
|                         TokenInfo(
 | |
|                             NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
 | |
|                         ),
 | |
|                     ],
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
|         node = parse_string("1\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
 | |
|                     None,
 | |
|                 ],
 | |
|                 TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_optional_literal(self) -> None:
 | |
|         grammar = """
 | |
|         start: sum NEWLINE
 | |
|         sum: term '+' ?
 | |
|         term: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("1+\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n"
 | |
|                     ),
 | |
|                     TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
 | |
|                 ],
 | |
|                 TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
 | |
|             ],
 | |
|         )
 | |
|         node = parse_string("1\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
 | |
|                     None,
 | |
|                 ],
 | |
|                 TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_alt_optional_operator(self) -> None:
 | |
|         grammar = """
 | |
|         start: sum NEWLINE
 | |
|         sum: term ['+' term]
 | |
|         term: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("1 + 2\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
 | |
|                     ),
 | |
|                     [
 | |
|                         TokenInfo(
 | |
|                             OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
 | |
|                         ),
 | |
|                         TokenInfo(
 | |
|                             NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
 | |
|                         ),
 | |
|                     ],
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
|         node = parse_string("1\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
 | |
|                     None,
 | |
|                 ],
 | |
|                 TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_repeat_0_simple(self) -> None:
 | |
|         grammar = """
 | |
|         start: thing thing* NEWLINE
 | |
|         thing: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("1 2 3\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
 | |
|                 [
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
 | |
|                     ),
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
 | |
|                     ),
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
|         node = parse_string("1\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
 | |
|                 [],
 | |
|                 TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_repeat_0_complex(self) -> None:
 | |
|         grammar = """
 | |
|         start: term ('+' term)* NEWLINE
 | |
|         term: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("1 + 2 + 3\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 TokenInfo(
 | |
|                     NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
 | |
|                 ),
 | |
|                 [
 | |
|                     [
 | |
|                         TokenInfo(
 | |
|                             OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
 | |
|                         ),
 | |
|                         TokenInfo(
 | |
|                             NUMBER,
 | |
|                             string="2",
 | |
|                             start=(1, 4),
 | |
|                             end=(1, 5),
 | |
|                             line="1 + 2 + 3\n",
 | |
|                         ),
 | |
|                     ],
 | |
|                     [
 | |
|                         TokenInfo(
 | |
|                             OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
 | |
|                         ),
 | |
|                         TokenInfo(
 | |
|                             NUMBER,
 | |
|                             string="3",
 | |
|                             start=(1, 8),
 | |
|                             end=(1, 9),
 | |
|                             line="1 + 2 + 3\n",
 | |
|                         ),
 | |
|                     ],
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_repeat_1_simple(self) -> None:
 | |
|         grammar = """
 | |
|         start: thing thing+ NEWLINE
 | |
|         thing: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("1 2 3\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
 | |
|                 [
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
 | |
|                     ),
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
 | |
|                     ),
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
|         with self.assertRaises(SyntaxError):
 | |
|             parse_string("1\n", parser_class)
 | |
| 
 | |
|     def test_repeat_1_complex(self) -> None:
 | |
|         grammar = """
 | |
|         start: term ('+' term)+ NEWLINE
 | |
|         term: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("1 + 2 + 3\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 TokenInfo(
 | |
|                     NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
 | |
|                 ),
 | |
|                 [
 | |
|                     [
 | |
|                         TokenInfo(
 | |
|                             OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
 | |
|                         ),
 | |
|                         TokenInfo(
 | |
|                             NUMBER,
 | |
|                             string="2",
 | |
|                             start=(1, 4),
 | |
|                             end=(1, 5),
 | |
|                             line="1 + 2 + 3\n",
 | |
|                         ),
 | |
|                     ],
 | |
|                     [
 | |
|                         TokenInfo(
 | |
|                             OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
 | |
|                         ),
 | |
|                         TokenInfo(
 | |
|                             NUMBER,
 | |
|                             string="3",
 | |
|                             start=(1, 8),
 | |
|                             end=(1, 9),
 | |
|                             line="1 + 2 + 3\n",
 | |
|                         ),
 | |
|                     ],
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
|         with self.assertRaises(SyntaxError):
 | |
|             parse_string("1\n", parser_class)
 | |
| 
 | |
|     def test_repeat_with_sep_simple(self) -> None:
 | |
|         grammar = """
 | |
|         start: ','.thing+ NEWLINE
 | |
|         thing: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("1, 2, 3\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"
 | |
|                     ),
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"
 | |
|                     ),
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"
 | |
|                     ),
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_left_recursive(self) -> None:
 | |
|         grammar_source = """
 | |
|         start: expr NEWLINE
 | |
|         expr: ('-' term | expr '+' term | term)
 | |
|         term: NUMBER
 | |
|         foo: NAME+
 | |
|         bar: NAME*
 | |
|         baz: NAME?
 | |
|         """
 | |
|         grammar: Grammar = parse_string(grammar_source, GrammarParser)
 | |
|         parser_class = generate_parser(grammar)
 | |
|         rules = grammar.rules
 | |
|         self.assertFalse(rules["start"].left_recursive)
 | |
|         self.assertTrue(rules["expr"].left_recursive)
 | |
|         self.assertFalse(rules["term"].left_recursive)
 | |
|         self.assertFalse(rules["foo"].left_recursive)
 | |
|         self.assertFalse(rules["bar"].left_recursive)
 | |
|         self.assertFalse(rules["baz"].left_recursive)
 | |
|         node = parse_string("1 + 2 + 3\n", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     [
 | |
|                         TokenInfo(
 | |
|                             NUMBER,
 | |
|                             string="1",
 | |
|                             start=(1, 0),
 | |
|                             end=(1, 1),
 | |
|                             line="1 + 2 + 3\n",
 | |
|                         ),
 | |
|                         TokenInfo(
 | |
|                             OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
 | |
|                         ),
 | |
|                         TokenInfo(
 | |
|                             NUMBER,
 | |
|                             string="2",
 | |
|                             start=(1, 4),
 | |
|                             end=(1, 5),
 | |
|                             line="1 + 2 + 3\n",
 | |
|                         ),
 | |
|                     ],
 | |
|                     TokenInfo(
 | |
|                         OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
 | |
|                     ),
 | |
|                     TokenInfo(
 | |
|                         NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n"
 | |
|                     ),
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_python_expr(self) -> None:
 | |
|         grammar = """
 | |
|         start: expr NEWLINE? $ { ast.Expression(expr) }
 | |
|         expr: ( expr '+' term { ast.BinOp(expr, ast.Add(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) }
 | |
|             | expr '-' term { ast.BinOp(expr, ast.Sub(), term, lineno=expr.lineno, col_offset=expr.col_offset, end_lineno=term.end_lineno, end_col_offset=term.end_col_offset) }
 | |
|             | term { term }
 | |
|             )
 | |
|         term: ( l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) }
 | |
|             | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, lineno=l.lineno, col_offset=l.col_offset, end_lineno=r.end_lineno, end_col_offset=r.end_col_offset) }
 | |
|             | factor { factor }
 | |
|             )
 | |
|         factor: ( '(' expr ')' { expr }
 | |
|                 | atom { atom }
 | |
|                 )
 | |
|         atom: ( n=NAME { ast.Name(id=n.string, ctx=ast.Load(), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) }
 | |
|             | n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), lineno=n.start[0], col_offset=n.start[1], end_lineno=n.end[0], end_col_offset=n.end[1]) }
 | |
|             )
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("(1 + 2*3 + 5)/(6 - 2)\n", parser_class)
 | |
|         code = compile(node, "", "eval")
 | |
|         val = eval(code)
 | |
|         self.assertEqual(val, 3.0)
 | |
| 
 | |
|     def test_f_string_in_action(self) -> None:
 | |
|         grammar = """
 | |
|         start: n=NAME NEWLINE? $ { f"name -> {n.string}" }
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("a", parser_class)
 | |
|         self.assertEqual(node.strip(), "name ->  a")
 | |
| 
 | |
|     def test_nullable(self) -> None:
 | |
|         grammar_source = """
 | |
|         start: sign NUMBER
 | |
|         sign: ['-' | '+']
 | |
|         """
 | |
|         grammar: Grammar = parse_string(grammar_source, GrammarParser)
 | |
|         rules = grammar.rules
 | |
|         nullables = compute_nullables(rules)
 | |
|         self.assertNotIn(rules["start"], nullables)  # Not None!
 | |
|         self.assertIn(rules["sign"], nullables)
 | |
| 
 | |
|     def test_advanced_left_recursive(self) -> None:
 | |
|         grammar_source = """
 | |
|         start: NUMBER | sign start
 | |
|         sign: ['-']
 | |
|         """
 | |
|         grammar: Grammar = parse_string(grammar_source, GrammarParser)
 | |
|         rules = grammar.rules
 | |
|         nullables = compute_nullables(rules)
 | |
|         compute_left_recursives(rules)
 | |
|         self.assertNotIn(rules["start"], nullables)  # Not None!
 | |
|         self.assertIn(rules["sign"], nullables)
 | |
|         self.assertTrue(rules["start"].left_recursive)
 | |
|         self.assertFalse(rules["sign"].left_recursive)
 | |
| 
 | |
|     def test_mutually_left_recursive(self) -> None:
 | |
|         grammar_source = """
 | |
|         start: foo 'E'
 | |
|         foo: bar 'A' | 'B'
 | |
|         bar: foo 'C' | 'D'
 | |
|         """
 | |
|         grammar: Grammar = parse_string(grammar_source, GrammarParser)
 | |
|         out = io.StringIO()
 | |
|         genr = PythonParserGenerator(grammar, out)
 | |
|         rules = grammar.rules
 | |
|         self.assertFalse(rules["start"].left_recursive)
 | |
|         self.assertTrue(rules["foo"].left_recursive)
 | |
|         self.assertTrue(rules["bar"].left_recursive)
 | |
|         genr.generate("<string>")
 | |
|         ns: Dict[str, Any] = {}
 | |
|         exec(out.getvalue(), ns)
 | |
|         parser_class: Type[Parser] = ns["GeneratedParser"]
 | |
|         node = parse_string("D A C A E", parser_class)
 | |
| 
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     [
 | |
|                         [
 | |
|                             TokenInfo(
 | |
|                                 type=NAME,
 | |
|                                 string="D",
 | |
|                                 start=(1, 0),
 | |
|                                 end=(1, 1),
 | |
|                                 line="D A C A E",
 | |
|                             ),
 | |
|                             TokenInfo(
 | |
|                                 type=NAME,
 | |
|                                 string="A",
 | |
|                                 start=(1, 2),
 | |
|                                 end=(1, 3),
 | |
|                                 line="D A C A E",
 | |
|                             ),
 | |
|                         ],
 | |
|                         TokenInfo(
 | |
|                             type=NAME,
 | |
|                             string="C",
 | |
|                             start=(1, 4),
 | |
|                             end=(1, 5),
 | |
|                             line="D A C A E",
 | |
|                         ),
 | |
|                     ],
 | |
|                     TokenInfo(
 | |
|                         type=NAME,
 | |
|                         string="A",
 | |
|                         start=(1, 6),
 | |
|                         end=(1, 7),
 | |
|                         line="D A C A E",
 | |
|                     ),
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
|         node = parse_string("B C A E", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 [
 | |
|                     [
 | |
|                         TokenInfo(
 | |
|                             type=NAME,
 | |
|                             string="B",
 | |
|                             start=(1, 0),
 | |
|                             end=(1, 1),
 | |
|                             line="B C A E",
 | |
|                         ),
 | |
|                         TokenInfo(
 | |
|                             type=NAME,
 | |
|                             string="C",
 | |
|                             start=(1, 2),
 | |
|                             end=(1, 3),
 | |
|                             line="B C A E",
 | |
|                         ),
 | |
|                     ],
 | |
|                     TokenInfo(
 | |
|                         type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"
 | |
|                     ),
 | |
|                 ],
 | |
|                 TokenInfo(
 | |
|                     type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"
 | |
|                 ),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_nasty_mutually_left_recursive(self) -> None:
 | |
|         # This grammar does not recognize 'x - + =', much to my chagrin.
 | |
|         # But that's the way PEG works.
 | |
|         # [Breathlessly]
 | |
|         # The problem is that the toplevel target call
 | |
|         # recurses into maybe, which recognizes 'x - +',
 | |
|         # and then the toplevel target looks for another '+',
 | |
|         # which fails, so it retreats to NAME,
 | |
|         # which succeeds, so we end up just recognizing 'x',
 | |
|         # and then start fails because there's no '=' after that.
 | |
|         grammar_source = """
 | |
|         start: target '='
 | |
|         target: maybe '+' | NAME
 | |
|         maybe: maybe '-' | target
 | |
|         """
 | |
|         grammar: Grammar = parse_string(grammar_source, GrammarParser)
 | |
|         out = io.StringIO()
 | |
|         genr = PythonParserGenerator(grammar, out)
 | |
|         genr.generate("<string>")
 | |
|         ns: Dict[str, Any] = {}
 | |
|         exec(out.getvalue(), ns)
 | |
|         parser_class = ns["GeneratedParser"]
 | |
|         with self.assertRaises(SyntaxError):
 | |
|             parse_string("x - + =", parser_class)
 | |
| 
 | |
|     def test_lookahead(self) -> None:
 | |
|         grammar = """
 | |
|         start: (expr_stmt | assign_stmt) &'.'
 | |
|         expr_stmt: !(target '=') expr
 | |
|         assign_stmt: target '=' expr
 | |
|         expr: term ('+' term)*
 | |
|         target: NAME
 | |
|         term: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("foo = 12 + 12 .", parser_class)
 | |
|         self.maxDiff = None
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 TokenInfo(
 | |
|                     NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ."
 | |
|                 ),
 | |
|                 TokenInfo(
 | |
|                     OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."
 | |
|                 ),
 | |
|                 [
 | |
|                     TokenInfo(
 | |
|                         NUMBER,
 | |
|                         string="12",
 | |
|                         start=(1, 6),
 | |
|                         end=(1, 8),
 | |
|                         line="foo = 12 + 12 .",
 | |
|                     ),
 | |
|                     [
 | |
|                         [
 | |
|                             TokenInfo(
 | |
|                                 OP,
 | |
|                                 string="+",
 | |
|                                 start=(1, 9),
 | |
|                                 end=(1, 10),
 | |
|                                 line="foo = 12 + 12 .",
 | |
|                             ),
 | |
|                             TokenInfo(
 | |
|                                 NUMBER,
 | |
|                                 string="12",
 | |
|                                 start=(1, 11),
 | |
|                                 end=(1, 13),
 | |
|                                 line="foo = 12 + 12 .",
 | |
|                             ),
 | |
|                         ]
 | |
|                     ],
 | |
|                 ],
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_named_lookahead_error(self) -> None:
 | |
|         grammar = """
 | |
|         start: foo=!'x' NAME
 | |
|         """
 | |
|         with self.assertRaises(SyntaxError):
 | |
|             make_parser(grammar)
 | |
| 
 | |
|     def test_start_leader(self) -> None:
 | |
|         grammar = """
 | |
|         start: attr | NAME
 | |
|         attr: start '.' NAME
 | |
|         """
 | |
|         # Would assert False without a special case in compute_left_recursives().
 | |
|         make_parser(grammar)
 | |
| 
 | |
|     def test_opt_sequence(self) -> None:
 | |
|         grammar = """
 | |
|         start: [NAME*]
 | |
|         """
 | |
|         # This case was failing because of a double trailing comma at the end
 | |
|         # of a line in the generated source. See bpo-41044
 | |
|         make_parser(grammar)
 | |
| 
 | |
|     def test_left_recursion_too_complex(self) -> None:
 | |
|         grammar = """
 | |
|         start: foo
 | |
|         foo: bar '+' | baz '+' | '+'
 | |
|         bar: baz '-' | foo '-' | '-'
 | |
|         baz: foo '*' | bar '*' | '*'
 | |
|         """
 | |
|         with self.assertRaises(ValueError) as errinfo:
 | |
|             make_parser(grammar)
 | |
|             self.assertTrue("no leader" in str(errinfo.exception.value))
 | |
| 
 | |
|     def test_cut(self) -> None:
 | |
|         grammar = """
 | |
|         start: '(' ~ expr ')'
 | |
|         expr: NUMBER
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         node = parse_string("(1)", parser_class)
 | |
|         self.assertEqual(
 | |
|             node,
 | |
|             [
 | |
|                 TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
 | |
|                 TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"),
 | |
|                 TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
 | |
|             ],
 | |
|         )
 | |
| 
 | |
|     def test_dangling_reference(self) -> None:
 | |
|         grammar = """
 | |
|         start: foo ENDMARKER
 | |
|         foo: bar NAME
 | |
|         """
 | |
|         with self.assertRaises(GrammarError):
 | |
|             parser_class = make_parser(grammar)
 | |
| 
 | |
|     def test_bad_token_reference(self) -> None:
 | |
|         grammar = """
 | |
|         start: foo
 | |
|         foo: NAMEE
 | |
|         """
 | |
|         with self.assertRaises(GrammarError):
 | |
|             parser_class = make_parser(grammar)
 | |
| 
 | |
|     def test_missing_start(self) -> None:
 | |
|         grammar = """
 | |
|         foo: NAME
 | |
|         """
 | |
|         with self.assertRaises(GrammarError):
 | |
|             parser_class = make_parser(grammar)
 | |
| 
 | |
|     def test_invalid_rule_name(self) -> None:
 | |
|         grammar = """
 | |
|         start: _a b
 | |
|         _a: 'a'
 | |
|         b: 'b'
 | |
|         """
 | |
|         with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_a'"):
 | |
|             parser_class = make_parser(grammar)
 | |
| 
 | |
|     def test_invalid_variable_name(self) -> None:
 | |
|         grammar = """
 | |
|         start: a b
 | |
|         a: _x='a'
 | |
|         b: 'b'
 | |
|         """
 | |
|         with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
 | |
|             parser_class = make_parser(grammar)
 | |
| 
 | |
|     def test_invalid_variable_name_in_temporal_rule(self) -> None:
 | |
|         grammar = """
 | |
|         start: a b
 | |
|         a: (_x='a' | 'b') | 'c'
 | |
|         b: 'b'
 | |
|         """
 | |
|         with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
 | |
|             parser_class = make_parser(grammar)
 | |
| 
 | |
|     def test_soft_keyword(self) -> None:
 | |
|         grammar = """
 | |
|         start:
 | |
|             | "number" n=NUMBER { eval(n.string) }
 | |
|             | "string" n=STRING { n.string }
 | |
|             | SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { l.string + " = " + n.string }
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         self.assertEqual(parse_string("number 1", parser_class), 1)
 | |
|         self.assertEqual(parse_string("string 'b'", parser_class), "'b'")
 | |
|         self.assertEqual(
 | |
|             parse_string("number test 1", parser_class), "test = 1"
 | |
|         )
 | |
|         assert (
 | |
|             parse_string("string test 'b'", parser_class) == "test = 'b'"
 | |
|         )
 | |
|         with self.assertRaises(SyntaxError):
 | |
|             parse_string("test 1", parser_class)
 | |
| 
 | |
|     def test_forced(self) -> None:
 | |
|         grammar = """
 | |
|         start: NAME &&':' | NAME
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         self.assertTrue(parse_string("number :", parser_class))
 | |
|         with self.assertRaises(SyntaxError) as e:
 | |
|             parse_string("a", parser_class)
 | |
| 
 | |
|         self.assertIn("expected ':'", str(e.exception))
 | |
| 
 | |
|     def test_forced_with_group(self) -> None:
 | |
|         grammar = """
 | |
|         start: NAME &&(':' | ';') | NAME
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         self.assertTrue(parse_string("number :", parser_class))
 | |
|         self.assertTrue(parse_string("number ;", parser_class))
 | |
|         with self.assertRaises(SyntaxError) as e:
 | |
|             parse_string("a", parser_class)
 | |
|         self.assertIn("expected (':' | ';')", e.exception.args[0])
 | |
| 
 | |
|     def test_unreachable_explicit(self) -> None:
 | |
|         source = """
 | |
|         start: NAME { UNREACHABLE }
 | |
|         """
 | |
|         grammar = parse_string(source, GrammarParser)
 | |
|         out = io.StringIO()
 | |
|         genr = PythonParserGenerator(
 | |
|             grammar, out, unreachable_formatting="This is a test"
 | |
|         )
 | |
|         genr.generate("<string>")
 | |
|         self.assertIn("This is a test", out.getvalue())
 | |
| 
 | |
|     def test_unreachable_implicit1(self) -> None:
 | |
|         source = """
 | |
|         start: NAME | invalid_input
 | |
|         invalid_input: NUMBER { None }
 | |
|         """
 | |
|         grammar = parse_string(source, GrammarParser)
 | |
|         out = io.StringIO()
 | |
|         genr = PythonParserGenerator(
 | |
|             grammar, out, unreachable_formatting="This is a test"
 | |
|         )
 | |
|         genr.generate("<string>")
 | |
|         self.assertIn("This is a test", out.getvalue())
 | |
| 
 | |
|     def test_unreachable_implicit2(self) -> None:
 | |
|         source = """
 | |
|         start: NAME | '(' invalid_input ')'
 | |
|         invalid_input: NUMBER { None }
 | |
|         """
 | |
|         grammar = parse_string(source, GrammarParser)
 | |
|         out = io.StringIO()
 | |
|         genr = PythonParserGenerator(
 | |
|             grammar, out, unreachable_formatting="This is a test"
 | |
|         )
 | |
|         genr.generate("<string>")
 | |
|         self.assertIn("This is a test", out.getvalue())
 | |
| 
 | |
|     def test_unreachable_implicit3(self) -> None:
 | |
|         source = """
 | |
|         start: NAME | invalid_input { None }
 | |
|         invalid_input: NUMBER
 | |
|         """
 | |
|         grammar = parse_string(source, GrammarParser)
 | |
|         out = io.StringIO()
 | |
|         genr = PythonParserGenerator(
 | |
|             grammar, out, unreachable_formatting="This is a test"
 | |
|         )
 | |
|         genr.generate("<string>")
 | |
|         self.assertNotIn("This is a test", out.getvalue())
 | |
| 
 | |
|     def test_locations_in_alt_action_and_group(self) -> None:
 | |
|         grammar = """
 | |
|         start: t=term NEWLINE? $ { ast.Expression(t) }
 | |
|         term:
 | |
|             | l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) }
 | |
|             | l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) }
 | |
|             | factor
 | |
|         factor:
 | |
|             | (
 | |
|                 n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } |
 | |
|                 n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) }
 | |
|             )
 | |
|         """
 | |
|         parser_class = make_parser(grammar)
 | |
|         source = "2*3\n"
 | |
|         o = ast.dump(parse_string(source, parser_class).body, include_attributes=True)
 | |
|         p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace(
 | |
|             " kind=None,", ""
 | |
|         )
 | |
|         diff = "\n".join(
 | |
|             difflib.unified_diff(
 | |
|                 o.split("\n"), p.split("\n"), "cpython", "python-pegen"
 | |
|             )
 | |
|         )
 | |
|         self.assertFalse(diff)
 | |
| 
 | |
| 
 | |
| class TestGrammarVisitor:
 | |
|     class Visitor(GrammarVisitor):
 | |
|         def __init__(self) -> None:
 | |
|             self.n_nodes = 0
 | |
| 
 | |
|         def visit(self, node: Any, *args: Any, **kwargs: Any) -> None:
 | |
|             self.n_nodes += 1
 | |
|             super().visit(node, *args, **kwargs)
 | |
| 
 | |
|     def test_parse_trivial_grammar(self) -> None:
 | |
|         grammar = """
 | |
|         start: 'a'
 | |
|         """
 | |
|         rules = parse_string(grammar, GrammarParser)
 | |
|         visitor = self.Visitor()
 | |
| 
 | |
|         visitor.visit(rules)
 | |
| 
 | |
|         self.assertEqual(visitor.n_nodes, 6)
 | |
| 
 | |
|     def test_parse_or_grammar(self) -> None:
 | |
|         grammar = """
 | |
|         start: rule
 | |
|         rule: 'a' | 'b'
 | |
|         """
 | |
|         rules = parse_string(grammar, GrammarParser)
 | |
|         visitor = self.Visitor()
 | |
| 
 | |
|         visitor.visit(rules)
 | |
| 
 | |
|         # Grammar/Rule/Rhs/Alt/NamedItem/NameLeaf   -> 6
 | |
|         #         Rule/Rhs/                         -> 2
 | |
|         #                  Alt/NamedItem/StringLeaf -> 3
 | |
|         #                  Alt/NamedItem/StringLeaf -> 3
 | |
| 
 | |
|         self.assertEqual(visitor.n_nodes, 14)
 | |
| 
 | |
|     def test_parse_repeat1_grammar(self) -> None:
 | |
|         grammar = """
 | |
|         start: 'a'+
 | |
|         """
 | |
|         rules = parse_string(grammar, GrammarParser)
 | |
|         visitor = self.Visitor()
 | |
| 
 | |
|         visitor.visit(rules)
 | |
| 
 | |
|         # Grammar/Rule/Rhs/Alt/NamedItem/Repeat1/StringLeaf -> 6
 | |
|         self.assertEqual(visitor.n_nodes, 7)
 | |
| 
 | |
|     def test_parse_repeat0_grammar(self) -> None:
 | |
|         grammar = """
 | |
|         start: 'a'*
 | |
|         """
 | |
|         rules = parse_string(grammar, GrammarParser)
 | |
|         visitor = self.Visitor()
 | |
| 
 | |
|         visitor.visit(rules)
 | |
| 
 | |
|         # Grammar/Rule/Rhs/Alt/NamedItem/Repeat0/StringLeaf -> 6
 | |
| 
 | |
|         self.assertEqual(visitor.n_nodes, 7)
 | |
| 
 | |
|     def test_parse_optional_grammar(self) -> None:
 | |
|         grammar = """
 | |
|         start: 'a' ['b']
 | |
|         """
 | |
|         rules = parse_string(grammar, GrammarParser)
 | |
|         visitor = self.Visitor()
 | |
| 
 | |
|         visitor.visit(rules)
 | |
| 
 | |
|         # Grammar/Rule/Rhs/Alt/NamedItem/StringLeaf                       -> 6
 | |
|         #                      NamedItem/Opt/Rhs/Alt/NamedItem/Stringleaf -> 6
 | |
| 
 | |
|         self.assertEqual(visitor.n_nodes, 12)
 | |
| 
 | |
| 
 | |
| class TestGrammarVisualizer(unittest.TestCase):
 | |
|     def test_simple_rule(self) -> None:
 | |
|         grammar = """
 | |
|         start: 'a' 'b'
 | |
|         """
 | |
|         rules = parse_string(grammar, GrammarParser)
 | |
| 
 | |
|         printer = ASTGrammarPrinter()
 | |
|         lines: List[str] = []
 | |
|         printer.print_grammar_ast(rules, printer=lines.append)
 | |
| 
 | |
|         output = "\n".join(lines)
 | |
|         expected_output = textwrap.dedent(
 | |
|             """\
 | |
|         └──Rule
 | |
|            └──Rhs
 | |
|               └──Alt
 | |
|                  ├──NamedItem
 | |
|                  │  └──StringLeaf("'a'")
 | |
|                  └──NamedItem
 | |
|                     └──StringLeaf("'b'")
 | |
|         """
 | |
|         )
 | |
| 
 | |
|         self.assertEqual(output, expected_output)
 | |
| 
 | |
|     def test_multiple_rules(self) -> None:
 | |
|         grammar = """
 | |
|         start: a b
 | |
|         a: 'a'
 | |
|         b: 'b'
 | |
|         """
 | |
|         rules = parse_string(grammar, GrammarParser)
 | |
| 
 | |
|         printer = ASTGrammarPrinter()
 | |
|         lines: List[str] = []
 | |
|         printer.print_grammar_ast(rules, printer=lines.append)
 | |
| 
 | |
|         output = "\n".join(lines)
 | |
|         expected_output = textwrap.dedent(
 | |
|             """\
 | |
|         └──Rule
 | |
|            └──Rhs
 | |
|               └──Alt
 | |
|                  ├──NamedItem
 | |
|                  │  └──NameLeaf('a')
 | |
|                  └──NamedItem
 | |
|                     └──NameLeaf('b')
 | |
| 
 | |
|         └──Rule
 | |
|            └──Rhs
 | |
|               └──Alt
 | |
|                  └──NamedItem
 | |
|                     └──StringLeaf("'a'")
 | |
| 
 | |
|         └──Rule
 | |
|            └──Rhs
 | |
|               └──Alt
 | |
|                  └──NamedItem
 | |
|                     └──StringLeaf("'b'")
 | |
|                         """
 | |
|         )
 | |
| 
 | |
|         self.assertEqual(output, expected_output)
 | |
| 
 | |
|     def test_deep_nested_rule(self) -> None:
 | |
|         grammar = """
 | |
|         start: 'a' ['b'['c'['d']]]
 | |
|         """
 | |
|         rules = parse_string(grammar, GrammarParser)
 | |
| 
 | |
|         printer = ASTGrammarPrinter()
 | |
|         lines: List[str] = []
 | |
|         printer.print_grammar_ast(rules, printer=lines.append)
 | |
| 
 | |
|         output = "\n".join(lines)
 | |
|         expected_output = textwrap.dedent(
 | |
|             """\
 | |
|         └──Rule
 | |
|            └──Rhs
 | |
|               └──Alt
 | |
|                  ├──NamedItem
 | |
|                  │  └──StringLeaf("'a'")
 | |
|                  └──NamedItem
 | |
|                     └──Opt
 | |
|                        └──Rhs
 | |
|                           └──Alt
 | |
|                              ├──NamedItem
 | |
|                              │  └──StringLeaf("'b'")
 | |
|                              └──NamedItem
 | |
|                                 └──Opt
 | |
|                                    └──Rhs
 | |
|                                       └──Alt
 | |
|                                          ├──NamedItem
 | |
|                                          │  └──StringLeaf("'c'")
 | |
|                                          └──NamedItem
 | |
|                                             └──Opt
 | |
|                                                └──Rhs
 | |
|                                                   └──Alt
 | |
|                                                      └──NamedItem
 | |
|                                                         └──StringLeaf("'d'")
 | |
|                                 """
 | |
|         )
 | |
| 
 | |
|         self.assertEqual(output, expected_output)
 | 
