[3.10] Allow the parser to avoid nested processing of invalid rules (GH-31252). (GH-31257)

(cherry picked from commit 390459de6d)

Co-authored-by: Pablo Galindo Salgado <Pablogsal@gmail.com>
This commit is contained in:
Pablo Galindo Salgado 2022-02-10 14:38:31 +00:00 committed by GitHub
parent 9f5145403b
commit 14284b0e71
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 1721 additions and 1721 deletions

View file

@ -846,6 +846,7 @@ invalid_kwarg:
RAISE_SYNTAX_ERROR_KNOWN_RANGE( RAISE_SYNTAX_ERROR_KNOWN_RANGE(
a, b, "expression cannot contain assignment, perhaps you meant \"==\"?") } a, b, "expression cannot contain assignment, perhaps you meant \"==\"?") }
# IMPORTANT: Note that the "_without_invalid" suffix causes the rule to not call invalid rules under it
expression_without_invalid[expr_ty]: expression_without_invalid[expr_ty]:
| a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) } | a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) }
| disjunction | disjunction
@ -863,16 +864,14 @@ invalid_expression:
RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") } RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") }
| a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") } | a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") }
invalid_left_assignment_prefixes(memo): list|tuple|genexp|'True'|'None'|'False' invalid_named_expression(memo):
invalid_named_expression:
| a=expression ':=' expression { | a=expression ':=' expression {
RAISE_SYNTAX_ERROR_KNOWN_LOCATION( RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) } a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) }
| a=NAME '=' b=bitwise_or !('='|':=') { | a=NAME '=' b=bitwise_or !('='|':=') {
p->in_raw_rule ? NULL : RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") } RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") }
| !invalid_left_assignment_prefixes a=bitwise_or b='=' bitwise_or !('='|':=') { | !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':=') {
p->in_raw_rule ? NULL : RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?", RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?",
_PyPegen_get_expr_name(a)) } _PyPegen_get_expr_name(a)) }
invalid_assignment: invalid_assignment:

View file

@ -231,7 +231,7 @@ def testSyntaxErrorOffset(self):
check('a = « hello » « world »', 1, 5) check('a = « hello » « world »', 1, 5)
check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5) check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5)
check('[file for\n str(file) in []]', 2, 2) check('[file for\n str(file) in []]', 2, 2)
check("ages = {'Alice'=22, 'Bob'=23}", 1, 16) check("ages = {'Alice'=22, 'Bob'=23}", 1, 9)
check('match ...:\n case {**rest, "key": value}:\n ...', 2, 19) check('match ...:\n case {**rest, "key": value}:\n ...', 2, 19)
check("[a b c d e f]", 1, 2) check("[a b c d e f]", 1, 2)
check("for x yfff:", 1, 7) check("for x yfff:", 1, 7)

File diff suppressed because it is too large Load diff

View file

@ -1259,7 +1259,6 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
p->known_err_token = NULL; p->known_err_token = NULL;
p->level = 0; p->level = 0;
p->call_invalid_rules = 0; p->call_invalid_rules = 0;
p->in_raw_rule = 0;
return p; return p;
} }

View file

@ -76,7 +76,6 @@ typedef struct {
Token *known_err_token; Token *known_err_token;
int level; int level;
int call_invalid_rules; int call_invalid_rules;
int in_raw_rule;
} Parser; } Parser;
typedef struct { typedef struct {

View file

@ -115,6 +115,7 @@ def __init__(
self.cache: Dict[Any, FunctionCall] = {} self.cache: Dict[Any, FunctionCall] = {}
self.keyword_cache: Dict[str, int] = {} self.keyword_cache: Dict[str, int] = {}
self.soft_keywords: Set[str] = set() self.soft_keywords: Set[str] = set()
self.cleanup_statements: List[str] = []
def keyword_helper(self, keyword: str) -> FunctionCall: def keyword_helper(self, keyword: str) -> FunctionCall:
if keyword not in self.keyword_cache: if keyword not in self.keyword_cache:
@ -357,6 +358,7 @@ def __init__(
self._varname_counter = 0 self._varname_counter = 0
self.debug = debug self.debug = debug
self.skip_actions = skip_actions self.skip_actions = skip_actions
self.cleanup_statements: List[str] = []
def add_level(self) -> None: def add_level(self) -> None:
self.print("if (p->level++ == MAXSTACK) {") self.print("if (p->level++ == MAXSTACK) {")
@ -369,6 +371,8 @@ def remove_level(self) -> None:
self.print("p->level--;") self.print("p->level--;")
def add_return(self, ret_val: str) -> None: def add_return(self, ret_val: str) -> None:
for stmt in self.cleanup_statements:
self.print(stmt)
self.remove_level() self.remove_level()
self.print(f"return {ret_val};") self.print(f"return {ret_val};")
@ -539,9 +543,7 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res" f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res"
) )
self.print("p->mark = _mark;") self.print("p->mark = _mark;")
self.print("p->in_raw_rule++;")
self.print(f"void *_raw = {node.name}_raw(p);") self.print(f"void *_raw = {node.name}_raw(p);")
self.print("p->in_raw_rule--;")
self.print("if (p->error_indicator) {") self.print("if (p->error_indicator) {")
with self.indent(): with self.indent():
self.add_return("NULL") self.add_return("NULL")
@ -649,10 +651,21 @@ def visit_Rule(self, node: Rule) -> None:
self._set_up_rule_memoization(node, result_type) self._set_up_rule_memoization(node, result_type)
self.print("{") self.print("{")
if node.name.endswith("without_invalid"):
with self.indent():
self.print("int _prev_call_invalid = p->call_invalid_rules;")
self.print("p->call_invalid_rules = 0;")
self.cleanup_statements.append("p->call_invalid_rules = _prev_call_invalid;")
if is_loop: if is_loop:
self._handle_loop_rule_body(node, rhs) self._handle_loop_rule_body(node, rhs)
else: else:
self._handle_default_rule_body(node, rhs, result_type) self._handle_default_rule_body(node, rhs, result_type)
if node.name.endswith("without_invalid"):
self.cleanup_statements.pop()
self.print("}") self.print("}")
def visit_NamedItem(self, node: NamedItem) -> None: def visit_NamedItem(self, node: NamedItem) -> None: