mirror of
				https://github.com/python/cpython.git
				synced 2025-10-24 18:33:49 +00:00 
			
		
		
		
	Allow the parser to avoid nested processing of invalid rules (GH-31252)
This commit is contained in:
		
							parent
							
								
									2cea8c29cf
								
							
						
					
					
						commit
						390459de6d
					
				
					 6 changed files with 1998 additions and 1997 deletions
				
			
		|  | @ -1078,6 +1078,7 @@ invalid_kwarg: | ||||||
|         RAISE_SYNTAX_ERROR_KNOWN_RANGE( |         RAISE_SYNTAX_ERROR_KNOWN_RANGE( | ||||||
|             a, b, "expression cannot contain assignment, perhaps you meant \"==\"?") } |             a, b, "expression cannot contain assignment, perhaps you meant \"==\"?") } | ||||||
| 
 | 
 | ||||||
|  | # IMPORTANT: Note that the "_without_invalid" suffix causes the rule to not call invalid rules under it | ||||||
| expression_without_invalid[expr_ty]: | expression_without_invalid[expr_ty]: | ||||||
|     | a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) } |     | a=disjunction 'if' b=disjunction 'else' c=expression { _PyAST_IfExp(b, a, c, EXTRA) } | ||||||
|     | disjunction |     | disjunction | ||||||
|  | @ -1095,16 +1096,14 @@ invalid_expression: | ||||||
|         RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") } |         RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Perhaps you forgot a comma?") } | ||||||
|    | a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") } |    | a=disjunction 'if' b=disjunction !('else'|':') { RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "expected 'else' after 'if' expression") } | ||||||
| 
 | 
 | ||||||
| invalid_left_assignment_prefixes(memo): list|tuple|genexp|'True'|'None'|'False' | invalid_named_expression(memo): | ||||||
| 
 |  | ||||||
| invalid_named_expression: |  | ||||||
|     | a=expression ':=' expression { |     | a=expression ':=' expression { | ||||||
|         RAISE_SYNTAX_ERROR_KNOWN_LOCATION( |         RAISE_SYNTAX_ERROR_KNOWN_LOCATION( | ||||||
|             a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) } |             a, "cannot use assignment expressions with %s", _PyPegen_get_expr_name(a)) } | ||||||
|     | a=NAME '=' b=bitwise_or !('='|':=') { |     | a=NAME '=' b=bitwise_or !('='|':=') { | ||||||
|         p->in_raw_rule ? NULL : RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") } |         RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, "invalid syntax. Maybe you meant '==' or ':=' instead of '='?") } | ||||||
|     | !invalid_left_assignment_prefixes a=bitwise_or b='=' bitwise_or !('='|':=') { |     | !(list|tuple|genexp|'True'|'None'|'False') a=bitwise_or b='=' bitwise_or !('='|':=') { | ||||||
|         p->in_raw_rule ? NULL : RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?", |         RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, "cannot assign to %s here. Maybe you meant '==' instead of '='?", | ||||||
|                                           _PyPegen_get_expr_name(a)) } |                                           _PyPegen_get_expr_name(a)) } | ||||||
| 
 | 
 | ||||||
| invalid_assignment: | invalid_assignment: | ||||||
|  |  | ||||||
|  | @ -231,7 +231,7 @@ def testSyntaxErrorOffset(self): | ||||||
|         check('a = « hello » « world »', 1, 5) |         check('a = « hello » « world »', 1, 5) | ||||||
|         check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5) |         check('[\nfile\nfor str(file)\nin\n[]\n]', 3, 5) | ||||||
|         check('[file for\n str(file) in []]', 2, 2) |         check('[file for\n str(file) in []]', 2, 2) | ||||||
|         check("ages = {'Alice'=22, 'Bob'=23}", 1, 16) |         check("ages = {'Alice'=22, 'Bob'=23}", 1, 9) | ||||||
|         check('match ...:\n    case {**rest, "key": value}:\n        ...', 2, 19) |         check('match ...:\n    case {**rest, "key": value}:\n        ...', 2, 19) | ||||||
|         check("[a b c d e f]", 1, 2) |         check("[a b c d e f]", 1, 2) | ||||||
|         check("for x yfff:", 1, 7) |         check("for x yfff:", 1, 7) | ||||||
|  |  | ||||||
							
								
								
									
										3956
									
								
								Parser/parser.c
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										3956
									
								
								Parser/parser.c
									
										
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -381,6 +381,7 @@ _PyPegen_expect_token(Parser *p, int type) | ||||||
|     } |     } | ||||||
|     Token *t = p->tokens[p->mark]; |     Token *t = p->tokens[p->mark]; | ||||||
|     if (t->type != type) { |     if (t->type != type) { | ||||||
|  |         if (Py_DebugFlag) fprintf(stderr, "Token = %s\n", PyBytes_AsString(t->bytes)); | ||||||
|         return NULL; |         return NULL; | ||||||
|     } |     } | ||||||
|     p->mark += 1; |     p->mark += 1; | ||||||
|  | @ -785,7 +786,6 @@ _PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags, | ||||||
|     p->known_err_token = NULL; |     p->known_err_token = NULL; | ||||||
|     p->level = 0; |     p->level = 0; | ||||||
|     p->call_invalid_rules = 0; |     p->call_invalid_rules = 0; | ||||||
|     p->in_raw_rule = 0; |  | ||||||
|     return p; |     return p; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -78,7 +78,6 @@ typedef struct { | ||||||
|     Token *known_err_token; |     Token *known_err_token; | ||||||
|     int level; |     int level; | ||||||
|     int call_invalid_rules; |     int call_invalid_rules; | ||||||
|     int in_raw_rule; |  | ||||||
| } Parser; | } Parser; | ||||||
| 
 | 
 | ||||||
| typedef struct { | typedef struct { | ||||||
|  |  | ||||||
|  | @ -122,6 +122,7 @@ def __init__( | ||||||
|         self.exact_tokens = exact_tokens |         self.exact_tokens = exact_tokens | ||||||
|         self.non_exact_tokens = non_exact_tokens |         self.non_exact_tokens = non_exact_tokens | ||||||
|         self.cache: Dict[Any, FunctionCall] = {} |         self.cache: Dict[Any, FunctionCall] = {} | ||||||
|  |         self.cleanup_statements: List[str] = [] | ||||||
| 
 | 
 | ||||||
|     def keyword_helper(self, keyword: str) -> FunctionCall: |     def keyword_helper(self, keyword: str) -> FunctionCall: | ||||||
|         return FunctionCall( |         return FunctionCall( | ||||||
|  | @ -364,6 +365,7 @@ def __init__( | ||||||
|         self._varname_counter = 0 |         self._varname_counter = 0 | ||||||
|         self.debug = debug |         self.debug = debug | ||||||
|         self.skip_actions = skip_actions |         self.skip_actions = skip_actions | ||||||
|  |         self.cleanup_statements: List[str] = [] | ||||||
| 
 | 
 | ||||||
|     def add_level(self) -> None: |     def add_level(self) -> None: | ||||||
|         self.print("if (p->level++ == MAXSTACK) {") |         self.print("if (p->level++ == MAXSTACK) {") | ||||||
|  | @ -376,6 +378,8 @@ def remove_level(self) -> None: | ||||||
|         self.print("p->level--;") |         self.print("p->level--;") | ||||||
| 
 | 
 | ||||||
|     def add_return(self, ret_val: str) -> None: |     def add_return(self, ret_val: str) -> None: | ||||||
|  |         for stmt in self.cleanup_statements: | ||||||
|  |             self.print(stmt) | ||||||
|         self.remove_level() |         self.remove_level() | ||||||
|         self.print(f"return {ret_val};") |         self.print(f"return {ret_val};") | ||||||
| 
 | 
 | ||||||
|  | @ -547,9 +551,7 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None: | ||||||
|                     f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res" |                     f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res" | ||||||
|                 ) |                 ) | ||||||
|                 self.print("p->mark = _mark;") |                 self.print("p->mark = _mark;") | ||||||
|                 self.print("p->in_raw_rule++;") |  | ||||||
|                 self.print(f"void *_raw = {node.name}_raw(p);") |                 self.print(f"void *_raw = {node.name}_raw(p);") | ||||||
|                 self.print("p->in_raw_rule--;") |  | ||||||
|                 self.print("if (p->error_indicator) {") |                 self.print("if (p->error_indicator) {") | ||||||
|                 with self.indent(): |                 with self.indent(): | ||||||
|                     self.add_return("NULL") |                     self.add_return("NULL") | ||||||
|  | @ -663,10 +665,21 @@ def visit_Rule(self, node: Rule) -> None: | ||||||
|             self._set_up_rule_memoization(node, result_type) |             self._set_up_rule_memoization(node, result_type) | ||||||
| 
 | 
 | ||||||
|         self.print("{") |         self.print("{") | ||||||
|  | 
 | ||||||
|  |         if node.name.endswith("without_invalid"): | ||||||
|  |             with self.indent(): | ||||||
|  |                 self.print("int _prev_call_invalid = p->call_invalid_rules;") | ||||||
|  |                 self.print("p->call_invalid_rules = 0;") | ||||||
|  |                 self.cleanup_statements.append("p->call_invalid_rules = _prev_call_invalid;") | ||||||
|  | 
 | ||||||
|         if is_loop: |         if is_loop: | ||||||
|             self._handle_loop_rule_body(node, rhs) |             self._handle_loop_rule_body(node, rhs) | ||||||
|         else: |         else: | ||||||
|             self._handle_default_rule_body(node, rhs, result_type) |             self._handle_default_rule_body(node, rhs, result_type) | ||||||
|  | 
 | ||||||
|  |         if node.name.endswith("without_invalid"): | ||||||
|  |             self.cleanup_statements.pop() | ||||||
|  | 
 | ||||||
|         self.print("}") |         self.print("}") | ||||||
| 
 | 
 | ||||||
|     def visit_NamedItem(self, node: NamedItem) -> None: |     def visit_NamedItem(self, node: NamedItem) -> None: | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Pablo Galindo Salgado
						Pablo Galindo Salgado