mirror of
				https://github.com/python/cpython.git
				synced 2025-11-03 23:21:29 +00:00 
			
		
		
		
	Refactor parser compilation units into specific components (GH-29676)
This commit is contained in:
		
							parent
							
								
									f7638dd0f9
								
							
						
					
					
						commit
						c9c4444d9f
					
				
					 10 changed files with 1910 additions and 1870 deletions
				
			
		| 
						 | 
					@ -331,6 +331,8 @@ LIBFFI_INCLUDEDIR=	@LIBFFI_INCLUDEDIR@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
PEGEN_OBJS=		\
 | 
					PEGEN_OBJS=		\
 | 
				
			||||||
		Parser/pegen.o \
 | 
							Parser/pegen.o \
 | 
				
			||||||
 | 
							Parser/pegen_errors.o \
 | 
				
			||||||
 | 
							Parser/action_helpers.o \
 | 
				
			||||||
		Parser/parser.o \
 | 
							Parser/parser.o \
 | 
				
			||||||
		Parser/string_parser.o \
 | 
							Parser/string_parser.o \
 | 
				
			||||||
		Parser/peg_api.o
 | 
							Parser/peg_api.o
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -163,6 +163,8 @@
 | 
				
			||||||
    <ClCompile Include="..\Parser\parser.c" />
 | 
					    <ClCompile Include="..\Parser\parser.c" />
 | 
				
			||||||
    <ClCompile Include="..\Parser\peg_api.c" />
 | 
					    <ClCompile Include="..\Parser\peg_api.c" />
 | 
				
			||||||
    <ClCompile Include="..\Parser\pegen.c" />
 | 
					    <ClCompile Include="..\Parser\pegen.c" />
 | 
				
			||||||
 | 
					    <ClCompile Include="..\Parser\pegen_errors.c" />
 | 
				
			||||||
 | 
					    <ClCompile Include="..\Parser\action_helpers.c" />
 | 
				
			||||||
    <ClCompile Include="..\Parser\string_parser.c" />
 | 
					    <ClCompile Include="..\Parser\string_parser.c" />
 | 
				
			||||||
    <ClCompile Include="..\Parser\token.c" />
 | 
					    <ClCompile Include="..\Parser\token.c" />
 | 
				
			||||||
    <ClCompile Include="..\Parser\tokenizer.c" />
 | 
					    <ClCompile Include="..\Parser\tokenizer.c" />
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -433,6 +433,8 @@
 | 
				
			||||||
    <ClCompile Include="..\Parser\tokenizer.c" />
 | 
					    <ClCompile Include="..\Parser\tokenizer.c" />
 | 
				
			||||||
    <ClCompile Include="..\Parser\token.c" />
 | 
					    <ClCompile Include="..\Parser\token.c" />
 | 
				
			||||||
    <ClCompile Include="..\Parser\pegen.c" />
 | 
					    <ClCompile Include="..\Parser\pegen.c" />
 | 
				
			||||||
 | 
					    <ClCompile Include="..\Parser\pegen_errors.c" />
 | 
				
			||||||
 | 
					    <ClCompile Include="..\Parser\action_helpers.c" />
 | 
				
			||||||
    <ClCompile Include="..\Parser\parser.c" />
 | 
					    <ClCompile Include="..\Parser\parser.c" />
 | 
				
			||||||
    <ClCompile Include="..\Parser\string_parser.c" />
 | 
					    <ClCompile Include="..\Parser\string_parser.c" />
 | 
				
			||||||
    <ClCompile Include="..\Parser\peg_api.c" />
 | 
					    <ClCompile Include="..\Parser\peg_api.c" />
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1205,6 +1205,12 @@
 | 
				
			||||||
    <ClCompile Include="..\Parser\pegen.c">
 | 
					    <ClCompile Include="..\Parser\pegen.c">
 | 
				
			||||||
      <Filter>Parser</Filter>
 | 
					      <Filter>Parser</Filter>
 | 
				
			||||||
    </ClCompile>
 | 
					    </ClCompile>
 | 
				
			||||||
 | 
					    <ClCompile Include="..\Parser\pegen_errors.c">
 | 
				
			||||||
 | 
					      <Filter>Parser</Filter>
 | 
				
			||||||
 | 
					    </ClCompile>
 | 
				
			||||||
 | 
					    <ClCompile Include="..\Parser\action_helpers.c">
 | 
				
			||||||
 | 
					      <Filter>Parser</Filter>
 | 
				
			||||||
 | 
					    </ClCompile>
 | 
				
			||||||
    <ClCompile Include="..\Parser\peg_api.c">
 | 
					    <ClCompile Include="..\Parser\peg_api.c">
 | 
				
			||||||
      <Filter>Parser</Filter>
 | 
					      <Filter>Parser</Filter>
 | 
				
			||||||
    </ClCompile>
 | 
					    </ClCompile>
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										1289
									
								
								Parser/action_helpers.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1289
									
								
								Parser/action_helpers.c
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										1936
									
								
								Parser/pegen.c
									
										
									
									
									
								
							
							
						
						
									
										1936
									
								
								Parser/pegen.c
									
										
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										112
									
								
								Parser/pegen.h
									
										
									
									
									
								
							
							
						
						
									
										112
									
								
								Parser/pegen.h
									
										
									
									
									
								
							| 
						 | 
					@ -23,6 +23,8 @@
 | 
				
			||||||
#define PyPARSE_TYPE_COMMENTS 0x0040
 | 
					#define PyPARSE_TYPE_COMMENTS 0x0040
 | 
				
			||||||
#define PyPARSE_ASYNC_HACKS   0x0080
 | 
					#define PyPARSE_ASYNC_HACKS   0x0080
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define CURRENT_POS (-5)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
typedef struct _memo {
 | 
					typedef struct _memo {
 | 
				
			||||||
    int type;
 | 
					    int type;
 | 
				
			||||||
    void *node;
 | 
					    void *node;
 | 
				
			||||||
| 
						 | 
					@ -114,6 +116,7 @@ typedef struct {
 | 
				
			||||||
    int is_keyword;
 | 
					    int is_keyword;
 | 
				
			||||||
} KeywordOrStarred;
 | 
					} KeywordOrStarred;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Internal parser functions
 | 
				
			||||||
#if defined(Py_DEBUG)
 | 
					#if defined(Py_DEBUG)
 | 
				
			||||||
void _PyPegen_clear_memo_statistics(void);
 | 
					void _PyPegen_clear_memo_statistics(void);
 | 
				
			||||||
PyObject *_PyPegen_get_memo_statistics(void);
 | 
					PyObject *_PyPegen_get_memo_statistics(void);
 | 
				
			||||||
| 
						 | 
					@ -123,7 +126,6 @@ int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
 | 
				
			||||||
int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
 | 
					int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
 | 
				
			||||||
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
 | 
					int _PyPegen_is_memoized(Parser *p, int type, void *pres);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
 | 
					int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
 | 
				
			||||||
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
 | 
					int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
 | 
				
			||||||
int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
 | 
					int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
 | 
				
			||||||
| 
						 | 
					@ -139,23 +141,24 @@ int _PyPegen_fill_token(Parser *p);
 | 
				
			||||||
expr_ty _PyPegen_name_token(Parser *p);
 | 
					expr_ty _PyPegen_name_token(Parser *p);
 | 
				
			||||||
expr_ty _PyPegen_number_token(Parser *p);
 | 
					expr_ty _PyPegen_number_token(Parser *p);
 | 
				
			||||||
void *_PyPegen_string_token(Parser *p);
 | 
					void *_PyPegen_string_token(Parser *p);
 | 
				
			||||||
const char *_PyPegen_get_expr_name(expr_ty);
 | 
					 | 
				
			||||||
Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
 | 
					Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Error handling functions and APIs
 | 
				
			||||||
 | 
					typedef enum {
 | 
				
			||||||
 | 
					    STAR_TARGETS,
 | 
				
			||||||
 | 
					    DEL_TARGETS,
 | 
				
			||||||
 | 
					    FOR_TARGETS
 | 
				
			||||||
 | 
					} TARGETS_TYPE;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int _Pypegen_raise_decode_error(Parser *p);
 | 
				
			||||||
 | 
					void _PyPegen_raise_tokenizer_init_error(PyObject *filename);
 | 
				
			||||||
 | 
					int _Pypegen_tokenizer_error(Parser *p);
 | 
				
			||||||
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
 | 
					void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
 | 
				
			||||||
void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
 | 
					void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
 | 
				
			||||||
                                          Py_ssize_t lineno, Py_ssize_t col_offset,
 | 
					                                          Py_ssize_t lineno, Py_ssize_t col_offset,
 | 
				
			||||||
                                          Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
 | 
					                                          Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
 | 
				
			||||||
                                          const char *errmsg, va_list va);
 | 
					                                          const char *errmsg, va_list va);
 | 
				
			||||||
void *_PyPegen_dummy_name(Parser *p, ...);
 | 
					void _Pypegen_set_syntax_error(Parser* p, Token* last_token);
 | 
				
			||||||
 | 
					 | 
				
			||||||
void * _PyPegen_seq_last_item(asdl_seq *seq);
 | 
					 | 
				
			||||||
#define PyPegen_last_item(seq, type) ((type)_PyPegen_seq_last_item((asdl_seq*)seq))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void * _PyPegen_seq_first_item(asdl_seq *seq);
 | 
					 | 
				
			||||||
#define PyPegen_first_item(seq, type) ((type)_PyPegen_seq_first_item((asdl_seq*)seq))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#define CURRENT_POS (-5)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Py_LOCAL_INLINE(void *)
 | 
					Py_LOCAL_INLINE(void *)
 | 
				
			||||||
RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
 | 
					RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
 | 
				
			||||||
                           Py_ssize_t lineno, Py_ssize_t col_offset,
 | 
					                           Py_ssize_t lineno, Py_ssize_t col_offset,
 | 
				
			||||||
| 
						 | 
					@ -170,10 +173,6 @@ RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
 | 
				
			||||||
    va_end(va);
 | 
					    va_end(va);
 | 
				
			||||||
    return NULL;
 | 
					    return NULL;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					 | 
				
			||||||
#define UNUSED(expr) do { (void)(expr); } while (0)
 | 
					 | 
				
			||||||
#define EXTRA_EXPR(head, tail) head->lineno, (head)->col_offset, (tail)->end_lineno, (tail)->end_col_offset, p->arena
 | 
					 | 
				
			||||||
#define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena
 | 
					 | 
				
			||||||
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
 | 
					#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
 | 
				
			||||||
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
 | 
					#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
 | 
				
			||||||
#define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
 | 
					#define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
 | 
				
			||||||
| 
						 | 
					@ -182,6 +181,7 @@ RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
 | 
				
			||||||
    RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (a)->end_lineno, (a)->end_col_offset, msg, ##__VA_ARGS__)
 | 
					    RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (a)->end_lineno, (a)->end_col_offset, msg, ##__VA_ARGS__)
 | 
				
			||||||
#define RAISE_SYNTAX_ERROR_STARTING_FROM(a, msg, ...) \
 | 
					#define RAISE_SYNTAX_ERROR_STARTING_FROM(a, msg, ...) \
 | 
				
			||||||
    RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, CURRENT_POS, CURRENT_POS, msg, ##__VA_ARGS__)
 | 
					    RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, CURRENT_POS, CURRENT_POS, msg, ##__VA_ARGS__)
 | 
				
			||||||
 | 
					#define RAISE_SYNTAX_ERROR_INVALID_TARGET(type, e) _RAISE_SYNTAX_ERROR_INVALID_TARGET(p, type, e)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Py_LOCAL_INLINE(void *)
 | 
					Py_LOCAL_INLINE(void *)
 | 
				
			||||||
CHECK_CALL(Parser *p, void *result)
 | 
					CHECK_CALL(Parser *p, void *result)
 | 
				
			||||||
| 
						 | 
					@ -207,6 +207,39 @@ CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
 | 
				
			||||||
#define CHECK(type, result) ((type) CHECK_CALL(p, result))
 | 
					#define CHECK(type, result) ((type) CHECK_CALL(p, result))
 | 
				
			||||||
#define CHECK_NULL_ALLOWED(type, result) ((type) CHECK_CALL_NULL_ALLOWED(p, result))
 | 
					#define CHECK_NULL_ALLOWED(type, result) ((type) CHECK_CALL_NULL_ALLOWED(p, result))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type);
 | 
				
			||||||
 | 
					const char *_PyPegen_get_expr_name(expr_ty);
 | 
				
			||||||
 | 
					Py_LOCAL_INLINE(void *)
 | 
				
			||||||
 | 
					_RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser *p, TARGETS_TYPE type, void *e)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    expr_ty invalid_target = CHECK_NULL_ALLOWED(expr_ty, _PyPegen_get_invalid_target(e, type));
 | 
				
			||||||
 | 
					    if (invalid_target != NULL) {
 | 
				
			||||||
 | 
					        const char *msg;
 | 
				
			||||||
 | 
					        if (type == STAR_TARGETS || type == FOR_TARGETS) {
 | 
				
			||||||
 | 
					            msg = "cannot assign to %s";
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        else {
 | 
				
			||||||
 | 
					            msg = "cannot delete %s";
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
 | 
				
			||||||
 | 
					            invalid_target,
 | 
				
			||||||
 | 
					            msg,
 | 
				
			||||||
 | 
					            _PyPegen_get_expr_name(invalid_target)
 | 
				
			||||||
 | 
					        );
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return RAISE_SYNTAX_ERROR("invalid syntax");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Action utility functions
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void *_PyPegen_dummy_name(Parser *p, ...);
 | 
				
			||||||
 | 
					void * _PyPegen_seq_last_item(asdl_seq *seq);
 | 
				
			||||||
 | 
					#define PyPegen_last_item(seq, type) ((type)_PyPegen_seq_last_item((asdl_seq*)seq))
 | 
				
			||||||
 | 
					void * _PyPegen_seq_first_item(asdl_seq *seq);
 | 
				
			||||||
 | 
					#define PyPegen_first_item(seq, type) ((type)_PyPegen_seq_first_item((asdl_seq*)seq))
 | 
				
			||||||
 | 
					#define UNUSED(expr) do { (void)(expr); } while (0)
 | 
				
			||||||
 | 
					#define EXTRA_EXPR(head, tail) head->lineno, (head)->col_offset, (tail)->end_lineno, (tail)->end_col_offset, p->arena
 | 
				
			||||||
 | 
					#define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena
 | 
				
			||||||
PyObject *_PyPegen_new_type_comment(Parser *, const char *);
 | 
					PyObject *_PyPegen_new_type_comment(Parser *, const char *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Py_LOCAL_INLINE(PyObject *)
 | 
					Py_LOCAL_INLINE(PyObject *)
 | 
				
			||||||
| 
						 | 
					@ -248,13 +281,6 @@ INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *);
 | 
					arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *);
 | 
				
			||||||
PyObject *_PyPegen_new_identifier(Parser *, const char *);
 | 
					PyObject *_PyPegen_new_identifier(Parser *, const char *);
 | 
				
			||||||
Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
 | 
					 | 
				
			||||||
void _PyPegen_Parser_Free(Parser *);
 | 
					 | 
				
			||||||
mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
 | 
					 | 
				
			||||||
                                    const char *, const char *, PyCompilerFlags *, int *, PyArena *);
 | 
					 | 
				
			||||||
void *_PyPegen_run_parser(Parser *);
 | 
					 | 
				
			||||||
mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
 | 
					 | 
				
			||||||
asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
 | 
					 | 
				
			||||||
asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
 | 
					asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
 | 
				
			||||||
asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
 | 
					asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
 | 
				
			||||||
asdl_seq *_PyPegen_seq_append_to_end(Parser *, asdl_seq *, void *);
 | 
					asdl_seq *_PyPegen_seq_append_to_end(Parser *, asdl_seq *, void *);
 | 
				
			||||||
| 
						 | 
					@ -295,40 +321,18 @@ asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
 | 
				
			||||||
int _PyPegen_check_barry_as_flufl(Parser *, Token *);
 | 
					int _PyPegen_check_barry_as_flufl(Parser *, Token *);
 | 
				
			||||||
int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
 | 
					int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
 | 
				
			||||||
mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
 | 
					mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
 | 
				
			||||||
 | 
					 | 
				
			||||||
// Error reporting helpers
 | 
					 | 
				
			||||||
typedef enum {
 | 
					 | 
				
			||||||
    STAR_TARGETS,
 | 
					 | 
				
			||||||
    DEL_TARGETS,
 | 
					 | 
				
			||||||
    FOR_TARGETS
 | 
					 | 
				
			||||||
} TARGETS_TYPE;
 | 
					 | 
				
			||||||
expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type);
 | 
					 | 
				
			||||||
#define RAISE_SYNTAX_ERROR_INVALID_TARGET(type, e) _RAISE_SYNTAX_ERROR_INVALID_TARGET(p, type, e)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
Py_LOCAL_INLINE(void *)
 | 
					 | 
				
			||||||
_RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser *p, TARGETS_TYPE type, void *e)
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
    expr_ty invalid_target = CHECK_NULL_ALLOWED(expr_ty, _PyPegen_get_invalid_target(e, type));
 | 
					 | 
				
			||||||
    if (invalid_target != NULL) {
 | 
					 | 
				
			||||||
        const char *msg;
 | 
					 | 
				
			||||||
        if (type == STAR_TARGETS || type == FOR_TARGETS) {
 | 
					 | 
				
			||||||
            msg = "cannot assign to %s";
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        else {
 | 
					 | 
				
			||||||
            msg = "cannot delete %s";
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
 | 
					 | 
				
			||||||
            invalid_target,
 | 
					 | 
				
			||||||
            msg,
 | 
					 | 
				
			||||||
            _PyPegen_get_expr_name(invalid_target)
 | 
					 | 
				
			||||||
        );
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    return RAISE_SYNTAX_ERROR("invalid syntax");
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
 | 
					void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
 | 
				
			||||||
void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions);
 | 
					void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Parser API
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
 | 
				
			||||||
 | 
					void _PyPegen_Parser_Free(Parser *);
 | 
				
			||||||
 | 
					mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
 | 
				
			||||||
 | 
					                                    const char *, const char *, PyCompilerFlags *, int *, PyArena *);
 | 
				
			||||||
 | 
					void *_PyPegen_run_parser(Parser *);
 | 
				
			||||||
 | 
					mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
 | 
				
			||||||
 | 
					asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Generated function in parse.c - function definition in python.gram
 | 
					// Generated function in parse.c - function definition in python.gram
 | 
				
			||||||
void *_PyPegen_parse(Parser *);
 | 
					void *_PyPegen_parse(Parser *);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										425
									
								
								Parser/pegen_errors.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										425
									
								
								Parser/pegen_errors.c
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,425 @@
 | 
				
			||||||
 | 
					#include <Python.h>
 | 
				
			||||||
 | 
					#include <errcode.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#include "tokenizer.h"
 | 
				
			||||||
 | 
					#include "pegen.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// TOKENIZER ERRORS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void
 | 
				
			||||||
 | 
					_PyPegen_raise_tokenizer_init_error(PyObject *filename)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if (!(PyErr_ExceptionMatches(PyExc_LookupError)
 | 
				
			||||||
 | 
					          || PyErr_ExceptionMatches(PyExc_SyntaxError)
 | 
				
			||||||
 | 
					          || PyErr_ExceptionMatches(PyExc_ValueError)
 | 
				
			||||||
 | 
					          || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    PyObject *errstr = NULL;
 | 
				
			||||||
 | 
					    PyObject *tuple = NULL;
 | 
				
			||||||
 | 
					    PyObject *type;
 | 
				
			||||||
 | 
					    PyObject *value;
 | 
				
			||||||
 | 
					    PyObject *tback;
 | 
				
			||||||
 | 
					    PyErr_Fetch(&type, &value, &tback);
 | 
				
			||||||
 | 
					    errstr = PyObject_Str(value);
 | 
				
			||||||
 | 
					    if (!errstr) {
 | 
				
			||||||
 | 
					        goto error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
 | 
				
			||||||
 | 
					    if (!tmp) {
 | 
				
			||||||
 | 
					        goto error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    tuple = PyTuple_Pack(2, errstr, tmp);
 | 
				
			||||||
 | 
					    Py_DECREF(tmp);
 | 
				
			||||||
 | 
					    if (!value) {
 | 
				
			||||||
 | 
					        goto error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    PyErr_SetObject(PyExc_SyntaxError, tuple);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					error:
 | 
				
			||||||
 | 
					    Py_XDECREF(type);
 | 
				
			||||||
 | 
					    Py_XDECREF(value);
 | 
				
			||||||
 | 
					    Py_XDECREF(tback);
 | 
				
			||||||
 | 
					    Py_XDECREF(errstr);
 | 
				
			||||||
 | 
					    Py_XDECREF(tuple);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static inline void
 | 
				
			||||||
 | 
					raise_unclosed_parentheses_error(Parser *p) {
 | 
				
			||||||
 | 
					       int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
 | 
				
			||||||
 | 
					       int error_col = p->tok->parencolstack[p->tok->level-1];
 | 
				
			||||||
 | 
					       RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError,
 | 
				
			||||||
 | 
					                                  error_lineno, error_col, error_lineno, -1,
 | 
				
			||||||
 | 
					                                  "'%c' was never closed",
 | 
				
			||||||
 | 
					                                  p->tok->parenstack[p->tok->level-1]);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int
 | 
				
			||||||
 | 
					_Pypegen_tokenizer_error(Parser *p)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if (PyErr_Occurred()) {
 | 
				
			||||||
 | 
					        return -1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    const char *msg = NULL;
 | 
				
			||||||
 | 
					    PyObject* errtype = PyExc_SyntaxError;
 | 
				
			||||||
 | 
					    Py_ssize_t col_offset = -1;
 | 
				
			||||||
 | 
					    switch (p->tok->done) {
 | 
				
			||||||
 | 
					        case E_TOKEN:
 | 
				
			||||||
 | 
					            msg = "invalid token";
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					        case E_EOF:
 | 
				
			||||||
 | 
					            if (p->tok->level) {
 | 
				
			||||||
 | 
					                raise_unclosed_parentheses_error(p);
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            return -1;
 | 
				
			||||||
 | 
					        case E_DEDENT:
 | 
				
			||||||
 | 
					            RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level");
 | 
				
			||||||
 | 
					            return -1;
 | 
				
			||||||
 | 
					        case E_INTR:
 | 
				
			||||||
 | 
					            if (!PyErr_Occurred()) {
 | 
				
			||||||
 | 
					                PyErr_SetNone(PyExc_KeyboardInterrupt);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            return -1;
 | 
				
			||||||
 | 
					        case E_NOMEM:
 | 
				
			||||||
 | 
					            PyErr_NoMemory();
 | 
				
			||||||
 | 
					            return -1;
 | 
				
			||||||
 | 
					        case E_TABSPACE:
 | 
				
			||||||
 | 
					            errtype = PyExc_TabError;
 | 
				
			||||||
 | 
					            msg = "inconsistent use of tabs and spaces in indentation";
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					        case E_TOODEEP:
 | 
				
			||||||
 | 
					            errtype = PyExc_IndentationError;
 | 
				
			||||||
 | 
					            msg = "too many levels of indentation";
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					        case E_LINECONT: {
 | 
				
			||||||
 | 
					            col_offset = p->tok->cur - p->tok->buf - 1;
 | 
				
			||||||
 | 
					            msg = "unexpected character after line continuation character";
 | 
				
			||||||
 | 
					            break;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        default:
 | 
				
			||||||
 | 
					            msg = "unknown parsing error";
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
 | 
				
			||||||
 | 
					                               col_offset >= 0 ? col_offset : 0,
 | 
				
			||||||
 | 
					                               p->tok->lineno, -1, msg);
 | 
				
			||||||
 | 
					    return -1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					int
 | 
				
			||||||
 | 
					_Pypegen_raise_decode_error(Parser *p)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    assert(PyErr_Occurred());
 | 
				
			||||||
 | 
					    const char *errtype = NULL;
 | 
				
			||||||
 | 
					    if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
 | 
				
			||||||
 | 
					        errtype = "unicode error";
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
 | 
				
			||||||
 | 
					        errtype = "value error";
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (errtype) {
 | 
				
			||||||
 | 
					        PyObject *type;
 | 
				
			||||||
 | 
					        PyObject *value;
 | 
				
			||||||
 | 
					        PyObject *tback;
 | 
				
			||||||
 | 
					        PyObject *errstr;
 | 
				
			||||||
 | 
					        PyErr_Fetch(&type, &value, &tback);
 | 
				
			||||||
 | 
					        errstr = PyObject_Str(value);
 | 
				
			||||||
 | 
					        if (errstr) {
 | 
				
			||||||
 | 
					            RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
 | 
				
			||||||
 | 
					            Py_DECREF(errstr);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        else {
 | 
				
			||||||
 | 
					            PyErr_Clear();
 | 
				
			||||||
 | 
					            RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        Py_XDECREF(type);
 | 
				
			||||||
 | 
					        Py_XDECREF(value);
 | 
				
			||||||
 | 
					        Py_XDECREF(tback);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return -1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int
 | 
				
			||||||
 | 
					_PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) {
 | 
				
			||||||
 | 
					    // Tokenize the whole input to see if there are any tokenization
 | 
				
			||||||
 | 
					    // errors such as mistmatching parentheses. These will get priority
 | 
				
			||||||
 | 
					    // over generic syntax errors only if the line number of the error is
 | 
				
			||||||
 | 
					    // before the one that we had for the generic error.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // We don't want to tokenize to the end for interactive input
 | 
				
			||||||
 | 
					    if (p->tok->prompt != NULL) {
 | 
				
			||||||
 | 
					        return 0;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    PyObject *type, *value, *traceback;
 | 
				
			||||||
 | 
					    PyErr_Fetch(&type, &value, &traceback);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
 | 
				
			||||||
 | 
					    Py_ssize_t current_err_line = current_token->lineno;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    int ret = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (;;) {
 | 
				
			||||||
 | 
					        const char *start;
 | 
				
			||||||
 | 
					        const char *end;
 | 
				
			||||||
 | 
					        switch (_PyTokenizer_Get(p->tok, &start, &end)) {
 | 
				
			||||||
 | 
					            case ERRORTOKEN:
 | 
				
			||||||
 | 
					                if (p->tok->level != 0) {
 | 
				
			||||||
 | 
					                    int error_lineno = p->tok->parenlinenostack[p->tok->level-1];
 | 
				
			||||||
 | 
					                    if (current_err_line > error_lineno) {
 | 
				
			||||||
 | 
					                        raise_unclosed_parentheses_error(p);
 | 
				
			||||||
 | 
					                        ret = -1;
 | 
				
			||||||
 | 
					                        goto exit;
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            case ENDMARKER:
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            default:
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        break;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					exit:
 | 
				
			||||||
 | 
					    if (PyErr_Occurred()) {
 | 
				
			||||||
 | 
					        Py_XDECREF(value);
 | 
				
			||||||
 | 
					        Py_XDECREF(type);
 | 
				
			||||||
 | 
					        Py_XDECREF(traceback);
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        PyErr_Restore(type, value, traceback);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// PARSER ERRORS
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void *
 | 
				
			||||||
 | 
					_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if (p->fill == 0) {
 | 
				
			||||||
 | 
					        va_list va;
 | 
				
			||||||
 | 
					        va_start(va, errmsg);
 | 
				
			||||||
 | 
					        _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va);
 | 
				
			||||||
 | 
					        va_end(va);
 | 
				
			||||||
 | 
					        return NULL;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Token *t = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1];
 | 
				
			||||||
 | 
					    Py_ssize_t col_offset;
 | 
				
			||||||
 | 
					    Py_ssize_t end_col_offset = -1;
 | 
				
			||||||
 | 
					    if (t->col_offset == -1) {
 | 
				
			||||||
 | 
					        if (p->tok->cur == p->tok->buf) {
 | 
				
			||||||
 | 
					            col_offset = 0;
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            const char* start = p->tok->buf  ? p->tok->line_start : p->tok->buf;
 | 
				
			||||||
 | 
					            col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    } else {
 | 
				
			||||||
 | 
					        col_offset = t->col_offset + 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (t->end_col_offset != -1) {
 | 
				
			||||||
 | 
					        end_col_offset = t->end_col_offset + 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    va_list va;
 | 
				
			||||||
 | 
					    va_start(va, errmsg);
 | 
				
			||||||
 | 
					    _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va);
 | 
				
			||||||
 | 
					    va_end(va);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static PyObject *
 | 
				
			||||||
 | 
					get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    /* If the file descriptor is interactive, the source lines of the current
 | 
				
			||||||
 | 
					     * (multi-line) statement are stored in p->tok->interactive_src_start.
 | 
				
			||||||
 | 
					     * If not, we're parsing from a string, which means that the whole source
 | 
				
			||||||
 | 
					     * is stored in p->tok->str. */
 | 
				
			||||||
 | 
					    assert(p->tok->fp == NULL || p->tok->fp == stdin);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str;
 | 
				
			||||||
 | 
					    assert(cur_line != NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (int i = 0; i < lineno - 1; i++) {
 | 
				
			||||||
 | 
					        cur_line = strchr(cur_line, '\n') + 1;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    char *next_newline;
 | 
				
			||||||
 | 
					    if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
 | 
				
			||||||
 | 
					        next_newline = cur_line + strlen(cur_line);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace");
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void *
 | 
				
			||||||
 | 
					_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
 | 
				
			||||||
 | 
					                                    Py_ssize_t lineno, Py_ssize_t col_offset,
 | 
				
			||||||
 | 
					                                    Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
 | 
				
			||||||
 | 
					                                    const char *errmsg, va_list va)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    PyObject *value = NULL;
 | 
				
			||||||
 | 
					    PyObject *errstr = NULL;
 | 
				
			||||||
 | 
					    PyObject *error_line = NULL;
 | 
				
			||||||
 | 
					    PyObject *tmp = NULL;
 | 
				
			||||||
 | 
					    p->error_indicator = 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (end_lineno == CURRENT_POS) {
 | 
				
			||||||
 | 
					        end_lineno = p->tok->lineno;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (end_col_offset == CURRENT_POS) {
 | 
				
			||||||
 | 
					        end_col_offset = p->tok->cur - p->tok->line_start;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (p->start_rule == Py_fstring_input) {
 | 
				
			||||||
 | 
					        const char *fstring_msg = "f-string: ";
 | 
				
			||||||
 | 
					        Py_ssize_t len = strlen(fstring_msg) + strlen(errmsg);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        char *new_errmsg = PyMem_Malloc(len + 1); // Lengths of both strings plus NULL character
 | 
				
			||||||
 | 
					        if (!new_errmsg) {
 | 
				
			||||||
 | 
					            return (void *) PyErr_NoMemory();
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Copy both strings into new buffer
 | 
				
			||||||
 | 
					        memcpy(new_errmsg, fstring_msg, strlen(fstring_msg));
 | 
				
			||||||
 | 
					        memcpy(new_errmsg + strlen(fstring_msg), errmsg, strlen(errmsg));
 | 
				
			||||||
 | 
					        new_errmsg[len] = 0;
 | 
				
			||||||
 | 
					        errmsg = new_errmsg;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    errstr = PyUnicode_FromFormatV(errmsg, va);
 | 
				
			||||||
 | 
					    if (!errstr) {
 | 
				
			||||||
 | 
					        goto error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (p->tok->fp_interactive) {
 | 
				
			||||||
 | 
					        error_line = get_error_line_from_tokenizer_buffers(p, lineno);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    else if (p->start_rule == Py_file_input) {
 | 
				
			||||||
 | 
					        error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename,
 | 
				
			||||||
 | 
					                                                     (int) lineno, p->tok->encoding);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (!error_line) {
 | 
				
			||||||
 | 
					        /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
 | 
				
			||||||
 | 
					           then we need to find the error line from some other source, because
 | 
				
			||||||
 | 
					           p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly
 | 
				
			||||||
 | 
					           failed or we're parsing from a string or the REPL. There's a third edge case where
 | 
				
			||||||
 | 
					           we're actually parsing from a file, which has an E_EOF SyntaxError and in that case
 | 
				
			||||||
 | 
					           `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which
 | 
				
			||||||
 | 
					           does not physically exist */
 | 
				
			||||||
 | 
					        assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
 | 
				
			||||||
 | 
					            Py_ssize_t size = p->tok->inp - p->tok->buf;
 | 
				
			||||||
 | 
					            error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        else if (p->tok->fp == NULL || p->tok->fp == stdin) {
 | 
				
			||||||
 | 
					            error_line = get_error_line_from_tokenizer_buffers(p, lineno);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        else {
 | 
				
			||||||
 | 
					            error_line = PyUnicode_FromStringAndSize("", 0);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if (!error_line) {
 | 
				
			||||||
 | 
					            goto error;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (p->start_rule == Py_fstring_input) {
 | 
				
			||||||
 | 
					        col_offset -= p->starting_col_offset;
 | 
				
			||||||
 | 
					        end_col_offset -= p->starting_col_offset;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Py_ssize_t col_number = col_offset;
 | 
				
			||||||
 | 
					    Py_ssize_t end_col_number = end_col_offset;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (p->tok->encoding != NULL) {
 | 
				
			||||||
 | 
					        col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset);
 | 
				
			||||||
 | 
					        if (col_number < 0) {
 | 
				
			||||||
 | 
					            goto error;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        if (end_col_number > 0) {
 | 
				
			||||||
 | 
					            Py_ssize_t end_col_offset = _PyPegen_byte_offset_to_character_offset(error_line, end_col_number);
 | 
				
			||||||
 | 
					            if (end_col_offset < 0) {
 | 
				
			||||||
 | 
					                goto error;
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
 | 
					                end_col_number = end_col_offset;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    tmp = Py_BuildValue("(OiiNii)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number);
 | 
				
			||||||
 | 
					    if (!tmp) {
 | 
				
			||||||
 | 
					        goto error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    value = PyTuple_Pack(2, errstr, tmp);
 | 
				
			||||||
 | 
					    Py_DECREF(tmp);
 | 
				
			||||||
 | 
					    if (!value) {
 | 
				
			||||||
 | 
					        goto error;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    PyErr_SetObject(errtype, value);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Py_DECREF(errstr);
 | 
				
			||||||
 | 
					    Py_DECREF(value);
 | 
				
			||||||
 | 
					    if (p->start_rule == Py_fstring_input) {
 | 
				
			||||||
 | 
					        PyMem_Free((void *)errmsg);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					error:
 | 
				
			||||||
 | 
					    Py_XDECREF(errstr);
 | 
				
			||||||
 | 
					    Py_XDECREF(error_line);
 | 
				
			||||||
 | 
					    if (p->start_rule == Py_fstring_input) {
 | 
				
			||||||
 | 
					        PyMem_Free((void *)errmsg);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void
 | 
				
			||||||
 | 
					_Pypegen_set_syntax_error(Parser* p, Token* last_token) {
 | 
				
			||||||
 | 
					    // Existing sintax error
 | 
				
			||||||
 | 
					    if (PyErr_Occurred()) {
 | 
				
			||||||
 | 
					        // Prioritize tokenizer errors to custom syntax errors raised
 | 
				
			||||||
 | 
					        // on the second phase only if the errors come from the parser.
 | 
				
			||||||
 | 
					        if (p->tok->done == E_DONE && PyErr_ExceptionMatches(PyExc_SyntaxError)) {
 | 
				
			||||||
 | 
					            _PyPegen_tokenize_full_source_to_check_for_errors(p);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        // Propagate the existing syntax error.
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // Initialization error
 | 
				
			||||||
 | 
					    if (p->fill == 0) {
 | 
				
			||||||
 | 
					        RAISE_SYNTAX_ERROR("error at start before reading any input");
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // Parser encountered EOF (End of File) unexpectedtly
 | 
				
			||||||
 | 
					    if (p->tok->done == E_EOF) {
 | 
				
			||||||
 | 
					        if (p->tok->level) {
 | 
				
			||||||
 | 
					            raise_unclosed_parentheses_error(p);
 | 
				
			||||||
 | 
					        } else {
 | 
				
			||||||
 | 
					            RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // Indentation error in the tokenizer
 | 
				
			||||||
 | 
					    if (last_token->type == INDENT || last_token->type == DEDENT) {
 | 
				
			||||||
 | 
					        RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent");
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    // Unknown error (generic case)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Use the last token we found on the first pass to avoid reporting
 | 
				
			||||||
 | 
					    // incorrect locations for generic syntax errors just because we reached
 | 
				
			||||||
 | 
					    // further away when trying to find specific syntax errors in the second
 | 
				
			||||||
 | 
					    // pass.
 | 
				
			||||||
 | 
					    RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax");
 | 
				
			||||||
 | 
					    // _PyPegen_tokenize_full_source_to_check_for_errors will override the existing
 | 
				
			||||||
 | 
					    // generic SyntaxError we just raised if errors are found.
 | 
				
			||||||
 | 
					    _PyPegen_tokenize_full_source_to_check_for_errors(p);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -22,7 +22,7 @@ data/xxl.py:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
build: peg_extension/parse.c
 | 
					build: peg_extension/parse.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen.c ../../Parser/string_parser.c ../../Parser/*.h pegen/grammar_parser.py
 | 
					peg_extension/parse.c: $(GRAMMAR) $(TOKENS) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen.c ../../Parser/pegen_errors.c ../../Parser/string_parser.c ../../Parser/action_helpers.c ../../Parser/*.h pegen/grammar_parser.py
 | 
				
			||||||
	$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension
 | 
						$(PYTHON) -m pegen -q c $(GRAMMAR) $(TOKENS) -o peg_extension/parse.c --compile-extension
 | 
				
			||||||
 | 
					
 | 
				
			||||||
clean:
 | 
					clean:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -69,6 +69,8 @@ def compile_c_extension(
 | 
				
			||||||
                str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
 | 
					                str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
 | 
				
			||||||
                str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
 | 
					                str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
 | 
				
			||||||
                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
 | 
					                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
 | 
				
			||||||
 | 
					                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
 | 
				
			||||||
 | 
					                str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
 | 
				
			||||||
                str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
 | 
					                str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
 | 
				
			||||||
                str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
 | 
					                str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
 | 
				
			||||||
                generated_source_path,
 | 
					                generated_source_path,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue