| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  | #include <Python.h>
 | 
					
						
							|  |  |  | #include <errcode.h>
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-07-03 12:48:50 +02:00
										 |  |  | #include "pycore_pyerrors.h"      // _PyErr_ProgramDecodedTextObject()
 | 
					
						
							| 
									
										
										
										
											2023-10-11 17:14:44 +02:00
										 |  |  | #include "lexer/state.h"
 | 
					
						
							|  |  |  | #include "lexer/lexer.h"
 | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  | #include "pegen.h"
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // TOKENIZER ERRORS
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | _PyPegen_raise_tokenizer_init_error(PyObject *filename) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (!(PyErr_ExceptionMatches(PyExc_LookupError) | 
					
						
							|  |  |  |           || PyErr_ExceptionMatches(PyExc_SyntaxError) | 
					
						
							|  |  |  |           || PyErr_ExceptionMatches(PyExc_ValueError) | 
					
						
							|  |  |  |           || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) { | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     PyObject *errstr = NULL; | 
					
						
							|  |  |  |     PyObject *tuple = NULL; | 
					
						
							|  |  |  |     PyObject *type; | 
					
						
							|  |  |  |     PyObject *value; | 
					
						
							|  |  |  |     PyObject *tback; | 
					
						
							|  |  |  |     PyErr_Fetch(&type, &value, &tback); | 
					
						
							|  |  |  |     errstr = PyObject_Str(value); | 
					
						
							|  |  |  |     if (!errstr) { | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None); | 
					
						
							|  |  |  |     if (!tmp) { | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     tuple = PyTuple_Pack(2, errstr, tmp); | 
					
						
							|  |  |  |     Py_DECREF(tmp); | 
					
						
							|  |  |  |     if (!value) { | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     PyErr_SetObject(PyExc_SyntaxError, tuple); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | error: | 
					
						
							|  |  |  |     Py_XDECREF(type); | 
					
						
							|  |  |  |     Py_XDECREF(value); | 
					
						
							|  |  |  |     Py_XDECREF(tback); | 
					
						
							|  |  |  |     Py_XDECREF(errstr); | 
					
						
							|  |  |  |     Py_XDECREF(tuple); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static inline void | 
					
						
							|  |  |  | raise_unclosed_parentheses_error(Parser *p) { | 
					
						
							|  |  |  |        int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; | 
					
						
							|  |  |  |        int error_col = p->tok->parencolstack[p->tok->level-1]; | 
					
						
							|  |  |  |        RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, | 
					
						
							|  |  |  |                                   error_lineno, error_col, error_lineno, -1, | 
					
						
							|  |  |  |                                   "'%c' was never closed", | 
					
						
							|  |  |  |                                   p->tok->parenstack[p->tok->level-1]); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | _Pypegen_tokenizer_error(Parser *p) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     if (PyErr_Occurred()) { | 
					
						
							|  |  |  |         return -1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     const char *msg = NULL; | 
					
						
							|  |  |  |     PyObject* errtype = PyExc_SyntaxError; | 
					
						
							|  |  |  |     Py_ssize_t col_offset = -1; | 
					
						
							| 
									
										
										
										
											2023-10-16 16:42:49 +02:00
										 |  |  |     p->error_indicator = 1; | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |     switch (p->tok->done) { | 
					
						
							|  |  |  |         case E_TOKEN: | 
					
						
							|  |  |  |             msg = "invalid token"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case E_EOF: | 
					
						
							|  |  |  |             if (p->tok->level) { | 
					
						
							|  |  |  |                 raise_unclosed_parentheses_error(p); | 
					
						
							|  |  |  |             } else { | 
					
						
							|  |  |  |                 RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         case E_DEDENT: | 
					
						
							|  |  |  |             RAISE_INDENTATION_ERROR("unindent does not match any outer indentation level"); | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         case E_INTR: | 
					
						
							|  |  |  |             if (!PyErr_Occurred()) { | 
					
						
							|  |  |  |                 PyErr_SetNone(PyExc_KeyboardInterrupt); | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         case E_NOMEM: | 
					
						
							|  |  |  |             PyErr_NoMemory(); | 
					
						
							|  |  |  |             return -1; | 
					
						
							|  |  |  |         case E_TABSPACE: | 
					
						
							|  |  |  |             errtype = PyExc_TabError; | 
					
						
							|  |  |  |             msg = "inconsistent use of tabs and spaces in indentation"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case E_TOODEEP: | 
					
						
							|  |  |  |             errtype = PyExc_IndentationError; | 
					
						
							|  |  |  |             msg = "too many levels of indentation"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         case E_LINECONT: { | 
					
						
							|  |  |  |             col_offset = p->tok->cur - p->tok->buf - 1; | 
					
						
							|  |  |  |             msg = "unexpected character after line continuation character"; | 
					
						
							|  |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-10-16 16:42:49 +02:00
										 |  |  |         case E_COLUMNOVERFLOW: | 
					
						
							|  |  |  |             PyErr_SetString(PyExc_OverflowError, | 
					
						
							|  |  |  |                     "Parser column offset overflow - source line is too big"); | 
					
						
							|  |  |  |             return -1; | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |         default: | 
					
						
							|  |  |  |             msg = "unknown parsing error"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, | 
					
						
							|  |  |  |                                col_offset >= 0 ? col_offset : 0, | 
					
						
							|  |  |  |                                p->tok->lineno, -1, msg); | 
					
						
							|  |  |  |     return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | int | 
					
						
							|  |  |  | _Pypegen_raise_decode_error(Parser *p) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     assert(PyErr_Occurred()); | 
					
						
							|  |  |  |     const char *errtype = NULL; | 
					
						
							|  |  |  |     if (PyErr_ExceptionMatches(PyExc_UnicodeError)) { | 
					
						
							|  |  |  |         errtype = "unicode error"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else if (PyErr_ExceptionMatches(PyExc_ValueError)) { | 
					
						
							|  |  |  |         errtype = "value error"; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (errtype) { | 
					
						
							|  |  |  |         PyObject *type; | 
					
						
							|  |  |  |         PyObject *value; | 
					
						
							|  |  |  |         PyObject *tback; | 
					
						
							|  |  |  |         PyObject *errstr; | 
					
						
							|  |  |  |         PyErr_Fetch(&type, &value, &tback); | 
					
						
							|  |  |  |         errstr = PyObject_Str(value); | 
					
						
							|  |  |  |         if (errstr) { | 
					
						
							|  |  |  |             RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr); | 
					
						
							|  |  |  |             Py_DECREF(errstr); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							|  |  |  |             PyErr_Clear(); | 
					
						
							|  |  |  |             RAISE_SYNTAX_ERROR("(%s) unknown error", errtype); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         Py_XDECREF(type); | 
					
						
							|  |  |  |         Py_XDECREF(value); | 
					
						
							|  |  |  |         Py_XDECREF(tback); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return -1; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static int | 
					
						
							|  |  |  | _PyPegen_tokenize_full_source_to_check_for_errors(Parser *p) { | 
					
						
							|  |  |  |     // Tokenize the whole input to see if there are any tokenization
 | 
					
						
							| 
									
										
										
										
											2024-07-23 17:04:14 +08:00
										 |  |  |     // errors such as mismatching parentheses. These will get priority
 | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |     // over generic syntax errors only if the line number of the error is
 | 
					
						
							|  |  |  |     // before the one that we had for the generic error.
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // We don't want to tokenize to the end for interactive input
 | 
					
						
							|  |  |  |     if (p->tok->prompt != NULL) { | 
					
						
							|  |  |  |         return 0; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     PyObject *type, *value, *traceback; | 
					
						
							|  |  |  |     PyErr_Fetch(&type, &value, &traceback); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Token *current_token = p->known_err_token != NULL ? p->known_err_token : p->tokens[p->fill - 1]; | 
					
						
							|  |  |  |     Py_ssize_t current_err_line = current_token->lineno; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     int ret = 0; | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |     struct token new_token; | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     _PyToken_Init(&new_token); | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     for (;;) { | 
					
						
							| 
									
										
										
										
											2022-10-06 16:07:17 -07:00
										 |  |  |         switch (_PyTokenizer_Get(p->tok, &new_token)) { | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |             case ERRORTOKEN: | 
					
						
							| 
									
										
										
										
											2022-12-06 23:09:56 +00:00
										 |  |  |                 if (PyErr_Occurred()) { | 
					
						
							|  |  |  |                     ret = -1; | 
					
						
							|  |  |  |                     goto exit; | 
					
						
							|  |  |  |                 } | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |                 if (p->tok->level != 0) { | 
					
						
							|  |  |  |                     int error_lineno = p->tok->parenlinenostack[p->tok->level-1]; | 
					
						
							|  |  |  |                     if (current_err_line > error_lineno) { | 
					
						
							|  |  |  |                         raise_unclosed_parentheses_error(p); | 
					
						
							|  |  |  |                         ret = -1; | 
					
						
							|  |  |  |                         goto exit; | 
					
						
							|  |  |  |                     } | 
					
						
							|  |  |  |                 } | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             case ENDMARKER: | 
					
						
							|  |  |  |                 break; | 
					
						
							|  |  |  |             default: | 
					
						
							|  |  |  |                 continue; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         break; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | exit: | 
					
						
							| 
									
										
										
										
											2023-05-21 02:03:02 +02:00
										 |  |  |     _PyToken_Free(&new_token); | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |     // If we're in an f-string, we want the syntax error in the expression part
 | 
					
						
							|  |  |  |     // to propagate, so that tokenizer errors (like expecting '}') that happen afterwards
 | 
					
						
							|  |  |  |     // do not swallow it.
 | 
					
						
							|  |  |  |     if (PyErr_Occurred() && p->tok->tok_mode_stack_index <= 0) { | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |         Py_XDECREF(value); | 
					
						
							|  |  |  |         Py_XDECREF(type); | 
					
						
							|  |  |  |         Py_XDECREF(traceback); | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         PyErr_Restore(type, value, traceback); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return ret; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | // PARSER ERRORS
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void * | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  | _PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...) | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  | { | 
					
						
							| 
									
										
										
										
											2023-11-27 18:36:11 +00:00
										 |  |  |     // Bail out if we already have an error set.
 | 
					
						
							|  |  |  |     if (p->error_indicator && PyErr_Occurred()) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |     if (p->fill == 0) { | 
					
						
							|  |  |  |         va_list va; | 
					
						
							|  |  |  |         va_start(va, errmsg); | 
					
						
							|  |  |  |         _PyPegen_raise_error_known_location(p, errtype, 0, 0, 0, -1, errmsg, va); | 
					
						
							|  |  |  |         va_end(va); | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2023-04-19 17:18:16 +01:00
										 |  |  |     if (use_mark && p->mark == p->fill && _PyPegen_fill_token(p) < 0) { | 
					
						
							|  |  |  |         p->error_indicator = 1; | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     Token *t = p->known_err_token != NULL | 
					
						
							|  |  |  |                    ? p->known_err_token | 
					
						
							|  |  |  |                    : p->tokens[use_mark ? p->mark : p->fill - 1]; | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |     Py_ssize_t col_offset; | 
					
						
							|  |  |  |     Py_ssize_t end_col_offset = -1; | 
					
						
							|  |  |  |     if (t->col_offset == -1) { | 
					
						
							|  |  |  |         if (p->tok->cur == p->tok->buf) { | 
					
						
							|  |  |  |             col_offset = 0; | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             const char* start = p->tok->buf  ? p->tok->line_start : p->tok->buf; | 
					
						
							|  |  |  |             col_offset = Py_SAFE_DOWNCAST(p->tok->cur - start, intptr_t, int); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } else { | 
					
						
							|  |  |  |         col_offset = t->col_offset + 1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (t->end_col_offset != -1) { | 
					
						
							|  |  |  |         end_col_offset = t->end_col_offset + 1; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     va_list va; | 
					
						
							|  |  |  |     va_start(va, errmsg); | 
					
						
							|  |  |  |     _PyPegen_raise_error_known_location(p, errtype, t->lineno, col_offset, t->end_lineno, end_col_offset, errmsg, va); | 
					
						
							|  |  |  |     va_end(va); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | static PyObject * | 
					
						
							|  |  |  | get_error_line_from_tokenizer_buffers(Parser *p, Py_ssize_t lineno) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     /* If the file descriptor is interactive, the source lines of the current
 | 
					
						
							|  |  |  |      * (multi-line) statement are stored in p->tok->interactive_src_start. | 
					
						
							|  |  |  |      * If not, we're parsing from a string, which means that the whole source | 
					
						
							|  |  |  |      * is stored in p->tok->str. */ | 
					
						
							| 
									
										
										
										
											2022-07-16 10:35:19 +02:00
										 |  |  |     assert((p->tok->fp == NULL && p->tok->str != NULL) || p->tok->fp != NULL); | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     char *cur_line = p->tok->fp_interactive ? p->tok->interactive_src_start : p->tok->str; | 
					
						
							| 
									
										
										
										
											2022-03-26 16:29:02 +00:00
										 |  |  |     if (cur_line == NULL) { | 
					
						
							|  |  |  |         assert(p->tok->fp_interactive); | 
					
						
							|  |  |  |         // We can reach this point if the tokenizer buffers for interactive source have not been
 | 
					
						
							|  |  |  |         // initialized because we failed to decode the original source with the given locale.
 | 
					
						
							| 
									
										
										
										
											2024-10-09 17:15:23 +02:00
										 |  |  |         return Py_GetConstant(Py_CONSTANT_EMPTY_STR); | 
					
						
							| 
									
										
										
										
											2022-03-26 16:29:02 +00:00
										 |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-11 16:30:39 +00:00
										 |  |  |     Py_ssize_t relative_lineno = p->starting_lineno ? lineno - p->starting_lineno + 1 : lineno; | 
					
						
							| 
									
										
										
										
											2022-01-20 15:34:13 +00:00
										 |  |  |     const char* buf_end = p->tok->fp_interactive ? p->tok->interactive_src_end : p->tok->inp; | 
					
						
							| 
									
										
										
										
											2022-01-11 16:30:39 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-11-27 18:37:48 +00:00
										 |  |  |     if (buf_end < cur_line) { | 
					
						
							|  |  |  |         buf_end = cur_line + strlen(cur_line); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-11 16:30:39 +00:00
										 |  |  |     for (int i = 0; i < relative_lineno - 1; i++) { | 
					
						
							| 
									
										
										
										
											2022-07-05 17:39:21 +01:00
										 |  |  |         char *new_line = strchr(cur_line, '\n'); | 
					
						
							| 
									
										
										
										
											2022-01-18 11:13:00 +00:00
										 |  |  |         // The assert is here for debug builds but the conditional that
 | 
					
						
							|  |  |  |         // follows is there so in release builds we do not crash at the cost
 | 
					
						
							|  |  |  |         // to report a potentially wrong line.
 | 
					
						
							| 
									
										
										
										
											2022-07-05 17:39:21 +01:00
										 |  |  |         assert(new_line != NULL && new_line + 1 < buf_end); | 
					
						
							|  |  |  |         if (new_line == NULL || new_line + 1 > buf_end) { | 
					
						
							| 
									
										
										
										
											2022-01-11 16:30:39 +00:00
										 |  |  |             break; | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2022-07-05 17:39:21 +01:00
										 |  |  |         cur_line = new_line + 1; | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     char *next_newline; | 
					
						
							|  |  |  |     if ((next_newline = strchr(cur_line, '\n')) == NULL) { // This is the last line
 | 
					
						
							|  |  |  |         next_newline = cur_line + strlen(cur_line); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     return PyUnicode_DecodeUTF8(cur_line, next_newline - cur_line, "replace"); | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void * | 
					
						
							|  |  |  | _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, | 
					
						
							|  |  |  |                                     Py_ssize_t lineno, Py_ssize_t col_offset, | 
					
						
							|  |  |  |                                     Py_ssize_t end_lineno, Py_ssize_t end_col_offset, | 
					
						
							|  |  |  |                                     const char *errmsg, va_list va) | 
					
						
							|  |  |  | { | 
					
						
							| 
									
										
										
										
											2024-01-02 13:00:52 +00:00
										 |  |  |     // Bail out if we already have an error set.
 | 
					
						
							|  |  |  |     if (p->error_indicator && PyErr_Occurred()) { | 
					
						
							|  |  |  |         return NULL; | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |     PyObject *value = NULL; | 
					
						
							|  |  |  |     PyObject *errstr = NULL; | 
					
						
							|  |  |  |     PyObject *error_line = NULL; | 
					
						
							|  |  |  |     PyObject *tmp = NULL; | 
					
						
							|  |  |  |     p->error_indicator = 1; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (end_lineno == CURRENT_POS) { | 
					
						
							|  |  |  |         end_lineno = p->tok->lineno; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     if (end_col_offset == CURRENT_POS) { | 
					
						
							|  |  |  |         end_col_offset = p->tok->cur - p->tok->line_start; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     errstr = PyUnicode_FromFormatV(errmsg, va); | 
					
						
							|  |  |  |     if (!errstr) { | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-03-26 16:29:02 +00:00
										 |  |  |     if (p->tok->fp_interactive && p->tok->interactive_src_start != NULL) { | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |         error_line = get_error_line_from_tokenizer_buffers(p, lineno); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     else if (p->start_rule == Py_file_input) { | 
					
						
							|  |  |  |         error_line = _PyErr_ProgramDecodedTextObject(p->tok->filename, | 
					
						
							|  |  |  |                                                      (int) lineno, p->tok->encoding); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (!error_line) { | 
					
						
							|  |  |  |         /* PyErr_ProgramTextObject was not called or returned NULL. If it was not called,
 | 
					
						
							|  |  |  |            then we need to find the error line from some other source, because | 
					
						
							|  |  |  |            p->start_rule != Py_file_input. If it returned NULL, then it either unexpectedly | 
					
						
							|  |  |  |            failed or we're parsing from a string or the REPL. There's a third edge case where | 
					
						
							|  |  |  |            we're actually parsing from a file, which has an E_EOF SyntaxError and in that case | 
					
						
							|  |  |  |            `PyErr_ProgramTextObject` fails because lineno points to last_file_line + 1, which | 
					
						
							|  |  |  |            does not physically exist */ | 
					
						
							|  |  |  |         assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { | 
					
						
							| 
									
										
										
										
											2025-02-13 01:07:37 +00:00
										 |  |  |             Py_ssize_t size = p->tok->inp - p->tok->line_start; | 
					
						
							|  |  |  |             error_line = PyUnicode_DecodeUTF8(p->tok->line_start, size, "replace"); | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |         } | 
					
						
							|  |  |  |         else if (p->tok->fp == NULL || p->tok->fp == stdin) { | 
					
						
							|  |  |  |             error_line = get_error_line_from_tokenizer_buffers(p, lineno); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         else { | 
					
						
							| 
									
										
										
										
											2024-10-09 17:15:23 +02:00
										 |  |  |             error_line = Py_GetConstant(Py_CONSTANT_EMPTY_STR); | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |         } | 
					
						
							|  |  |  |         if (!error_line) { | 
					
						
							|  |  |  |             goto error; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Py_ssize_t col_number = col_offset; | 
					
						
							|  |  |  |     Py_ssize_t end_col_number = end_col_offset; | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-26 13:57:09 +01:00
										 |  |  |     col_number = _PyPegen_byte_offset_to_character_offset(error_line, col_offset); | 
					
						
							|  |  |  |     if (col_number < 0) { | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if (end_col_offset > 0) { | 
					
						
							|  |  |  |         end_col_number = _PyPegen_byte_offset_to_character_offset(error_line, end_col_offset); | 
					
						
							|  |  |  |         if (end_col_number < 0) { | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |             goto error; | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |     } | 
					
						
							| 
									
										
										
										
											2024-02-26 13:57:09 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-08-11 10:55:57 +02:00
										 |  |  |     tmp = Py_BuildValue("(OnnNnn)", p->tok->filename, lineno, col_number, error_line, end_lineno, end_col_number); | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |     if (!tmp) { | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     value = PyTuple_Pack(2, errstr, tmp); | 
					
						
							|  |  |  |     Py_DECREF(tmp); | 
					
						
							|  |  |  |     if (!value) { | 
					
						
							|  |  |  |         goto error; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     PyErr_SetObject(errtype, value); | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Py_DECREF(errstr); | 
					
						
							|  |  |  |     Py_DECREF(value); | 
					
						
							|  |  |  |     return NULL; | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | error: | 
					
						
							|  |  |  |     Py_XDECREF(errstr); | 
					
						
							|  |  |  |     Py_XDECREF(error_line); | 
					
						
							|  |  |  |     return NULL; | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | _Pypegen_set_syntax_error(Parser* p, Token* last_token) { | 
					
						
							| 
									
										
										
										
											2023-12-01 17:37:40 +08:00
										 |  |  |     // Existing syntax error
 | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |     if (PyErr_Occurred()) { | 
					
						
							|  |  |  |         // Prioritize tokenizer errors to custom syntax errors raised
 | 
					
						
							|  |  |  |         // on the second phase only if the errors come from the parser.
 | 
					
						
							| 
									
										
										
										
											2022-01-04 10:41:22 +00:00
										 |  |  |         int is_tok_ok = (p->tok->done == E_DONE || p->tok->done == E_OK); | 
					
						
							|  |  |  |         if (is_tok_ok && PyErr_ExceptionMatches(PyExc_SyntaxError)) { | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |             _PyPegen_tokenize_full_source_to_check_for_errors(p); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         // Propagate the existing syntax error.
 | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     // Initialization error
 | 
					
						
							|  |  |  |     if (p->fill == 0) { | 
					
						
							|  |  |  |         RAISE_SYNTAX_ERROR("error at start before reading any input"); | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     // Parser encountered EOF (End of File) unexpectedtly
 | 
					
						
							| 
									
										
										
										
											2021-11-24 22:21:23 +00:00
										 |  |  |     if (last_token->type == ERRORTOKEN && p->tok->done == E_EOF) { | 
					
						
							| 
									
										
										
										
											2021-11-21 01:08:50 +00:00
										 |  |  |         if (p->tok->level) { | 
					
						
							|  |  |  |             raise_unclosed_parentheses_error(p); | 
					
						
							|  |  |  |         } else { | 
					
						
							|  |  |  |             RAISE_SYNTAX_ERROR("unexpected EOF while parsing"); | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     // Indentation error in the tokenizer
 | 
					
						
							|  |  |  |     if (last_token->type == INDENT || last_token->type == DEDENT) { | 
					
						
							|  |  |  |         RAISE_INDENTATION_ERROR(last_token->type == INDENT ? "unexpected indent" : "unexpected unindent"); | 
					
						
							|  |  |  |         return; | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  |     // Unknown error (generic case)
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     // Use the last token we found on the first pass to avoid reporting
 | 
					
						
							|  |  |  |     // incorrect locations for generic syntax errors just because we reached
 | 
					
						
							|  |  |  |     // further away when trying to find specific syntax errors in the second
 | 
					
						
							|  |  |  |     // pass.
 | 
					
						
							|  |  |  |     RAISE_SYNTAX_ERROR_KNOWN_LOCATION(last_token, "invalid syntax"); | 
					
						
							|  |  |  |     // _PyPegen_tokenize_full_source_to_check_for_errors will override the existing
 | 
					
						
							|  |  |  |     // generic SyntaxError we just raised if errors are found.
 | 
					
						
							|  |  |  |     _PyPegen_tokenize_full_source_to_check_for_errors(p); | 
					
						
							| 
									
										
										
										
											2021-11-24 22:21:23 +00:00
										 |  |  | } | 
					
						
							| 
									
										
										
										
											2023-08-22 03:41:50 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | void | 
					
						
							|  |  |  | _Pypegen_stack_overflow(Parser *p) | 
					
						
							|  |  |  | { | 
					
						
							|  |  |  |     p->error_indicator = 1; | 
					
						
							|  |  |  |     PyErr_SetString(PyExc_MemoryError, | 
					
						
							|  |  |  |         "Parser stack overflowed - Python source too complex to parse"); | 
					
						
							|  |  |  | } |