mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	[3.13] gh-116042: Fix location for SyntaxErrors of invalid escapes in the tokenizer (GH-116049) (#130066)
(cherry picked from commit56eda25633) (cherry picked from commit369704b428)
This commit is contained in:
		
							parent
							
								
									4c2a59b7b8
								
							
						
					
					
						commit
						8d1d36b742
					
				
					 5 changed files with 82 additions and 18 deletions
				
			
		|  | @ -660,7 +660,7 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self): | |||
|             self.assertEqual( | ||||
|                 stderr.splitlines()[-3:], | ||||
|                 [   b'    foo = """\\q"""', | ||||
|                     b'          ^^^^^^^^', | ||||
|                     b'             ^^', | ||||
|                     b'SyntaxError: invalid escape sequence \'\\q\'' | ||||
|                 ], | ||||
|             ) | ||||
|  |  | |||
|  | @ -118,7 +118,7 @@ def test_eval_str_invalid_escape(self): | |||
|         self.assertEqual(len(w), 1) | ||||
|         self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") | ||||
|         self.assertEqual(w[0].filename, '<string>') | ||||
|         self.assertEqual(w[0].lineno, 1) | ||||
|         self.assertEqual(w[0].lineno, 2) | ||||
| 
 | ||||
|         with warnings.catch_warnings(record=True) as w: | ||||
|             warnings.simplefilter('error', category=SyntaxWarning) | ||||
|  | @ -128,7 +128,7 @@ def test_eval_str_invalid_escape(self): | |||
|         self.assertEqual(w, []) | ||||
|         self.assertEqual(exc.msg, r"invalid escape sequence '\z'") | ||||
|         self.assertEqual(exc.filename, '<string>') | ||||
|         self.assertEqual(exc.lineno, 1) | ||||
|         self.assertEqual(exc.lineno, 2) | ||||
|         self.assertEqual(exc.offset, 1) | ||||
| 
 | ||||
|         # Check that the warning is raised only once if there are syntax errors | ||||
|  | @ -155,7 +155,7 @@ def test_eval_str_invalid_octal_escape(self): | |||
|         self.assertEqual(str(w[0].message), | ||||
|                          r"invalid octal escape sequence '\407'") | ||||
|         self.assertEqual(w[0].filename, '<string>') | ||||
|         self.assertEqual(w[0].lineno, 1) | ||||
|         self.assertEqual(w[0].lineno, 2) | ||||
| 
 | ||||
|         with warnings.catch_warnings(record=True) as w: | ||||
|             warnings.simplefilter('error', category=SyntaxWarning) | ||||
|  | @ -165,9 +165,32 @@ def test_eval_str_invalid_octal_escape(self): | |||
|         self.assertEqual(w, []) | ||||
|         self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") | ||||
|         self.assertEqual(exc.filename, '<string>') | ||||
|         self.assertEqual(exc.lineno, 1) | ||||
|         self.assertEqual(exc.lineno, 2) | ||||
|         self.assertEqual(exc.offset, 1) | ||||
| 
 | ||||
|     def test_invalid_escape_locations_with_offset(self): | ||||
|         with warnings.catch_warnings(record=True) as w: | ||||
|             warnings.simplefilter('error', category=SyntaxWarning) | ||||
|             with self.assertRaises(SyntaxError) as cm: | ||||
|                 eval("\"'''''''''''''''''''''invalid\ Escape\"") | ||||
|             exc = cm.exception | ||||
|         self.assertEqual(w, []) | ||||
|         self.assertEqual(exc.msg, r"invalid escape sequence '\ '") | ||||
|         self.assertEqual(exc.filename, '<string>') | ||||
|         self.assertEqual(exc.lineno, 1) | ||||
|         self.assertEqual(exc.offset, 30) | ||||
| 
 | ||||
|         with warnings.catch_warnings(record=True) as w: | ||||
|             warnings.simplefilter('error', category=SyntaxWarning) | ||||
|             with self.assertRaises(SyntaxError) as cm: | ||||
|                 eval("\"''Incorrect \ logic?\"") | ||||
|             exc = cm.exception | ||||
|         self.assertEqual(w, []) | ||||
|         self.assertEqual(exc.msg, r"invalid escape sequence '\ '") | ||||
|         self.assertEqual(exc.filename, '<string>') | ||||
|         self.assertEqual(exc.lineno, 1) | ||||
|         self.assertEqual(exc.offset, 14) | ||||
| 
 | ||||
|     def test_eval_str_raw(self): | ||||
|         self.assertEqual(eval(""" r'x' """), 'x') | ||||
|         self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') | ||||
|  | @ -207,7 +230,7 @@ def test_eval_bytes_invalid_escape(self): | |||
|         self.assertEqual(len(w), 1) | ||||
|         self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") | ||||
|         self.assertEqual(w[0].filename, '<string>') | ||||
|         self.assertEqual(w[0].lineno, 1) | ||||
|         self.assertEqual(w[0].lineno, 2) | ||||
| 
 | ||||
|         with warnings.catch_warnings(record=True) as w: | ||||
|             warnings.simplefilter('error', category=SyntaxWarning) | ||||
|  | @ -217,7 +240,7 @@ def test_eval_bytes_invalid_escape(self): | |||
|         self.assertEqual(w, []) | ||||
|         self.assertEqual(exc.msg, r"invalid escape sequence '\z'") | ||||
|         self.assertEqual(exc.filename, '<string>') | ||||
|         self.assertEqual(exc.lineno, 1) | ||||
|         self.assertEqual(exc.lineno, 2) | ||||
| 
 | ||||
|     def test_eval_bytes_invalid_octal_escape(self): | ||||
|         for i in range(0o400, 0o1000): | ||||
|  | @ -231,7 +254,7 @@ def test_eval_bytes_invalid_octal_escape(self): | |||
|         self.assertEqual(str(w[0].message), | ||||
|                          r"invalid octal escape sequence '\407'") | ||||
|         self.assertEqual(w[0].filename, '<string>') | ||||
|         self.assertEqual(w[0].lineno, 1) | ||||
|         self.assertEqual(w[0].lineno, 2) | ||||
| 
 | ||||
|         with warnings.catch_warnings(record=True) as w: | ||||
|             warnings.simplefilter('error', category=SyntaxWarning) | ||||
|  | @ -241,7 +264,7 @@ def test_eval_bytes_invalid_octal_escape(self): | |||
|         self.assertEqual(w, []) | ||||
|         self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") | ||||
|         self.assertEqual(exc.filename, '<string>') | ||||
|         self.assertEqual(exc.lineno, 1) | ||||
|         self.assertEqual(exc.lineno, 2) | ||||
| 
 | ||||
|     def test_eval_bytes_raw(self): | ||||
|         self.assertEqual(eval(""" br'x' """), b'x') | ||||
|  |  | |||
|  | @ -0,0 +1,2 @@ | |||
| Fix location for SyntaxErrors of invalid escapes in the tokenizer. Patch by | ||||
| Pablo Galindo | ||||
|  | @ -352,8 +352,8 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, | |||
|         assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); | ||||
| 
 | ||||
|         if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { | ||||
|             Py_ssize_t size = p->tok->inp - p->tok->buf; | ||||
|             error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); | ||||
|             Py_ssize_t size = p->tok->inp - p->tok->line_start; | ||||
|             error_line = PyUnicode_DecodeUTF8(p->tok->line_start, size, "replace"); | ||||
|         } | ||||
|         else if (p->tok->fp == NULL || p->tok->fp == stdin) { | ||||
|             error_line = get_error_line_from_tokenizer_buffers(p, lineno); | ||||
|  |  | |||
|  | @ -11,7 +11,7 @@ | |||
| //// STRING HANDLING FUNCTIONS ////
 | ||||
| 
 | ||||
| static int | ||||
| warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t) | ||||
| warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t) | ||||
| { | ||||
|     if (p->call_invalid_rules) { | ||||
|         // Do not report warnings if we are in the second pass of the parser
 | ||||
|  | @ -41,8 +41,46 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token | |||
|     else { | ||||
|         category = PyExc_DeprecationWarning; | ||||
|     } | ||||
| 
 | ||||
|     // Calculate the lineno and the col_offset of the invalid escape sequence
 | ||||
|     const char *start = buffer; | ||||
|     const char *end = first_invalid_escape; | ||||
|     int lineno = t->lineno; | ||||
|     int col_offset = t->col_offset; | ||||
|     while (start < end) { | ||||
|         if (*start == '\n') { | ||||
|             lineno++; | ||||
|             col_offset = 0; | ||||
|         } | ||||
|         else { | ||||
|             col_offset++; | ||||
|         } | ||||
|         start++; | ||||
|     } | ||||
| 
 | ||||
|     // Count the number of quotes in the token
 | ||||
|     char first_quote = 0; | ||||
|     if (lineno == t->lineno) { | ||||
|         int quote_count = 0; | ||||
|         char* tok = PyBytes_AsString(t->bytes); | ||||
|         for (int i = 0; i < PyBytes_Size(t->bytes); i++) { | ||||
|             if (tok[i] == '\'' || tok[i] == '\"') { | ||||
|                 if (quote_count == 0) { | ||||
|                     first_quote = tok[i]; | ||||
|                 } | ||||
|                 if (tok[i] == first_quote) { | ||||
|                     quote_count++; | ||||
|                 } | ||||
|             } else { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         col_offset += quote_count; | ||||
|     } | ||||
| 
 | ||||
|     if (PyErr_WarnExplicitObject(category, msg, p->tok->filename, | ||||
|                                  t->lineno, NULL, NULL) < 0) { | ||||
|                                  lineno, NULL, NULL) < 0) { | ||||
|         if (PyErr_ExceptionMatches(category)) { | ||||
|             /* Replace the Syntax/DeprecationWarning exception with a SyntaxError
 | ||||
|                to get a more accurate error report */ | ||||
|  | @ -53,11 +91,12 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token | |||
|                error location, if p->known_err_token is not set. */ | ||||
|             p->known_err_token = t; | ||||
|             if (octal) { | ||||
|                 RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'", | ||||
|                                    first_invalid_escape); | ||||
|                 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1, | ||||
|                 "invalid octal escape sequence '\\%.3s'", first_invalid_escape); | ||||
|             } | ||||
|             else { | ||||
|                 RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c); | ||||
|                 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1, | ||||
|                 "invalid escape sequence '\\%c'", c); | ||||
|             } | ||||
|         } | ||||
|         Py_DECREF(msg); | ||||
|  | @ -151,7 +190,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) | |||
|     // HACK: later we can simply pass the line no, since we don't preserve the tokens
 | ||||
|     // when we are decoding the string but we preserve the line numbers.
 | ||||
|     if (v != NULL && first_invalid_escape != NULL && t != NULL) { | ||||
|         if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) { | ||||
|         if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) { | ||||
|             /* We have not decref u before because first_invalid_escape points
 | ||||
|                inside u. */ | ||||
|             Py_XDECREF(u); | ||||
|  | @ -173,7 +212,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) | |||
|     } | ||||
| 
 | ||||
|     if (first_invalid_escape != NULL) { | ||||
|         if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) { | ||||
|         if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) { | ||||
|             Py_DECREF(result); | ||||
|             return NULL; | ||||
|         } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Pablo Galindo Salgado
						Pablo Galindo Salgado