mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	[3.13] gh-116042: Fix location for SyntaxErrors of invalid escapes in the tokenizer (GH-116049) (#130066)
(cherry picked from commit56eda25633) (cherry picked from commit369704b428)
This commit is contained in:
		
							parent
							
								
									4c2a59b7b8
								
							
						
					
					
						commit
						8d1d36b742
					
				
					 5 changed files with 82 additions and 18 deletions
				
			
		|  | @ -660,7 +660,7 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self): | ||||||
|             self.assertEqual( |             self.assertEqual( | ||||||
|                 stderr.splitlines()[-3:], |                 stderr.splitlines()[-3:], | ||||||
|                 [   b'    foo = """\\q"""', |                 [   b'    foo = """\\q"""', | ||||||
|                     b'          ^^^^^^^^', |                     b'             ^^', | ||||||
|                     b'SyntaxError: invalid escape sequence \'\\q\'' |                     b'SyntaxError: invalid escape sequence \'\\q\'' | ||||||
|                 ], |                 ], | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|  | @ -118,7 +118,7 @@ def test_eval_str_invalid_escape(self): | ||||||
|         self.assertEqual(len(w), 1) |         self.assertEqual(len(w), 1) | ||||||
|         self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") |         self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") | ||||||
|         self.assertEqual(w[0].filename, '<string>') |         self.assertEqual(w[0].filename, '<string>') | ||||||
|         self.assertEqual(w[0].lineno, 1) |         self.assertEqual(w[0].lineno, 2) | ||||||
| 
 | 
 | ||||||
|         with warnings.catch_warnings(record=True) as w: |         with warnings.catch_warnings(record=True) as w: | ||||||
|             warnings.simplefilter('error', category=SyntaxWarning) |             warnings.simplefilter('error', category=SyntaxWarning) | ||||||
|  | @ -128,7 +128,7 @@ def test_eval_str_invalid_escape(self): | ||||||
|         self.assertEqual(w, []) |         self.assertEqual(w, []) | ||||||
|         self.assertEqual(exc.msg, r"invalid escape sequence '\z'") |         self.assertEqual(exc.msg, r"invalid escape sequence '\z'") | ||||||
|         self.assertEqual(exc.filename, '<string>') |         self.assertEqual(exc.filename, '<string>') | ||||||
|         self.assertEqual(exc.lineno, 1) |         self.assertEqual(exc.lineno, 2) | ||||||
|         self.assertEqual(exc.offset, 1) |         self.assertEqual(exc.offset, 1) | ||||||
| 
 | 
 | ||||||
|         # Check that the warning is raised only once if there are syntax errors |         # Check that the warning is raised only once if there are syntax errors | ||||||
|  | @ -155,7 +155,7 @@ def test_eval_str_invalid_octal_escape(self): | ||||||
|         self.assertEqual(str(w[0].message), |         self.assertEqual(str(w[0].message), | ||||||
|                          r"invalid octal escape sequence '\407'") |                          r"invalid octal escape sequence '\407'") | ||||||
|         self.assertEqual(w[0].filename, '<string>') |         self.assertEqual(w[0].filename, '<string>') | ||||||
|         self.assertEqual(w[0].lineno, 1) |         self.assertEqual(w[0].lineno, 2) | ||||||
| 
 | 
 | ||||||
|         with warnings.catch_warnings(record=True) as w: |         with warnings.catch_warnings(record=True) as w: | ||||||
|             warnings.simplefilter('error', category=SyntaxWarning) |             warnings.simplefilter('error', category=SyntaxWarning) | ||||||
|  | @ -165,9 +165,32 @@ def test_eval_str_invalid_octal_escape(self): | ||||||
|         self.assertEqual(w, []) |         self.assertEqual(w, []) | ||||||
|         self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") |         self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") | ||||||
|         self.assertEqual(exc.filename, '<string>') |         self.assertEqual(exc.filename, '<string>') | ||||||
|         self.assertEqual(exc.lineno, 1) |         self.assertEqual(exc.lineno, 2) | ||||||
|         self.assertEqual(exc.offset, 1) |         self.assertEqual(exc.offset, 1) | ||||||
| 
 | 
 | ||||||
|  |     def test_invalid_escape_locations_with_offset(self): | ||||||
|  |         with warnings.catch_warnings(record=True) as w: | ||||||
|  |             warnings.simplefilter('error', category=SyntaxWarning) | ||||||
|  |             with self.assertRaises(SyntaxError) as cm: | ||||||
|  |                 eval("\"'''''''''''''''''''''invalid\ Escape\"") | ||||||
|  |             exc = cm.exception | ||||||
|  |         self.assertEqual(w, []) | ||||||
|  |         self.assertEqual(exc.msg, r"invalid escape sequence '\ '") | ||||||
|  |         self.assertEqual(exc.filename, '<string>') | ||||||
|  |         self.assertEqual(exc.lineno, 1) | ||||||
|  |         self.assertEqual(exc.offset, 30) | ||||||
|  | 
 | ||||||
|  |         with warnings.catch_warnings(record=True) as w: | ||||||
|  |             warnings.simplefilter('error', category=SyntaxWarning) | ||||||
|  |             with self.assertRaises(SyntaxError) as cm: | ||||||
|  |                 eval("\"''Incorrect \ logic?\"") | ||||||
|  |             exc = cm.exception | ||||||
|  |         self.assertEqual(w, []) | ||||||
|  |         self.assertEqual(exc.msg, r"invalid escape sequence '\ '") | ||||||
|  |         self.assertEqual(exc.filename, '<string>') | ||||||
|  |         self.assertEqual(exc.lineno, 1) | ||||||
|  |         self.assertEqual(exc.offset, 14) | ||||||
|  | 
 | ||||||
|     def test_eval_str_raw(self): |     def test_eval_str_raw(self): | ||||||
|         self.assertEqual(eval(""" r'x' """), 'x') |         self.assertEqual(eval(""" r'x' """), 'x') | ||||||
|         self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') |         self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') | ||||||
|  | @ -207,7 +230,7 @@ def test_eval_bytes_invalid_escape(self): | ||||||
|         self.assertEqual(len(w), 1) |         self.assertEqual(len(w), 1) | ||||||
|         self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") |         self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'") | ||||||
|         self.assertEqual(w[0].filename, '<string>') |         self.assertEqual(w[0].filename, '<string>') | ||||||
|         self.assertEqual(w[0].lineno, 1) |         self.assertEqual(w[0].lineno, 2) | ||||||
| 
 | 
 | ||||||
|         with warnings.catch_warnings(record=True) as w: |         with warnings.catch_warnings(record=True) as w: | ||||||
|             warnings.simplefilter('error', category=SyntaxWarning) |             warnings.simplefilter('error', category=SyntaxWarning) | ||||||
|  | @ -217,7 +240,7 @@ def test_eval_bytes_invalid_escape(self): | ||||||
|         self.assertEqual(w, []) |         self.assertEqual(w, []) | ||||||
|         self.assertEqual(exc.msg, r"invalid escape sequence '\z'") |         self.assertEqual(exc.msg, r"invalid escape sequence '\z'") | ||||||
|         self.assertEqual(exc.filename, '<string>') |         self.assertEqual(exc.filename, '<string>') | ||||||
|         self.assertEqual(exc.lineno, 1) |         self.assertEqual(exc.lineno, 2) | ||||||
| 
 | 
 | ||||||
|     def test_eval_bytes_invalid_octal_escape(self): |     def test_eval_bytes_invalid_octal_escape(self): | ||||||
|         for i in range(0o400, 0o1000): |         for i in range(0o400, 0o1000): | ||||||
|  | @ -231,7 +254,7 @@ def test_eval_bytes_invalid_octal_escape(self): | ||||||
|         self.assertEqual(str(w[0].message), |         self.assertEqual(str(w[0].message), | ||||||
|                          r"invalid octal escape sequence '\407'") |                          r"invalid octal escape sequence '\407'") | ||||||
|         self.assertEqual(w[0].filename, '<string>') |         self.assertEqual(w[0].filename, '<string>') | ||||||
|         self.assertEqual(w[0].lineno, 1) |         self.assertEqual(w[0].lineno, 2) | ||||||
| 
 | 
 | ||||||
|         with warnings.catch_warnings(record=True) as w: |         with warnings.catch_warnings(record=True) as w: | ||||||
|             warnings.simplefilter('error', category=SyntaxWarning) |             warnings.simplefilter('error', category=SyntaxWarning) | ||||||
|  | @ -241,7 +264,7 @@ def test_eval_bytes_invalid_octal_escape(self): | ||||||
|         self.assertEqual(w, []) |         self.assertEqual(w, []) | ||||||
|         self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") |         self.assertEqual(exc.msg, r"invalid octal escape sequence '\407'") | ||||||
|         self.assertEqual(exc.filename, '<string>') |         self.assertEqual(exc.filename, '<string>') | ||||||
|         self.assertEqual(exc.lineno, 1) |         self.assertEqual(exc.lineno, 2) | ||||||
| 
 | 
 | ||||||
|     def test_eval_bytes_raw(self): |     def test_eval_bytes_raw(self): | ||||||
|         self.assertEqual(eval(""" br'x' """), b'x') |         self.assertEqual(eval(""" br'x' """), b'x') | ||||||
|  |  | ||||||
|  | @ -0,0 +1,2 @@ | ||||||
|  | Fix location for SyntaxErrors of invalid escapes in the tokenizer. Patch by | ||||||
|  | Pablo Galindo | ||||||
|  | @ -352,8 +352,8 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, | ||||||
|         assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); |         assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF); | ||||||
| 
 | 
 | ||||||
|         if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { |         if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) { | ||||||
|             Py_ssize_t size = p->tok->inp - p->tok->buf; |             Py_ssize_t size = p->tok->inp - p->tok->line_start; | ||||||
|             error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace"); |             error_line = PyUnicode_DecodeUTF8(p->tok->line_start, size, "replace"); | ||||||
|         } |         } | ||||||
|         else if (p->tok->fp == NULL || p->tok->fp == stdin) { |         else if (p->tok->fp == NULL || p->tok->fp == stdin) { | ||||||
|             error_line = get_error_line_from_tokenizer_buffers(p, lineno); |             error_line = get_error_line_from_tokenizer_buffers(p, lineno); | ||||||
|  |  | ||||||
|  | @ -11,7 +11,7 @@ | ||||||
| //// STRING HANDLING FUNCTIONS ////
 | //// STRING HANDLING FUNCTIONS ////
 | ||||||
| 
 | 
 | ||||||
| static int | static int | ||||||
| warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token *t) | warn_invalid_escape_sequence(Parser *p, const char* buffer, const char *first_invalid_escape, Token *t) | ||||||
| { | { | ||||||
|     if (p->call_invalid_rules) { |     if (p->call_invalid_rules) { | ||||||
|         // Do not report warnings if we are in the second pass of the parser
 |         // Do not report warnings if we are in the second pass of the parser
 | ||||||
|  | @ -41,8 +41,46 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token | ||||||
|     else { |     else { | ||||||
|         category = PyExc_DeprecationWarning; |         category = PyExc_DeprecationWarning; | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     // Calculate the lineno and the col_offset of the invalid escape sequence
 | ||||||
|  |     const char *start = buffer; | ||||||
|  |     const char *end = first_invalid_escape; | ||||||
|  |     int lineno = t->lineno; | ||||||
|  |     int col_offset = t->col_offset; | ||||||
|  |     while (start < end) { | ||||||
|  |         if (*start == '\n') { | ||||||
|  |             lineno++; | ||||||
|  |             col_offset = 0; | ||||||
|  |         } | ||||||
|  |         else { | ||||||
|  |             col_offset++; | ||||||
|  |         } | ||||||
|  |         start++; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     // Count the number of quotes in the token
 | ||||||
|  |     char first_quote = 0; | ||||||
|  |     if (lineno == t->lineno) { | ||||||
|  |         int quote_count = 0; | ||||||
|  |         char* tok = PyBytes_AsString(t->bytes); | ||||||
|  |         for (int i = 0; i < PyBytes_Size(t->bytes); i++) { | ||||||
|  |             if (tok[i] == '\'' || tok[i] == '\"') { | ||||||
|  |                 if (quote_count == 0) { | ||||||
|  |                     first_quote = tok[i]; | ||||||
|  |                 } | ||||||
|  |                 if (tok[i] == first_quote) { | ||||||
|  |                     quote_count++; | ||||||
|  |                 } | ||||||
|  |             } else { | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         col_offset += quote_count; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     if (PyErr_WarnExplicitObject(category, msg, p->tok->filename, |     if (PyErr_WarnExplicitObject(category, msg, p->tok->filename, | ||||||
|                                  t->lineno, NULL, NULL) < 0) { |                                  lineno, NULL, NULL) < 0) { | ||||||
|         if (PyErr_ExceptionMatches(category)) { |         if (PyErr_ExceptionMatches(category)) { | ||||||
|             /* Replace the Syntax/DeprecationWarning exception with a SyntaxError
 |             /* Replace the Syntax/DeprecationWarning exception with a SyntaxError
 | ||||||
|                to get a more accurate error report */ |                to get a more accurate error report */ | ||||||
|  | @ -53,11 +91,12 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token | ||||||
|                error location, if p->known_err_token is not set. */ |                error location, if p->known_err_token is not set. */ | ||||||
|             p->known_err_token = t; |             p->known_err_token = t; | ||||||
|             if (octal) { |             if (octal) { | ||||||
|                 RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'", |                 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1, | ||||||
|                                    first_invalid_escape); |                 "invalid octal escape sequence '\\%.3s'", first_invalid_escape); | ||||||
|             } |             } | ||||||
|             else { |             else { | ||||||
|                 RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c); |                 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, lineno, col_offset-1, lineno, col_offset+1, | ||||||
|  |                 "invalid escape sequence '\\%c'", c); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|         Py_DECREF(msg); |         Py_DECREF(msg); | ||||||
|  | @ -151,7 +190,7 @@ decode_unicode_with_escapes(Parser *parser, const char *s, size_t len, Token *t) | ||||||
|     // HACK: later we can simply pass the line no, since we don't preserve the tokens
 |     // HACK: later we can simply pass the line no, since we don't preserve the tokens
 | ||||||
|     // when we are decoding the string but we preserve the line numbers.
 |     // when we are decoding the string but we preserve the line numbers.
 | ||||||
|     if (v != NULL && first_invalid_escape != NULL && t != NULL) { |     if (v != NULL && first_invalid_escape != NULL && t != NULL) { | ||||||
|         if (warn_invalid_escape_sequence(parser, first_invalid_escape, t) < 0) { |         if (warn_invalid_escape_sequence(parser, s, first_invalid_escape, t) < 0) { | ||||||
|             /* We have not decref u before because first_invalid_escape points
 |             /* We have not decref u before because first_invalid_escape points
 | ||||||
|                inside u. */ |                inside u. */ | ||||||
|             Py_XDECREF(u); |             Py_XDECREF(u); | ||||||
|  | @ -173,7 +212,7 @@ decode_bytes_with_escapes(Parser *p, const char *s, Py_ssize_t len, Token *t) | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (first_invalid_escape != NULL) { |     if (first_invalid_escape != NULL) { | ||||||
|         if (warn_invalid_escape_sequence(p, first_invalid_escape, t) < 0) { |         if (warn_invalid_escape_sequence(p, s, first_invalid_escape, t) < 0) { | ||||||
|             Py_DECREF(result); |             Py_DECREF(result); | ||||||
|             return NULL; |             return NULL; | ||||||
|         } |         } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Pablo Galindo Salgado
						Pablo Galindo Salgado