mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	bpo-40176: Improve error messages for unclosed string literals (GH-19346)
Automerge-Triggered-By: GH:isidentical
This commit is contained in:
		
							parent
							
								
									c3f167d7b2
								
							
						
					
					
						commit
						a698d52c39
					
				
					 7 changed files with 34 additions and 32 deletions
				
			
		|  | @ -26,8 +26,6 @@ extern "C" { | ||||||
| #define E_TOODEEP       20      /* Too many indentation levels */ | #define E_TOODEEP       20      /* Too many indentation levels */ | ||||||
| #define E_DEDENT        21      /* No matching outer block for dedent */ | #define E_DEDENT        21      /* No matching outer block for dedent */ | ||||||
| #define E_DECODE        22      /* Error in decoding into Unicode */ | #define E_DECODE        22      /* Error in decoding into Unicode */ | ||||||
| #define E_EOFS          23      /* EOF in triple-quoted string */ |  | ||||||
| #define E_EOLS          24      /* EOL in single-quoted string */ |  | ||||||
| #define E_LINECONT      25      /* Unexpected characters after a line continuation */ | #define E_LINECONT      25      /* Unexpected characters after a line continuation */ | ||||||
| #define E_BADSINGLE     27      /* Ill-formed single statement input */ | #define E_BADSINGLE     27      /* Ill-formed single statement input */ | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -7,23 +7,25 @@ | ||||||
| import unittest | import unittest | ||||||
| 
 | 
 | ||||||
| class EOFTestCase(unittest.TestCase): | class EOFTestCase(unittest.TestCase): | ||||||
|     def test_EOFC(self): |     def test_EOF_single_quote(self): | ||||||
|         expect = "EOL while scanning string literal (<string>, line 1)" |         expect = "unterminated string literal (detected at line 1) (<string>, line 1)" | ||||||
|  |         for quote in ("'", "\""): | ||||||
|             try: |             try: | ||||||
|             eval("""'this is a test\ |                 eval(f"""{quote}this is a test\ | ||||||
|                 """) |                 """) | ||||||
|             except SyntaxError as msg: |             except SyntaxError as msg: | ||||||
|                 self.assertEqual(str(msg), expect) |                 self.assertEqual(str(msg), expect) | ||||||
|  |                 self.assertEqual(msg.offset, 1) | ||||||
|             else: |             else: | ||||||
|                 raise support.TestFailed |                 raise support.TestFailed | ||||||
| 
 | 
 | ||||||
|     def test_EOFS(self): |     def test_EOFS(self): | ||||||
|         expect = ("EOF while scanning triple-quoted string literal " |         expect = ("unterminated triple-quoted string literal (detected at line 1) (<string>, line 1)") | ||||||
|                   "(<string>, line 1)") |  | ||||||
|         try: |         try: | ||||||
|             eval("""'''this is a test""") |             eval("""'''this is a test""") | ||||||
|         except SyntaxError as msg: |         except SyntaxError as msg: | ||||||
|             self.assertEqual(str(msg), expect) |             self.assertEqual(str(msg), expect) | ||||||
|  |             self.assertEqual(msg.offset, 1) | ||||||
|         else: |         else: | ||||||
|             raise support.TestFailed |             raise support.TestFailed | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -206,7 +206,7 @@ def testSyntaxErrorOffset(self): | ||||||
|         check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +', |         check(b'# -*- coding: cp1251 -*-\nPython = "\xcf\xb3\xf2\xee\xed" +', | ||||||
|               2, 19, encoding='cp1251') |               2, 19, encoding='cp1251') | ||||||
|         check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18) |         check(b'Python = "\xcf\xb3\xf2\xee\xed" +', 1, 18) | ||||||
|         check('x = "a', 1, 7) |         check('x = "a', 1, 5) | ||||||
|         check('lambda x: x = 2', 1, 1) |         check('lambda x: x = 2', 1, 1) | ||||||
|         check('f{a + b + c}', 1, 2) |         check('f{a + b + c}', 1, 2) | ||||||
|         check('[file for str(file) in []\n])', 1, 11) |         check('[file for str(file) in []\n])', 1, 11) | ||||||
|  | @ -238,7 +238,7 @@ def bar(): | ||||||
| 
 | 
 | ||||||
|             def baz(): |             def baz(): | ||||||
|                 '''quux''' |                 '''quux''' | ||||||
|             """, 9, 20) |             """, 9, 24) | ||||||
|         check("pass\npass\npass\n(1+)\npass\npass\npass", 4, 4) |         check("pass\npass\npass\n(1+)\npass\npass\npass", 4, 4) | ||||||
|         check("(1+)", 1, 4) |         check("(1+)", 1, 4) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -661,7 +661,7 @@ def test_parens_in_expressions(self): | ||||||
|                             ["f'{3)+(4}'", |                             ["f'{3)+(4}'", | ||||||
|                              ]) |                              ]) | ||||||
| 
 | 
 | ||||||
|         self.assertAllRaise(SyntaxError, 'EOL while scanning string literal', |         self.assertAllRaise(SyntaxError, 'unterminated string literal', | ||||||
|                             ["f'{\n}'", |                             ["f'{\n}'", | ||||||
|                              ]) |                              ]) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -0,0 +1,2 @@ | ||||||
|  | Syntax errors for unterminated string literals now point to the start | ||||||
|  | of the string instead of reporting EOF/EOL. | ||||||
|  | @ -327,12 +327,6 @@ tokenizer_error(Parser *p) | ||||||
|         case E_TOKEN: |         case E_TOKEN: | ||||||
|             msg = "invalid token"; |             msg = "invalid token"; | ||||||
|             break; |             break; | ||||||
|         case E_EOFS: |  | ||||||
|             RAISE_SYNTAX_ERROR("EOF while scanning triple-quoted string literal"); |  | ||||||
|             return -1; |  | ||||||
|         case E_EOLS: |  | ||||||
|             RAISE_SYNTAX_ERROR("EOL while scanning string literal"); |  | ||||||
|             return -1; |  | ||||||
|         case E_EOF: |         case E_EOF: | ||||||
|             if (p->tok->level) { |             if (p->tok->level) { | ||||||
|                 raise_unclosed_parentheses_error(p); |                 raise_unclosed_parentheses_error(p); | ||||||
|  |  | ||||||
|  | @ -1739,20 +1739,26 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end) | ||||||
|         /* Get rest of string */ |         /* Get rest of string */ | ||||||
|         while (end_quote_size != quote_size) { |         while (end_quote_size != quote_size) { | ||||||
|             c = tok_nextc(tok); |             c = tok_nextc(tok); | ||||||
|             if (c == EOF) { |             if (c == EOF || (quote_size == 1 && c == '\n')) { | ||||||
|  |                 // shift the tok_state's location into
 | ||||||
|  |                 // the start of string, and report the error
 | ||||||
|  |                 // from the initial quote character
 | ||||||
|  |                 tok->cur = (char *)tok->start; | ||||||
|  |                 tok->cur++; | ||||||
|  |                 tok->line_start = tok->multi_line_start; | ||||||
|  |                 int start = tok->lineno; | ||||||
|  |                 tok->lineno = tok->first_lineno; | ||||||
|  | 
 | ||||||
|                 if (quote_size == 3) { |                 if (quote_size == 3) { | ||||||
|                     tok->done = E_EOFS; |                     return syntaxerror(tok, | ||||||
|  |                                        "unterminated triple-quoted string literal" | ||||||
|  |                                        " (detected at line %d)", start); | ||||||
|                 } |                 } | ||||||
|                 else { |                 else { | ||||||
|                     tok->done = E_EOLS; |                     return syntaxerror(tok, | ||||||
|  |                                        "unterminated string literal (detected at" | ||||||
|  |                                        " line %d)", start); | ||||||
|                 } |                 } | ||||||
|                 tok->cur = tok->inp; |  | ||||||
|                 return ERRORTOKEN; |  | ||||||
|             } |  | ||||||
|             if (quote_size == 1 && c == '\n') { |  | ||||||
|                 tok->done = E_EOLS; |  | ||||||
|                 tok->cur = tok->inp; |  | ||||||
|                 return ERRORTOKEN; |  | ||||||
|             } |             } | ||||||
|             if (c == quote) { |             if (c == quote) { | ||||||
|                 end_quote_size += 1; |                 end_quote_size += 1; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Batuhan Taskaya
						Batuhan Taskaya