mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	gh-96670: Raise SyntaxError when parsing NULL bytes (#97594)
This commit is contained in:
		
							parent
							
								
									dd53b79de0
								
							
						
					
					
						commit
						aab01e3524
					
				
					 10 changed files with 65 additions and 21 deletions
				
			
		|  | @ -86,6 +86,12 @@ Other Language Changes | |||
| * :class:`memoryview` now supports the half-float type (the "e" format code). | ||||
|   (Contributed by Dong-hee Na and Antoine Pitrou in :gh:`90751`.) | ||||
| 
 | ||||
| * The parser now raises :exc:`SyntaxError` when parsing source code containing | ||||
|   null bytes. (Contributed by Pablo Galindo in :gh:`96670`.) | ||||
| 
 | ||||
| * :func:`ast.parse` now raises :exc:`SyntaxError` instead of :exc:`ValueError` | ||||
|   when parsing source code containing null bytes. (Contributed by Pablo Galindo | ||||
|   in :gh:`96670`.) | ||||
| 
 | ||||
| New Modules | ||||
| =========== | ||||
|  |  | |||
|  | @ -3,6 +3,7 @@ | |||
| #endif | ||||
| 
 | ||||
| PyAPI_FUNC(char *) Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *); | ||||
| PyAPI_FUNC(char *) _Py_UniversalNewlineFgetsWithSize(char *, int, FILE*, PyObject *, size_t*); | ||||
| 
 | ||||
| /* The std printer acts as a preliminary sys.stderr until the new io
 | ||||
|    infrastructure is in place. */ | ||||
|  |  | |||
|  | @ -844,6 +844,10 @@ def check_limit(prefix, repeated): | |||
|         check_limit("a", "[0]") | ||||
|         check_limit("a", "*a") | ||||
| 
 | ||||
|     def test_null_bytes(self): | ||||
|         with self.assertRaises(SyntaxError, | ||||
|             msg="source code string cannot contain null bytes"): | ||||
|             ast.parse("a\0b") | ||||
| 
 | ||||
| class ASTHelpers_Test(unittest.TestCase): | ||||
|     maxDiff = None | ||||
|  |  | |||
|  | @ -334,11 +334,10 @@ def test_compile(self): | |||
|         self.assertRaises(TypeError, compile) | ||||
|         self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'badmode') | ||||
|         self.assertRaises(ValueError, compile, 'print(42)\n', '<string>', 'single', 0xff) | ||||
|         self.assertRaises(ValueError, compile, chr(0), 'f', 'exec') | ||||
|         self.assertRaises(TypeError, compile, 'pass', '?', 'exec', | ||||
|                           mode='eval', source='0', filename='tmp') | ||||
|         compile('print("\xe5")\n', '', 'exec') | ||||
|         self.assertRaises(ValueError, compile, chr(0), 'f', 'exec') | ||||
|         self.assertRaises(SyntaxError, compile, chr(0), 'f', 'exec') | ||||
|         self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad') | ||||
| 
 | ||||
|         # test the optimize argument | ||||
|  |  | |||
|  | @ -657,6 +657,18 @@ def test_syntaxerror_invalid_escape_sequence_multi_line(self): | |||
|                 ], | ||||
|             ) | ||||
| 
 | ||||
|     def test_syntaxerror_null_bytes(self): | ||||
|         script = "x = '\0' nothing to see here\n';import os;os.system('echo pwnd')\n" | ||||
|         with os_helper.temp_dir() as script_dir: | ||||
|             script_name = _make_test_script(script_dir, 'script', script) | ||||
|             exitcode, stdout, stderr = assert_python_failure(script_name) | ||||
|             self.assertEqual( | ||||
|                 stderr.splitlines()[-2:], | ||||
|                 [   b"    x = '", | ||||
|                     b'SyntaxError: source code cannot contain null bytes' | ||||
|                 ], | ||||
|             ) | ||||
| 
 | ||||
|     def test_consistent_sys_path_for_direct_execution(self): | ||||
|         # This test case ensures that the following all give the same | ||||
|         # sys.path configuration: | ||||
|  |  | |||
|  | @ -544,7 +544,7 @@ def test_particularly_evil_undecodable(self): | |||
|             with open(fn, "wb") as fp: | ||||
|                 fp.write(src) | ||||
|             res = script_helper.run_python_until_end(fn)[0] | ||||
|         self.assertIn(b"Non-UTF-8", res.err) | ||||
|         self.assertIn(b"source code cannot contain null bytes", res.err) | ||||
| 
 | ||||
|     def test_yet_more_evil_still_undecodable(self): | ||||
|         # Issue #25388 | ||||
|  | @ -554,7 +554,7 @@ def test_yet_more_evil_still_undecodable(self): | |||
|             with open(fn, "wb") as fp: | ||||
|                 fp.write(src) | ||||
|             res = script_helper.run_python_until_end(fn)[0] | ||||
|         self.assertIn(b"Non-UTF-8", res.err) | ||||
|         self.assertIn(b"source code cannot contain null bytes", res.err) | ||||
| 
 | ||||
|     @support.cpython_only | ||||
|     @unittest.skipIf(support.is_wasi, "exhausts limited stack on WASI") | ||||
|  | @ -591,9 +591,9 @@ def check_limit(prefix, repeated, mode="single"): | |||
|     def test_null_terminated(self): | ||||
|         # The source code is null-terminated internally, but bytes-like | ||||
|         # objects are accepted, which could be not terminated. | ||||
|         with self.assertRaisesRegex(ValueError, "cannot contain null"): | ||||
|         with self.assertRaisesRegex(SyntaxError, "cannot contain null"): | ||||
|             compile("123\x00", "<dummy>", "eval") | ||||
|         with self.assertRaisesRegex(ValueError, "cannot contain null"): | ||||
|         with self.assertRaisesRegex(SyntaxError, "cannot contain null"): | ||||
|             compile(memoryview(b"123\x00"), "<dummy>", "eval") | ||||
|         code = compile(memoryview(b"123\x00")[1:-1], "<dummy>", "eval") | ||||
|         self.assertEqual(eval(code), 23) | ||||
|  |  | |||
|  | @ -0,0 +1,2 @@ | |||
| The parser now raises :exc:`SyntaxError` when parsing source code containing | ||||
| null bytes. Patch by Pablo Galindo | ||||
|  | @ -230,16 +230,8 @@ _PyLong_FileDescriptor_Converter(PyObject *o, void *ptr) | |||
|     return 1; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| ** Py_UniversalNewlineFgets is an fgets variation that understands | ||||
| ** all of \r, \n and \r\n conventions. | ||||
| ** The stream should be opened in binary mode. | ||||
| ** The fobj parameter exists solely for legacy reasons and must be NULL. | ||||
| ** Note that we need no error handling: fgets() treats error and eof | ||||
| ** identically. | ||||
| */ | ||||
| char * | ||||
| Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) | ||||
| _Py_UniversalNewlineFgetsWithSize(char *buf, int n, FILE *stream, PyObject *fobj, size_t* size) | ||||
| { | ||||
|     char *p = buf; | ||||
|     int c; | ||||
|  | @ -265,11 +257,28 @@ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) | |||
|     } | ||||
|     FUNLOCKFILE(stream); | ||||
|     *p = '\0'; | ||||
|     if (p == buf) | ||||
|     if (p == buf) { | ||||
|         return NULL; | ||||
|     } | ||||
|     *size = p - buf; | ||||
|     return buf; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
| ** Py_UniversalNewlineFgets is an fgets variation that understands | ||||
| ** all of \r, \n and \r\n conventions. | ||||
| ** The stream should be opened in binary mode. | ||||
| ** The fobj parameter exists solely for legacy reasons and must be NULL. | ||||
| ** Note that we need no error handling: fgets() treats error and eof | ||||
| ** identically. | ||||
| */ | ||||
| 
 | ||||
| char * | ||||
| Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) { | ||||
|     size_t size; | ||||
|     return _Py_UniversalNewlineFgetsWithSize(buf, n, stream, fobj, &size); | ||||
| } | ||||
| 
 | ||||
| /* **************************** std printer ****************************
 | ||||
|  * The stdprinter is used during the boot strapping phase as a preliminary | ||||
|  * file like object for sys.stderr. | ||||
|  |  | |||
|  | @ -378,6 +378,11 @@ tok_reserve_buf(struct tok_state *tok, Py_ssize_t size) | |||
|     return 1; | ||||
| } | ||||
| 
 | ||||
| static inline int | ||||
| contains_null_bytes(const char* str, size_t size) { | ||||
|     return memchr(str, 0, size) != NULL; | ||||
| } | ||||
| 
 | ||||
| static int | ||||
| tok_readline_recode(struct tok_state *tok) { | ||||
|     PyObject *line; | ||||
|  | @ -829,9 +834,9 @@ tok_readline_raw(struct tok_state *tok) | |||
|         if (!tok_reserve_buf(tok, BUFSIZ)) { | ||||
|             return 0; | ||||
|         } | ||||
|         char *line = Py_UniversalNewlineFgets(tok->inp, | ||||
|                                               (int)(tok->end - tok->inp), | ||||
|                                               tok->fp, NULL); | ||||
|         int n_chars = (int)(tok->end - tok->inp); | ||||
|         size_t line_size = 0; | ||||
|         char *line = _Py_UniversalNewlineFgetsWithSize(tok->inp, n_chars, tok->fp, NULL, &line_size); | ||||
|         if (line == NULL) { | ||||
|             return 1; | ||||
|         } | ||||
|  | @ -839,7 +844,7 @@ tok_readline_raw(struct tok_state *tok) | |||
|             tok_concatenate_interactive_new_line(tok, line) == -1) { | ||||
|             return 0; | ||||
|         } | ||||
|         tok->inp = strchr(tok->inp, '\0'); | ||||
|         tok->inp += line_size; | ||||
|         if (tok->inp == tok->buf) { | ||||
|             return 0; | ||||
|         } | ||||
|  | @ -1075,6 +1080,12 @@ tok_nextc(struct tok_state *tok) | |||
|             return EOF; | ||||
|         } | ||||
|         tok->line_start = tok->cur; | ||||
| 
 | ||||
|         if (contains_null_bytes(tok->line_start, tok->inp - tok->line_start)) { | ||||
|             syntaxerror(tok, "source code cannot contain null bytes"); | ||||
|             tok->cur = tok->inp; | ||||
|             return EOF; | ||||
|         } | ||||
|     } | ||||
|     Py_UNREACHABLE(); | ||||
| } | ||||
|  |  | |||
|  | @ -1858,7 +1858,7 @@ _Py_SourceAsString(PyObject *cmd, const char *funcname, const char *what, PyComp | |||
|     } | ||||
| 
 | ||||
|     if (strlen(str) != (size_t)size) { | ||||
|         PyErr_SetString(PyExc_ValueError, | ||||
|         PyErr_SetString(PyExc_SyntaxError, | ||||
|             "source code string cannot contain null bytes"); | ||||
|         Py_CLEAR(*cmd_copy); | ||||
|         return NULL; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Pablo Galindo Salgado
						Pablo Galindo Salgado