mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 21:51:50 +00:00 
			
		
		
		
	bpo-42739: Don't use sentinels to mark end of line table. (GH-25657)
* Add length parameter to PyLineTable_InitAddressRange and doen't use sentinel values at end of table. Makes the line number table more robust. * Update PyCodeAddressRange to match PEP 626.
This commit is contained in:
		
							parent
							
								
									53dd6c99b3
								
							
						
					
					
						commit
						c76da79b37
					
				
					 13 changed files with 4968 additions and 4986 deletions
				
			
		|  | @ -135,12 +135,17 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno); | ||||||
| PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int); | PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int); | ||||||
| 
 | 
 | ||||||
| /* for internal use only */ | /* for internal use only */ | ||||||
|  | struct _opaque { | ||||||
|  |     int computed_line; | ||||||
|  |     char *lo_next; | ||||||
|  |     char *limit; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| typedef struct _line_offsets { | typedef struct _line_offsets { | ||||||
|     int ar_start; |     int ar_start; | ||||||
|     int ar_end; |     int ar_end; | ||||||
|     int ar_line; |     int ar_line; | ||||||
|     int ar_computed_line; |     struct _opaque opaque; | ||||||
|     char *lo_next; |  | ||||||
| } PyCodeAddressRange; | } PyCodeAddressRange; | ||||||
| 
 | 
 | ||||||
| /* Update *bounds to describe the first and one-past-the-last instructions in the
 | /* Update *bounds to describe the first and one-past-the-last instructions in the
 | ||||||
|  | @ -170,7 +175,7 @@ PyAPI_FUNC(int) _PyCode_SetExtra(PyObject *code, Py_ssize_t index, | ||||||
| int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds); | int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds); | ||||||
| 
 | 
 | ||||||
| /** Out of process API for initializing the line number table. */ | /** Out of process API for initializing the line number table. */ | ||||||
| void PyLineTable_InitAddressRange(char *linetable, int firstlineno, PyCodeAddressRange *range); | void PyLineTable_InitAddressRange(char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range); | ||||||
| 
 | 
 | ||||||
| /** API for traversing the line number table. */ | /** API for traversing the line number table. */ | ||||||
| int PyLineTable_NextAddressRange(PyCodeAddressRange *range); | int PyLineTable_NextAddressRange(PyCodeAddressRange *range); | ||||||
|  |  | ||||||
|  | @ -80,9 +80,9 @@ class struct_frozen(Structure): | ||||||
|                 continue |                 continue | ||||||
|             items.append((entry.name.decode("ascii"), entry.size)) |             items.append((entry.name.decode("ascii"), entry.size)) | ||||||
| 
 | 
 | ||||||
|         expected = [("__hello__", 139), |         expected = [("__hello__", 137), | ||||||
|                     ("__phello__", -139), |                     ("__phello__", -137), | ||||||
|                     ("__phello__.spam", 139), |                     ("__phello__.spam", 137), | ||||||
|                     ] |                     ] | ||||||
|         self.assertEqual(items, expected, "PyImport_FrozenModules example " |         self.assertEqual(items, expected, "PyImport_FrozenModules example " | ||||||
|             "in Doc/library/ctypes.rst may be out of date") |             "in Doc/library/ctypes.rst may be out of date") | ||||||
|  |  | ||||||
|  | @ -350,6 +350,7 @@ def _write_atomic(path, data, mode=0o666): | ||||||
| #     Python 3.10a7 3435 Use instruction offsets (as opposed to byte offsets). | #     Python 3.10a7 3435 Use instruction offsets (as opposed to byte offsets). | ||||||
| #     Python 3.10b1 3436 (Add GEN_START bytecode #43683) | #     Python 3.10b1 3436 (Add GEN_START bytecode #43683) | ||||||
| #     Python 3.10b1 3437 (Undo making 'annotations' future by default - We like to dance among core devs!) | #     Python 3.10b1 3437 (Undo making 'annotations' future by default - We like to dance among core devs!) | ||||||
|  | #     Python 3.10b1 3438 Safer line number table handling. | ||||||
| 
 | 
 | ||||||
| # | # | ||||||
| # MAGIC must change whenever the bytecode emitted by the compiler may no | # MAGIC must change whenever the bytecode emitted by the compiler may no | ||||||
|  | @ -359,7 +360,7 @@ def _write_atomic(path, data, mode=0o666): | ||||||
| # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array | # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array | ||||||
| # in PC/launcher.c must also be updated. | # in PC/launcher.c must also be updated. | ||||||
| 
 | 
 | ||||||
| MAGIC_NUMBER = (3437).to_bytes(2, 'little') + b'\r\n' | MAGIC_NUMBER = (3438).to_bytes(2, 'little') + b'\r\n' | ||||||
| _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little')  # For import.c | _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little')  # For import.c | ||||||
| 
 | 
 | ||||||
| _PYCACHE = '__pycache__' | _PYCACHE = '__pycache__' | ||||||
|  |  | ||||||
|  | @ -264,6 +264,12 @@ def func2(): | ||||||
|                 new_code = code.replace(**{attr: value}) |                 new_code = code.replace(**{attr: value}) | ||||||
|                 self.assertEqual(getattr(new_code, attr), value) |                 self.assertEqual(getattr(new_code, attr), value) | ||||||
| 
 | 
 | ||||||
|  |     def test_empty_linetable(self): | ||||||
|  |         def func(): | ||||||
|  |             pass | ||||||
|  |         new_code = code = func.__code__.replace(co_linetable=b'') | ||||||
|  |         self.assertEqual(list(new_code.co_lines()), []) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| def isinterned(s): | def isinterned(s): | ||||||
|     return s is sys.intern(('_' + s + '_')[1:-1]) |     return s is sys.intern(('_' + s + '_')[1:-1]) | ||||||
|  |  | ||||||
|  | @ -172,7 +172,7 @@ def bug42562(): | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| # Set line number for 'pass' to None | # Set line number for 'pass' to None | ||||||
| bug42562.__code__ = bug42562.__code__.replace(co_linetable=b'\x04\x80\xff\x80') | bug42562.__code__ = bug42562.__code__.replace(co_linetable=b'\x04\x80') | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| dis_bug42562 = """\ | dis_bug42562 = """\ | ||||||
|  |  | ||||||
|  | @ -0,0 +1,4 @@ | ||||||
|  | The internal representation of line number tables is changed to not use | ||||||
|  | sentinels, and an explicit length parameter is added to the out of process | ||||||
|  | API function ``PyLineTable_InitAddressRange``. This makes the handling of | ||||||
|  | line number tables more robust in some circumstances. | ||||||
|  | @ -456,15 +456,15 @@ code_getlnotab(PyCodeObject *code, void *closure) | ||||||
|     } |     } | ||||||
|     _PyCode_InitAddressRange(code, &bounds); |     _PyCode_InitAddressRange(code, &bounds); | ||||||
|     while (PyLineTable_NextAddressRange(&bounds)) { |     while (PyLineTable_NextAddressRange(&bounds)) { | ||||||
|         if (bounds.ar_computed_line != line) { |         if (bounds.opaque.computed_line != line) { | ||||||
|             int bdelta = bounds.ar_start - code_offset; |             int bdelta = bounds.ar_start - code_offset; | ||||||
|             int ldelta = bounds.ar_computed_line - line; |             int ldelta = bounds.opaque.computed_line - line; | ||||||
|             if (!emit_delta(&bytes, bdelta, ldelta, &table_offset)) { |             if (!emit_delta(&bytes, bdelta, ldelta, &table_offset)) { | ||||||
|                 Py_DECREF(bytes); |                 Py_DECREF(bytes); | ||||||
|                 return NULL; |                 return NULL; | ||||||
|             } |             } | ||||||
|             code_offset = bounds.ar_start; |             code_offset = bounds.ar_start; | ||||||
|             line = bounds.ar_computed_line; |             line = bounds.opaque.computed_line; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     _PyBytes_Resize(&bytes, table_offset); |     _PyBytes_Resize(&bytes, table_offset); | ||||||
|  | @ -1120,20 +1120,20 @@ code_linesiterator(PyCodeObject *code, PyObject *Py_UNUSED(args)) | ||||||
| static void | static void | ||||||
| retreat(PyCodeAddressRange *bounds) | retreat(PyCodeAddressRange *bounds) | ||||||
| { | { | ||||||
|     int ldelta = ((signed char *)bounds->lo_next)[-1]; |     int ldelta = ((signed char *)bounds->opaque.lo_next)[-1]; | ||||||
|     if (ldelta == -128) { |     if (ldelta == -128) { | ||||||
|         ldelta = 0; |         ldelta = 0; | ||||||
|     } |     } | ||||||
|     bounds->ar_computed_line -= ldelta; |     bounds->opaque.computed_line -= ldelta; | ||||||
|     bounds->lo_next -= 2; |     bounds->opaque.lo_next -= 2; | ||||||
|     bounds->ar_end = bounds->ar_start; |     bounds->ar_end = bounds->ar_start; | ||||||
|     bounds->ar_start -= ((unsigned char *)bounds->lo_next)[-2]; |     bounds->ar_start -= ((unsigned char *)bounds->opaque.lo_next)[-2]; | ||||||
|     ldelta = ((signed char *)bounds->lo_next)[-1]; |     ldelta = ((signed char *)bounds->opaque.lo_next)[-1]; | ||||||
|     if (ldelta == -128) { |     if (ldelta == -128) { | ||||||
|         bounds->ar_line = -1; |         bounds->ar_line = -1; | ||||||
|     } |     } | ||||||
|     else { |     else { | ||||||
|         bounds->ar_line = bounds->ar_computed_line; |         bounds->ar_line = bounds->opaque.computed_line; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1141,23 +1141,22 @@ static void | ||||||
| advance(PyCodeAddressRange *bounds) | advance(PyCodeAddressRange *bounds) | ||||||
| { | { | ||||||
|     bounds->ar_start = bounds->ar_end; |     bounds->ar_start = bounds->ar_end; | ||||||
|     int delta = ((unsigned char *)bounds->lo_next)[0]; |     int delta = ((unsigned char *)bounds->opaque.lo_next)[0]; | ||||||
|     assert (delta < 255); |  | ||||||
|     bounds->ar_end += delta; |     bounds->ar_end += delta; | ||||||
|     int ldelta = ((signed char *)bounds->lo_next)[1]; |     int ldelta = ((signed char *)bounds->opaque.lo_next)[1]; | ||||||
|     bounds->lo_next += 2; |     bounds->opaque.lo_next += 2; | ||||||
|     if (ldelta == -128) { |     if (ldelta == -128) { | ||||||
|         bounds->ar_line = -1; |         bounds->ar_line = -1; | ||||||
|     } |     } | ||||||
|     else { |     else { | ||||||
|         bounds->ar_computed_line += ldelta; |         bounds->opaque.computed_line += ldelta; | ||||||
|         bounds->ar_line = bounds->ar_computed_line; |         bounds->ar_line = bounds->opaque.computed_line; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static inline int | static inline int | ||||||
| at_end(PyCodeAddressRange *bounds) { | at_end(PyCodeAddressRange *bounds) { | ||||||
|     return ((unsigned char *)bounds->lo_next)[0] == 255; |     return bounds->opaque.lo_next >= bounds->opaque.limit; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| int | int | ||||||
|  | @ -1256,12 +1255,13 @@ PyCode_Addr2Line(PyCodeObject *co, int addrq) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void | void | ||||||
| PyLineTable_InitAddressRange(char *linetable, int firstlineno, PyCodeAddressRange *range) | PyLineTable_InitAddressRange(char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range) | ||||||
| { | { | ||||||
|     range->lo_next = linetable; |     range->opaque.lo_next = linetable; | ||||||
|  |     range->opaque.limit = range->opaque.lo_next + length; | ||||||
|     range->ar_start = -1; |     range->ar_start = -1; | ||||||
|     range->ar_end = 0; |     range->ar_end = 0; | ||||||
|     range->ar_computed_line = firstlineno; |     range->opaque.computed_line = firstlineno; | ||||||
|     range->ar_line = -1; |     range->ar_line = -1; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -1269,7 +1269,8 @@ int | ||||||
| _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds) | _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds) | ||||||
| { | { | ||||||
|     char *linetable = PyBytes_AS_STRING(co->co_linetable); |     char *linetable = PyBytes_AS_STRING(co->co_linetable); | ||||||
|     PyLineTable_InitAddressRange(linetable, co->co_firstlineno, bounds); |     Py_ssize_t length = PyBytes_GET_SIZE(co->co_linetable); | ||||||
|  |     PyLineTable_InitAddressRange(linetable, length, co->co_firstlineno, bounds); | ||||||
|     return bounds->ar_line; |     return bounds->ar_line; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -39,7 +39,6 @@ Note that the end - start value is always positive. | ||||||
| Finally, in order to fit into a single byte we need to convert start deltas to the range 0 <= delta <= 254, | Finally, in order to fit into a single byte we need to convert start deltas to the range 0 <= delta <= 254, | ||||||
| and line deltas to the range -127  <= delta <= 127. | and line deltas to the range -127  <= delta <= 127. | ||||||
| A line delta of -128 is used to indicate no line number. | A line delta of -128 is used to indicate no line number. | ||||||
| A start delta of 255 is used as a sentinel to mark the end of the table. |  | ||||||
| Also note that a delta of zero indicates that there are no bytecodes in the given range, | Also note that a delta of zero indicates that there are no bytecodes in the given range, | ||||||
| which means we can use an invalid line number for that range. | which means we can use an invalid line number for that range. | ||||||
| 
 | 
 | ||||||
|  | @ -54,7 +53,6 @@ Final form: | ||||||
|     16              +1 |     16              +1 | ||||||
|     0               +127 (line 135, but the range is empty as no bytecodes are at line 135) |     0               +127 (line 135, but the range is empty as no bytecodes are at line 135) | ||||||
|     4               +73 |     4               +73 | ||||||
|     255 (end mark)  --- |  | ||||||
| 
 | 
 | ||||||
| Iterating over the table. | Iterating over the table. | ||||||
| ------------------------- | ------------------------- | ||||||
|  | @ -68,8 +66,6 @@ def co_lines(code): | ||||||
|     end = 0 |     end = 0 | ||||||
|     table_iter = iter(code.internal_line_table): |     table_iter = iter(code.internal_line_table): | ||||||
|     for sdelta, ldelta in table_iter: |     for sdelta, ldelta in table_iter: | ||||||
|         if sdelta == 255: |  | ||||||
|             break |  | ||||||
|         if ldelta == 0: # No change to line number, just accumulate changes to end |         if ldelta == 0: # No change to line number, just accumulate changes to end | ||||||
|             end += odelta |             end += odelta | ||||||
|             continue |             continue | ||||||
|  |  | ||||||
|  | @ -6959,10 +6959,6 @@ assemble(struct compiler *c, int addNone) | ||||||
|     if (!assemble_line_range(&a)) { |     if (!assemble_line_range(&a)) { | ||||||
|         return 0; |         return 0; | ||||||
|     } |     } | ||||||
|     /* Emit sentinel at end of line number table */ |  | ||||||
|     if (!assemble_emit_linetable_pair(&a, 255, -128)) { |  | ||||||
|         goto error; |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     if (_PyBytes_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) { |     if (_PyBytes_Resize(&a.a_lnotab, a.a_lnotab_off) < 0) { | ||||||
|         goto error; |         goto error; | ||||||
|  |  | ||||||
|  | @ -8,5 +8,5 @@ const unsigned char _Py_M__hello[] = { | ||||||
|     5,112,114,105,110,116,169,0,114,1,0,0,0,114,1,0, |     5,112,114,105,110,116,169,0,114,1,0,0,0,114,1,0, | ||||||
|     0,0,122,14,60,102,114,111,122,101,110,32,104,101,108,108, |     0,0,122,14,60,102,114,111,122,101,110,32,104,101,108,108, | ||||||
|     111,62,218,8,60,109,111,100,117,108,101,62,1,0,0,0, |     111,62,218,8,60,109,111,100,117,108,101,62,1,0,0,0, | ||||||
|     115,6,0,0,0,4,0,12,1,255,128, |     115,4,0,0,0,4,0,12,1, | ||||||
| }; | }; | ||||||
|  |  | ||||||
							
								
								
									
										3203
									
								
								Python/importlib.h
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										3203
									
								
								Python/importlib.h
									
										
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										4800
									
								
								Python/importlib_external.h
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										4800
									
								
								Python/importlib_external.h
									
										
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Mark Shannon
						Mark Shannon