mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	gh-100227: Move the Dict of Interned Strings to PyInterpreterState (gh-102339)
We can revisit the options for keeping it global later, if desired. For now the approach seems quite complex, so we've gone with the simpler isolation solution in the meantime. https://github.com/python/cpython/issues/100227
This commit is contained in:
		
							parent
							
								
									7703def37e
								
							
						
					
					
						commit
						ba65a065cf
					
				
					 6 changed files with 727 additions and 718 deletions
				
			
		|  | @ -23,13 +23,6 @@ extern "C" { | |||
| // Only immutable objects should be considered runtime-global.
 | ||||
| // All others must be per-interpreter.
 | ||||
| 
 | ||||
| #define _Py_CACHED_OBJECT(NAME) \ | ||||
|     _PyRuntime.cached_objects.NAME | ||||
| 
 | ||||
| struct _Py_cached_objects { | ||||
|     PyObject *interned_strings; | ||||
| }; | ||||
| 
 | ||||
| #define _Py_GLOBAL_OBJECT(NAME) \ | ||||
|     _PyRuntime.static_objects.NAME | ||||
| #define _Py_SINGLETON(NAME) \ | ||||
|  | @ -65,6 +58,8 @@ struct _Py_static_objects { | |||
|     (interp)->cached_objects.NAME | ||||
| 
 | ||||
| struct _Py_interp_cached_objects { | ||||
|     PyObject *interned_strings; | ||||
| 
 | ||||
|     /* AST */ | ||||
|     PyObject *str_replace_inf; | ||||
| 
 | ||||
|  |  | |||
|  | @ -163,7 +163,6 @@ typedef struct pyruntimestate { | |||
|     } types; | ||||
| 
 | ||||
|     /* All the objects that are shared by the runtime's interpreters. */ | ||||
|     struct _Py_cached_objects cached_objects; | ||||
|     struct _Py_static_objects static_objects; | ||||
| 
 | ||||
|     /* The following fields are here to avoid allocation during init.
 | ||||
|  |  | |||
|  | @ -59,6 +59,7 @@ struct _Py_unicode_state { | |||
|     struct _Py_unicode_ids ids; | ||||
| }; | ||||
| 
 | ||||
| extern void _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p); | ||||
| extern void _PyUnicode_ClearInterned(PyInterpreterState *interp); | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										1332
									
								
								Include/internal/pycore_unicodeobject_generated.h
									
										
									
										generated
									
									
									
								
							
							
						
						
									
										1332
									
								
								Include/internal/pycore_unicodeobject_generated.h
									
										
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -231,14 +231,32 @@ static inline PyObject* unicode_new_empty(void) | |||
|    Another way to look at this is that to say that the actual reference | ||||
|    count of a string is:  s->ob_refcnt + (s->state ? 2 : 0) | ||||
| */ | ||||
| static inline PyObject *get_interned_dict(void) | ||||
| static inline PyObject *get_interned_dict(PyInterpreterState *interp) | ||||
| { | ||||
|     return _Py_CACHED_OBJECT(interned_strings); | ||||
|     return _Py_INTERP_CACHED_OBJECT(interp, interned_strings); | ||||
| } | ||||
| 
 | ||||
| static inline void set_interned_dict(PyObject *dict) | ||||
| static int | ||||
| init_interned_dict(PyInterpreterState *interp) | ||||
| { | ||||
|     _Py_CACHED_OBJECT(interned_strings) = dict; | ||||
|     assert(get_interned_dict(interp) == NULL); | ||||
|     PyObject *interned = interned = PyDict_New(); | ||||
|     if (interned == NULL) { | ||||
|         return -1; | ||||
|     } | ||||
|     _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = interned; | ||||
|     return 0; | ||||
| } | ||||
| 
 | ||||
| static void | ||||
| clear_interned_dict(PyInterpreterState *interp) | ||||
| { | ||||
|     PyObject *interned = get_interned_dict(interp); | ||||
|     if (interned != NULL) { | ||||
|         PyDict_Clear(interned); | ||||
|         Py_DECREF(interned); | ||||
|         _Py_INTERP_CACHED_OBJECT(interp, interned_strings) = NULL; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| #define _Py_RETURN_UNICODE_EMPTY()   \ | ||||
|  | @ -1520,12 +1538,12 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end, | |||
| static void | ||||
| unicode_dealloc(PyObject *unicode) | ||||
| { | ||||
|     PyInterpreterState *interp = _PyInterpreterState_GET(); | ||||
| #ifdef Py_DEBUG | ||||
|     if (!unicode_is_finalizing() && unicode_is_singleton(unicode)) { | ||||
|         _Py_FatalRefcountError("deallocating an Unicode singleton"); | ||||
|     } | ||||
| #endif | ||||
|     PyObject *interned = get_interned_dict(); | ||||
|     if (PyUnicode_CHECK_INTERNED(unicode)) { | ||||
|         /* Revive the dead object temporarily. PyDict_DelItem() removes two
 | ||||
|            references (key and value) which were ignored by | ||||
|  | @ -1534,6 +1552,8 @@ unicode_dealloc(PyObject *unicode) | |||
|            PyDict_DelItem(). */ | ||||
|         assert(Py_REFCNT(unicode) == 0); | ||||
|         Py_SET_REFCNT(unicode, 3); | ||||
|         PyObject *interned = get_interned_dict(interp); | ||||
|         assert(interned != NULL); | ||||
|         if (PyDict_DelItem(interned, unicode) != 0) { | ||||
|             _PyErr_WriteUnraisableMsg("deletion of interned string failed", | ||||
|                                       NULL); | ||||
|  | @ -14529,26 +14549,20 @@ _PyUnicode_InitState(PyInterpreterState *interp) | |||
| PyStatus | ||||
| _PyUnicode_InitGlobalObjects(PyInterpreterState *interp) | ||||
| { | ||||
|     if (!_Py_IsMainInterpreter(interp)) { | ||||
|         return _PyStatus_OK(); | ||||
|     } | ||||
| 
 | ||||
|     // Initialize the global interned dict
 | ||||
|     PyObject *interned = PyDict_New(); | ||||
|     if (interned == NULL) { | ||||
|     if (init_interned_dict(interp)) { | ||||
|         PyErr_Clear(); | ||||
|         return _PyStatus_ERR("failed to create interned dict"); | ||||
|     } | ||||
| 
 | ||||
|     set_interned_dict(interned); | ||||
| 
 | ||||
|     if (_Py_IsMainInterpreter(interp)) { | ||||
|         /* Intern statically allocated string identifiers and deepfreeze strings.
 | ||||
|          * This must be done before any module initialization so that statically | ||||
|          * allocated string identifiers are used instead of heap allocated strings. | ||||
|          * Deepfreeze uses the interned identifiers if present to save space | ||||
|          * else generates them and they are interned to speed up dict lookups. | ||||
|         */ | ||||
|     _PyUnicode_InitStaticStrings(); | ||||
|         _PyUnicode_InitStaticStrings(interp); | ||||
| 
 | ||||
| #ifdef Py_DEBUG | ||||
|         assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1)); | ||||
|  | @ -14557,6 +14571,7 @@ _PyUnicode_InitGlobalObjects(PyInterpreterState *interp) | |||
|             assert(_PyUnicode_CheckConsistency(LATIN1(i), 1)); | ||||
|         } | ||||
| #endif | ||||
|     } | ||||
| 
 | ||||
|     return _PyStatus_OK(); | ||||
| } | ||||
|  | @ -14586,7 +14601,7 @@ _PyUnicode_InitTypes(PyInterpreterState *interp) | |||
| 
 | ||||
| 
 | ||||
| void | ||||
| PyUnicode_InternInPlace(PyObject **p) | ||||
| _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p) | ||||
| { | ||||
|     PyObject *s = *p; | ||||
| #ifdef Py_DEBUG | ||||
|  | @ -14608,7 +14623,7 @@ PyUnicode_InternInPlace(PyObject **p) | |||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     PyObject *interned = get_interned_dict(); | ||||
|     PyObject *interned = get_interned_dict(interp); | ||||
|     assert(interned != NULL); | ||||
| 
 | ||||
|     PyObject *t = PyDict_SetDefault(interned, s, s); | ||||
|  | @ -14629,6 +14644,13 @@ PyUnicode_InternInPlace(PyObject **p) | |||
|     _PyUnicode_STATE(s).interned = 1; | ||||
| } | ||||
| 
 | ||||
| void | ||||
| PyUnicode_InternInPlace(PyObject **p) | ||||
| { | ||||
|     PyInterpreterState *interp = _PyInterpreterState_GET(); | ||||
|     _PyUnicode_InternInPlace(interp, p); | ||||
| } | ||||
| 
 | ||||
| // Function kept for the stable ABI.
 | ||||
| PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); | ||||
| void | ||||
|  | @ -14653,12 +14675,7 @@ PyUnicode_InternFromString(const char *cp) | |||
| void | ||||
| _PyUnicode_ClearInterned(PyInterpreterState *interp) | ||||
| { | ||||
|     if (!_Py_IsMainInterpreter(interp)) { | ||||
|         // interned dict is shared by all interpreters
 | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     PyObject *interned = get_interned_dict(); | ||||
|     PyObject *interned = get_interned_dict(interp); | ||||
|     if (interned == NULL) { | ||||
|         return; | ||||
|     } | ||||
|  | @ -14693,9 +14710,7 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp) | |||
|             total_length); | ||||
| #endif | ||||
| 
 | ||||
|     PyDict_Clear(interned); | ||||
|     Py_DECREF(interned); | ||||
|     set_interned_dict(NULL); | ||||
|     clear_interned_dict(interp); | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
|  | @ -15108,7 +15123,7 @@ _PyUnicode_EnableLegacyWindowsFSEncoding(void) | |||
| static inline int | ||||
| unicode_is_finalizing(void) | ||||
| { | ||||
|     return (get_interned_dict() == NULL); | ||||
|     return (get_interned_dict(_PyInterpreterState_Main()) == NULL); | ||||
| } | ||||
| #endif | ||||
| 
 | ||||
|  | @ -15131,14 +15146,13 @@ _PyUnicode_Fini(PyInterpreterState *interp) | |||
| { | ||||
|     struct _Py_unicode_state *state = &interp->unicode; | ||||
| 
 | ||||
|     if (_Py_IsMainInterpreter(interp)) { | ||||
|     // _PyUnicode_ClearInterned() must be called before _PyUnicode_Fini()
 | ||||
|         assert(get_interned_dict() == NULL); | ||||
|         // bpo-47182: force a unicodedata CAPI capsule re-import on
 | ||||
|         // subsequent initialization of main interpreter.
 | ||||
|     } | ||||
|     assert(get_interned_dict(interp) == NULL); | ||||
| 
 | ||||
|     _PyUnicode_FiniEncodings(&state->fs_codec); | ||||
| 
 | ||||
|     // bpo-47182: force a unicodedata CAPI capsule re-import on
 | ||||
|     // subsequent initialization of interpreter.
 | ||||
|     interp->unicode.ucnhash_capi = NULL; | ||||
| 
 | ||||
|     unicode_clear_identifiers(state); | ||||
|  |  | |||
|  | @ -354,14 +354,14 @@ def generate_static_strings_initializer(identifiers, strings): | |||
|         printer.write(before) | ||||
|         printer.write(START) | ||||
|         printer.write("static inline void") | ||||
|         with printer.block("_PyUnicode_InitStaticStrings(void)"): | ||||
|         with printer.block("_PyUnicode_InitStaticStrings(PyInterpreterState *interp)"): | ||||
|             printer.write(f'PyObject *string;') | ||||
|             for i in sorted(identifiers): | ||||
|                 # This use of _Py_ID() is ignored by iter_global_strings() | ||||
|                 # since iter_files() ignores .h files. | ||||
|                 printer.write(f'string = &_Py_ID({i});') | ||||
|                 printer.write(f'assert(_PyUnicode_CheckConsistency(string, 1));') | ||||
|                 printer.write(f'PyUnicode_InternInPlace(&string);') | ||||
|                 printer.write(f'_PyUnicode_InternInPlace(interp, &string);') | ||||
|             # XXX What about "strings"? | ||||
|         printer.write(END) | ||||
|         printer.write(after) | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Eric Snow
						Eric Snow