mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	Patch #923098: Share interned strings in marshal.
This commit is contained in:
		
							parent
							
								
									8d97e33bb7
								
							
						
					
					
						commit
						ef82d2fdfe
					
				
					 6 changed files with 122 additions and 31 deletions
				
			
		|  | @ -283,20 +283,31 @@ data must be opened in binary mode. | |||
| 
 | ||||
| Numeric values are stored with the least significant byte first. | ||||
| 
 | ||||
| \begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file} | ||||
| The module supports two versions of the data format: version 0 is the | ||||
| historical version, version 1 (new in Python 2.4) shares interned | ||||
| strings in the file, and upon unmarshalling. \var{Py_MARSHAL_VERSION} | ||||
| indicates the current file format (currently 1). | ||||
| 
 | ||||
| \begin{cfuncdesc}{void}{PyMarshal_WriteLongToFile}{long value, FILE *file, int version} | ||||
|   Marshal a \ctype{long} integer, \var{value}, to \var{file}.  This | ||||
|   will only write the least-significant 32 bits of \var{value}; | ||||
|   regardless of the size of the native \ctype{long} type. | ||||
| 
 | ||||
|   \versionchanged[\var{version} indicates the file format]{2.4} | ||||
| \end{cfuncdesc} | ||||
| 
 | ||||
| \begin{cfuncdesc}{void}{PyMarshal_WriteObjectToFile}{PyObject *value, | ||||
|                                                      FILE *file} | ||||
|                                                      FILE *file, int version} | ||||
|   Marshal a Python object, \var{value}, to \var{file}. | ||||
| 
 | ||||
|   \versionchanged[\var{version} indicates the file format]{2.4} | ||||
| \end{cfuncdesc} | ||||
| 
 | ||||
| \begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value} | ||||
| \begin{cfuncdesc}{PyObject*}{PyMarshal_WriteObjectToString}{PyObject *value, int version} | ||||
|   Return a string object containing the marshalled representation of | ||||
|   \var{value}. | ||||
| 
 | ||||
|   \versionchanged[\var{version} indicates the file format]{2.4} | ||||
| \end{cfuncdesc} | ||||
| 
 | ||||
| The following functions allow marshalled values to be read back in. | ||||
|  |  | |||
|  | @ -73,6 +73,9 @@ The module defines these functions: | |||
|   a \exception{ValueError} exception is raised --- but garbage data | ||||
|   will also be written to the file.  The object will not be properly | ||||
|   read back by \function{load()}. | ||||
| 
 | ||||
|   \versionadded[The \var{version} argument indicates the data | ||||
|   format that \code{dumps} should use.]{2.4} | ||||
| \end{funcdesc} | ||||
| 
 | ||||
| \begin{funcdesc}{load}{file} | ||||
|  | @ -86,11 +89,14 @@ The module defines these functions: | |||
|   \code{None} for the unmarshallable type.} | ||||
| \end{funcdesc} | ||||
| 
 | ||||
| \begin{funcdesc}{dumps}{value} | ||||
| \begin{funcdesc}{dumps}{value\optional{, version}} | ||||
|   Return the string that would be written to a file by | ||||
|   \code{dump(\var{value}, \var{file})}.  The value must be a supported | ||||
|   type.  Raise a \exception{ValueError} exception if value has (or | ||||
|   contains an object that has) an unsupported type. | ||||
| 
 | ||||
|   \versionadded[The \var{version} argument indicates the data | ||||
|   format that \code{dumps} should use.]{2.4} | ||||
| \end{funcdesc} | ||||
| 
 | ||||
| \begin{funcdesc}{loads}{string} | ||||
|  | @ -98,3 +104,13 @@ The module defines these functions: | |||
|   \exception{EOFError}, \exception{ValueError} or | ||||
|   \exception{TypeError}.  Extra characters in the string are ignored. | ||||
| \end{funcdesc} | ||||
| 
 | ||||
| In addition, the following constants are defined: | ||||
| 
 | ||||
| \begin{datadesc}{version} | ||||
|   Indicates the format that the module uses. Version 0 is the | ||||
|   historical format, version 1 (added in Python 2.4) shares | ||||
|   interned strings. The current version is 1. | ||||
| 
 | ||||
|   \versionadded{2.4} | ||||
| \end{datadesc} | ||||
|  | @ -7,9 +7,11 @@ | |||
| extern "C" { | ||||
| #endif | ||||
| 
 | ||||
| PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *); | ||||
| PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *); | ||||
| PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *); | ||||
| #define Py_MARSHAL_VERSION 1 | ||||
| 
 | ||||
| PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *, int); | ||||
| PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *, int); | ||||
| PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *, int); | ||||
| 
 | ||||
| PyAPI_FUNC(long) PyMarshal_ReadLongFromFile(FILE *); | ||||
| PyAPI_FUNC(int) PyMarshal_ReadShortFromFile(FILE *); | ||||
|  |  | |||
|  | @ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1? | |||
| Core and builtins | ||||
| ----------------- | ||||
| 
 | ||||
| - marshal now shares interned strings. This change introduces | ||||
|   a new .pyc magic. | ||||
| 
 | ||||
| - Bug #966623. classes created with type() in an exec(, {}) don't | ||||
|   have a __module__, but code in typeobject assumed it would always | ||||
|   be there. | ||||
|  |  | |||
|  | @ -26,9 +26,9 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *); | |||
|    a .pyc file in text mode the magic number will be wrong; also, the | ||||
|    Apple MPW compiler swaps their values, botching string constants. | ||||
| 
 | ||||
|    Apparently, there was a distinction made between even and odd | ||||
|    bytecodes that is related to Unicode.  The details aren't clear, | ||||
|    but the magic number has been odd for a long time. | ||||
|    The magic numbers must be spaced apart atleast 2 values, as the | ||||
|    -U interpeter flag will cause MAGIC+1 being used. They have been | ||||
|    odd numbers for some time now. | ||||
| 
 | ||||
|    There were a variety of old schemes for setting the magic number. | ||||
|    The current working scheme is to increment the previous value by | ||||
|  | @ -47,9 +47,9 @@ extern time_t PyOS_GetLastModificationTime(char *, FILE *); | |||
|        Python 2.3a0: 62011 | ||||
|        Python 2.3a0: 62021 | ||||
|        Python 2.3a0: 62011 (!) | ||||
|        Python 2.4a0: 62031 | ||||
|        Python 2.4a0: 62041 | ||||
| */ | ||||
| #define MAGIC (62031 | ((long)'\r'<<16) | ((long)'\n'<<24)) | ||||
| #define MAGIC (62041 | ((long)'\r'<<16) | ((long)'\n'<<24)) | ||||
| 
 | ||||
| /* Magic word as global; note that _PyImport_Init() can change the
 | ||||
|    value of this global to accommodate for alterations of how the | ||||
|  | @ -797,10 +797,10 @@ write_compiled_module(PyCodeObject *co, char *cpathname, long mtime) | |||
| 				"# can't create %s\n", cpathname); | ||||
| 		return; | ||||
| 	} | ||||
| 	PyMarshal_WriteLongToFile(pyc_magic, fp); | ||||
| 	PyMarshal_WriteLongToFile(pyc_magic, fp, Py_MARSHAL_VERSION); | ||||
| 	/* First write a 0 for mtime */ | ||||
| 	PyMarshal_WriteLongToFile(0L, fp); | ||||
| 	PyMarshal_WriteObjectToFile((PyObject *)co, fp); | ||||
| 	PyMarshal_WriteLongToFile(0L, fp, Py_MARSHAL_VERSION); | ||||
| 	PyMarshal_WriteObjectToFile((PyObject *)co, fp, Py_MARSHAL_VERSION); | ||||
| 	if (fflush(fp) != 0 || ferror(fp)) { | ||||
| 		if (Py_VerboseFlag) | ||||
| 			PySys_WriteStderr("# can't write %s\n", cpathname); | ||||
|  | @ -811,7 +811,7 @@ write_compiled_module(PyCodeObject *co, char *cpathname, long mtime) | |||
| 	} | ||||
| 	/* Now write the true mtime */ | ||||
| 	fseek(fp, 4L, 0); | ||||
| 	PyMarshal_WriteLongToFile(mtime, fp); | ||||
| 	PyMarshal_WriteLongToFile(mtime, fp, Py_MARSHAL_VERSION); | ||||
| 	fflush(fp); | ||||
| 	fclose(fp); | ||||
| 	if (Py_VerboseFlag) | ||||
|  |  | |||
|  | @ -27,6 +27,8 @@ | |||
| #define TYPE_COMPLEX	'x' | ||||
| #define TYPE_LONG	'l' | ||||
| #define TYPE_STRING	's' | ||||
| #define TYPE_INTERNED	't' | ||||
| #define TYPE_STRINGREF	'R' | ||||
| #define TYPE_TUPLE	'(' | ||||
| #define TYPE_LIST	'[' | ||||
| #define TYPE_DICT	'{' | ||||
|  | @ -42,6 +44,7 @@ typedef struct { | |||
| 	PyObject *str; | ||||
| 	char *ptr; | ||||
| 	char *end; | ||||
| 	PyObject *strings; /* dict on marshal, list on unmarshal */ | ||||
| } WFILE; | ||||
| 
 | ||||
| #define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \ | ||||
|  | @ -189,7 +192,24 @@ w_object(PyObject *v, WFILE *p) | |||
| 	} | ||||
| #endif | ||||
| 	else if (PyString_Check(v)) { | ||||
| 		w_byte(TYPE_STRING, p); | ||||
| 		if (p->strings && PyString_CHECK_INTERNED(v)) { | ||||
| 			PyObject *o = PyDict_GetItem(p->strings, v); | ||||
| 			if (o) { | ||||
| 				long w = PyInt_AsLong(o); | ||||
| 				w_byte(TYPE_STRINGREF, p); | ||||
| 				w_long(w, p); | ||||
| 				goto exit; | ||||
| 			} | ||||
| 			else { | ||||
| 				o = PyInt_FromLong(PyDict_Size(p->strings)); | ||||
| 				PyDict_SetItem(p->strings, v, o); | ||||
| 				Py_DECREF(o); | ||||
| 				w_byte(TYPE_INTERNED, p); | ||||
| 			} | ||||
| 		} | ||||
| 		else { | ||||
| 			w_byte(TYPE_STRING, p); | ||||
| 		} | ||||
| 		n = PyString_GET_SIZE(v); | ||||
| 		w_long((long)n, p); | ||||
| 		w_string(PyString_AS_STRING(v), n, p); | ||||
|  | @ -269,28 +289,32 @@ w_object(PyObject *v, WFILE *p) | |||
| 		w_byte(TYPE_UNKNOWN, p); | ||||
| 		p->error = 1; | ||||
| 	} | ||||
| 
 | ||||
|    exit: | ||||
| 	p->depth--; | ||||
| } | ||||
| 
 | ||||
| /* version currently has no effect for writing longs. */ | ||||
| void | ||||
| PyMarshal_WriteLongToFile(long x, FILE *fp) | ||||
| PyMarshal_WriteLongToFile(long x, FILE *fp, int version) | ||||
| { | ||||
| 	WFILE wf; | ||||
| 	wf.fp = fp; | ||||
| 	wf.error = 0; | ||||
| 	wf.depth = 0; | ||||
| 	wf.strings = NULL; | ||||
| 	w_long(x, &wf); | ||||
| } | ||||
| 
 | ||||
| void | ||||
| PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp) | ||||
| PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version) | ||||
| { | ||||
| 	WFILE wf; | ||||
| 	wf.fp = fp; | ||||
| 	wf.error = 0; | ||||
| 	wf.depth = 0; | ||||
| 	wf.strings = (version > 0) ? PyDict_New() : NULL; | ||||
| 	w_object(x, &wf); | ||||
| 	Py_XDECREF(wf.strings); | ||||
| } | ||||
| 
 | ||||
| typedef WFILE RFILE; /* Same struct with different invariants */ | ||||
|  | @ -491,6 +515,7 @@ r_object(RFILE *p) | |||
| 		} | ||||
| #endif | ||||
| 
 | ||||
| 	case TYPE_INTERNED: | ||||
| 	case TYPE_STRING: | ||||
| 		n = r_long(p); | ||||
| 		if (n < 0) { | ||||
|  | @ -506,6 +531,16 @@ r_object(RFILE *p) | |||
| 					"EOF read where object expected"); | ||||
| 			} | ||||
| 		} | ||||
| 		if (type == TYPE_INTERNED) { | ||||
| 			PyString_InternInPlace(&v); | ||||
| 			PyList_Append(p->strings, v); | ||||
| 		} | ||||
| 		return v; | ||||
| 
 | ||||
| 	case TYPE_STRINGREF: | ||||
| 		n = r_long(p); | ||||
| 		v = PyList_GET_ITEM(p->strings, n); | ||||
| 		Py_INCREF(v); | ||||
| 		return v; | ||||
| 
 | ||||
| #ifdef Py_USING_UNICODE | ||||
|  | @ -673,6 +708,7 @@ PyMarshal_ReadShortFromFile(FILE *fp) | |||
| { | ||||
| 	RFILE rf; | ||||
| 	rf.fp = fp; | ||||
| 	rf.strings = NULL; | ||||
| 	return r_short(&rf); | ||||
| } | ||||
| 
 | ||||
|  | @ -681,6 +717,7 @@ PyMarshal_ReadLongFromFile(FILE *fp) | |||
| { | ||||
| 	RFILE rf; | ||||
| 	rf.fp = fp; | ||||
| 	rf.strings = NULL; | ||||
| 	return r_long(&rf); | ||||
| } | ||||
| 
 | ||||
|  | @ -747,22 +784,30 @@ PyObject * | |||
| PyMarshal_ReadObjectFromFile(FILE *fp) | ||||
| { | ||||
| 	RFILE rf; | ||||
| 	PyObject *result; | ||||
| 	rf.fp = fp; | ||||
| 	return read_object(&rf); | ||||
| 	rf.strings = PyList_New(0); | ||||
| 	result = r_object(&rf); | ||||
| 	Py_DECREF(rf.strings); | ||||
| 	return result; | ||||
| } | ||||
| 
 | ||||
| PyObject * | ||||
| PyMarshal_ReadObjectFromString(char *str, int len) | ||||
| { | ||||
| 	RFILE rf; | ||||
| 	PyObject *result; | ||||
| 	rf.fp = NULL; | ||||
| 	rf.ptr = str; | ||||
| 	rf.end = str + len; | ||||
| 	return read_object(&rf); | ||||
| 	rf.strings = PyList_New(0); | ||||
| 	result = r_object(&rf); | ||||
| 	Py_DECREF(rf.strings); | ||||
| 	return result; | ||||
| } | ||||
| 
 | ||||
| PyObject * | ||||
| PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */ | ||||
| PyMarshal_WriteObjectToString(PyObject *x, int version) | ||||
| { | ||||
| 	WFILE wf; | ||||
| 	wf.fp = NULL; | ||||
|  | @ -773,7 +818,9 @@ PyMarshal_WriteObjectToString(PyObject *x) /* wrs_object() */ | |||
| 	wf.end = wf.ptr + PyString_Size(wf.str); | ||||
| 	wf.error = 0; | ||||
| 	wf.depth = 0; | ||||
| 	wf.strings = (version > 0) ? PyDict_New() : NULL; | ||||
| 	w_object(x, &wf); | ||||
| 	Py_XDECREF(wf.strings); | ||||
| 	if (wf.str != NULL) | ||||
| 		_PyString_Resize(&wf.str, | ||||
| 		    (int) (wf.ptr - | ||||
|  | @ -796,7 +843,8 @@ marshal_dump(PyObject *self, PyObject *args) | |||
| 	WFILE wf; | ||||
| 	PyObject *x; | ||||
| 	PyObject *f; | ||||
| 	if (!PyArg_ParseTuple(args, "OO:dump", &x, &f)) | ||||
| 	int version = Py_MARSHAL_VERSION; | ||||
| 	if (!PyArg_ParseTuple(args, "OO|i:dump", &x, &f, &version)) | ||||
| 		return NULL; | ||||
| 	if (!PyFile_Check(f)) { | ||||
| 		PyErr_SetString(PyExc_TypeError, | ||||
|  | @ -808,7 +856,9 @@ marshal_dump(PyObject *self, PyObject *args) | |||
| 	wf.ptr = wf.end = NULL; | ||||
| 	wf.error = 0; | ||||
| 	wf.depth = 0; | ||||
| 	wf.strings = (version > 0) ? PyDict_New() : 0; | ||||
| 	w_object(x, &wf); | ||||
| 	Py_XDECREF(wf.strings); | ||||
| 	if (wf.error) { | ||||
| 		PyErr_SetString(PyExc_ValueError, | ||||
| 				(wf.error==1)?"unmarshallable object" | ||||
|  | @ -823,7 +873,7 @@ static PyObject * | |||
| marshal_load(PyObject *self, PyObject *args) | ||||
| { | ||||
| 	RFILE rf; | ||||
| 	PyObject *f; | ||||
| 	PyObject *f, *result; | ||||
| 	if (!PyArg_ParseTuple(args, "O:load", &f)) | ||||
| 		return NULL; | ||||
| 	if (!PyFile_Check(f)) { | ||||
|  | @ -832,16 +882,20 @@ marshal_load(PyObject *self, PyObject *args) | |||
| 		return NULL; | ||||
| 	} | ||||
| 	rf.fp = PyFile_AsFile(f); | ||||
| 	return read_object(&rf); | ||||
| 	rf.strings = PyList_New(0); | ||||
| 	result = read_object(&rf); | ||||
| 	Py_DECREF(rf.strings); | ||||
| 	return result; | ||||
| } | ||||
| 
 | ||||
| static PyObject * | ||||
| marshal_dumps(PyObject *self, PyObject *args) | ||||
| { | ||||
| 	PyObject *x; | ||||
| 	if (!PyArg_ParseTuple(args, "O:dumps", &x)) | ||||
| 	int version = Py_MARSHAL_VERSION; | ||||
| 	if (!PyArg_ParseTuple(args, "O|i:dumps", &x, version)) | ||||
| 		return NULL; | ||||
| 	return PyMarshal_WriteObjectToString(x); | ||||
| 	return PyMarshal_WriteObjectToString(x, version); | ||||
| } | ||||
| 
 | ||||
| static PyObject * | ||||
|  | @ -850,12 +904,16 @@ marshal_loads(PyObject *self, PyObject *args) | |||
| 	RFILE rf; | ||||
| 	char *s; | ||||
| 	int n; | ||||
| 	if (!PyArg_ParseTuple(args, "s#:loads", &s, &n)) | ||||
| 	PyObject* result; | ||||
| 	if (!PyArg_ParseTuple(args, "s#|i:loads", &s, &n)) | ||||
| 		return NULL; | ||||
| 	rf.fp = NULL; | ||||
| 	rf.ptr = s; | ||||
| 	rf.end = s + n; | ||||
| 	return read_object(&rf); | ||||
| 	rf.strings = PyList_New(0); | ||||
| 	result = read_object(&rf); | ||||
| 	Py_DECREF(rf.strings); | ||||
| 	return result; | ||||
| } | ||||
| 
 | ||||
| static PyMethodDef marshal_methods[] = { | ||||
|  | @ -869,5 +927,6 @@ static PyMethodDef marshal_methods[] = { | |||
| PyMODINIT_FUNC | ||||
| PyMarshal_Init(void) | ||||
| { | ||||
| 	(void) Py_InitModule("marshal", marshal_methods); | ||||
| 	PyObject *mod = Py_InitModule("marshal", marshal_methods); | ||||
| 	PyModule_AddIntConstant(mod, "version", Py_MARSHAL_VERSION); | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Martin v. Löwis
						Martin v. Löwis