gh-113626: Add allow_code parameter in marshal functions (GH-113648)

Passing allow_code=False prevents serialization and de-serialization of
code objects which is incompatible between Python versions.
This commit is contained in:
Serhiy Storchaka 2024-01-16 18:05:15 +02:00 committed by GitHub
parent a482bc67ee
commit d2d8332f71
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 357 additions and 54 deletions

View file

@ -2,10 +2,14 @@
preserve
[clinic start generated code]*/
#include "pycore_modsupport.h" // _PyArg_CheckPositional()
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
# include "pycore_gc.h" // PyGC_Head
# include "pycore_runtime.h" // _Py_ID()
#endif
#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
PyDoc_STRVAR(marshal_dump__doc__,
"dump($module, value, file, version=version, /)\n"
"dump($module, value, file, version=version, /, *, allow_code=True)\n"
"--\n"
"\n"
"Write the value on the open file.\n"
@ -16,53 +20,95 @@ PyDoc_STRVAR(marshal_dump__doc__,
" Must be a writeable binary file.\n"
" version\n"
" Indicates the data format that dump should use.\n"
" allow_code\n"
" Allow to write code objects.\n"
"\n"
"If the value has (or contains an object that has) an unsupported type, a\n"
"ValueError exception is raised - but garbage data will also be written\n"
"to the file. The object will not be properly read back by load().");
#define MARSHAL_DUMP_METHODDEF \
{"dump", _PyCFunction_CAST(marshal_dump), METH_FASTCALL, marshal_dump__doc__},
{"dump", _PyCFunction_CAST(marshal_dump), METH_FASTCALL|METH_KEYWORDS, marshal_dump__doc__},
static PyObject *
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
int version);
int version, int allow_code);
static PyObject *
marshal_dump(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
marshal_dump(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 1
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_item = { &_Py_ID(allow_code), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"", "", "", "allow_code", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "dump",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[4];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 2;
PyObject *value;
PyObject *file;
int version = Py_MARSHAL_VERSION;
int allow_code = 1;
if (!_PyArg_CheckPositional("dump", nargs, 2, 3)) {
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 2, 3, 0, argsbuf);
if (!args) {
goto exit;
}
value = args[0];
file = args[1];
if (nargs < 3) {
goto skip_optional;
goto skip_optional_posonly;
}
noptargs--;
version = PyLong_AsInt(args[2]);
if (version == -1 && PyErr_Occurred()) {
goto exit;
}
skip_optional:
return_value = marshal_dump_impl(module, value, file, version);
skip_optional_posonly:
if (!noptargs) {
goto skip_optional_kwonly;
}
allow_code = PyObject_IsTrue(args[3]);
if (allow_code < 0) {
goto exit;
}
skip_optional_kwonly:
return_value = marshal_dump_impl(module, value, file, version, allow_code);
exit:
return return_value;
}
PyDoc_STRVAR(marshal_load__doc__,
"load($module, file, /)\n"
"load($module, file, /, *, allow_code=True)\n"
"--\n"
"\n"
"Read one value from the open file and return it.\n"
"\n"
" file\n"
" Must be readable binary file.\n"
" allow_code\n"
" Allow to load code objects.\n"
"\n"
"If no valid value is read (e.g. because the data has a different Python\n"
"version\'s incompatible marshal format), raise EOFError, ValueError or\n"
@ -72,10 +118,66 @@ PyDoc_STRVAR(marshal_load__doc__,
"dump(), load() will substitute None for the unmarshallable type.");
#define MARSHAL_LOAD_METHODDEF \
{"load", (PyCFunction)marshal_load, METH_O, marshal_load__doc__},
{"load", _PyCFunction_CAST(marshal_load), METH_FASTCALL|METH_KEYWORDS, marshal_load__doc__},
static PyObject *
marshal_load_impl(PyObject *module, PyObject *file, int allow_code);
static PyObject *
marshal_load(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 1
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_item = { &_Py_ID(allow_code), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"", "allow_code", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "load",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[2];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
PyObject *file;
int allow_code = 1;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
file = args[0];
if (!noptargs) {
goto skip_optional_kwonly;
}
allow_code = PyObject_IsTrue(args[1]);
if (allow_code < 0) {
goto exit;
}
skip_optional_kwonly:
return_value = marshal_load_impl(module, file, allow_code);
exit:
return return_value;
}
PyDoc_STRVAR(marshal_dumps__doc__,
"dumps($module, value, version=version, /)\n"
"dumps($module, value, version=version, /, *, allow_code=True)\n"
"--\n"
"\n"
"Return the bytes object that would be written to a file by dump(value, file).\n"
@ -84,66 +186,150 @@ PyDoc_STRVAR(marshal_dumps__doc__,
" Must be a supported type.\n"
" version\n"
" Indicates the data format that dumps should use.\n"
" allow_code\n"
" Allow to write code objects.\n"
"\n"
"Raise a ValueError exception if value has (or contains an object that has) an\n"
"unsupported type.");
#define MARSHAL_DUMPS_METHODDEF \
{"dumps", _PyCFunction_CAST(marshal_dumps), METH_FASTCALL, marshal_dumps__doc__},
{"dumps", _PyCFunction_CAST(marshal_dumps), METH_FASTCALL|METH_KEYWORDS, marshal_dumps__doc__},
static PyObject *
marshal_dumps_impl(PyObject *module, PyObject *value, int version);
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
int allow_code);
static PyObject *
marshal_dumps(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
marshal_dumps(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 1
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_item = { &_Py_ID(allow_code), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"", "", "allow_code", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "dumps",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[3];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
PyObject *value;
int version = Py_MARSHAL_VERSION;
int allow_code = 1;
if (!_PyArg_CheckPositional("dumps", nargs, 1, 2)) {
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf);
if (!args) {
goto exit;
}
value = args[0];
if (nargs < 2) {
goto skip_optional;
goto skip_optional_posonly;
}
noptargs--;
version = PyLong_AsInt(args[1]);
if (version == -1 && PyErr_Occurred()) {
goto exit;
}
skip_optional:
return_value = marshal_dumps_impl(module, value, version);
skip_optional_posonly:
if (!noptargs) {
goto skip_optional_kwonly;
}
allow_code = PyObject_IsTrue(args[2]);
if (allow_code < 0) {
goto exit;
}
skip_optional_kwonly:
return_value = marshal_dumps_impl(module, value, version, allow_code);
exit:
return return_value;
}
PyDoc_STRVAR(marshal_loads__doc__,
"loads($module, bytes, /)\n"
"loads($module, bytes, /, *, allow_code=True)\n"
"--\n"
"\n"
"Convert the bytes-like object to a value.\n"
"\n"
" allow_code\n"
" Allow to load code objects.\n"
"\n"
"If no valid value is found, raise EOFError, ValueError or TypeError. Extra\n"
"bytes in the input are ignored.");
#define MARSHAL_LOADS_METHODDEF \
{"loads", (PyCFunction)marshal_loads, METH_O, marshal_loads__doc__},
{"loads", _PyCFunction_CAST(marshal_loads), METH_FASTCALL|METH_KEYWORDS, marshal_loads__doc__},
static PyObject *
marshal_loads_impl(PyObject *module, Py_buffer *bytes);
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code);
static PyObject *
marshal_loads(PyObject *module, PyObject *arg)
marshal_loads(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
Py_buffer bytes = {NULL, NULL};
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
if (PyObject_GetBuffer(arg, &bytes, PyBUF_SIMPLE) != 0) {
#define NUM_KEYWORDS 1
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_item = { &_Py_ID(allow_code), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"", "allow_code", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "loads",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[2];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1;
Py_buffer bytes = {NULL, NULL};
int allow_code = 1;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf);
if (!args) {
goto exit;
}
return_value = marshal_loads_impl(module, &bytes);
if (PyObject_GetBuffer(args[0], &bytes, PyBUF_SIMPLE) != 0) {
goto exit;
}
if (!noptargs) {
goto skip_optional_kwonly;
}
allow_code = PyObject_IsTrue(args[1]);
if (allow_code < 0) {
goto exit;
}
skip_optional_kwonly:
return_value = marshal_loads_impl(module, &bytes, allow_code);
exit:
/* Cleanup for bytes */
@ -153,4 +339,4 @@ exit:
return return_value;
}
/*[clinic end generated code: output=92d2d47aac9128ee input=a9049054013a1b77]*/
/*[clinic end generated code: output=1575b9a3ae48ad3d input=a9049054013a1b77]*/

View file

@ -78,6 +78,7 @@ module marshal
#define WFERR_UNMARSHALLABLE 1
#define WFERR_NESTEDTOODEEP 2
#define WFERR_NOMEMORY 3
#define WFERR_CODE_NOT_ALLOWED 4
typedef struct {
FILE *fp;
@ -89,6 +90,7 @@ typedef struct {
char *buf;
_Py_hashtable_t *hashtable;
int version;
int allow_code;
} WFILE;
#define w_byte(c, p) do { \
@ -225,6 +227,9 @@ w_short_pstring(const void *s, Py_ssize_t n, WFILE *p)
w_byte((t) | flag, (p)); \
} while(0)
static PyObject *
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code);
static void
w_PyLong(const PyLongObject *ob, char flag, WFILE *p)
{
@ -520,7 +525,8 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
}
Py_ssize_t i = 0;
while (_PySet_NextEntry(v, &pos, &value, &hash)) {
PyObject *dump = PyMarshal_WriteObjectToString(value, p->version);
PyObject *dump = _PyMarshal_WriteObjectToString(value,
p->version, p->allow_code);
if (dump == NULL) {
p->error = WFERR_UNMARSHALLABLE;
Py_DECREF(pairs);
@ -549,6 +555,10 @@ w_complex_object(PyObject *v, char flag, WFILE *p)
Py_DECREF(pairs);
}
else if (PyCode_Check(v)) {
if (!p->allow_code) {
p->error = WFERR_CODE_NOT_ALLOWED;
return;
}
PyCodeObject *co = (PyCodeObject *)v;
PyObject *co_code = _PyCode_GetCode(co);
if (co_code == NULL) {
@ -657,6 +667,7 @@ PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
wf.end = wf.ptr + sizeof(buf);
wf.error = WFERR_OK;
wf.version = version;
wf.allow_code = 1;
if (w_init_refs(&wf, version)) {
return; /* caller must check PyErr_Occurred() */
}
@ -674,6 +685,7 @@ typedef struct {
char *buf;
Py_ssize_t buf_size;
PyObject *refs; /* a list */
int allow_code;
} RFILE;
static const char *
@ -1364,6 +1376,11 @@ r_object(RFILE *p)
PyObject* linetable = NULL;
PyObject *exceptiontable = NULL;
if (!p->allow_code) {
PyErr_SetString(PyExc_ValueError,
"unmarshalling code objects is disallowed");
break;
}
idx = r_ref_reserve(flag, p);
if (idx < 0)
break;
@ -1609,6 +1626,7 @@ PyMarshal_ReadObjectFromFile(FILE *fp)
{
RFILE rf;
PyObject *result;
rf.allow_code = 1;
rf.fp = fp;
rf.readable = NULL;
rf.depth = 0;
@ -1629,6 +1647,7 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
{
RFILE rf;
PyObject *result;
rf.allow_code = 1;
rf.fp = NULL;
rf.readable = NULL;
rf.ptr = str;
@ -1645,8 +1664,8 @@ PyMarshal_ReadObjectFromString(const char *str, Py_ssize_t len)
return result;
}
PyObject *
PyMarshal_WriteObjectToString(PyObject *x, int version)
static PyObject *
_PyMarshal_WriteObjectToString(PyObject *x, int version, int allow_code)
{
WFILE wf;
@ -1661,6 +1680,7 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
wf.end = wf.ptr + PyBytes_GET_SIZE(wf.str);
wf.error = WFERR_OK;
wf.version = version;
wf.allow_code = allow_code;
if (w_init_refs(&wf, version)) {
Py_DECREF(wf.str);
return NULL;
@ -1674,17 +1694,35 @@ PyMarshal_WriteObjectToString(PyObject *x, int version)
}
if (wf.error != WFERR_OK) {
Py_XDECREF(wf.str);
if (wf.error == WFERR_NOMEMORY)
switch (wf.error) {
case WFERR_NOMEMORY:
PyErr_NoMemory();
else
break;
case WFERR_NESTEDTOODEEP:
PyErr_SetString(PyExc_ValueError,
(wf.error==WFERR_UNMARSHALLABLE)?"unmarshallable object"
:"object too deeply nested to marshal");
"object too deeply nested to marshal");
break;
case WFERR_CODE_NOT_ALLOWED:
PyErr_SetString(PyExc_ValueError,
"marshalling code objects is disallowed");
break;
default:
case WFERR_UNMARSHALLABLE:
PyErr_SetString(PyExc_ValueError,
"unmarshallable object");
break;
}
return NULL;
}
return wf.str;
}
PyObject *
PyMarshal_WriteObjectToString(PyObject *x, int version)
{
return _PyMarshal_WriteObjectToString(x, version, 1);
}
/* And an interface for Python programs... */
/*[clinic input]
marshal.dump
@ -1696,6 +1734,9 @@ marshal.dump
version: int(c_default="Py_MARSHAL_VERSION") = version
Indicates the data format that dump should use.
/
*
allow_code: bool = True
Allow to write code objects.
Write the value on the open file.
@ -1706,14 +1747,14 @@ to the file. The object will not be properly read back by load().
static PyObject *
marshal_dump_impl(PyObject *module, PyObject *value, PyObject *file,
int version)
/*[clinic end generated code: output=aaee62c7028a7cb2 input=6c7a3c23c6fef556]*/
int version, int allow_code)
/*[clinic end generated code: output=429e5fd61c2196b9 input=041f7f6669b0aafb]*/
{
/* XXX Quick hack -- need to do this differently */
PyObject *s;
PyObject *res;
s = PyMarshal_WriteObjectToString(value, version);
s = _PyMarshal_WriteObjectToString(value, version, allow_code);
if (s == NULL)
return NULL;
res = PyObject_CallMethodOneArg(file, &_Py_ID(write), s);
@ -1727,6 +1768,9 @@ marshal.load
file: object
Must be readable binary file.
/
*
allow_code: bool = True
Allow to load code objects.
Read one value from the open file and return it.
@ -1739,8 +1783,8 @@ dump(), load() will substitute None for the unmarshallable type.
[clinic start generated code]*/
static PyObject *
marshal_load(PyObject *module, PyObject *file)
/*[clinic end generated code: output=f8e5c33233566344 input=c85c2b594cd8124a]*/
marshal_load_impl(PyObject *module, PyObject *file, int allow_code)
/*[clinic end generated code: output=0c1aaf3546ae3ed3 input=2dca7b570653b82f]*/
{
PyObject *data, *result;
RFILE rf;
@ -1762,6 +1806,7 @@ marshal_load(PyObject *module, PyObject *file)
result = NULL;
}
else {
rf.allow_code = allow_code;
rf.depth = 0;
rf.fp = NULL;
rf.readable = file;
@ -1787,6 +1832,9 @@ marshal.dumps
version: int(c_default="Py_MARSHAL_VERSION") = version
Indicates the data format that dumps should use.
/
*
allow_code: bool = True
Allow to write code objects.
Return the bytes object that would be written to a file by dump(value, file).
@ -1795,10 +1843,11 @@ unsupported type.
[clinic start generated code]*/
static PyObject *
marshal_dumps_impl(PyObject *module, PyObject *value, int version)
/*[clinic end generated code: output=9c200f98d7256cad input=a2139ea8608e9b27]*/
marshal_dumps_impl(PyObject *module, PyObject *value, int version,
int allow_code)
/*[clinic end generated code: output=115f90da518d1d49 input=167eaecceb63f0a8]*/
{
return PyMarshal_WriteObjectToString(value, version);
return _PyMarshal_WriteObjectToString(value, version, allow_code);
}
/*[clinic input]
@ -1806,6 +1855,9 @@ marshal.loads
bytes: Py_buffer
/
*
allow_code: bool = True
Allow to load code objects.
Convert the bytes-like object to a value.
@ -1814,13 +1866,14 @@ bytes in the input are ignored.
[clinic start generated code]*/
static PyObject *
marshal_loads_impl(PyObject *module, Py_buffer *bytes)
/*[clinic end generated code: output=9fc65985c93d1bb1 input=6f426518459c8495]*/
marshal_loads_impl(PyObject *module, Py_buffer *bytes, int allow_code)
/*[clinic end generated code: output=62c0c538d3edc31f input=14de68965b45aaa7]*/
{
RFILE rf;
char *s = bytes->buf;
Py_ssize_t n = bytes->len;
PyObject* result;
rf.allow_code = allow_code;
rf.fp = NULL;
rf.readable = NULL;
rf.ptr = s;