cython: better exception handling (#606)

- use `except -1` instead of manual error handling
- use `PyUnicode_AsUTF8AndSize()`
- use `_pack()` and `_pack_inner()` instead of `while True:`
This commit is contained in:
Inada Naoki 2024-05-06 02:13:12 +09:00 committed by GitHub
parent 3da5818a3a
commit e1068087e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 134 additions and 195 deletions

View file

@ -24,21 +24,20 @@ cdef extern from "pack.h":
size_t buf_size size_t buf_size
bint use_bin_type bint use_bin_type
int msgpack_pack_nil(msgpack_packer* pk) int msgpack_pack_nil(msgpack_packer* pk) except -1
int msgpack_pack_true(msgpack_packer* pk) int msgpack_pack_true(msgpack_packer* pk) except -1
int msgpack_pack_false(msgpack_packer* pk) int msgpack_pack_false(msgpack_packer* pk) except -1
int msgpack_pack_long_long(msgpack_packer* pk, long long d) int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1
int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1
int msgpack_pack_float(msgpack_packer* pk, float d) int msgpack_pack_float(msgpack_packer* pk, float d) except -1
int msgpack_pack_double(msgpack_packer* pk, double d) int msgpack_pack_double(msgpack_packer* pk, double d) except -1
int msgpack_pack_array(msgpack_packer* pk, size_t l) int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1
int msgpack_pack_map(msgpack_packer* pk, size_t l) int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1
int msgpack_pack_raw(msgpack_packer* pk, size_t l) int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1
int msgpack_pack_bin(msgpack_packer* pk, size_t l) int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1
int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds); int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1
int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit)
cdef int DEFAULT_RECURSE_LIMIT=511 cdef int DEFAULT_RECURSE_LIMIT=511
@ -119,6 +118,10 @@ cdef class Packer:
self.pk.buf_size = buf_size self.pk.buf_size = buf_size
self.pk.length = 0 self.pk.length = 0
def __dealloc__(self):
PyMem_Free(self.pk.buf)
self.pk.buf = NULL
def __init__(self, *, default=None, def __init__(self, *, default=None,
bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True,
bint strict_types=False, bint datetime=False, unicode_errors=None, bint strict_types=False, bint datetime=False, unicode_errors=None,
@ -139,155 +142,127 @@ cdef class Packer:
else: else:
self.unicode_errors = self._berrors self.unicode_errors = self._berrors
def __dealloc__(self): # returns -2 when default should(o) be called
PyMem_Free(self.pk.buf) cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1:
self.pk.buf = NULL
cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
cdef long long llval cdef long long llval
cdef unsigned long long ullval cdef unsigned long long ullval
cdef unsigned long ulval cdef unsigned long ulval
cdef long longval cdef const char* rawval
cdef float fval
cdef double dval
cdef char* rawval
cdef int ret
cdef dict d
cdef Py_ssize_t L cdef Py_ssize_t L
cdef int default_used = 0 cdef bool strict_types = self.strict_types
cdef bint strict_types = self.strict_types
cdef Py_buffer view cdef Py_buffer view
if o is None:
msgpack_pack_nil(&self.pk)
elif o is True:
msgpack_pack_true(&self.pk)
elif o is False:
msgpack_pack_false(&self.pk)
elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o):
try:
if o > 0:
ullval = o
msgpack_pack_unsigned_long_long(&self.pk, ullval)
else:
llval = o
msgpack_pack_long_long(&self.pk, llval)
except OverflowError as oe:
if will_default:
return -2
else:
raise OverflowError("Integer value out of range")
elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o):
if self.use_float:
msgpack_pack_float(&self.pk, <float>o)
else:
msgpack_pack_double(&self.pk, <double>o)
elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o):
L = Py_SIZE(o)
if L > ITEM_LIMIT:
PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name)
rawval = o
msgpack_pack_bin(&self.pk, L)
msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
if self.unicode_errors == NULL:
rawval = PyUnicode_AsUTF8AndSize(o, &L)
if L >ITEM_LIMIT:
raise ValueError("unicode string is too large")
else:
o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors)
L = Py_SIZE(o)
if L > ITEM_LIMIT:
raise ValueError("unicode string is too large")
rawval = o
msgpack_pack_raw(&self.pk, L)
msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyDict_CheckExact(o) if strict_types else PyDict_Check(o):
L = len(o)
if L > ITEM_LIMIT:
raise ValueError("dict is too large")
msgpack_pack_map(&self.pk, L)
for k, v in o.items():
self._pack(k, nest_limit)
self._pack(v, nest_limit)
elif type(o) is ExtType if strict_types else isinstance(o, ExtType):
# This should be before Tuple because ExtType is namedtuple.
rawval = o.data
L = len(o.data)
if L > ITEM_LIMIT:
raise ValueError("EXT data is too large")
msgpack_pack_ext(&self.pk, <long>o.code, L)
msgpack_pack_raw_body(&self.pk, rawval, L)
elif type(o) is Timestamp:
llval = o.seconds
ulval = o.nanoseconds
msgpack_pack_timestamp(&self.pk, llval, ulval)
elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)):
L = Py_SIZE(o)
if L > ITEM_LIMIT:
raise ValueError("list is too large")
msgpack_pack_array(&self.pk, L)
for v in o:
self._pack(v, nest_limit)
elif PyMemoryView_Check(o):
PyObject_GetBuffer(o, &view, PyBUF_SIMPLE)
L = view.len
if L > ITEM_LIMIT:
PyBuffer_Release(&view);
raise ValueError("memoryview is too large")
try:
msgpack_pack_bin(&self.pk, L)
msgpack_pack_raw_body(&self.pk, <char*>view.buf, L)
finally:
PyBuffer_Release(&view);
elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None:
delta = o - epoch
if not PyDelta_CheckExact(delta):
raise ValueError("failed to calculate delta")
llval = timedelta_days(delta) * <long long>(24*60*60) + timedelta_seconds(delta)
ulval = timedelta_microseconds(delta) * 1000
msgpack_pack_timestamp(&self.pk, llval, ulval)
elif will_default:
return -2
elif self.datetime and PyDateTime_CheckExact(o):
# this should be later than will_default
PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name)
else:
PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name)
cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1:
cdef int ret
if nest_limit < 0: if nest_limit < 0:
raise ValueError("recursion limit exceeded.") raise ValueError("recursion limit exceeded.")
nest_limit -= 1
while True: if self._default is not None:
if o is None: ret = self._pack_inner(o, 1, nest_limit)
ret = msgpack_pack_nil(&self.pk) if ret == -2:
elif o is True:
ret = msgpack_pack_true(&self.pk)
elif o is False:
ret = msgpack_pack_false(&self.pk)
elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o):
# PyInt_Check(long) is True for Python 3.
# So we should test long before int.
try:
if o > 0:
ullval = o
ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
else:
llval = o
ret = msgpack_pack_long_long(&self.pk, llval)
except OverflowError as oe:
if not default_used and self._default is not None:
o = self._default(o)
default_used = True
continue
else:
raise OverflowError("Integer value out of range")
elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o):
if self.use_float:
fval = o
ret = msgpack_pack_float(&self.pk, fval)
else:
dval = o
ret = msgpack_pack_double(&self.pk, dval)
elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o):
L = Py_SIZE(o)
if L > ITEM_LIMIT:
PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name)
rawval = o
ret = msgpack_pack_bin(&self.pk, L)
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
if self.unicode_errors == NULL:
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
if ret == -2:
raise ValueError("unicode string is too large")
else:
o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors)
L = Py_SIZE(o)
if L > ITEM_LIMIT:
raise ValueError("unicode string is too large")
ret = msgpack_pack_raw(&self.pk, L)
if ret == 0:
rawval = o
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyDict_CheckExact(o):
d = <dict>o
L = len(d)
if L > ITEM_LIMIT:
raise ValueError("dict is too large")
ret = msgpack_pack_map(&self.pk, L)
if ret == 0:
for k, v in d.items():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif not strict_types and PyDict_Check(o):
L = len(o)
if L > ITEM_LIMIT:
raise ValueError("dict is too large")
ret = msgpack_pack_map(&self.pk, L)
if ret == 0:
for k, v in o.items():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif type(o) is ExtType if strict_types else isinstance(o, ExtType):
# This should be before Tuple because ExtType is namedtuple.
longval = o.code
rawval = o.data
L = len(o.data)
if L > ITEM_LIMIT:
raise ValueError("EXT data is too large")
ret = msgpack_pack_ext(&self.pk, longval, L)
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif type(o) is Timestamp:
llval = o.seconds
ulval = o.nanoseconds
ret = msgpack_pack_timestamp(&self.pk, llval, ulval)
elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)):
L = Py_SIZE(o)
if L > ITEM_LIMIT:
raise ValueError("list is too large")
ret = msgpack_pack_array(&self.pk, L)
if ret == 0:
for v in o:
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif PyMemoryView_Check(o):
if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0:
raise ValueError("could not get buffer for memoryview")
L = view.len
if L > ITEM_LIMIT:
PyBuffer_Release(&view);
raise ValueError("memoryview is too large")
ret = msgpack_pack_bin(&self.pk, L)
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, <char*>view.buf, L)
PyBuffer_Release(&view);
elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None:
delta = o - epoch
if not PyDelta_CheckExact(delta):
raise ValueError("failed to calculate delta")
llval = timedelta_days(delta) * <long long>(24*60*60) + timedelta_seconds(delta)
ulval = timedelta_microseconds(delta) * 1000
ret = msgpack_pack_timestamp(&self.pk, llval, ulval)
elif not default_used and self._default:
o = self._default(o) o = self._default(o)
default_used = 1
continue
elif self.datetime and PyDateTime_CheckExact(o):
PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name)
else: else:
PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) return ret
return ret return self._pack_inner(o, 0, nest_limit)
cpdef pack(self, object obj): def pack(self, object obj):
cdef int ret cdef int ret
try: try:
ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) ret = self._pack(obj, DEFAULT_RECURSE_LIMIT)
@ -308,11 +283,7 @@ cdef class Packer:
def pack_array_header(self, long long size): def pack_array_header(self, long long size):
if size > ITEM_LIMIT: if size > ITEM_LIMIT:
raise ValueError raise ValueError
cdef int ret = msgpack_pack_array(&self.pk, size) msgpack_pack_array(&self.pk, size)
if ret == -1:
raise MemoryError
elif ret: # should not happen
raise TypeError
if self.autoreset: if self.autoreset:
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
self.pk.length = 0 self.pk.length = 0
@ -321,11 +292,7 @@ cdef class Packer:
def pack_map_header(self, long long size): def pack_map_header(self, long long size):
if size > ITEM_LIMIT: if size > ITEM_LIMIT:
raise ValueError raise ValueError
cdef int ret = msgpack_pack_map(&self.pk, size) msgpack_pack_map(&self.pk, size)
if ret == -1:
raise MemoryError
elif ret: # should not happen
raise TypeError
if self.autoreset: if self.autoreset:
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
self.pk.length = 0 self.pk.length = 0
@ -338,17 +305,10 @@ cdef class Packer:
*pairs* should be a sequence of pairs. *pairs* should be a sequence of pairs.
(`len(pairs)` and `for k, v in pairs:` should be supported.) (`len(pairs)` and `for k, v in pairs:` should be supported.)
""" """
cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) msgpack_pack_map(&self.pk, len(pairs))
if ret == 0: for k, v in pairs:
for k, v in pairs: self._pack(k)
ret = self._pack(k) self._pack(v)
if ret != 0: break
ret = self._pack(v)
if ret != 0: break
if ret == -1:
raise MemoryError
elif ret: # should not happen
raise TypeError
if self.autoreset: if self.autoreset:
buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length)
self.pk.length = 0 self.pk.length = 0

View file

@ -64,27 +64,6 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_
#include "pack_template.h" #include "pack_template.h"
// return -2 when o is too long
static inline int
msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit)
{
assert(PyUnicode_Check(o));
Py_ssize_t len;
const char* buf = PyUnicode_AsUTF8AndSize(o, &len);
if (buf == NULL)
return -1;
if (len > limit) {
return -2;
}
int ret = msgpack_pack_raw(pk, len);
if (ret) return ret;
return msgpack_pack_raw_body(pk, buf, len);
}
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif