Use cython's cast for converting encoding and errors (#279)

It is little faster on Python 3 because we can skip temporary bytes object
This commit is contained in:
INADA Naoki 2018-02-05 11:44:17 +09:00 committed by GitHub
parent 351023946f
commit 2644cbdcb7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 32 additions and 53 deletions

View file

@ -1,7 +1,8 @@
# coding: utf-8
#cython: embedsignature=True
#cython: embedsignature=True, c_string_encoding=ascii
from cpython cimport *
from cpython.version cimport PY_MAJOR_VERSION
from cpython.exc cimport PyErr_WarnEx
from msgpack.exceptions import PackValueError, PackOverflowError
@ -99,8 +100,8 @@ cdef class Packer(object):
cdef object _default
cdef object _bencoding
cdef object _berrors
cdef char *encoding
cdef char *unicode_errors
cdef const char *encoding
cdef const char *unicode_errors
cdef bint strict_types
cdef bool use_float
cdef bint autoreset
@ -126,26 +127,21 @@ cdef class Packer(object):
if not PyCallable_Check(default):
raise TypeError("default must be a callable.")
self._default = default
if encoding is None and unicode_errors is None:
self.encoding = NULL
self.unicode_errors = NULL
else:
if encoding is None:
self._bencoding = encoding
if encoding is None:
if PY_MAJOR_VERSION < 3:
self.encoding = 'utf-8'
else:
if isinstance(encoding, unicode):
self._bencoding = encoding.encode('ascii')
else:
self._bencoding = encoding
self.encoding = PyBytes_AsString(self._bencoding)
if unicode_errors is None:
self.unicode_errors = 'strict'
else:
if isinstance(unicode_errors, unicode):
self._berrors = unicode_errors.encode('ascii')
else:
self._berrors = unicode_errors
self.unicode_errors = PyBytes_AsString(self._berrors)
self.encoding = NULL
else:
self.encoding = self._bencoding
self._berrors = unicode_errors
if unicode_errors is None:
self.unicode_errors = NULL
else:
self.unicode_errors = self._berrors
def __dealloc__(self):
PyMem_Free(self.pk.buf)
@ -212,7 +208,7 @@ cdef class Packer(object):
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
if self.encoding == NULL:
if self.encoding == NULL and self.unicode_errors == NULL:
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
if ret == -2:
raise PackValueError("unicode string is too large")

View file

@ -1,6 +1,7 @@
# coding: utf-8
#cython: embedsignature=True
#cython: embedsignature=True, c_string_encoding=ascii
from cpython.version cimport PY_MAJOR_VERSION
from cpython.bytes cimport (
PyBytes_AsString,
PyBytes_FromStringAndSize,
@ -75,7 +76,7 @@ cdef inline init_ctx(unpack_context *ctx,
object object_hook, object object_pairs_hook,
object list_hook, object ext_hook,
bint use_list, bint raw,
char* encoding, char* unicode_errors,
const char* encoding, const char* unicode_errors,
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
Py_ssize_t max_ext_len):
@ -180,24 +181,16 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
cdef Py_buffer view
cdef char* buf = NULL
cdef Py_ssize_t buf_len
cdef char* cenc = NULL
cdef char* cerr = NULL
cdef const char* cenc = NULL
cdef const char* cerr = NULL
cdef int new_protocol = 0
if encoding is not None:
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1)
if isinstance(encoding, unicode):
encoding = encoding.encode('ascii')
elif not isinstance(encoding, bytes):
raise TypeError("encoding should be bytes or unicode")
cenc = PyBytes_AsString(encoding)
cenc = encoding
if unicode_errors is not None:
if isinstance(unicode_errors, unicode):
unicode_errors = unicode_errors.encode('ascii')
elif not isinstance(unicode_errors, bytes):
raise TypeError("unicode_errors should be bytes or unicode")
cerr = PyBytes_AsString(unicode_errors)
cerr = unicode_errors
get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
try:
@ -219,7 +212,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
def unpack(object stream, object object_hook=None, object list_hook=None,
bint use_list=1, encoding=None, unicode_errors="strict",
bint use_list=1, encoding=None, unicode_errors=None,
object_pairs_hook=None, ext_hook=ExtType,
Py_ssize_t max_str_len=2147483647, # 2**32-1
Py_ssize_t max_bin_len=2147483647,
@ -352,8 +345,8 @@ cdef class Unpacker(object):
Py_ssize_t max_array_len=2147483647,
Py_ssize_t max_map_len=2147483647,
Py_ssize_t max_ext_len=2147483647):
cdef char *cenc=NULL,
cdef char *cerr=NULL
cdef const char *cenc=NULL,
cdef const char *cerr=NULL
self.object_hook = object_hook
self.object_pairs_hook = object_pairs_hook
@ -383,22 +376,12 @@ cdef class Unpacker(object):
if encoding is not None:
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1)
if isinstance(encoding, unicode):
self.encoding = encoding.encode('ascii')
elif isinstance(encoding, bytes):
self.encoding = encoding
else:
raise TypeError("encoding should be bytes or unicode")
cenc = PyBytes_AsString(self.encoding)
self.encoding = encoding
cenc = encoding
if unicode_errors is not None:
if isinstance(unicode_errors, unicode):
self.unicode_errors = unicode_errors.encode('ascii')
elif isinstance(unicode_errors, bytes):
self.unicode_errors = unicode_errors
else:
raise TypeError("unicode_errors should be bytes or unicode")
cerr = PyBytes_AsString(self.unicode_errors)
self.unicode_errors = unicode_errors
cerr = unicode_errors
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
ext_hook, use_list, raw, cenc, cerr,