Use cython's cast for converting encoding and errors (#279)

It is little faster on Python 3 because we can skip temporary bytes object
2026-02-06 17:59:52 +00:00 · 2018-02-05 11:44:17 +09:00 · 2018-02-05 11:44:17 +09:00 · 2644cbdcb7
commit 2644cbdcb7
parent 351023946f
2 changed files with 32 additions and 53 deletions
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@ -1,7 +1,8 @@
 # coding: utf-8
-#cython: embedsignature=True
+#cython: embedsignature=True, c_string_encoding=ascii

 from cpython cimport *
+from cpython.version cimport PY_MAJOR_VERSION
 from cpython.exc cimport PyErr_WarnEx

 from msgpack.exceptions import PackValueError, PackOverflowError
@ -99,8 +100,8 @@ cdef class Packer(object):
    cdef object _default
    cdef object _bencoding
    cdef object _berrors
-    cdef char *encoding
-    cdef char *unicode_errors
+    cdef const char *encoding
+    cdef const char *unicode_errors
    cdef bint strict_types
    cdef bool use_float
    cdef bint autoreset
@ -126,26 +127,21 @@ cdef class Packer(object):
            if not PyCallable_Check(default):
                raise TypeError("default must be a callable.")
        self._default = default
-        if encoding is None and unicode_errors is None:
-            self.encoding = NULL
-            self.unicode_errors = NULL
-        else:
-            if encoding is None:
+
+        self._bencoding = encoding
+        if encoding is None:
+            if PY_MAJOR_VERSION < 3:
                self.encoding = 'utf-8'
            else:
-                if isinstance(encoding, unicode):
-                    self._bencoding = encoding.encode('ascii')
-                else:
-                    self._bencoding = encoding
-                self.encoding = PyBytes_AsString(self._bencoding)
-            if unicode_errors is None:
-                self.unicode_errors = 'strict'
-            else:
-                if isinstance(unicode_errors, unicode):
-                    self._berrors = unicode_errors.encode('ascii')
-                else:
-                    self._berrors = unicode_errors
-                self.unicode_errors = PyBytes_AsString(self._berrors)
+                self.encoding = NULL
+        else:
+            self.encoding = self._bencoding
+
+        self._berrors = unicode_errors
+        if unicode_errors is None:
+            self.unicode_errors = NULL
+        else:
+            self.unicode_errors = self._berrors

    def __dealloc__(self):
        PyMem_Free(self.pk.buf)
@ -212,7 +208,7 @@ cdef class Packer(object):
                if ret == 0:
                    ret = msgpack_pack_raw_body(&self.pk, rawval, L)
            elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
-                if self.encoding == NULL:
+                if self.encoding == NULL and self.unicode_errors == NULL:
                    ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
                    if ret == -2:
                        raise PackValueError("unicode string is too large")
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@ -1,6 +1,7 @@
 # coding: utf-8
-#cython: embedsignature=True
+#cython: embedsignature=True, c_string_encoding=ascii

+from cpython.version cimport PY_MAJOR_VERSION
 from cpython.bytes cimport (
    PyBytes_AsString,
    PyBytes_FromStringAndSize,
@ -75,7 +76,7 @@ cdef inline init_ctx(unpack_context *ctx,
                     object object_hook, object object_pairs_hook,
                     object list_hook, object ext_hook,
                     bint use_list, bint raw,
-                     char* encoding, char* unicode_errors,
+                     const char* encoding, const char* unicode_errors,
                     Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
                     Py_ssize_t max_array_len, Py_ssize_t max_map_len,
                     Py_ssize_t max_ext_len):
@ -180,24 +181,16 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
    cdef Py_buffer view
    cdef char* buf = NULL
    cdef Py_ssize_t buf_len
-    cdef char* cenc = NULL
-    cdef char* cerr = NULL
+    cdef const char* cenc = NULL
+    cdef const char* cerr = NULL
    cdef int new_protocol = 0

    if encoding is not None:
        PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1)
-        if isinstance(encoding, unicode):
-            encoding = encoding.encode('ascii')
-        elif not isinstance(encoding, bytes):
-            raise TypeError("encoding should be bytes or unicode")
-        cenc = PyBytes_AsString(encoding)
+        cenc = encoding

    if unicode_errors is not None:
-        if isinstance(unicode_errors, unicode):
-            unicode_errors = unicode_errors.encode('ascii')
-        elif not isinstance(unicode_errors, bytes):
-            raise TypeError("unicode_errors should be bytes or unicode")
-        cerr = PyBytes_AsString(unicode_errors)
+        cerr = unicode_errors

    get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
    try:
@ -219,7 +212,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,


 def unpack(object stream, object object_hook=None, object list_hook=None,
-           bint use_list=1, encoding=None, unicode_errors="strict",
+           bint use_list=1, encoding=None, unicode_errors=None,
           object_pairs_hook=None, ext_hook=ExtType,
           Py_ssize_t max_str_len=2147483647, # 2**32-1
           Py_ssize_t max_bin_len=2147483647,
@ -352,8 +345,8 @@ cdef class Unpacker(object):
                 Py_ssize_t max_array_len=2147483647,
                 Py_ssize_t max_map_len=2147483647,
                 Py_ssize_t max_ext_len=2147483647):
-        cdef char *cenc=NULL,
-        cdef char *cerr=NULL
+        cdef const char *cenc=NULL,
+        cdef const char *cerr=NULL

        self.object_hook = object_hook
        self.object_pairs_hook = object_pairs_hook
@ -383,22 +376,12 @@ cdef class Unpacker(object):

        if encoding is not None:
            PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw=False instead.", 1)
-            if isinstance(encoding, unicode):
-                self.encoding = encoding.encode('ascii')
-            elif isinstance(encoding, bytes):
-                self.encoding = encoding
-            else:
-                raise TypeError("encoding should be bytes or unicode")
-            cenc = PyBytes_AsString(self.encoding)
+            self.encoding = encoding
+            cenc = encoding

        if unicode_errors is not None:
-            if isinstance(unicode_errors, unicode):
-                self.unicode_errors = unicode_errors.encode('ascii')
-            elif isinstance(unicode_errors, bytes):
-                self.unicode_errors = unicode_errors
-            else:
-                raise TypeError("unicode_errors should be bytes or unicode")
-            cerr = PyBytes_AsString(self.unicode_errors)
+            self.unicode_errors = unicode_errors
+            cerr = unicode_errors

        init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
                 ext_hook, use_list, raw, cenc, cerr,