Add raw_as_bytes option to Unpacker. (#265)

2025-12-08 06:09:49 +00:00 · 2018-01-11 17:02:41 +09:00 · 2018-01-11 17:02:41 +09:00 · 5534d0c7af
commit 5534d0c7af
parent 50ea49c86f
11 changed files with 199 additions and 93 deletions
--- a/3
+++ b/3
@ -8,7 +8,8 @@ cython:
 .PHONY: test
 test:
-	py.test -v test
+	pytest -v test
 	MSGPACK_PUREPYTHON=1 pytest -v test
 .PHONY: serve-doc
 serve-doc: all
--- a/README.rst
+++ b/README.rst
@ -10,8 +10,21 @@ MessagePack for Python
   :target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest
   :alt: Documentation Status
-IMPORTANT: Upgrading from msgpack-0.4
+
--------------------------------------
+What's this
 -----------
 `MessagePack <https://msgpack.org/>`_ is an efficient binary serialization format.
 It lets you exchange data among multiple languages like JSON.
 But it's faster and smaller.
 This package provides CPython bindings for reading and writing MessagePack data.
 Very important notes for existing users
 ---------------------------------------
 PyPI package name
 ^^^^^^^^^^^^^^^^^
 TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`.
 Do `pip uninstall msgpack-python; pip install msgpack` instead.
@ -24,13 +37,37 @@ Sadly, this doesn't work for upgrade install.  After `pip install -U msgpack-pyt
 msgpack is removed and `import msgpack` fail.
-What's this
+Deprecating encoding option
-----------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 encoding and unicode_errors options are deprecated.
 In case of packer, use UTF-8 always.  Storing other than UTF-8 is not recommended.
 For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes``
 object into msgpack raw type.
 In case of unpacker, there is new ``raw_as_bytes`` option.  It is ``True`` by default
 for backward compatibility, but it is changed to ``False`` in near future.
 You can use ``raw_as_bytes=False`` instead of ``encoding='utf-8'``.
 Planned backward incompatible changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 When msgpack 1.0, I planning these breaking changes:
 * packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option.
 * packer: Change default of ``use_bin_type`` option from False to True.
 * unpacker: Change default of ``raw_as_bytes`` option from True to False.
 * unpacker: Reduce all ``max_xxx_len`` options for typical usage.
 * unpacker: Remove ``write_bytes`` option from all methods.
 To avoid these breaking changes breaks your application, please:
 * Don't use deprecated options.
 * Pass ``use_bin_type`` and ``raw_as_bytes`` options explicitly.
 * If your application handle large (>1MB) data, specify ``max_xxx_len`` options too.
 `MessagePack <https://msgpack.org/>`_ is an efficient binary serialization format.
 It lets you exchange data among multiple languages like JSON.
 But it's faster and smaller.
 This package provides CPython bindings for reading and writing MessagePack data.
 Install
 -------
@ -76,14 +113,14 @@ msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with
   >>> import msgpack
   >>> msgpack.packb([1, 2, 3], use_bin_type=True)
   '\x93\x01\x02\x03'
-   >>> msgpack.unpackb(_)
+   >>> msgpack.unpackb(_, raw_as_bytes=False)
   [1, 2, 3]
 ``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple:
 .. code-block:: pycon
-   >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False)
+   >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw_as_bytes=False)
   (1, 2, 3)
 You should always specify the ``use_list`` keyword argument for backward compatibility.
@ -109,7 +146,7 @@ stream (or from bytes provided through its ``feed`` method).
   buf.seek(0)
-   unpacker = msgpack.Unpacker(buf)
+   unpacker = msgpack.Unpacker(buf, raw_as_bytes=False)
   for unpacked in unpacker:
       print(unpacked)
@ -142,7 +179,7 @@ It is also possible to pack/unpack custom data types. Here is an example for
    packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True)
-    this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
+    this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw_as_bytes=False)
 ``Unpacker``'s ``object_hook`` callback receives a dict; the
 ``object_pairs_hook`` callback may instead be used to receive a list of
@ -172,7 +209,7 @@ It is also possible to pack/unpack custom data types using the **ext** type.
    ...
    >>> data = array.array('d', [1.2, 3.4])
    >>> packed = msgpack.packb(data, default=default, use_bin_type=True)
-    >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
+    >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw_as_bytes=False)
    >>> data == unpacked
    True
@ -217,14 +254,10 @@ Early versions of msgpack didn't distinguish string and binary types (like Pytho
 The type for representing both string and binary types was named **raw**.
 For backward compatibility reasons, msgpack-python will still default all
-strings to byte strings, unless you specify the `use_bin_type=True` option in
+strings to byte strings, unless you specify the ``use_bin_type=True`` option in
 the packer. If you do so, it will use a non-standard type called **bin** to
 serialize byte arrays, and **raw** becomes to mean **str**. If you want to
-distinguish **bin** and **raw** in the unpacker, specify `encoding='utf-8'`.
+distinguish **bin** and **raw** in the unpacker, specify ``raw_as_bytes=False``.
 **In future version, default value of ``use_bin_type`` will be changed to ``True``.
 To avoid this change will break your code, you must specify it explicitly
 even when you want to use old format.**
 Note that Python 2 defaults to byte-arrays over Unicode strings:
@ -234,7 +267,7 @@ Note that Python 2 defaults to byte-arrays over Unicode strings:
    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
    ['spam', 'eggs']
    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
-                        encoding='utf-8')
+                        raw_as_bytes=False)
    ['spam', u'eggs']
 This is the same code in Python 3 (same behaviour, but Python 3 has a
@ -246,7 +279,7 @@ different default):
    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
    [b'spam', b'eggs']
    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
-                        encoding='utf-8')
+                        raw_as_bytes=False)
    [b'spam', 'eggs']
@ -277,6 +310,7 @@ You can use ``gc.disable()`` when unpacking large message.
 use_list option
 ^^^^^^^^^^^^^^^
 List is the default sequence type of Python.
 But tuple is lighter than list.
 You can use ``use_list=False`` while unpacking when performance is important.
@ -295,7 +329,7 @@ Test
 MessagePack uses `pytest` for testing.
 Run test with following command:
-    $ pytest -v test
+    $ make test
 ..
--- a/ci/runtests.bat
+++ b/ci/runtests.bat
@ -3,5 +3,7 @@
 %PYTHON%\python.exe setup.py install
 %PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))"
 %PYTHON%\python.exe -c "from msgpack import _packer, _unpacker"
 %PYTHON%\python.exe -m pytest -v test
 %PYTHON%\python.exe setup.py bdist_wheel
 %PYTHON%\python.exe -m pytest -v test
 SET EL=%ERRORLEVEL%
 exit /b %EL%
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@ -2,7 +2,7 @@
 #cython: embedsignature=True
 from cpython cimport *
-#from cpython.exc cimport PyErr_WarnEx
+from cpython.exc cimport PyErr_WarnEx
 from msgpack.exceptions import PackValueError, PackOverflowError
 from msgpack import ExtType
@ -39,7 +39,7 @@ cdef extern from "pack.h":
    int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
 cdef int DEFAULT_RECURSE_LIMIT=511
-cdef size_t ITEM_LIMIT = (2**32)-1
+cdef long long ITEM_LIMIT = (2**32)-1
 cdef inline int PyBytesLike_Check(object o):
@ -110,9 +110,13 @@ cdef class Packer(object):
        self.pk.buf_size = buf_size
        self.pk.length = 0
-    def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
+    def __init__(self, default=None, encoding=None, unicode_errors=None,
                 bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
                 bint strict_types=False):
        if encoding is not None:
            PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1)
        if unicode_errors is not None:
            PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated.", 1)
        self.use_float = use_single_float
        self.strict_types = strict_types
        self.autoreset = autoreset
@ -122,7 +126,7 @@ cdef class Packer(object):
                raise TypeError("default must be a callable.")
        self._default = default
        if encoding is None:
-            self.encoding = NULL
+            self.encoding = 'utf_8'
            self.unicode_errors = NULL
        else:
            if isinstance(encoding, unicode):
@ -134,7 +138,8 @@ cdef class Packer(object):
                self._berrors = unicode_errors.encode('ascii')
            else:
                self._berrors = unicode_errors
-            self.unicode_errors = PyBytes_AsString(self._berrors)
+            if self._berrors is not None:
                self.unicode_errors = PyBytes_AsString(self._berrors)
    def __dealloc__(self):
        PyMem_Free(self.pk.buf)
@ -149,7 +154,7 @@ cdef class Packer(object):
        cdef char* rawval
        cdef int ret
        cdef dict d
-        cdef size_t L
+        cdef Py_ssize_t L
        cdef int default_used = 0
        cdef bint strict_types = self.strict_types
        cdef Py_buffer view
@ -203,6 +208,7 @@ cdef class Packer(object):
            elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
                if not self.encoding:
                    raise TypeError("Can't encode unicode string: no encoding is specified")
                #TODO: Use faster API for UTF-8
                o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
                L = len(o)
                if L > ITEM_LIMIT:
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@ -43,8 +43,9 @@ from msgpack import ExtType
 cdef extern from "unpack.h":
    ctypedef struct msgpack_user:
        bint use_list
-        PyObject* object_hook
+        bint raw_as_bytes
        bint has_pairs_hook # call object_hook with k-v pairs
        PyObject* object_hook
        PyObject* list_hook
        PyObject* ext_hook
        char *encoding
@ -73,12 +74,14 @@ cdef extern from "unpack.h":
 cdef inline init_ctx(unpack_context *ctx,
                     object object_hook, object object_pairs_hook,
                     object list_hook, object ext_hook,
-                     bint use_list, char* encoding, char* unicode_errors,
+                     bint use_list, bint raw_as_bytes,
                     char* encoding, char* unicode_errors,
                     Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
                     Py_ssize_t max_array_len, Py_ssize_t max_map_len,
                     Py_ssize_t max_ext_len):
    unpack_init(ctx)
    ctx.user.use_list = use_list
    ctx.user.raw_as_bytes = raw_as_bytes
    ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL
    ctx.user.max_str_len = max_str_len
    ctx.user.max_bin_len = max_bin_len
@ -155,7 +158,8 @@ cdef inline int get_data_from_buffer(object obj,
        return 1
 def unpackb(object packed, object object_hook=None, object list_hook=None,
-            bint use_list=1, encoding=None, unicode_errors="strict",
+            bint use_list=True, bint raw_as_bytes=True,
            encoding=None, unicode_errors="strict",
            object_pairs_hook=None, ext_hook=ExtType,
            Py_ssize_t max_str_len=2147483647, # 2**32-1
            Py_ssize_t max_bin_len=2147483647,
@ -180,21 +184,26 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
    cdef char* cerr = NULL
    cdef int new_protocol = 0
    if encoding is not None:
        PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
        if isinstance(encoding, unicode):
            encoding = encoding.encode('ascii')
        elif not isinstance(encoding, bytes):
            raise TypeError("encoding should be bytes or unicode")
        cenc = PyBytes_AsString(encoding)
    if unicode_errors is not None:
        PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
        if isinstance(unicode_errors, unicode):
            unicode_errors = unicode_errors.encode('ascii')
        elif not isinstance(unicode_errors, bytes):
            raise TypeError("unicode_errors should be bytes or unicode")
        cerr = PyBytes_AsString(unicode_errors)
    get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
    try:
        if encoding is not None:
            if isinstance(encoding, unicode):
                encoding = encoding.encode('ascii')
            cenc = PyBytes_AsString(encoding)
        if unicode_errors is not None:
            if isinstance(unicode_errors, unicode):
                unicode_errors = unicode_errors.encode('ascii')
            cerr = PyBytes_AsString(unicode_errors)
        init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
-                 use_list, cenc, cerr,
+                 use_list, raw_as_bytes, cenc, cerr,
                 max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
        ret = unpack_construct(&ctx, buf, buf_len, &off)
    finally:
@ -252,6 +261,16 @@ cdef class Unpacker(object):
        If true, unpack msgpack array to Python list.
        Otherwise, unpack to Python tuple. (default: True)
    :param bool raw_as_bytes:
        If true, unpack msgpack raw to Python bytes (default).
        Otherwise, unpack to Python str (or unicode on Python 2) by decoding
        with UTF-8 encoding (recommended).
        Currently, the default is true, but it will be changed to false in
        near future.  So you must specify it explicitly for keeping backward
        compatibility.
        *encoding* option which is deprecated overrides this option.
    :param callable object_hook:
        When specified, it should be callable.
        Unpacker calls it with a dict argument after unpacking msgpack map.
@ -262,14 +281,6 @@ cdef class Unpacker(object):
        Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
        (See also simplejson)
    :param str encoding:
        Encoding used for decoding msgpack raw.
        If it is None (default), msgpack raw is deserialized to Python bytes.
    :param str unicode_errors:
        Used for decoding msgpack raw with *encoding*.
        (default: `'strict'`)
    :param int max_buffer_size:
        Limits size of data waiting unpacked.  0 means system's INT_MAX (default).
        Raises `BufferFull` exception when it is insufficient.
@ -287,16 +298,25 @@ cdef class Unpacker(object):
    :param int max_map_len:
        Limits max length of map. (default: 2**31-1)
    :param str encoding:
        Deprecated, use raw_as_bytes instead.
        Encoding used for decoding msgpack raw.
        If it is None (default), msgpack raw is deserialized to Python bytes.
-    example of streaming deserialize from file-like object::
+    :param str unicode_errors:
        Deprecated. Used for decoding msgpack raw with *encoding*.
        (default: `'strict'`)
-        unpacker = Unpacker(file_like)
+
    Example of streaming deserialize from file-like object::
        unpacker = Unpacker(file_like, raw_as_bytes=False)
        for o in unpacker:
            process(o)
-    example of streaming deserialize from socket::
+    Example of streaming deserialize from socket::
-        unpacker = Unpacker()
+        unpacker = Unpacker(raw_as_bytes=False)
        while True:
            buf = sock.recv(1024**2)
            if not buf:
@ -324,7 +344,8 @@ cdef class Unpacker(object):
        PyMem_Free(self.buf)
        self.buf = NULL
-    def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
+    def __init__(self, file_like=None, Py_ssize_t read_size=0,
                 bint use_list=True, bint raw_as_bytes=True,
                 object object_hook=None, object object_pairs_hook=None, object list_hook=None,
                 encoding=None, unicode_errors='strict', int max_buffer_size=0,
                 object ext_hook=ExtType,
@ -363,6 +384,7 @@ cdef class Unpacker(object):
        self.stream_offset = 0
        if encoding is not None:
            PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
            if isinstance(encoding, unicode):
                self.encoding = encoding.encode('ascii')
            elif isinstance(encoding, bytes):
@ -372,6 +394,7 @@ cdef class Unpacker(object):
            cenc = PyBytes_AsString(self.encoding)
        if unicode_errors is not None:
            PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
            if isinstance(unicode_errors, unicode):
                self.unicode_errors = unicode_errors.encode('ascii')
            elif isinstance(unicode_errors, bytes):
@ -381,7 +404,7 @@ cdef class Unpacker(object):
            cerr = PyBytes_AsString(self.unicode_errors)
        init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
-                 ext_hook, use_list, cenc, cerr,
+                 ext_hook, use_list, raw_as_bytes, cenc, cerr,
                 max_str_len, max_bin_len, max_array_len,
                 max_map_len, max_ext_len)
--- a/msgpack/fallback.py
+++ b/msgpack/fallback.py
@ -145,6 +145,16 @@ class Unpacker(object):
        If true, unpack msgpack array to Python list.
        Otherwise, unpack to Python tuple. (default: True)
    :param bool raw_as_bytes:
        If true, unpack msgpack raw to Python bytes (default).
        Otherwise, unpack to Python str (or unicode on Python 2) by decoding
        with UTF-8 encoding (recommended).
        Currently, the default is true, but it will be changed to false in
        near future.  So you must specify it explicitly for keeping backward
        compatibility.
        *encoding* option which is deprecated overrides this option.
    :param callable object_hook:
        When specified, it should be callable.
        Unpacker calls it with a dict argument after unpacking msgpack map.
@ -183,13 +193,13 @@ class Unpacker(object):
    example of streaming deserialize from file-like object::
-        unpacker = Unpacker(file_like)
+        unpacker = Unpacker(file_like, raw_as_bytes=False)
        for o in unpacker:
            process(o)
    example of streaming deserialize from socket::
-        unpacker = Unpacker()
+        unpacker = Unpacker(raw_as_bytes=False)
        while True:
            buf = sock.recv(1024**2)
            if not buf:
@ -199,15 +209,28 @@ class Unpacker(object):
                process(o)
    """
-    def __init__(self, file_like=None, read_size=0, use_list=True,
+    def __init__(self, file_like=None, read_size=0, use_list=True, raw_as_bytes=True,
                 object_hook=None, object_pairs_hook=None, list_hook=None,
-                 encoding=None, unicode_errors='strict', max_buffer_size=0,
+                 encoding=None, unicode_errors=None, max_buffer_size=0,
                 ext_hook=ExtType,
                 max_str_len=2147483647, # 2**32-1
                 max_bin_len=2147483647,
                 max_array_len=2147483647,
                 max_map_len=2147483647,
                 max_ext_len=2147483647):
        if encoding is not None:
            warnings.warn(
                "encoding is deprecated, Use raw_as_bytes=False instead.",
                PendingDeprecationWarning)
        if unicode_errors is not None:
            warnings.warn(
                "unicode_errors is deprecated.",
                PendingDeprecationWarning)
        else:
            unicode_errors = 'strict'
        if file_like is None:
            self._feeding = True
        else:
@ -234,6 +257,7 @@ class Unpacker(object):
        if read_size > self._max_buffer_size:
            raise ValueError("read_size must be smaller than max_buffer_size")
        self._read_size = read_size or min(self._max_buffer_size, 16*1024)
        self._raw_as_bytes = bool(raw_as_bytes)
        self._encoding = encoding
        self._unicode_errors = unicode_errors
        self._use_list = use_list
@ -582,8 +606,10 @@ class Unpacker(object):
        if typ == TYPE_RAW:
            if self._encoding is not None:
                obj = obj.decode(self._encoding, self._unicode_errors)
-            else:
+            elif self._raw_as_bytes:
                obj = bytes(obj)
            else:
                obj = obj.decode('utf_8')
            return obj
        if typ == TYPE_EXT:
            return self._ext_hook(n, bytes(obj))
@ -682,9 +708,23 @@ class Packer(object):
    :param str unicode_errors:
        (deprecated) Error handler for encoding unicode. (default: 'strict')
    """
-    def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
+    def __init__(self, default=None, encoding=None, unicode_errors=None,
                 use_single_float=False, autoreset=True, use_bin_type=False,
                 strict_types=False):
        if encoding is None:
            encoding = 'utf_8'
        else:
            warnings.warn(
                "encoding is deprecated, Use raw_as_bytes=False instead.",
                PendingDeprecationWarning)
        if unicode_errors is None:
            unicode_errors = 'strict'
        else:
            warnings.warn(
                "unicode_errors is deprecated.",
                PendingDeprecationWarning)
        self._strict_types = strict_types
        self._use_float = use_single_float
        self._autoreset = autoreset
--- a/msgpack/unpack.h
+++ b/msgpack/unpack.h
@ -20,9 +20,10 @@
 #include "unpack_define.h"
 typedef struct unpack_user {
-    int use_list;
+    bool use_list;
-    PyObject *object_hook;
+    bool raw_as_bytes;
    bool has_pairs_hook;
    PyObject *object_hook;
    PyObject *list_hook;
    PyObject *ext_hook;
    const char *encoding;
@ -225,10 +226,13 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
    }
    PyObject *py;
-    if(u->encoding) {
+
    if (u->encoding) {
        py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors);
-    } else {
+    } else if (u->raw_as_bytes) {
        py = PyBytes_FromStringAndSize(p, l);
    } else {
        py = PyUnicode_DecodeUTF8(p, l, NULL);
    }
    if (!py)
        return -1;
--- a/test/test_limits.py
+++ b/test/test_limits.py
@ -39,11 +39,11 @@ def test_max_str_len():
    d = 'x' * 3
    packed = packb(d)
-    unpacker = Unpacker(max_str_len=3, encoding='utf-8')
+    unpacker = Unpacker(max_str_len=3, raw_as_bytes=False)
    unpacker.feed(packed)
    assert unpacker.unpack() == d
-    unpacker = Unpacker(max_str_len=2, encoding='utf-8')
+    unpacker = Unpacker(max_str_len=2, raw_as_bytes=False)
    with pytest.raises(UnpackValueError):
        unpacker.feed(packed)
        unpacker.unpack()
--- a/test/test_pack.py
+++ b/test/test_pack.py
@ -31,14 +31,14 @@ def testPack():
 def testPackUnicode():
    test_data = ["", "abcd", ["defgh"], "Русский текст"]
    for td in test_data:
-        re = unpackb(packb(td, encoding='utf-8'), use_list=1, encoding='utf-8')
+        re = unpackb(packb(td), use_list=1, raw_as_bytes=False)
        assert re == td
-        packer = Packer(encoding='utf-8')
+        packer = Packer()
        data = packer.pack(td)
-        re = Unpacker(BytesIO(data), encoding=str('utf-8'), use_list=1).unpack()
+        re = Unpacker(BytesIO(data), raw_as_bytes=False, use_list=1).unpack()
        assert re == td
-def testPackUTF32():
+def testPackUTF32():  # deprecated
    try:
        test_data = [
            "",
@ -66,26 +66,22 @@ def testPackByteArrays():
    for td in test_data:
        check(td)
-def testIgnoreUnicodeErrors():
+def testIgnoreUnicodeErrors(): # deprecated
    re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1)
    assert re == "abcdef"
 def testStrictUnicodeUnpack():
    with raises(UnicodeDecodeError):
-        unpackb(packb(b'abc\xeddef'), encoding='utf-8', use_list=1)
+        unpackb(packb(b'abc\xeddef'), raw_as_bytes=False, use_list=1)
-def testStrictUnicodePack():
+def testStrictUnicodePack():  # deprecated
    with raises(UnicodeEncodeError):
        packb("abc\xeddef", encoding='ascii', unicode_errors='strict')
-def testIgnoreErrorsPack():
+def testIgnoreErrorsPack():  # deprecated
-    re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), encoding='utf-8', use_list=1)
+    re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw_as_bytes=False, use_list=1)
    assert re == "abcdef"
 def testNoEncoding():
    with raises(TypeError):
        packb("abc", encoding=None)
 def testDecodeBinary():
    re = unpackb(packb(b"abc"), encoding=None, use_list=1)
    assert re == b"abc"
--- a/test/test_stricttype.py
+++ b/test/test_stricttype.py
@ -11,7 +11,7 @@ def test_namedtuple():
            return dict(o._asdict())
        raise TypeError('Unsupported type %s' % (type(o),))
    packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default)
-    unpacked = unpackb(packed, encoding='utf-8')
+    unpacked = unpackb(packed, raw_as_bytes=False)
    assert unpacked == {'foo': 1, 'bar': 42}
@ -32,7 +32,7 @@ def test_tuple():
        return o
    data = packb(t, strict_types=True, use_bin_type=True, default=default)
-    expected = unpackb(data, encoding='utf-8', object_hook=convert)
+    expected = unpackb(data, raw_as_bytes=False, object_hook=convert)
    assert expected == t
@ -53,10 +53,10 @@ def test_tuple_ext():
    def convert(code, payload):
        if code == MSGPACK_EXT_TYPE_TUPLE:
            # Unpack and convert to tuple
-            return tuple(unpackb(payload, encoding='utf-8', ext_hook=convert))
+            return tuple(unpackb(payload, raw_as_bytes=False, ext_hook=convert))
        raise ValueError('Unknown Ext code {}'.format(code))
    data = packb(t, strict_types=True, use_bin_type=True, default=default)
-    expected = unpackb(data, encoding='utf-8', ext_hook=convert)
+    expected = unpackb(data, raw_as_bytes=False, ext_hook=convert)
    assert expected == t
--- a/test/test_unpack.py
+++ b/test/test_unpack.py
@ -47,8 +47,8 @@ def test_unpacker_ext_hook():
    class MyUnpacker(Unpacker):
        def __init__(self):
-            super(MyUnpacker, self).__init__(ext_hook=self._hook,
+            super(MyUnpacker, self).__init__(
-                                             encoding='utf-8')
+                ext_hook=self._hook, raw_as_bytes=False)
        def _hook(self, code, data):
            if code == 1:
@ -57,11 +57,11 @@ def test_unpacker_ext_hook():
                return ExtType(code, data)
    unpacker = MyUnpacker()
-    unpacker.feed(packb({'a': 1}, encoding='utf-8'))
+    unpacker.feed(packb({'a': 1}))
    assert unpacker.unpack() == {'a': 1}
-    unpacker.feed(packb({'a': ExtType(1, b'123')}, encoding='utf-8'))
+    unpacker.feed(packb({'a': ExtType(1, b'123')}))
    assert unpacker.unpack() == {'a': 123}
-    unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8'))
+    unpacker.feed(packb({'a': ExtType(2, b'321')}))
    assert unpacker.unpack() == {'a': ExtType(2, b'321')}