Add raw_as_bytes option to Unpacker. (#265)

2025-12-08 06:09:49 +00:00 · 2018-01-11 17:02:41 +09:00 · 2018-01-11 17:02:41 +09:00 · 5534d0c7af
commit 5534d0c7af
parent 50ea49c86f
11 changed files with 199 additions and 93 deletions
--- a/3
+++ b/3
@ -8,7 +8,8 @@ cython:

 .PHONY: test
 test:
-	py.test -v test
+	pytest -v test
+	MSGPACK_PUREPYTHON=1 pytest -v test

 .PHONY: serve-doc
 serve-doc: all
--- a/README.rst
+++ b/README.rst
@ -10,8 +10,21 @@ MessagePack for Python
   :target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest
   :alt: Documentation Status

-IMPORTANT: Upgrading from msgpack-0.4
--------------------------------------
+
+What's this
+-----------
+
+`MessagePack <https://msgpack.org/>`_ is an efficient binary serialization format.
+It lets you exchange data among multiple languages like JSON.
+But it's faster and smaller.
+This package provides CPython bindings for reading and writing MessagePack data.
+
+
+Very important notes for existing users
+---------------------------------------
+
+PyPI package name
+^^^^^^^^^^^^^^^^^

 TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`.
 Do `pip uninstall msgpack-python; pip install msgpack` instead.
@ -24,13 +37,37 @@ Sadly, this doesn't work for upgrade install.  After `pip install -U msgpack-pyt
 msgpack is removed and `import msgpack` fail.


-What's this
-----------
+Deprecating encoding option
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+encoding and unicode_errors options are deprecated.
+
+In case of packer, use UTF-8 always.  Storing other than UTF-8 is not recommended.
+
+For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes``
+object into msgpack raw type.
+
+In case of unpacker, there is new ``raw_as_bytes`` option.  It is ``True`` by default
+for backward compatibility, but it is changed to ``False`` in near future.
+You can use ``raw_as_bytes=False`` instead of ``encoding='utf-8'``.
+
+Planned backward incompatible changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When msgpack 1.0, I planning these breaking changes:
+
+* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option.
+* packer: Change default of ``use_bin_type`` option from False to True.
+* unpacker: Change default of ``raw_as_bytes`` option from True to False.
+* unpacker: Reduce all ``max_xxx_len`` options for typical usage.
+* unpacker: Remove ``write_bytes`` option from all methods.
+
+To avoid these breaking changes breaks your application, please:
+
+* Don't use deprecated options.
+* Pass ``use_bin_type`` and ``raw_as_bytes`` options explicitly.
+* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too.

-`MessagePack <https://msgpack.org/>`_ is an efficient binary serialization format.
-It lets you exchange data among multiple languages like JSON.
-But it's faster and smaller.
-This package provides CPython bindings for reading and writing MessagePack data.

 Install
 -------
@ -76,14 +113,14 @@ msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with
   >>> import msgpack
   >>> msgpack.packb([1, 2, 3], use_bin_type=True)
   '\x93\x01\x02\x03'
-   >>> msgpack.unpackb(_)
+   >>> msgpack.unpackb(_, raw_as_bytes=False)
   [1, 2, 3]

 ``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple:

 .. code-block:: pycon

-   >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False)
+   >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw_as_bytes=False)
   (1, 2, 3)

 You should always specify the ``use_list`` keyword argument for backward compatibility.
@ -109,7 +146,7 @@ stream (or from bytes provided through its ``feed`` method).

   buf.seek(0)

-   unpacker = msgpack.Unpacker(buf)
+   unpacker = msgpack.Unpacker(buf, raw_as_bytes=False)
   for unpacked in unpacker:
       print(unpacked)

@ -142,7 +179,7 @@ It is also possible to pack/unpack custom data types. Here is an example for


    packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True)
-    this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
+    this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw_as_bytes=False)

 ``Unpacker``'s ``object_hook`` callback receives a dict; the
 ``object_pairs_hook`` callback may instead be used to receive a list of
@ -172,7 +209,7 @@ It is also possible to pack/unpack custom data types using the **ext** type.
    ...
    >>> data = array.array('d', [1.2, 3.4])
    >>> packed = msgpack.packb(data, default=default, use_bin_type=True)
-    >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
+    >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw_as_bytes=False)
    >>> data == unpacked
    True

@ -217,14 +254,10 @@ Early versions of msgpack didn't distinguish string and binary types (like Pytho
 The type for representing both string and binary types was named **raw**.

 For backward compatibility reasons, msgpack-python will still default all
-strings to byte strings, unless you specify the `use_bin_type=True` option in
+strings to byte strings, unless you specify the ``use_bin_type=True`` option in
 the packer. If you do so, it will use a non-standard type called **bin** to
 serialize byte arrays, and **raw** becomes to mean **str**. If you want to
-distinguish **bin** and **raw** in the unpacker, specify `encoding='utf-8'`.
-
-**In future version, default value of ``use_bin_type`` will be changed to ``True``.
-To avoid this change will break your code, you must specify it explicitly
-even when you want to use old format.**
+distinguish **bin** and **raw** in the unpacker, specify ``raw_as_bytes=False``.

 Note that Python 2 defaults to byte-arrays over Unicode strings:

@ -234,7 +267,7 @@ Note that Python 2 defaults to byte-arrays over Unicode strings:
    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
    ['spam', 'eggs']
    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
-                        encoding='utf-8')
+                        raw_as_bytes=False)
    ['spam', u'eggs']

 This is the same code in Python 3 (same behaviour, but Python 3 has a
@ -246,7 +279,7 @@ different default):
    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
    [b'spam', b'eggs']
    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
-                        encoding='utf-8')
+                        raw_as_bytes=False)
    [b'spam', 'eggs']


@ -277,6 +310,7 @@ You can use ``gc.disable()`` when unpacking large message.

 use_list option
 ^^^^^^^^^^^^^^^
+
 List is the default sequence type of Python.
 But tuple is lighter than list.
 You can use ``use_list=False`` while unpacking when performance is important.
@ -295,7 +329,7 @@ Test
 MessagePack uses `pytest` for testing.
 Run test with following command:

-    $ pytest -v test
+    $ make test


 ..
--- a/ci/runtests.bat
+++ b/ci/runtests.bat
@ -3,5 +3,7 @@
 %PYTHON%\python.exe setup.py install
 %PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))"
 %PYTHON%\python.exe -c "from msgpack import _packer, _unpacker"
-%PYTHON%\python.exe -m pytest -v test
 %PYTHON%\python.exe setup.py bdist_wheel
+%PYTHON%\python.exe -m pytest -v test
+SET EL=%ERRORLEVEL%
+exit /b %EL%
--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@ -2,7 +2,7 @@
 #cython: embedsignature=True

 from cpython cimport *
-#from cpython.exc cimport PyErr_WarnEx
+from cpython.exc cimport PyErr_WarnEx

 from msgpack.exceptions import PackValueError, PackOverflowError
 from msgpack import ExtType
@ -39,7 +39,7 @@ cdef extern from "pack.h":
    int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)

 cdef int DEFAULT_RECURSE_LIMIT=511
-cdef size_t ITEM_LIMIT = (2**32)-1
+cdef long long ITEM_LIMIT = (2**32)-1


 cdef inline int PyBytesLike_Check(object o):
@ -110,9 +110,13 @@ cdef class Packer(object):
        self.pk.buf_size = buf_size
        self.pk.length = 0

-    def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
+    def __init__(self, default=None, encoding=None, unicode_errors=None,
                 bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
                 bint strict_types=False):
+        if encoding is not None:
+            PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1)
+        if unicode_errors is not None:
+            PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated.", 1)
        self.use_float = use_single_float
        self.strict_types = strict_types
        self.autoreset = autoreset
@ -122,7 +126,7 @@ cdef class Packer(object):
                raise TypeError("default must be a callable.")
        self._default = default
        if encoding is None:
-            self.encoding = NULL
+            self.encoding = 'utf_8'
            self.unicode_errors = NULL
        else:
            if isinstance(encoding, unicode):
@ -134,7 +138,8 @@ cdef class Packer(object):
                self._berrors = unicode_errors.encode('ascii')
            else:
                self._berrors = unicode_errors
-            self.unicode_errors = PyBytes_AsString(self._berrors)
+            if self._berrors is not None:
+                self.unicode_errors = PyBytes_AsString(self._berrors)

    def __dealloc__(self):
        PyMem_Free(self.pk.buf)
@ -149,7 +154,7 @@ cdef class Packer(object):
        cdef char* rawval
        cdef int ret
        cdef dict d
-        cdef size_t L
+        cdef Py_ssize_t L
        cdef int default_used = 0
        cdef bint strict_types = self.strict_types
        cdef Py_buffer view
@ -203,6 +208,7 @@ cdef class Packer(object):
            elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
                if not self.encoding:
                    raise TypeError("Can't encode unicode string: no encoding is specified")
+                #TODO: Use faster API for UTF-8
                o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
                L = len(o)
                if L > ITEM_LIMIT:
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@ -43,8 +43,9 @@ from msgpack import ExtType
 cdef extern from "unpack.h":
    ctypedef struct msgpack_user:
        bint use_list
-        PyObject* object_hook
+        bint raw_as_bytes
        bint has_pairs_hook # call object_hook with k-v pairs
+        PyObject* object_hook
        PyObject* list_hook
        PyObject* ext_hook
        char *encoding
@ -73,12 +74,14 @@ cdef extern from "unpack.h":
 cdef inline init_ctx(unpack_context *ctx,
                     object object_hook, object object_pairs_hook,
                     object list_hook, object ext_hook,
-                     bint use_list, char* encoding, char* unicode_errors,
+                     bint use_list, bint raw_as_bytes,
+                     char* encoding, char* unicode_errors,
                     Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
                     Py_ssize_t max_array_len, Py_ssize_t max_map_len,
                     Py_ssize_t max_ext_len):
    unpack_init(ctx)
    ctx.user.use_list = use_list
+    ctx.user.raw_as_bytes = raw_as_bytes
    ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL
    ctx.user.max_str_len = max_str_len
    ctx.user.max_bin_len = max_bin_len
@ -155,7 +158,8 @@ cdef inline int get_data_from_buffer(object obj,
        return 1

 def unpackb(object packed, object object_hook=None, object list_hook=None,
-            bint use_list=1, encoding=None, unicode_errors="strict",
+            bint use_list=True, bint raw_as_bytes=True,
+            encoding=None, unicode_errors="strict",
            object_pairs_hook=None, ext_hook=ExtType,
            Py_ssize_t max_str_len=2147483647, # 2**32-1
            Py_ssize_t max_bin_len=2147483647,
@ -180,21 +184,26 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
    cdef char* cerr = NULL
    cdef int new_protocol = 0

+    if encoding is not None:
+        PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
+        if isinstance(encoding, unicode):
+            encoding = encoding.encode('ascii')
+        elif not isinstance(encoding, bytes):
+            raise TypeError("encoding should be bytes or unicode")
+        cenc = PyBytes_AsString(encoding)
+
+    if unicode_errors is not None:
+        PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
+        if isinstance(unicode_errors, unicode):
+            unicode_errors = unicode_errors.encode('ascii')
+        elif not isinstance(unicode_errors, bytes):
+            raise TypeError("unicode_errors should be bytes or unicode")
+        cerr = PyBytes_AsString(unicode_errors)
+
    get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
-
    try:
-        if encoding is not None:
-            if isinstance(encoding, unicode):
-                encoding = encoding.encode('ascii')
-            cenc = PyBytes_AsString(encoding)
-
-        if unicode_errors is not None:
-            if isinstance(unicode_errors, unicode):
-                unicode_errors = unicode_errors.encode('ascii')
-            cerr = PyBytes_AsString(unicode_errors)
-
        init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
-                 use_list, cenc, cerr,
+                 use_list, raw_as_bytes, cenc, cerr,
                 max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
        ret = unpack_construct(&ctx, buf, buf_len, &off)
    finally:
@ -252,6 +261,16 @@ cdef class Unpacker(object):
        If true, unpack msgpack array to Python list.
        Otherwise, unpack to Python tuple. (default: True)

+    :param bool raw_as_bytes:
+        If true, unpack msgpack raw to Python bytes (default).
+        Otherwise, unpack to Python str (or unicode on Python 2) by decoding
+        with UTF-8 encoding (recommended).
+        Currently, the default is true, but it will be changed to false in
+        near future.  So you must specify it explicitly for keeping backward
+        compatibility.
+
+        *encoding* option which is deprecated overrides this option.
+
    :param callable object_hook:
        When specified, it should be callable.
        Unpacker calls it with a dict argument after unpacking msgpack map.
@ -262,14 +281,6 @@ cdef class Unpacker(object):
        Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
        (See also simplejson)

-    :param str encoding:
-        Encoding used for decoding msgpack raw.
-        If it is None (default), msgpack raw is deserialized to Python bytes.
-
-    :param str unicode_errors:
-        Used for decoding msgpack raw with *encoding*.
-        (default: `'strict'`)
-
    :param int max_buffer_size:
        Limits size of data waiting unpacked.  0 means system's INT_MAX (default).
        Raises `BufferFull` exception when it is insufficient.
@ -287,16 +298,25 @@ cdef class Unpacker(object):
    :param int max_map_len:
        Limits max length of map. (default: 2**31-1)

+    :param str encoding:
+        Deprecated, use raw_as_bytes instead.
+        Encoding used for decoding msgpack raw.
+        If it is None (default), msgpack raw is deserialized to Python bytes.

-    example of streaming deserialize from file-like object::
+    :param str unicode_errors:
+        Deprecated. Used for decoding msgpack raw with *encoding*.
+        (default: `'strict'`)

-        unpacker = Unpacker(file_like)
+
+    Example of streaming deserialize from file-like object::
+
+        unpacker = Unpacker(file_like, raw_as_bytes=False)
        for o in unpacker:
            process(o)

-    example of streaming deserialize from socket::
+    Example of streaming deserialize from socket::

-        unpacker = Unpacker()
+        unpacker = Unpacker(raw_as_bytes=False)
        while True:
            buf = sock.recv(1024**2)
            if not buf:
@ -324,7 +344,8 @@ cdef class Unpacker(object):
        PyMem_Free(self.buf)
        self.buf = NULL

-    def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
+    def __init__(self, file_like=None, Py_ssize_t read_size=0,
+                 bint use_list=True, bint raw_as_bytes=True,
                 object object_hook=None, object object_pairs_hook=None, object list_hook=None,
                 encoding=None, unicode_errors='strict', int max_buffer_size=0,
                 object ext_hook=ExtType,
@ -363,6 +384,7 @@ cdef class Unpacker(object):
        self.stream_offset = 0

        if encoding is not None:
+            PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
            if isinstance(encoding, unicode):
                self.encoding = encoding.encode('ascii')
            elif isinstance(encoding, bytes):
@ -372,6 +394,7 @@ cdef class Unpacker(object):
            cenc = PyBytes_AsString(self.encoding)

        if unicode_errors is not None:
+            PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
            if isinstance(unicode_errors, unicode):
                self.unicode_errors = unicode_errors.encode('ascii')
            elif isinstance(unicode_errors, bytes):
@ -381,7 +404,7 @@ cdef class Unpacker(object):
            cerr = PyBytes_AsString(self.unicode_errors)

        init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
-                 ext_hook, use_list, cenc, cerr,
+                 ext_hook, use_list, raw_as_bytes, cenc, cerr,
                 max_str_len, max_bin_len, max_array_len,
                 max_map_len, max_ext_len)

--- a/msgpack/fallback.py
+++ b/msgpack/fallback.py
@ -145,6 +145,16 @@ class Unpacker(object):
        If true, unpack msgpack array to Python list.
        Otherwise, unpack to Python tuple. (default: True)

+    :param bool raw_as_bytes:
+        If true, unpack msgpack raw to Python bytes (default).
+        Otherwise, unpack to Python str (or unicode on Python 2) by decoding
+        with UTF-8 encoding (recommended).
+        Currently, the default is true, but it will be changed to false in
+        near future.  So you must specify it explicitly for keeping backward
+        compatibility.
+
+        *encoding* option which is deprecated overrides this option.
+
    :param callable object_hook:
        When specified, it should be callable.
        Unpacker calls it with a dict argument after unpacking msgpack map.
@ -183,13 +193,13 @@ class Unpacker(object):

    example of streaming deserialize from file-like object::

-        unpacker = Unpacker(file_like)
+        unpacker = Unpacker(file_like, raw_as_bytes=False)
        for o in unpacker:
            process(o)

    example of streaming deserialize from socket::

-        unpacker = Unpacker()
+        unpacker = Unpacker(raw_as_bytes=False)
        while True:
            buf = sock.recv(1024**2)
            if not buf:
@ -199,15 +209,28 @@ class Unpacker(object):
                process(o)
    """

-    def __init__(self, file_like=None, read_size=0, use_list=True,
+    def __init__(self, file_like=None, read_size=0, use_list=True, raw_as_bytes=True,
                 object_hook=None, object_pairs_hook=None, list_hook=None,
-                 encoding=None, unicode_errors='strict', max_buffer_size=0,
+                 encoding=None, unicode_errors=None, max_buffer_size=0,
                 ext_hook=ExtType,
                 max_str_len=2147483647, # 2**32-1
                 max_bin_len=2147483647,
                 max_array_len=2147483647,
                 max_map_len=2147483647,
                 max_ext_len=2147483647):
+
+        if encoding is not None:
+            warnings.warn(
+                "encoding is deprecated, Use raw_as_bytes=False instead.",
+                PendingDeprecationWarning)
+
+        if unicode_errors is not None:
+            warnings.warn(
+                "unicode_errors is deprecated.",
+                PendingDeprecationWarning)
+        else:
+            unicode_errors = 'strict'
+
        if file_like is None:
            self._feeding = True
        else:
@ -234,6 +257,7 @@ class Unpacker(object):
        if read_size > self._max_buffer_size:
            raise ValueError("read_size must be smaller than max_buffer_size")
        self._read_size = read_size or min(self._max_buffer_size, 16*1024)
+        self._raw_as_bytes = bool(raw_as_bytes)
        self._encoding = encoding
        self._unicode_errors = unicode_errors
        self._use_list = use_list
@ -582,8 +606,10 @@ class Unpacker(object):
        if typ == TYPE_RAW:
            if self._encoding is not None:
                obj = obj.decode(self._encoding, self._unicode_errors)
-            else:
+            elif self._raw_as_bytes:
                obj = bytes(obj)
+            else:
+                obj = obj.decode('utf_8')
            return obj
        if typ == TYPE_EXT:
            return self._ext_hook(n, bytes(obj))
@ -682,9 +708,23 @@ class Packer(object):
    :param str unicode_errors:
        (deprecated) Error handler for encoding unicode. (default: 'strict')
    """
-    def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
+    def __init__(self, default=None, encoding=None, unicode_errors=None,
                 use_single_float=False, autoreset=True, use_bin_type=False,
                 strict_types=False):
+        if encoding is None:
+            encoding = 'utf_8'
+        else:
+            warnings.warn(
+                "encoding is deprecated, Use raw_as_bytes=False instead.",
+                PendingDeprecationWarning)
+
+        if unicode_errors is None:
+            unicode_errors = 'strict'
+        else:
+            warnings.warn(
+                "unicode_errors is deprecated.",
+                PendingDeprecationWarning)
+
        self._strict_types = strict_types
        self._use_float = use_single_float
        self._autoreset = autoreset
--- a/msgpack/unpack.h
+++ b/msgpack/unpack.h
@ -20,9 +20,10 @@
 #include "unpack_define.h"

 typedef struct unpack_user {
-    int use_list;
-    PyObject *object_hook;
+    bool use_list;
+    bool raw_as_bytes;
    bool has_pairs_hook;
+    PyObject *object_hook;
    PyObject *list_hook;
    PyObject *ext_hook;
    const char *encoding;
@ -225,10 +226,13 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
    }

    PyObject *py;
-    if(u->encoding) {
+
+    if (u->encoding) {
        py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors);
-    } else {
+    } else if (u->raw_as_bytes) {
        py = PyBytes_FromStringAndSize(p, l);
+    } else {
+        py = PyUnicode_DecodeUTF8(p, l, NULL);
    }
    if (!py)
        return -1;
--- a/test/test_limits.py
+++ b/test/test_limits.py
@ -39,11 +39,11 @@ def test_max_str_len():
    d = 'x' * 3
    packed = packb(d)

-    unpacker = Unpacker(max_str_len=3, encoding='utf-8')
+    unpacker = Unpacker(max_str_len=3, raw_as_bytes=False)
    unpacker.feed(packed)
    assert unpacker.unpack() == d

-    unpacker = Unpacker(max_str_len=2, encoding='utf-8')
+    unpacker = Unpacker(max_str_len=2, raw_as_bytes=False)
    with pytest.raises(UnpackValueError):
        unpacker.feed(packed)
        unpacker.unpack()
--- a/test/test_pack.py
+++ b/test/test_pack.py
@ -31,14 +31,14 @@ def testPack():
 def testPackUnicode():
    test_data = ["", "abcd", ["defgh"], "Русский текст"]
    for td in test_data:
-        re = unpackb(packb(td, encoding='utf-8'), use_list=1, encoding='utf-8')
+        re = unpackb(packb(td), use_list=1, raw_as_bytes=False)
        assert re == td
-        packer = Packer(encoding='utf-8')
+        packer = Packer()
        data = packer.pack(td)
-        re = Unpacker(BytesIO(data), encoding=str('utf-8'), use_list=1).unpack()
+        re = Unpacker(BytesIO(data), raw_as_bytes=False, use_list=1).unpack()
        assert re == td

-def testPackUTF32():
+def testPackUTF32():  # deprecated
    try:
        test_data = [
            "",
@ -66,26 +66,22 @@ def testPackByteArrays():
    for td in test_data:
        check(td)

-def testIgnoreUnicodeErrors():
+def testIgnoreUnicodeErrors(): # deprecated
    re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1)
    assert re == "abcdef"

 def testStrictUnicodeUnpack():
    with raises(UnicodeDecodeError):
-        unpackb(packb(b'abc\xeddef'), encoding='utf-8', use_list=1)
+        unpackb(packb(b'abc\xeddef'), raw_as_bytes=False, use_list=1)

-def testStrictUnicodePack():
+def testStrictUnicodePack():  # deprecated
    with raises(UnicodeEncodeError):
        packb("abc\xeddef", encoding='ascii', unicode_errors='strict')

-def testIgnoreErrorsPack():
-    re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), encoding='utf-8', use_list=1)
+def testIgnoreErrorsPack():  # deprecated
+    re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw_as_bytes=False, use_list=1)
    assert re == "abcdef"

-def testNoEncoding():
-    with raises(TypeError):
-        packb("abc", encoding=None)
-
 def testDecodeBinary():
    re = unpackb(packb(b"abc"), encoding=None, use_list=1)
    assert re == b"abc"
--- a/test/test_stricttype.py
+++ b/test/test_stricttype.py
@ -11,7 +11,7 @@ def test_namedtuple():
            return dict(o._asdict())
        raise TypeError('Unsupported type %s' % (type(o),))
    packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default)
-    unpacked = unpackb(packed, encoding='utf-8')
+    unpacked = unpackb(packed, raw_as_bytes=False)
    assert unpacked == {'foo': 1, 'bar': 42}


@ -32,7 +32,7 @@ def test_tuple():
        return o

    data = packb(t, strict_types=True, use_bin_type=True, default=default)
-    expected = unpackb(data, encoding='utf-8', object_hook=convert)
+    expected = unpackb(data, raw_as_bytes=False, object_hook=convert)

    assert expected == t

@ -53,10 +53,10 @@ def test_tuple_ext():
    def convert(code, payload):
        if code == MSGPACK_EXT_TYPE_TUPLE:
            # Unpack and convert to tuple
-            return tuple(unpackb(payload, encoding='utf-8', ext_hook=convert))
+            return tuple(unpackb(payload, raw_as_bytes=False, ext_hook=convert))
        raise ValueError('Unknown Ext code {}'.format(code))

    data = packb(t, strict_types=True, use_bin_type=True, default=default)
-    expected = unpackb(data, encoding='utf-8', ext_hook=convert)
+    expected = unpackb(data, raw_as_bytes=False, ext_hook=convert)

    assert expected == t
--- a/test/test_unpack.py
+++ b/test/test_unpack.py
@ -47,8 +47,8 @@ def test_unpacker_ext_hook():
    class MyUnpacker(Unpacker):

        def __init__(self):
-            super(MyUnpacker, self).__init__(ext_hook=self._hook,
-                                             encoding='utf-8')
+            super(MyUnpacker, self).__init__(
+                ext_hook=self._hook, raw_as_bytes=False)

        def _hook(self, code, data):
            if code == 1:
@ -57,11 +57,11 @@ def test_unpacker_ext_hook():
                return ExtType(code, data)

    unpacker = MyUnpacker()
-    unpacker.feed(packb({'a': 1}, encoding='utf-8'))
+    unpacker.feed(packb({'a': 1}))
    assert unpacker.unpack() == {'a': 1}
-    unpacker.feed(packb({'a': ExtType(1, b'123')}, encoding='utf-8'))
+    unpacker.feed(packb({'a': ExtType(1, b'123')}))
    assert unpacker.unpack() == {'a': 123}
-    unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8'))
+    unpacker.feed(packb({'a': ExtType(2, b'321')}))
    assert unpacker.unpack() == {'a': ExtType(2, b'321')}