Add raw_as_bytes option to Unpacker. (#265)

This commit is contained in:
INADA Naoki 2018-01-11 17:02:41 +09:00 committed by GitHub
parent 50ea49c86f
commit 5534d0c7af
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 199 additions and 93 deletions

View file

@ -8,7 +8,8 @@ cython:
.PHONY: test
test:
py.test -v test
pytest -v test
MSGPACK_PUREPYTHON=1 pytest -v test
.PHONY: serve-doc
serve-doc: all

View file

@ -10,8 +10,21 @@ MessagePack for Python
:target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest
:alt: Documentation Status
IMPORTANT: Upgrading from msgpack-0.4
--------------------------------------
What's this
-----------
`MessagePack <https://msgpack.org/>`_ is an efficient binary serialization format.
It lets you exchange data among multiple languages like JSON.
But it's faster and smaller.
This package provides CPython bindings for reading and writing MessagePack data.
Very important notes for existing users
---------------------------------------
PyPI package name
^^^^^^^^^^^^^^^^^
TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`.
Do `pip uninstall msgpack-python; pip install msgpack` instead.
@ -24,13 +37,37 @@ Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-pyt
msgpack is removed and `import msgpack` fail.
What's this
-----------
Deprecating encoding option
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
encoding and unicode_errors options are deprecated.
In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommended.
For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes``
object into msgpack raw type.
In case of unpacker, there is new ``raw_as_bytes`` option. It is ``True`` by default
for backward compatibility, but it is changed to ``False`` in near future.
You can use ``raw_as_bytes=False`` instead of ``encoding='utf-8'``.
Planned backward incompatible changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
When msgpack 1.0, I planning these breaking changes:
* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option.
* packer: Change default of ``use_bin_type`` option from False to True.
* unpacker: Change default of ``raw_as_bytes`` option from True to False.
* unpacker: Reduce all ``max_xxx_len`` options for typical usage.
* unpacker: Remove ``write_bytes`` option from all methods.
To avoid these breaking changes breaks your application, please:
* Don't use deprecated options.
* Pass ``use_bin_type`` and ``raw_as_bytes`` options explicitly.
* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too.
`MessagePack <https://msgpack.org/>`_ is an efficient binary serialization format.
It lets you exchange data among multiple languages like JSON.
But it's faster and smaller.
This package provides CPython bindings for reading and writing MessagePack data.
Install
-------
@ -76,14 +113,14 @@ msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with
>>> import msgpack
>>> msgpack.packb([1, 2, 3], use_bin_type=True)
'\x93\x01\x02\x03'
>>> msgpack.unpackb(_)
>>> msgpack.unpackb(_, raw_as_bytes=False)
[1, 2, 3]
``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple:
.. code-block:: pycon
>>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False)
>>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw_as_bytes=False)
(1, 2, 3)
You should always specify the ``use_list`` keyword argument for backward compatibility.
@ -109,7 +146,7 @@ stream (or from bytes provided through its ``feed`` method).
buf.seek(0)
unpacker = msgpack.Unpacker(buf)
unpacker = msgpack.Unpacker(buf, raw_as_bytes=False)
for unpacked in unpacker:
print(unpacked)
@ -142,7 +179,7 @@ It is also possible to pack/unpack custom data types. Here is an example for
packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True)
this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw_as_bytes=False)
``Unpacker``'s ``object_hook`` callback receives a dict; the
``object_pairs_hook`` callback may instead be used to receive a list of
@ -172,7 +209,7 @@ It is also possible to pack/unpack custom data types using the **ext** type.
...
>>> data = array.array('d', [1.2, 3.4])
>>> packed = msgpack.packb(data, default=default, use_bin_type=True)
>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw_as_bytes=False)
>>> data == unpacked
True
@ -217,14 +254,10 @@ Early versions of msgpack didn't distinguish string and binary types (like Pytho
The type for representing both string and binary types was named **raw**.
For backward compatibility reasons, msgpack-python will still default all
strings to byte strings, unless you specify the `use_bin_type=True` option in
strings to byte strings, unless you specify the ``use_bin_type=True`` option in
the packer. If you do so, it will use a non-standard type called **bin** to
serialize byte arrays, and **raw** becomes to mean **str**. If you want to
distinguish **bin** and **raw** in the unpacker, specify `encoding='utf-8'`.
**In future version, default value of ``use_bin_type`` will be changed to ``True``.
To avoid this change will break your code, you must specify it explicitly
even when you want to use old format.**
distinguish **bin** and **raw** in the unpacker, specify ``raw_as_bytes=False``.
Note that Python 2 defaults to byte-arrays over Unicode strings:
@ -234,7 +267,7 @@ Note that Python 2 defaults to byte-arrays over Unicode strings:
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
['spam', 'eggs']
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
encoding='utf-8')
raw_as_bytes=False)
['spam', u'eggs']
This is the same code in Python 3 (same behaviour, but Python 3 has a
@ -246,7 +279,7 @@ different default):
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
[b'spam', b'eggs']
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
encoding='utf-8')
raw_as_bytes=False)
[b'spam', 'eggs']
@ -277,6 +310,7 @@ You can use ``gc.disable()`` when unpacking large message.
use_list option
^^^^^^^^^^^^^^^
List is the default sequence type of Python.
But tuple is lighter than list.
You can use ``use_list=False`` while unpacking when performance is important.
@ -295,7 +329,7 @@ Test
MessagePack uses `pytest` for testing.
Run test with following command:
$ pytest -v test
$ make test
..

View file

@ -3,5 +3,7 @@
%PYTHON%\python.exe setup.py install
%PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))"
%PYTHON%\python.exe -c "from msgpack import _packer, _unpacker"
%PYTHON%\python.exe -m pytest -v test
%PYTHON%\python.exe setup.py bdist_wheel
%PYTHON%\python.exe -m pytest -v test
SET EL=%ERRORLEVEL%
exit /b %EL%

View file

@ -2,7 +2,7 @@
#cython: embedsignature=True
from cpython cimport *
#from cpython.exc cimport PyErr_WarnEx
from cpython.exc cimport PyErr_WarnEx
from msgpack.exceptions import PackValueError, PackOverflowError
from msgpack import ExtType
@ -39,7 +39,7 @@ cdef extern from "pack.h":
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
cdef int DEFAULT_RECURSE_LIMIT=511
cdef size_t ITEM_LIMIT = (2**32)-1
cdef long long ITEM_LIMIT = (2**32)-1
cdef inline int PyBytesLike_Check(object o):
@ -110,9 +110,13 @@ cdef class Packer(object):
self.pk.buf_size = buf_size
self.pk.length = 0
def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
def __init__(self, default=None, encoding=None, unicode_errors=None,
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
bint strict_types=False):
if encoding is not None:
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1)
if unicode_errors is not None:
PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated.", 1)
self.use_float = use_single_float
self.strict_types = strict_types
self.autoreset = autoreset
@ -122,7 +126,7 @@ cdef class Packer(object):
raise TypeError("default must be a callable.")
self._default = default
if encoding is None:
self.encoding = NULL
self.encoding = 'utf_8'
self.unicode_errors = NULL
else:
if isinstance(encoding, unicode):
@ -134,7 +138,8 @@ cdef class Packer(object):
self._berrors = unicode_errors.encode('ascii')
else:
self._berrors = unicode_errors
self.unicode_errors = PyBytes_AsString(self._berrors)
if self._berrors is not None:
self.unicode_errors = PyBytes_AsString(self._berrors)
def __dealloc__(self):
PyMem_Free(self.pk.buf)
@ -149,7 +154,7 @@ cdef class Packer(object):
cdef char* rawval
cdef int ret
cdef dict d
cdef size_t L
cdef Py_ssize_t L
cdef int default_used = 0
cdef bint strict_types = self.strict_types
cdef Py_buffer view
@ -203,6 +208,7 @@ cdef class Packer(object):
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
if not self.encoding:
raise TypeError("Can't encode unicode string: no encoding is specified")
#TODO: Use faster API for UTF-8
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
L = len(o)
if L > ITEM_LIMIT:

View file

@ -43,8 +43,9 @@ from msgpack import ExtType
cdef extern from "unpack.h":
ctypedef struct msgpack_user:
bint use_list
PyObject* object_hook
bint raw_as_bytes
bint has_pairs_hook # call object_hook with k-v pairs
PyObject* object_hook
PyObject* list_hook
PyObject* ext_hook
char *encoding
@ -73,12 +74,14 @@ cdef extern from "unpack.h":
cdef inline init_ctx(unpack_context *ctx,
object object_hook, object object_pairs_hook,
object list_hook, object ext_hook,
bint use_list, char* encoding, char* unicode_errors,
bint use_list, bint raw_as_bytes,
char* encoding, char* unicode_errors,
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
Py_ssize_t max_ext_len):
unpack_init(ctx)
ctx.user.use_list = use_list
ctx.user.raw_as_bytes = raw_as_bytes
ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL
ctx.user.max_str_len = max_str_len
ctx.user.max_bin_len = max_bin_len
@ -155,7 +158,8 @@ cdef inline int get_data_from_buffer(object obj,
return 1
def unpackb(object packed, object object_hook=None, object list_hook=None,
bint use_list=1, encoding=None, unicode_errors="strict",
bint use_list=True, bint raw_as_bytes=True,
encoding=None, unicode_errors="strict",
object_pairs_hook=None, ext_hook=ExtType,
Py_ssize_t max_str_len=2147483647, # 2**32-1
Py_ssize_t max_bin_len=2147483647,
@ -180,21 +184,26 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
cdef char* cerr = NULL
cdef int new_protocol = 0
if encoding is not None:
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
if isinstance(encoding, unicode):
encoding = encoding.encode('ascii')
elif not isinstance(encoding, bytes):
raise TypeError("encoding should be bytes or unicode")
cenc = PyBytes_AsString(encoding)
if unicode_errors is not None:
PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
if isinstance(unicode_errors, unicode):
unicode_errors = unicode_errors.encode('ascii')
elif not isinstance(unicode_errors, bytes):
raise TypeError("unicode_errors should be bytes or unicode")
cerr = PyBytes_AsString(unicode_errors)
get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
try:
if encoding is not None:
if isinstance(encoding, unicode):
encoding = encoding.encode('ascii')
cenc = PyBytes_AsString(encoding)
if unicode_errors is not None:
if isinstance(unicode_errors, unicode):
unicode_errors = unicode_errors.encode('ascii')
cerr = PyBytes_AsString(unicode_errors)
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
use_list, cenc, cerr,
use_list, raw_as_bytes, cenc, cerr,
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
ret = unpack_construct(&ctx, buf, buf_len, &off)
finally:
@ -252,6 +261,16 @@ cdef class Unpacker(object):
If true, unpack msgpack array to Python list.
Otherwise, unpack to Python tuple. (default: True)
:param bool raw_as_bytes:
If true, unpack msgpack raw to Python bytes (default).
Otherwise, unpack to Python str (or unicode on Python 2) by decoding
with UTF-8 encoding (recommended).
Currently, the default is true, but it will be changed to false in
near future. So you must specify it explicitly for keeping backward
compatibility.
*encoding* option which is deprecated overrides this option.
:param callable object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
@ -262,14 +281,6 @@ cdef class Unpacker(object):
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
(See also simplejson)
:param str encoding:
Encoding used for decoding msgpack raw.
If it is None (default), msgpack raw is deserialized to Python bytes.
:param str unicode_errors:
Used for decoding msgpack raw with *encoding*.
(default: `'strict'`)
:param int max_buffer_size:
Limits size of data waiting unpacked. 0 means system's INT_MAX (default).
Raises `BufferFull` exception when it is insufficient.
@ -287,16 +298,25 @@ cdef class Unpacker(object):
:param int max_map_len:
Limits max length of map. (default: 2**31-1)
:param str encoding:
Deprecated, use raw_as_bytes instead.
Encoding used for decoding msgpack raw.
If it is None (default), msgpack raw is deserialized to Python bytes.
example of streaming deserialize from file-like object::
:param str unicode_errors:
Deprecated. Used for decoding msgpack raw with *encoding*.
(default: `'strict'`)
unpacker = Unpacker(file_like)
Example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like, raw_as_bytes=False)
for o in unpacker:
process(o)
example of streaming deserialize from socket::
Example of streaming deserialize from socket::
unpacker = Unpacker()
unpacker = Unpacker(raw_as_bytes=False)
while True:
buf = sock.recv(1024**2)
if not buf:
@ -324,7 +344,8 @@ cdef class Unpacker(object):
PyMem_Free(self.buf)
self.buf = NULL
def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
def __init__(self, file_like=None, Py_ssize_t read_size=0,
bint use_list=True, bint raw_as_bytes=True,
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
encoding=None, unicode_errors='strict', int max_buffer_size=0,
object ext_hook=ExtType,
@ -363,6 +384,7 @@ cdef class Unpacker(object):
self.stream_offset = 0
if encoding is not None:
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
if isinstance(encoding, unicode):
self.encoding = encoding.encode('ascii')
elif isinstance(encoding, bytes):
@ -372,6 +394,7 @@ cdef class Unpacker(object):
cenc = PyBytes_AsString(self.encoding)
if unicode_errors is not None:
PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
if isinstance(unicode_errors, unicode):
self.unicode_errors = unicode_errors.encode('ascii')
elif isinstance(unicode_errors, bytes):
@ -381,7 +404,7 @@ cdef class Unpacker(object):
cerr = PyBytes_AsString(self.unicode_errors)
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
ext_hook, use_list, cenc, cerr,
ext_hook, use_list, raw_as_bytes, cenc, cerr,
max_str_len, max_bin_len, max_array_len,
max_map_len, max_ext_len)

View file

@ -145,6 +145,16 @@ class Unpacker(object):
If true, unpack msgpack array to Python list.
Otherwise, unpack to Python tuple. (default: True)
:param bool raw_as_bytes:
If true, unpack msgpack raw to Python bytes (default).
Otherwise, unpack to Python str (or unicode on Python 2) by decoding
with UTF-8 encoding (recommended).
Currently, the default is true, but it will be changed to false in
near future. So you must specify it explicitly for keeping backward
compatibility.
*encoding* option which is deprecated overrides this option.
:param callable object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
@ -183,13 +193,13 @@ class Unpacker(object):
example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like)
unpacker = Unpacker(file_like, raw_as_bytes=False)
for o in unpacker:
process(o)
example of streaming deserialize from socket::
unpacker = Unpacker()
unpacker = Unpacker(raw_as_bytes=False)
while True:
buf = sock.recv(1024**2)
if not buf:
@ -199,15 +209,28 @@ class Unpacker(object):
process(o)
"""
def __init__(self, file_like=None, read_size=0, use_list=True,
def __init__(self, file_like=None, read_size=0, use_list=True, raw_as_bytes=True,
object_hook=None, object_pairs_hook=None, list_hook=None,
encoding=None, unicode_errors='strict', max_buffer_size=0,
encoding=None, unicode_errors=None, max_buffer_size=0,
ext_hook=ExtType,
max_str_len=2147483647, # 2**32-1
max_bin_len=2147483647,
max_array_len=2147483647,
max_map_len=2147483647,
max_ext_len=2147483647):
if encoding is not None:
warnings.warn(
"encoding is deprecated, Use raw_as_bytes=False instead.",
PendingDeprecationWarning)
if unicode_errors is not None:
warnings.warn(
"unicode_errors is deprecated.",
PendingDeprecationWarning)
else:
unicode_errors = 'strict'
if file_like is None:
self._feeding = True
else:
@ -234,6 +257,7 @@ class Unpacker(object):
if read_size > self._max_buffer_size:
raise ValueError("read_size must be smaller than max_buffer_size")
self._read_size = read_size or min(self._max_buffer_size, 16*1024)
self._raw_as_bytes = bool(raw_as_bytes)
self._encoding = encoding
self._unicode_errors = unicode_errors
self._use_list = use_list
@ -582,8 +606,10 @@ class Unpacker(object):
if typ == TYPE_RAW:
if self._encoding is not None:
obj = obj.decode(self._encoding, self._unicode_errors)
else:
elif self._raw_as_bytes:
obj = bytes(obj)
else:
obj = obj.decode('utf_8')
return obj
if typ == TYPE_EXT:
return self._ext_hook(n, bytes(obj))
@ -682,9 +708,23 @@ class Packer(object):
:param str unicode_errors:
(deprecated) Error handler for encoding unicode. (default: 'strict')
"""
def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
def __init__(self, default=None, encoding=None, unicode_errors=None,
use_single_float=False, autoreset=True, use_bin_type=False,
strict_types=False):
if encoding is None:
encoding = 'utf_8'
else:
warnings.warn(
"encoding is deprecated, Use raw_as_bytes=False instead.",
PendingDeprecationWarning)
if unicode_errors is None:
unicode_errors = 'strict'
else:
warnings.warn(
"unicode_errors is deprecated.",
PendingDeprecationWarning)
self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset

View file

@ -20,9 +20,10 @@
#include "unpack_define.h"
typedef struct unpack_user {
int use_list;
PyObject *object_hook;
bool use_list;
bool raw_as_bytes;
bool has_pairs_hook;
PyObject *object_hook;
PyObject *list_hook;
PyObject *ext_hook;
const char *encoding;
@ -225,10 +226,13 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
}
PyObject *py;
if(u->encoding) {
if (u->encoding) {
py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors);
} else {
} else if (u->raw_as_bytes) {
py = PyBytes_FromStringAndSize(p, l);
} else {
py = PyUnicode_DecodeUTF8(p, l, NULL);
}
if (!py)
return -1;

View file

@ -39,11 +39,11 @@ def test_max_str_len():
d = 'x' * 3
packed = packb(d)
unpacker = Unpacker(max_str_len=3, encoding='utf-8')
unpacker = Unpacker(max_str_len=3, raw_as_bytes=False)
unpacker.feed(packed)
assert unpacker.unpack() == d
unpacker = Unpacker(max_str_len=2, encoding='utf-8')
unpacker = Unpacker(max_str_len=2, raw_as_bytes=False)
with pytest.raises(UnpackValueError):
unpacker.feed(packed)
unpacker.unpack()

View file

@ -31,14 +31,14 @@ def testPack():
def testPackUnicode():
test_data = ["", "abcd", ["defgh"], "Русский текст"]
for td in test_data:
re = unpackb(packb(td, encoding='utf-8'), use_list=1, encoding='utf-8')
re = unpackb(packb(td), use_list=1, raw_as_bytes=False)
assert re == td
packer = Packer(encoding='utf-8')
packer = Packer()
data = packer.pack(td)
re = Unpacker(BytesIO(data), encoding=str('utf-8'), use_list=1).unpack()
re = Unpacker(BytesIO(data), raw_as_bytes=False, use_list=1).unpack()
assert re == td
def testPackUTF32():
def testPackUTF32(): # deprecated
try:
test_data = [
"",
@ -66,26 +66,22 @@ def testPackByteArrays():
for td in test_data:
check(td)
def testIgnoreUnicodeErrors():
def testIgnoreUnicodeErrors(): # deprecated
re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1)
assert re == "abcdef"
def testStrictUnicodeUnpack():
with raises(UnicodeDecodeError):
unpackb(packb(b'abc\xeddef'), encoding='utf-8', use_list=1)
unpackb(packb(b'abc\xeddef'), raw_as_bytes=False, use_list=1)
def testStrictUnicodePack():
def testStrictUnicodePack(): # deprecated
with raises(UnicodeEncodeError):
packb("abc\xeddef", encoding='ascii', unicode_errors='strict')
def testIgnoreErrorsPack():
re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), encoding='utf-8', use_list=1)
def testIgnoreErrorsPack(): # deprecated
re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw_as_bytes=False, use_list=1)
assert re == "abcdef"
def testNoEncoding():
with raises(TypeError):
packb("abc", encoding=None)
def testDecodeBinary():
re = unpackb(packb(b"abc"), encoding=None, use_list=1)
assert re == b"abc"

View file

@ -11,7 +11,7 @@ def test_namedtuple():
return dict(o._asdict())
raise TypeError('Unsupported type %s' % (type(o),))
packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default)
unpacked = unpackb(packed, encoding='utf-8')
unpacked = unpackb(packed, raw_as_bytes=False)
assert unpacked == {'foo': 1, 'bar': 42}
@ -32,7 +32,7 @@ def test_tuple():
return o
data = packb(t, strict_types=True, use_bin_type=True, default=default)
expected = unpackb(data, encoding='utf-8', object_hook=convert)
expected = unpackb(data, raw_as_bytes=False, object_hook=convert)
assert expected == t
@ -53,10 +53,10 @@ def test_tuple_ext():
def convert(code, payload):
if code == MSGPACK_EXT_TYPE_TUPLE:
# Unpack and convert to tuple
return tuple(unpackb(payload, encoding='utf-8', ext_hook=convert))
return tuple(unpackb(payload, raw_as_bytes=False, ext_hook=convert))
raise ValueError('Unknown Ext code {}'.format(code))
data = packb(t, strict_types=True, use_bin_type=True, default=default)
expected = unpackb(data, encoding='utf-8', ext_hook=convert)
expected = unpackb(data, raw_as_bytes=False, ext_hook=convert)
assert expected == t

View file

@ -47,8 +47,8 @@ def test_unpacker_ext_hook():
class MyUnpacker(Unpacker):
def __init__(self):
super(MyUnpacker, self).__init__(ext_hook=self._hook,
encoding='utf-8')
super(MyUnpacker, self).__init__(
ext_hook=self._hook, raw_as_bytes=False)
def _hook(self, code, data):
if code == 1:
@ -57,11 +57,11 @@ def test_unpacker_ext_hook():
return ExtType(code, data)
unpacker = MyUnpacker()
unpacker.feed(packb({'a': 1}, encoding='utf-8'))
unpacker.feed(packb({'a': 1}))
assert unpacker.unpack() == {'a': 1}
unpacker.feed(packb({'a': ExtType(1, b'123')}, encoding='utf-8'))
unpacker.feed(packb({'a': ExtType(1, b'123')}))
assert unpacker.unpack() == {'a': 123}
unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8'))
unpacker.feed(packb({'a': ExtType(2, b'321')}))
assert unpacker.unpack() == {'a': ExtType(2, b'321')}