mirror of
https://github.com/msgpack/msgpack-python.git
synced 2025-10-19 20:03:16 +00:00
Add raw_as_bytes option to Unpacker. (#265)
This commit is contained in:
parent
50ea49c86f
commit
5534d0c7af
11 changed files with 199 additions and 93 deletions
3
Makefile
3
Makefile
|
@ -8,7 +8,8 @@ cython:
|
||||||
|
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test:
|
||||||
py.test -v test
|
pytest -v test
|
||||||
|
MSGPACK_PUREPYTHON=1 pytest -v test
|
||||||
|
|
||||||
.PHONY: serve-doc
|
.PHONY: serve-doc
|
||||||
serve-doc: all
|
serve-doc: all
|
||||||
|
|
78
README.rst
78
README.rst
|
@ -10,8 +10,21 @@ MessagePack for Python
|
||||||
:target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest
|
:target: https://msgpack-python.readthedocs.io/en/latest/?badge=latest
|
||||||
:alt: Documentation Status
|
:alt: Documentation Status
|
||||||
|
|
||||||
IMPORTANT: Upgrading from msgpack-0.4
|
|
||||||
--------------------------------------
|
What's this
|
||||||
|
-----------
|
||||||
|
|
||||||
|
`MessagePack <https://msgpack.org/>`_ is an efficient binary serialization format.
|
||||||
|
It lets you exchange data among multiple languages like JSON.
|
||||||
|
But it's faster and smaller.
|
||||||
|
This package provides CPython bindings for reading and writing MessagePack data.
|
||||||
|
|
||||||
|
|
||||||
|
Very important notes for existing users
|
||||||
|
---------------------------------------
|
||||||
|
|
||||||
|
PyPI package name
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`.
|
TL;DR: When upgrading from msgpack-0.4 or earlier, don't do `pip install -U msgpack-python`.
|
||||||
Do `pip uninstall msgpack-python; pip install msgpack` instead.
|
Do `pip uninstall msgpack-python; pip install msgpack` instead.
|
||||||
|
@ -24,13 +37,37 @@ Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-pyt
|
||||||
msgpack is removed and `import msgpack` fail.
|
msgpack is removed and `import msgpack` fail.
|
||||||
|
|
||||||
|
|
||||||
What's this
|
Deprecating encoding option
|
||||||
-----------
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
encoding and unicode_errors options are deprecated.
|
||||||
|
|
||||||
|
In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommended.
|
||||||
|
|
||||||
|
For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes``
|
||||||
|
object into msgpack raw type.
|
||||||
|
|
||||||
|
In case of unpacker, there is new ``raw_as_bytes`` option. It is ``True`` by default
|
||||||
|
for backward compatibility, but it is changed to ``False`` in near future.
|
||||||
|
You can use ``raw_as_bytes=False`` instead of ``encoding='utf-8'``.
|
||||||
|
|
||||||
|
Planned backward incompatible changes
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
When msgpack 1.0, I planning these breaking changes:
|
||||||
|
|
||||||
|
* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option.
|
||||||
|
* packer: Change default of ``use_bin_type`` option from False to True.
|
||||||
|
* unpacker: Change default of ``raw_as_bytes`` option from True to False.
|
||||||
|
* unpacker: Reduce all ``max_xxx_len`` options for typical usage.
|
||||||
|
* unpacker: Remove ``write_bytes`` option from all methods.
|
||||||
|
|
||||||
|
To avoid these breaking changes breaks your application, please:
|
||||||
|
|
||||||
|
* Don't use deprecated options.
|
||||||
|
* Pass ``use_bin_type`` and ``raw_as_bytes`` options explicitly.
|
||||||
|
* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too.
|
||||||
|
|
||||||
`MessagePack <https://msgpack.org/>`_ is an efficient binary serialization format.
|
|
||||||
It lets you exchange data among multiple languages like JSON.
|
|
||||||
But it's faster and smaller.
|
|
||||||
This package provides CPython bindings for reading and writing MessagePack data.
|
|
||||||
|
|
||||||
Install
|
Install
|
||||||
-------
|
-------
|
||||||
|
@ -76,14 +113,14 @@ msgpack provides ``dumps`` and ``loads`` as an alias for compatibility with
|
||||||
>>> import msgpack
|
>>> import msgpack
|
||||||
>>> msgpack.packb([1, 2, 3], use_bin_type=True)
|
>>> msgpack.packb([1, 2, 3], use_bin_type=True)
|
||||||
'\x93\x01\x02\x03'
|
'\x93\x01\x02\x03'
|
||||||
>>> msgpack.unpackb(_)
|
>>> msgpack.unpackb(_, raw_as_bytes=False)
|
||||||
[1, 2, 3]
|
[1, 2, 3]
|
||||||
|
|
||||||
``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple:
|
``unpack`` unpacks msgpack's array to Python's list, but can also unpack to tuple:
|
||||||
|
|
||||||
.. code-block:: pycon
|
.. code-block:: pycon
|
||||||
|
|
||||||
>>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False)
|
>>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw_as_bytes=False)
|
||||||
(1, 2, 3)
|
(1, 2, 3)
|
||||||
|
|
||||||
You should always specify the ``use_list`` keyword argument for backward compatibility.
|
You should always specify the ``use_list`` keyword argument for backward compatibility.
|
||||||
|
@ -109,7 +146,7 @@ stream (or from bytes provided through its ``feed`` method).
|
||||||
|
|
||||||
buf.seek(0)
|
buf.seek(0)
|
||||||
|
|
||||||
unpacker = msgpack.Unpacker(buf)
|
unpacker = msgpack.Unpacker(buf, raw_as_bytes=False)
|
||||||
for unpacked in unpacker:
|
for unpacked in unpacker:
|
||||||
print(unpacked)
|
print(unpacked)
|
||||||
|
|
||||||
|
@ -142,7 +179,7 @@ It is also possible to pack/unpack custom data types. Here is an example for
|
||||||
|
|
||||||
|
|
||||||
packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True)
|
packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True)
|
||||||
this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime)
|
this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw_as_bytes=False)
|
||||||
|
|
||||||
``Unpacker``'s ``object_hook`` callback receives a dict; the
|
``Unpacker``'s ``object_hook`` callback receives a dict; the
|
||||||
``object_pairs_hook`` callback may instead be used to receive a list of
|
``object_pairs_hook`` callback may instead be used to receive a list of
|
||||||
|
@ -172,7 +209,7 @@ It is also possible to pack/unpack custom data types using the **ext** type.
|
||||||
...
|
...
|
||||||
>>> data = array.array('d', [1.2, 3.4])
|
>>> data = array.array('d', [1.2, 3.4])
|
||||||
>>> packed = msgpack.packb(data, default=default, use_bin_type=True)
|
>>> packed = msgpack.packb(data, default=default, use_bin_type=True)
|
||||||
>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
|
>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw_as_bytes=False)
|
||||||
>>> data == unpacked
|
>>> data == unpacked
|
||||||
True
|
True
|
||||||
|
|
||||||
|
@ -217,14 +254,10 @@ Early versions of msgpack didn't distinguish string and binary types (like Pytho
|
||||||
The type for representing both string and binary types was named **raw**.
|
The type for representing both string and binary types was named **raw**.
|
||||||
|
|
||||||
For backward compatibility reasons, msgpack-python will still default all
|
For backward compatibility reasons, msgpack-python will still default all
|
||||||
strings to byte strings, unless you specify the `use_bin_type=True` option in
|
strings to byte strings, unless you specify the ``use_bin_type=True`` option in
|
||||||
the packer. If you do so, it will use a non-standard type called **bin** to
|
the packer. If you do so, it will use a non-standard type called **bin** to
|
||||||
serialize byte arrays, and **raw** becomes to mean **str**. If you want to
|
serialize byte arrays, and **raw** becomes to mean **str**. If you want to
|
||||||
distinguish **bin** and **raw** in the unpacker, specify `encoding='utf-8'`.
|
distinguish **bin** and **raw** in the unpacker, specify ``raw_as_bytes=False``.
|
||||||
|
|
||||||
**In future version, default value of ``use_bin_type`` will be changed to ``True``.
|
|
||||||
To avoid this change will break your code, you must specify it explicitly
|
|
||||||
even when you want to use old format.**
|
|
||||||
|
|
||||||
Note that Python 2 defaults to byte-arrays over Unicode strings:
|
Note that Python 2 defaults to byte-arrays over Unicode strings:
|
||||||
|
|
||||||
|
@ -234,7 +267,7 @@ Note that Python 2 defaults to byte-arrays over Unicode strings:
|
||||||
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
|
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
|
||||||
['spam', 'eggs']
|
['spam', 'eggs']
|
||||||
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
|
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
|
||||||
encoding='utf-8')
|
raw_as_bytes=False)
|
||||||
['spam', u'eggs']
|
['spam', u'eggs']
|
||||||
|
|
||||||
This is the same code in Python 3 (same behaviour, but Python 3 has a
|
This is the same code in Python 3 (same behaviour, but Python 3 has a
|
||||||
|
@ -246,7 +279,7 @@ different default):
|
||||||
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
|
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
|
||||||
[b'spam', b'eggs']
|
[b'spam', b'eggs']
|
||||||
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
|
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
|
||||||
encoding='utf-8')
|
raw_as_bytes=False)
|
||||||
[b'spam', 'eggs']
|
[b'spam', 'eggs']
|
||||||
|
|
||||||
|
|
||||||
|
@ -277,6 +310,7 @@ You can use ``gc.disable()`` when unpacking large message.
|
||||||
|
|
||||||
use_list option
|
use_list option
|
||||||
^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
List is the default sequence type of Python.
|
List is the default sequence type of Python.
|
||||||
But tuple is lighter than list.
|
But tuple is lighter than list.
|
||||||
You can use ``use_list=False`` while unpacking when performance is important.
|
You can use ``use_list=False`` while unpacking when performance is important.
|
||||||
|
@ -295,7 +329,7 @@ Test
|
||||||
MessagePack uses `pytest` for testing.
|
MessagePack uses `pytest` for testing.
|
||||||
Run test with following command:
|
Run test with following command:
|
||||||
|
|
||||||
$ pytest -v test
|
$ make test
|
||||||
|
|
||||||
|
|
||||||
..
|
..
|
||||||
|
|
|
@ -3,5 +3,7 @@
|
||||||
%PYTHON%\python.exe setup.py install
|
%PYTHON%\python.exe setup.py install
|
||||||
%PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))"
|
%PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))"
|
||||||
%PYTHON%\python.exe -c "from msgpack import _packer, _unpacker"
|
%PYTHON%\python.exe -c "from msgpack import _packer, _unpacker"
|
||||||
%PYTHON%\python.exe -m pytest -v test
|
|
||||||
%PYTHON%\python.exe setup.py bdist_wheel
|
%PYTHON%\python.exe setup.py bdist_wheel
|
||||||
|
%PYTHON%\python.exe -m pytest -v test
|
||||||
|
SET EL=%ERRORLEVEL%
|
||||||
|
exit /b %EL%
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
#cython: embedsignature=True
|
#cython: embedsignature=True
|
||||||
|
|
||||||
from cpython cimport *
|
from cpython cimport *
|
||||||
#from cpython.exc cimport PyErr_WarnEx
|
from cpython.exc cimport PyErr_WarnEx
|
||||||
|
|
||||||
from msgpack.exceptions import PackValueError, PackOverflowError
|
from msgpack.exceptions import PackValueError, PackOverflowError
|
||||||
from msgpack import ExtType
|
from msgpack import ExtType
|
||||||
|
@ -39,7 +39,7 @@ cdef extern from "pack.h":
|
||||||
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
|
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
|
||||||
|
|
||||||
cdef int DEFAULT_RECURSE_LIMIT=511
|
cdef int DEFAULT_RECURSE_LIMIT=511
|
||||||
cdef size_t ITEM_LIMIT = (2**32)-1
|
cdef long long ITEM_LIMIT = (2**32)-1
|
||||||
|
|
||||||
|
|
||||||
cdef inline int PyBytesLike_Check(object o):
|
cdef inline int PyBytesLike_Check(object o):
|
||||||
|
@ -110,9 +110,13 @@ cdef class Packer(object):
|
||||||
self.pk.buf_size = buf_size
|
self.pk.buf_size = buf_size
|
||||||
self.pk.length = 0
|
self.pk.length = 0
|
||||||
|
|
||||||
def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
|
def __init__(self, default=None, encoding=None, unicode_errors=None,
|
||||||
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
|
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
|
||||||
bint strict_types=False):
|
bint strict_types=False):
|
||||||
|
if encoding is not None:
|
||||||
|
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1)
|
||||||
|
if unicode_errors is not None:
|
||||||
|
PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated.", 1)
|
||||||
self.use_float = use_single_float
|
self.use_float = use_single_float
|
||||||
self.strict_types = strict_types
|
self.strict_types = strict_types
|
||||||
self.autoreset = autoreset
|
self.autoreset = autoreset
|
||||||
|
@ -122,7 +126,7 @@ cdef class Packer(object):
|
||||||
raise TypeError("default must be a callable.")
|
raise TypeError("default must be a callable.")
|
||||||
self._default = default
|
self._default = default
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
self.encoding = NULL
|
self.encoding = 'utf_8'
|
||||||
self.unicode_errors = NULL
|
self.unicode_errors = NULL
|
||||||
else:
|
else:
|
||||||
if isinstance(encoding, unicode):
|
if isinstance(encoding, unicode):
|
||||||
|
@ -134,7 +138,8 @@ cdef class Packer(object):
|
||||||
self._berrors = unicode_errors.encode('ascii')
|
self._berrors = unicode_errors.encode('ascii')
|
||||||
else:
|
else:
|
||||||
self._berrors = unicode_errors
|
self._berrors = unicode_errors
|
||||||
self.unicode_errors = PyBytes_AsString(self._berrors)
|
if self._berrors is not None:
|
||||||
|
self.unicode_errors = PyBytes_AsString(self._berrors)
|
||||||
|
|
||||||
def __dealloc__(self):
|
def __dealloc__(self):
|
||||||
PyMem_Free(self.pk.buf)
|
PyMem_Free(self.pk.buf)
|
||||||
|
@ -149,7 +154,7 @@ cdef class Packer(object):
|
||||||
cdef char* rawval
|
cdef char* rawval
|
||||||
cdef int ret
|
cdef int ret
|
||||||
cdef dict d
|
cdef dict d
|
||||||
cdef size_t L
|
cdef Py_ssize_t L
|
||||||
cdef int default_used = 0
|
cdef int default_used = 0
|
||||||
cdef bint strict_types = self.strict_types
|
cdef bint strict_types = self.strict_types
|
||||||
cdef Py_buffer view
|
cdef Py_buffer view
|
||||||
|
@ -203,6 +208,7 @@ cdef class Packer(object):
|
||||||
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
|
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
|
||||||
if not self.encoding:
|
if not self.encoding:
|
||||||
raise TypeError("Can't encode unicode string: no encoding is specified")
|
raise TypeError("Can't encode unicode string: no encoding is specified")
|
||||||
|
#TODO: Use faster API for UTF-8
|
||||||
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
|
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
|
||||||
L = len(o)
|
L = len(o)
|
||||||
if L > ITEM_LIMIT:
|
if L > ITEM_LIMIT:
|
||||||
|
|
|
@ -43,8 +43,9 @@ from msgpack import ExtType
|
||||||
cdef extern from "unpack.h":
|
cdef extern from "unpack.h":
|
||||||
ctypedef struct msgpack_user:
|
ctypedef struct msgpack_user:
|
||||||
bint use_list
|
bint use_list
|
||||||
PyObject* object_hook
|
bint raw_as_bytes
|
||||||
bint has_pairs_hook # call object_hook with k-v pairs
|
bint has_pairs_hook # call object_hook with k-v pairs
|
||||||
|
PyObject* object_hook
|
||||||
PyObject* list_hook
|
PyObject* list_hook
|
||||||
PyObject* ext_hook
|
PyObject* ext_hook
|
||||||
char *encoding
|
char *encoding
|
||||||
|
@ -73,12 +74,14 @@ cdef extern from "unpack.h":
|
||||||
cdef inline init_ctx(unpack_context *ctx,
|
cdef inline init_ctx(unpack_context *ctx,
|
||||||
object object_hook, object object_pairs_hook,
|
object object_hook, object object_pairs_hook,
|
||||||
object list_hook, object ext_hook,
|
object list_hook, object ext_hook,
|
||||||
bint use_list, char* encoding, char* unicode_errors,
|
bint use_list, bint raw_as_bytes,
|
||||||
|
char* encoding, char* unicode_errors,
|
||||||
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
|
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
|
||||||
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
|
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
|
||||||
Py_ssize_t max_ext_len):
|
Py_ssize_t max_ext_len):
|
||||||
unpack_init(ctx)
|
unpack_init(ctx)
|
||||||
ctx.user.use_list = use_list
|
ctx.user.use_list = use_list
|
||||||
|
ctx.user.raw_as_bytes = raw_as_bytes
|
||||||
ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL
|
ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL
|
||||||
ctx.user.max_str_len = max_str_len
|
ctx.user.max_str_len = max_str_len
|
||||||
ctx.user.max_bin_len = max_bin_len
|
ctx.user.max_bin_len = max_bin_len
|
||||||
|
@ -155,7 +158,8 @@ cdef inline int get_data_from_buffer(object obj,
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
def unpackb(object packed, object object_hook=None, object list_hook=None,
|
def unpackb(object packed, object object_hook=None, object list_hook=None,
|
||||||
bint use_list=1, encoding=None, unicode_errors="strict",
|
bint use_list=True, bint raw_as_bytes=True,
|
||||||
|
encoding=None, unicode_errors="strict",
|
||||||
object_pairs_hook=None, ext_hook=ExtType,
|
object_pairs_hook=None, ext_hook=ExtType,
|
||||||
Py_ssize_t max_str_len=2147483647, # 2**32-1
|
Py_ssize_t max_str_len=2147483647, # 2**32-1
|
||||||
Py_ssize_t max_bin_len=2147483647,
|
Py_ssize_t max_bin_len=2147483647,
|
||||||
|
@ -180,21 +184,26 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
|
||||||
cdef char* cerr = NULL
|
cdef char* cerr = NULL
|
||||||
cdef int new_protocol = 0
|
cdef int new_protocol = 0
|
||||||
|
|
||||||
|
if encoding is not None:
|
||||||
|
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
|
||||||
|
if isinstance(encoding, unicode):
|
||||||
|
encoding = encoding.encode('ascii')
|
||||||
|
elif not isinstance(encoding, bytes):
|
||||||
|
raise TypeError("encoding should be bytes or unicode")
|
||||||
|
cenc = PyBytes_AsString(encoding)
|
||||||
|
|
||||||
|
if unicode_errors is not None:
|
||||||
|
PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
|
||||||
|
if isinstance(unicode_errors, unicode):
|
||||||
|
unicode_errors = unicode_errors.encode('ascii')
|
||||||
|
elif not isinstance(unicode_errors, bytes):
|
||||||
|
raise TypeError("unicode_errors should be bytes or unicode")
|
||||||
|
cerr = PyBytes_AsString(unicode_errors)
|
||||||
|
|
||||||
get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
|
get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if encoding is not None:
|
|
||||||
if isinstance(encoding, unicode):
|
|
||||||
encoding = encoding.encode('ascii')
|
|
||||||
cenc = PyBytes_AsString(encoding)
|
|
||||||
|
|
||||||
if unicode_errors is not None:
|
|
||||||
if isinstance(unicode_errors, unicode):
|
|
||||||
unicode_errors = unicode_errors.encode('ascii')
|
|
||||||
cerr = PyBytes_AsString(unicode_errors)
|
|
||||||
|
|
||||||
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
|
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
|
||||||
use_list, cenc, cerr,
|
use_list, raw_as_bytes, cenc, cerr,
|
||||||
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
|
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
|
||||||
ret = unpack_construct(&ctx, buf, buf_len, &off)
|
ret = unpack_construct(&ctx, buf, buf_len, &off)
|
||||||
finally:
|
finally:
|
||||||
|
@ -252,6 +261,16 @@ cdef class Unpacker(object):
|
||||||
If true, unpack msgpack array to Python list.
|
If true, unpack msgpack array to Python list.
|
||||||
Otherwise, unpack to Python tuple. (default: True)
|
Otherwise, unpack to Python tuple. (default: True)
|
||||||
|
|
||||||
|
:param bool raw_as_bytes:
|
||||||
|
If true, unpack msgpack raw to Python bytes (default).
|
||||||
|
Otherwise, unpack to Python str (or unicode on Python 2) by decoding
|
||||||
|
with UTF-8 encoding (recommended).
|
||||||
|
Currently, the default is true, but it will be changed to false in
|
||||||
|
near future. So you must specify it explicitly for keeping backward
|
||||||
|
compatibility.
|
||||||
|
|
||||||
|
*encoding* option which is deprecated overrides this option.
|
||||||
|
|
||||||
:param callable object_hook:
|
:param callable object_hook:
|
||||||
When specified, it should be callable.
|
When specified, it should be callable.
|
||||||
Unpacker calls it with a dict argument after unpacking msgpack map.
|
Unpacker calls it with a dict argument after unpacking msgpack map.
|
||||||
|
@ -262,14 +281,6 @@ cdef class Unpacker(object):
|
||||||
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
|
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
|
||||||
(See also simplejson)
|
(See also simplejson)
|
||||||
|
|
||||||
:param str encoding:
|
|
||||||
Encoding used for decoding msgpack raw.
|
|
||||||
If it is None (default), msgpack raw is deserialized to Python bytes.
|
|
||||||
|
|
||||||
:param str unicode_errors:
|
|
||||||
Used for decoding msgpack raw with *encoding*.
|
|
||||||
(default: `'strict'`)
|
|
||||||
|
|
||||||
:param int max_buffer_size:
|
:param int max_buffer_size:
|
||||||
Limits size of data waiting unpacked. 0 means system's INT_MAX (default).
|
Limits size of data waiting unpacked. 0 means system's INT_MAX (default).
|
||||||
Raises `BufferFull` exception when it is insufficient.
|
Raises `BufferFull` exception when it is insufficient.
|
||||||
|
@ -287,16 +298,25 @@ cdef class Unpacker(object):
|
||||||
:param int max_map_len:
|
:param int max_map_len:
|
||||||
Limits max length of map. (default: 2**31-1)
|
Limits max length of map. (default: 2**31-1)
|
||||||
|
|
||||||
|
:param str encoding:
|
||||||
|
Deprecated, use raw_as_bytes instead.
|
||||||
|
Encoding used for decoding msgpack raw.
|
||||||
|
If it is None (default), msgpack raw is deserialized to Python bytes.
|
||||||
|
|
||||||
example of streaming deserialize from file-like object::
|
:param str unicode_errors:
|
||||||
|
Deprecated. Used for decoding msgpack raw with *encoding*.
|
||||||
|
(default: `'strict'`)
|
||||||
|
|
||||||
unpacker = Unpacker(file_like)
|
|
||||||
|
Example of streaming deserialize from file-like object::
|
||||||
|
|
||||||
|
unpacker = Unpacker(file_like, raw_as_bytes=False)
|
||||||
for o in unpacker:
|
for o in unpacker:
|
||||||
process(o)
|
process(o)
|
||||||
|
|
||||||
example of streaming deserialize from socket::
|
Example of streaming deserialize from socket::
|
||||||
|
|
||||||
unpacker = Unpacker()
|
unpacker = Unpacker(raw_as_bytes=False)
|
||||||
while True:
|
while True:
|
||||||
buf = sock.recv(1024**2)
|
buf = sock.recv(1024**2)
|
||||||
if not buf:
|
if not buf:
|
||||||
|
@ -324,7 +344,8 @@ cdef class Unpacker(object):
|
||||||
PyMem_Free(self.buf)
|
PyMem_Free(self.buf)
|
||||||
self.buf = NULL
|
self.buf = NULL
|
||||||
|
|
||||||
def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
|
def __init__(self, file_like=None, Py_ssize_t read_size=0,
|
||||||
|
bint use_list=True, bint raw_as_bytes=True,
|
||||||
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
|
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
|
||||||
encoding=None, unicode_errors='strict', int max_buffer_size=0,
|
encoding=None, unicode_errors='strict', int max_buffer_size=0,
|
||||||
object ext_hook=ExtType,
|
object ext_hook=ExtType,
|
||||||
|
@ -363,6 +384,7 @@ cdef class Unpacker(object):
|
||||||
self.stream_offset = 0
|
self.stream_offset = 0
|
||||||
|
|
||||||
if encoding is not None:
|
if encoding is not None:
|
||||||
|
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
|
||||||
if isinstance(encoding, unicode):
|
if isinstance(encoding, unicode):
|
||||||
self.encoding = encoding.encode('ascii')
|
self.encoding = encoding.encode('ascii')
|
||||||
elif isinstance(encoding, bytes):
|
elif isinstance(encoding, bytes):
|
||||||
|
@ -372,6 +394,7 @@ cdef class Unpacker(object):
|
||||||
cenc = PyBytes_AsString(self.encoding)
|
cenc = PyBytes_AsString(self.encoding)
|
||||||
|
|
||||||
if unicode_errors is not None:
|
if unicode_errors is not None:
|
||||||
|
PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
|
||||||
if isinstance(unicode_errors, unicode):
|
if isinstance(unicode_errors, unicode):
|
||||||
self.unicode_errors = unicode_errors.encode('ascii')
|
self.unicode_errors = unicode_errors.encode('ascii')
|
||||||
elif isinstance(unicode_errors, bytes):
|
elif isinstance(unicode_errors, bytes):
|
||||||
|
@ -381,7 +404,7 @@ cdef class Unpacker(object):
|
||||||
cerr = PyBytes_AsString(self.unicode_errors)
|
cerr = PyBytes_AsString(self.unicode_errors)
|
||||||
|
|
||||||
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
|
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
|
||||||
ext_hook, use_list, cenc, cerr,
|
ext_hook, use_list, raw_as_bytes, cenc, cerr,
|
||||||
max_str_len, max_bin_len, max_array_len,
|
max_str_len, max_bin_len, max_array_len,
|
||||||
max_map_len, max_ext_len)
|
max_map_len, max_ext_len)
|
||||||
|
|
||||||
|
|
|
@ -145,6 +145,16 @@ class Unpacker(object):
|
||||||
If true, unpack msgpack array to Python list.
|
If true, unpack msgpack array to Python list.
|
||||||
Otherwise, unpack to Python tuple. (default: True)
|
Otherwise, unpack to Python tuple. (default: True)
|
||||||
|
|
||||||
|
:param bool raw_as_bytes:
|
||||||
|
If true, unpack msgpack raw to Python bytes (default).
|
||||||
|
Otherwise, unpack to Python str (or unicode on Python 2) by decoding
|
||||||
|
with UTF-8 encoding (recommended).
|
||||||
|
Currently, the default is true, but it will be changed to false in
|
||||||
|
near future. So you must specify it explicitly for keeping backward
|
||||||
|
compatibility.
|
||||||
|
|
||||||
|
*encoding* option which is deprecated overrides this option.
|
||||||
|
|
||||||
:param callable object_hook:
|
:param callable object_hook:
|
||||||
When specified, it should be callable.
|
When specified, it should be callable.
|
||||||
Unpacker calls it with a dict argument after unpacking msgpack map.
|
Unpacker calls it with a dict argument after unpacking msgpack map.
|
||||||
|
@ -183,13 +193,13 @@ class Unpacker(object):
|
||||||
|
|
||||||
example of streaming deserialize from file-like object::
|
example of streaming deserialize from file-like object::
|
||||||
|
|
||||||
unpacker = Unpacker(file_like)
|
unpacker = Unpacker(file_like, raw_as_bytes=False)
|
||||||
for o in unpacker:
|
for o in unpacker:
|
||||||
process(o)
|
process(o)
|
||||||
|
|
||||||
example of streaming deserialize from socket::
|
example of streaming deserialize from socket::
|
||||||
|
|
||||||
unpacker = Unpacker()
|
unpacker = Unpacker(raw_as_bytes=False)
|
||||||
while True:
|
while True:
|
||||||
buf = sock.recv(1024**2)
|
buf = sock.recv(1024**2)
|
||||||
if not buf:
|
if not buf:
|
||||||
|
@ -199,15 +209,28 @@ class Unpacker(object):
|
||||||
process(o)
|
process(o)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, file_like=None, read_size=0, use_list=True,
|
def __init__(self, file_like=None, read_size=0, use_list=True, raw_as_bytes=True,
|
||||||
object_hook=None, object_pairs_hook=None, list_hook=None,
|
object_hook=None, object_pairs_hook=None, list_hook=None,
|
||||||
encoding=None, unicode_errors='strict', max_buffer_size=0,
|
encoding=None, unicode_errors=None, max_buffer_size=0,
|
||||||
ext_hook=ExtType,
|
ext_hook=ExtType,
|
||||||
max_str_len=2147483647, # 2**32-1
|
max_str_len=2147483647, # 2**32-1
|
||||||
max_bin_len=2147483647,
|
max_bin_len=2147483647,
|
||||||
max_array_len=2147483647,
|
max_array_len=2147483647,
|
||||||
max_map_len=2147483647,
|
max_map_len=2147483647,
|
||||||
max_ext_len=2147483647):
|
max_ext_len=2147483647):
|
||||||
|
|
||||||
|
if encoding is not None:
|
||||||
|
warnings.warn(
|
||||||
|
"encoding is deprecated, Use raw_as_bytes=False instead.",
|
||||||
|
PendingDeprecationWarning)
|
||||||
|
|
||||||
|
if unicode_errors is not None:
|
||||||
|
warnings.warn(
|
||||||
|
"unicode_errors is deprecated.",
|
||||||
|
PendingDeprecationWarning)
|
||||||
|
else:
|
||||||
|
unicode_errors = 'strict'
|
||||||
|
|
||||||
if file_like is None:
|
if file_like is None:
|
||||||
self._feeding = True
|
self._feeding = True
|
||||||
else:
|
else:
|
||||||
|
@ -234,6 +257,7 @@ class Unpacker(object):
|
||||||
if read_size > self._max_buffer_size:
|
if read_size > self._max_buffer_size:
|
||||||
raise ValueError("read_size must be smaller than max_buffer_size")
|
raise ValueError("read_size must be smaller than max_buffer_size")
|
||||||
self._read_size = read_size or min(self._max_buffer_size, 16*1024)
|
self._read_size = read_size or min(self._max_buffer_size, 16*1024)
|
||||||
|
self._raw_as_bytes = bool(raw_as_bytes)
|
||||||
self._encoding = encoding
|
self._encoding = encoding
|
||||||
self._unicode_errors = unicode_errors
|
self._unicode_errors = unicode_errors
|
||||||
self._use_list = use_list
|
self._use_list = use_list
|
||||||
|
@ -582,8 +606,10 @@ class Unpacker(object):
|
||||||
if typ == TYPE_RAW:
|
if typ == TYPE_RAW:
|
||||||
if self._encoding is not None:
|
if self._encoding is not None:
|
||||||
obj = obj.decode(self._encoding, self._unicode_errors)
|
obj = obj.decode(self._encoding, self._unicode_errors)
|
||||||
else:
|
elif self._raw_as_bytes:
|
||||||
obj = bytes(obj)
|
obj = bytes(obj)
|
||||||
|
else:
|
||||||
|
obj = obj.decode('utf_8')
|
||||||
return obj
|
return obj
|
||||||
if typ == TYPE_EXT:
|
if typ == TYPE_EXT:
|
||||||
return self._ext_hook(n, bytes(obj))
|
return self._ext_hook(n, bytes(obj))
|
||||||
|
@ -682,9 +708,23 @@ class Packer(object):
|
||||||
:param str unicode_errors:
|
:param str unicode_errors:
|
||||||
(deprecated) Error handler for encoding unicode. (default: 'strict')
|
(deprecated) Error handler for encoding unicode. (default: 'strict')
|
||||||
"""
|
"""
|
||||||
def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
|
def __init__(self, default=None, encoding=None, unicode_errors=None,
|
||||||
use_single_float=False, autoreset=True, use_bin_type=False,
|
use_single_float=False, autoreset=True, use_bin_type=False,
|
||||||
strict_types=False):
|
strict_types=False):
|
||||||
|
if encoding is None:
|
||||||
|
encoding = 'utf_8'
|
||||||
|
else:
|
||||||
|
warnings.warn(
|
||||||
|
"encoding is deprecated, Use raw_as_bytes=False instead.",
|
||||||
|
PendingDeprecationWarning)
|
||||||
|
|
||||||
|
if unicode_errors is None:
|
||||||
|
unicode_errors = 'strict'
|
||||||
|
else:
|
||||||
|
warnings.warn(
|
||||||
|
"unicode_errors is deprecated.",
|
||||||
|
PendingDeprecationWarning)
|
||||||
|
|
||||||
self._strict_types = strict_types
|
self._strict_types = strict_types
|
||||||
self._use_float = use_single_float
|
self._use_float = use_single_float
|
||||||
self._autoreset = autoreset
|
self._autoreset = autoreset
|
||||||
|
|
|
@ -20,9 +20,10 @@
|
||||||
#include "unpack_define.h"
|
#include "unpack_define.h"
|
||||||
|
|
||||||
typedef struct unpack_user {
|
typedef struct unpack_user {
|
||||||
int use_list;
|
bool use_list;
|
||||||
PyObject *object_hook;
|
bool raw_as_bytes;
|
||||||
bool has_pairs_hook;
|
bool has_pairs_hook;
|
||||||
|
PyObject *object_hook;
|
||||||
PyObject *list_hook;
|
PyObject *list_hook;
|
||||||
PyObject *ext_hook;
|
PyObject *ext_hook;
|
||||||
const char *encoding;
|
const char *encoding;
|
||||||
|
@ -225,10 +226,13 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
|
||||||
}
|
}
|
||||||
|
|
||||||
PyObject *py;
|
PyObject *py;
|
||||||
if(u->encoding) {
|
|
||||||
|
if (u->encoding) {
|
||||||
py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors);
|
py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors);
|
||||||
} else {
|
} else if (u->raw_as_bytes) {
|
||||||
py = PyBytes_FromStringAndSize(p, l);
|
py = PyBytes_FromStringAndSize(p, l);
|
||||||
|
} else {
|
||||||
|
py = PyUnicode_DecodeUTF8(p, l, NULL);
|
||||||
}
|
}
|
||||||
if (!py)
|
if (!py)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
@ -39,11 +39,11 @@ def test_max_str_len():
|
||||||
d = 'x' * 3
|
d = 'x' * 3
|
||||||
packed = packb(d)
|
packed = packb(d)
|
||||||
|
|
||||||
unpacker = Unpacker(max_str_len=3, encoding='utf-8')
|
unpacker = Unpacker(max_str_len=3, raw_as_bytes=False)
|
||||||
unpacker.feed(packed)
|
unpacker.feed(packed)
|
||||||
assert unpacker.unpack() == d
|
assert unpacker.unpack() == d
|
||||||
|
|
||||||
unpacker = Unpacker(max_str_len=2, encoding='utf-8')
|
unpacker = Unpacker(max_str_len=2, raw_as_bytes=False)
|
||||||
with pytest.raises(UnpackValueError):
|
with pytest.raises(UnpackValueError):
|
||||||
unpacker.feed(packed)
|
unpacker.feed(packed)
|
||||||
unpacker.unpack()
|
unpacker.unpack()
|
||||||
|
|
|
@ -31,14 +31,14 @@ def testPack():
|
||||||
def testPackUnicode():
|
def testPackUnicode():
|
||||||
test_data = ["", "abcd", ["defgh"], "Русский текст"]
|
test_data = ["", "abcd", ["defgh"], "Русский текст"]
|
||||||
for td in test_data:
|
for td in test_data:
|
||||||
re = unpackb(packb(td, encoding='utf-8'), use_list=1, encoding='utf-8')
|
re = unpackb(packb(td), use_list=1, raw_as_bytes=False)
|
||||||
assert re == td
|
assert re == td
|
||||||
packer = Packer(encoding='utf-8')
|
packer = Packer()
|
||||||
data = packer.pack(td)
|
data = packer.pack(td)
|
||||||
re = Unpacker(BytesIO(data), encoding=str('utf-8'), use_list=1).unpack()
|
re = Unpacker(BytesIO(data), raw_as_bytes=False, use_list=1).unpack()
|
||||||
assert re == td
|
assert re == td
|
||||||
|
|
||||||
def testPackUTF32():
|
def testPackUTF32(): # deprecated
|
||||||
try:
|
try:
|
||||||
test_data = [
|
test_data = [
|
||||||
"",
|
"",
|
||||||
|
@ -66,26 +66,22 @@ def testPackByteArrays():
|
||||||
for td in test_data:
|
for td in test_data:
|
||||||
check(td)
|
check(td)
|
||||||
|
|
||||||
def testIgnoreUnicodeErrors():
|
def testIgnoreUnicodeErrors(): # deprecated
|
||||||
re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1)
|
re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1)
|
||||||
assert re == "abcdef"
|
assert re == "abcdef"
|
||||||
|
|
||||||
def testStrictUnicodeUnpack():
|
def testStrictUnicodeUnpack():
|
||||||
with raises(UnicodeDecodeError):
|
with raises(UnicodeDecodeError):
|
||||||
unpackb(packb(b'abc\xeddef'), encoding='utf-8', use_list=1)
|
unpackb(packb(b'abc\xeddef'), raw_as_bytes=False, use_list=1)
|
||||||
|
|
||||||
def testStrictUnicodePack():
|
def testStrictUnicodePack(): # deprecated
|
||||||
with raises(UnicodeEncodeError):
|
with raises(UnicodeEncodeError):
|
||||||
packb("abc\xeddef", encoding='ascii', unicode_errors='strict')
|
packb("abc\xeddef", encoding='ascii', unicode_errors='strict')
|
||||||
|
|
||||||
def testIgnoreErrorsPack():
|
def testIgnoreErrorsPack(): # deprecated
|
||||||
re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), encoding='utf-8', use_list=1)
|
re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), raw_as_bytes=False, use_list=1)
|
||||||
assert re == "abcdef"
|
assert re == "abcdef"
|
||||||
|
|
||||||
def testNoEncoding():
|
|
||||||
with raises(TypeError):
|
|
||||||
packb("abc", encoding=None)
|
|
||||||
|
|
||||||
def testDecodeBinary():
|
def testDecodeBinary():
|
||||||
re = unpackb(packb(b"abc"), encoding=None, use_list=1)
|
re = unpackb(packb(b"abc"), encoding=None, use_list=1)
|
||||||
assert re == b"abc"
|
assert re == b"abc"
|
||||||
|
|
|
@ -11,7 +11,7 @@ def test_namedtuple():
|
||||||
return dict(o._asdict())
|
return dict(o._asdict())
|
||||||
raise TypeError('Unsupported type %s' % (type(o),))
|
raise TypeError('Unsupported type %s' % (type(o),))
|
||||||
packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default)
|
packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default)
|
||||||
unpacked = unpackb(packed, encoding='utf-8')
|
unpacked = unpackb(packed, raw_as_bytes=False)
|
||||||
assert unpacked == {'foo': 1, 'bar': 42}
|
assert unpacked == {'foo': 1, 'bar': 42}
|
||||||
|
|
||||||
|
|
||||||
|
@ -32,7 +32,7 @@ def test_tuple():
|
||||||
return o
|
return o
|
||||||
|
|
||||||
data = packb(t, strict_types=True, use_bin_type=True, default=default)
|
data = packb(t, strict_types=True, use_bin_type=True, default=default)
|
||||||
expected = unpackb(data, encoding='utf-8', object_hook=convert)
|
expected = unpackb(data, raw_as_bytes=False, object_hook=convert)
|
||||||
|
|
||||||
assert expected == t
|
assert expected == t
|
||||||
|
|
||||||
|
@ -53,10 +53,10 @@ def test_tuple_ext():
|
||||||
def convert(code, payload):
|
def convert(code, payload):
|
||||||
if code == MSGPACK_EXT_TYPE_TUPLE:
|
if code == MSGPACK_EXT_TYPE_TUPLE:
|
||||||
# Unpack and convert to tuple
|
# Unpack and convert to tuple
|
||||||
return tuple(unpackb(payload, encoding='utf-8', ext_hook=convert))
|
return tuple(unpackb(payload, raw_as_bytes=False, ext_hook=convert))
|
||||||
raise ValueError('Unknown Ext code {}'.format(code))
|
raise ValueError('Unknown Ext code {}'.format(code))
|
||||||
|
|
||||||
data = packb(t, strict_types=True, use_bin_type=True, default=default)
|
data = packb(t, strict_types=True, use_bin_type=True, default=default)
|
||||||
expected = unpackb(data, encoding='utf-8', ext_hook=convert)
|
expected = unpackb(data, raw_as_bytes=False, ext_hook=convert)
|
||||||
|
|
||||||
assert expected == t
|
assert expected == t
|
||||||
|
|
|
@ -47,8 +47,8 @@ def test_unpacker_ext_hook():
|
||||||
class MyUnpacker(Unpacker):
|
class MyUnpacker(Unpacker):
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(MyUnpacker, self).__init__(ext_hook=self._hook,
|
super(MyUnpacker, self).__init__(
|
||||||
encoding='utf-8')
|
ext_hook=self._hook, raw_as_bytes=False)
|
||||||
|
|
||||||
def _hook(self, code, data):
|
def _hook(self, code, data):
|
||||||
if code == 1:
|
if code == 1:
|
||||||
|
@ -57,11 +57,11 @@ def test_unpacker_ext_hook():
|
||||||
return ExtType(code, data)
|
return ExtType(code, data)
|
||||||
|
|
||||||
unpacker = MyUnpacker()
|
unpacker = MyUnpacker()
|
||||||
unpacker.feed(packb({'a': 1}, encoding='utf-8'))
|
unpacker.feed(packb({'a': 1}))
|
||||||
assert unpacker.unpack() == {'a': 1}
|
assert unpacker.unpack() == {'a': 1}
|
||||||
unpacker.feed(packb({'a': ExtType(1, b'123')}, encoding='utf-8'))
|
unpacker.feed(packb({'a': ExtType(1, b'123')}))
|
||||||
assert unpacker.unpack() == {'a': 123}
|
assert unpacker.unpack() == {'a': 123}
|
||||||
unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8'))
|
unpacker.feed(packb({'a': ExtType(2, b'321')}))
|
||||||
assert unpacker.unpack() == {'a': ExtType(2, b'321')}
|
assert unpacker.unpack() == {'a': ExtType(2, b'321')}
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue