mirror of
https://github.com/msgpack/msgpack-python.git
synced 2025-10-29 16:44:14 +00:00
Use new msgpack spec by default. (#386)
This commit is contained in:
parent
de320488ae
commit
7e9905bdfa
11 changed files with 75 additions and 126 deletions
70
README.rst
70
README.rst
|
|
@ -37,36 +37,16 @@ Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-pyt
|
|||
msgpack is removed and `import msgpack` fail.
|
||||
|
||||
|
||||
Deprecating encoding option
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
Compatibility with old format
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
encoding and unicode_errors options are deprecated.
|
||||
You can use ``use_bin_type=False`` option to pack ``bytes``
|
||||
object into raw type in old msgpack spec, instead of bin type in new msgpack spec.
|
||||
|
||||
In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommended.
|
||||
You can unpack old msgpack formatk using ``raw=True`` option.
|
||||
It unpacks str (raw) type in msgpack into Python bytes.
|
||||
|
||||
For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes``
|
||||
object into msgpack raw type.
|
||||
|
||||
In case of unpacker, there is new ``raw`` option. It is ``True`` by default
|
||||
for backward compatibility, but it is changed to ``False`` in near future.
|
||||
You can use ``raw=False`` instead of ``encoding='utf-8'``.
|
||||
|
||||
Planned backward incompatible changes
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
When msgpack 1.0, I planning these breaking changes:
|
||||
|
||||
* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option.
|
||||
* packer: Change default of ``use_bin_type`` option from False to True.
|
||||
* unpacker: Change default of ``raw`` option from True to False.
|
||||
* unpacker: Reduce all ``max_xxx_len`` options for typical usage.
|
||||
* unpacker: Remove ``write_bytes`` option from all methods.
|
||||
|
||||
To avoid these breaking changes breaks your application, please:
|
||||
|
||||
* Don't use deprecated options.
|
||||
* Pass ``use_bin_type`` and ``raw`` options explicitly.
|
||||
* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too.
|
||||
See note in below for detail.
|
||||
|
||||
|
||||
Install
|
||||
|
|
@ -76,6 +56,7 @@ Install
|
|||
|
||||
$ pip install msgpack
|
||||
|
||||
|
||||
Pure Python implementation
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
|
@ -100,6 +81,13 @@ Without extension, using pure Python implementation on CPython runs slowly.
|
|||
How to use
|
||||
----------
|
||||
|
||||
.. note::
|
||||
|
||||
In examples below, I use ``raw=False`` and ``use_bin_type=True`` for users
|
||||
using msgpack < 1.0.
|
||||
These options are default from msgpack 1.0 so you can omit them.
|
||||
|
||||
|
||||
One-shot pack & unpack
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
|
|
@ -252,36 +240,18 @@ Notes
|
|||
string and binary type
|
||||
^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
Early versions of msgpack didn't distinguish string and binary types (like Python 1).
|
||||
Early versions of msgpack didn't distinguish string and binary types.
|
||||
The type for representing both string and binary types was named **raw**.
|
||||
|
||||
For backward compatibility reasons, msgpack-python will still default all
|
||||
strings to byte strings, unless you specify the ``use_bin_type=True`` option in
|
||||
the packer. If you do so, it will use a non-standard type called **bin** to
|
||||
serialize byte arrays, and **raw** becomes to mean **str**. If you want to
|
||||
distinguish **bin** and **raw** in the unpacker, specify ``raw=False``.
|
||||
|
||||
Note that Python 2 defaults to byte-arrays over Unicode strings:
|
||||
You can pack into and unpack from this old spec using ``use_bin_type=False``
|
||||
and ``raw=True`` options.
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> import msgpack
|
||||
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
|
||||
['spam', 'eggs']
|
||||
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
|
||||
raw=False)
|
||||
['spam', u'eggs']
|
||||
|
||||
This is the same code in Python 3 (same behaviour, but Python 3 has a
|
||||
different default):
|
||||
|
||||
.. code-block:: pycon
|
||||
|
||||
>>> import msgpack
|
||||
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
|
||||
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True)
|
||||
[b'spam', b'eggs']
|
||||
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
|
||||
raw=False)
|
||||
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False)
|
||||
[b'spam', 'eggs']
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -80,9 +80,7 @@ cdef class Packer(object):
|
|||
|
||||
:param bool use_bin_type:
|
||||
Use bin type introduced in msgpack spec 2.0 for bytes.
|
||||
It also enables str8 type for unicode.
|
||||
Current default value is false, but it will be changed to true
|
||||
in future version. You should specify it explicitly.
|
||||
It also enables str8 type for unicode. (default: True)
|
||||
|
||||
:param bool strict_types:
|
||||
If set to true, types will be checked to be exact. Derived classes
|
||||
|
|
@ -113,7 +111,7 @@ cdef class Packer(object):
|
|||
self.pk.length = 0
|
||||
|
||||
def __init__(self, *, default=None, unicode_errors=None,
|
||||
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
|
||||
bint use_single_float=False, bint autoreset=True, bint use_bin_type=True,
|
||||
bint strict_types=False):
|
||||
self.use_float = use_single_float
|
||||
self.strict_types = strict_types
|
||||
|
|
|
|||
|
|
@ -131,7 +131,7 @@ cdef inline int get_data_from_buffer(object obj,
|
|||
|
||||
|
||||
def unpackb(object packed, *, object object_hook=None, object list_hook=None,
|
||||
bint use_list=True, bint raw=True, bint strict_map_key=False,
|
||||
bint use_list=True, bint raw=False, bint strict_map_key=False,
|
||||
unicode_errors=None,
|
||||
object_pairs_hook=None, ext_hook=ExtType,
|
||||
Py_ssize_t max_str_len=-1,
|
||||
|
|
@ -217,12 +217,8 @@ cdef class Unpacker(object):
|
|||
Otherwise, unpack to Python tuple. (default: True)
|
||||
|
||||
:param bool raw:
|
||||
If true, unpack msgpack raw to Python bytes (default).
|
||||
Otherwise, unpack to Python str (or unicode on Python 2) by decoding
|
||||
with UTF-8 encoding (recommended).
|
||||
Currently, the default is true, but it will be changed to false in
|
||||
near future. So you must specify it explicitly for keeping backward
|
||||
compatibility.
|
||||
If true, unpack msgpack raw to Python bytes.
|
||||
Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
|
||||
|
||||
:param bool strict_map_key:
|
||||
If true, only str or bytes are accepted for map (dict) keys.
|
||||
|
|
@ -268,13 +264,13 @@ cdef class Unpacker(object):
|
|||
|
||||
Example of streaming deserialize from file-like object::
|
||||
|
||||
unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024)
|
||||
unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024)
|
||||
for o in unpacker:
|
||||
process(o)
|
||||
|
||||
Example of streaming deserialize from socket::
|
||||
|
||||
unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024)
|
||||
unpacker = Unpacker(max_buffer_size=10*1024*1024)
|
||||
while True:
|
||||
buf = sock.recv(1024**2)
|
||||
if not buf:
|
||||
|
|
@ -309,7 +305,7 @@ cdef class Unpacker(object):
|
|||
self.buf = NULL
|
||||
|
||||
def __init__(self, file_like=None, *, Py_ssize_t read_size=0,
|
||||
bint use_list=True, bint raw=True, bint strict_map_key=False,
|
||||
bint use_list=True, bint raw=False, bint strict_map_key=False,
|
||||
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
|
||||
unicode_errors=None, Py_ssize_t max_buffer_size=0,
|
||||
object ext_hook=ExtType,
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ else:
|
|||
class Unpacker(object):
|
||||
"""Streaming unpacker.
|
||||
|
||||
arguments:
|
||||
Arguments:
|
||||
|
||||
:param file_like:
|
||||
File-like object having `.read(n)` method.
|
||||
|
|
@ -172,12 +172,8 @@ class Unpacker(object):
|
|||
Otherwise, unpack to Python tuple. (default: True)
|
||||
|
||||
:param bool raw:
|
||||
If true, unpack msgpack raw to Python bytes (default).
|
||||
Otherwise, unpack to Python str (or unicode on Python 2) by decoding
|
||||
with UTF-8 encoding (recommended).
|
||||
Currently, the default is true, but it will be changed to false in
|
||||
near future. So you must specify it explicitly for keeping backward
|
||||
compatibility.
|
||||
If true, unpack msgpack raw to Python bytes.
|
||||
Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
|
||||
|
||||
:param bool strict_map_key:
|
||||
If true, only str or bytes are accepted for map (dict) keys.
|
||||
|
|
@ -226,13 +222,13 @@ class Unpacker(object):
|
|||
|
||||
Example of streaming deserialize from file-like object::
|
||||
|
||||
unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024)
|
||||
unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024)
|
||||
for o in unpacker:
|
||||
process(o)
|
||||
|
||||
Example of streaming deserialize from socket::
|
||||
|
||||
unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024)
|
||||
unpacker = Unpacker(max_buffer_size=10*1024*1024)
|
||||
while True:
|
||||
buf = sock.recv(1024**2)
|
||||
if not buf:
|
||||
|
|
@ -253,7 +249,7 @@ class Unpacker(object):
|
|||
file_like=None,
|
||||
read_size=0,
|
||||
use_list=True,
|
||||
raw=True,
|
||||
raw=False,
|
||||
strict_map_key=False,
|
||||
object_hook=None,
|
||||
object_pairs_hook=None,
|
||||
|
|
@ -748,7 +744,7 @@ class Packer(object):
|
|||
|
||||
:param bool use_bin_type:
|
||||
Use bin type introduced in msgpack spec 2.0 for bytes.
|
||||
It also enables str8 type for unicode.
|
||||
It also enables str8 type for unicode. (default: True)
|
||||
|
||||
:param bool strict_types:
|
||||
If set to true, types will be checked to be exact. Derived classes
|
||||
|
|
@ -769,7 +765,7 @@ class Packer(object):
|
|||
unicode_errors=None,
|
||||
use_single_float=False,
|
||||
autoreset=True,
|
||||
use_bin_type=False,
|
||||
use_bin_type=True,
|
||||
strict_types=False,
|
||||
):
|
||||
self._strict_types = strict_types
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ def test_unpack_buffer():
|
|||
|
||||
|
||||
def test_unpack_bytearray():
|
||||
buf = bytearray(packb(("foo", "bar")))
|
||||
buf = bytearray(packb((b"foo", b"bar")))
|
||||
obj = unpackb(buf, use_list=1)
|
||||
assert [b"foo", b"bar"] == obj
|
||||
expected_type = bytes
|
||||
|
|
@ -25,7 +25,7 @@ def test_unpack_bytearray():
|
|||
|
||||
|
||||
def test_unpack_memoryview():
|
||||
buf = bytearray(packb(("foo", "bar")))
|
||||
buf = bytearray(packb((b"foo", b"bar")))
|
||||
view = memoryview(buf)
|
||||
obj = unpackb(view, use_list=1)
|
||||
assert [b"foo", b"bar"] == obj
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
from msgpack import packb, unpackb
|
||||
|
||||
|
||||
def check(length, obj):
|
||||
v = packb(obj)
|
||||
def check(length, obj, use_bin_type=True):
|
||||
v = packb(obj, use_bin_type=use_bin_type)
|
||||
assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v))
|
||||
assert unpackb(v, use_list=0) == obj
|
||||
assert unpackb(v, use_list=0, raw=not use_bin_type) == obj
|
||||
|
||||
|
||||
def test_1():
|
||||
|
|
@ -56,7 +55,7 @@ def test_9():
|
|||
|
||||
|
||||
def check_raw(overhead, num):
|
||||
check(num + overhead, b" " * num)
|
||||
check(num + overhead, b" " * num, use_bin_type=False)
|
||||
|
||||
|
||||
def test_fixraw():
|
||||
|
|
@ -135,4 +134,4 @@ def test_match():
|
|||
|
||||
|
||||
def test_unicode():
|
||||
assert unpackb(packb("foobar"), use_list=1) == b"foobar"
|
||||
assert unpackb(packb(u"foobar"), use_list=1) == u"foobar"
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@
|
|||
from msgpack import unpackb
|
||||
|
||||
|
||||
def check(src, should, use_list=0):
|
||||
assert unpackb(src, use_list=use_list) == should
|
||||
def check(src, should, use_list=0, raw=True):
|
||||
assert unpackb(src, use_list=use_list, raw=raw) == should
|
||||
|
||||
|
||||
def testSimpleValue():
|
||||
|
|
@ -59,6 +59,12 @@ def testRaw():
|
|||
b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
|
||||
(b"", b"a", b"ab", b"", b"a", b"ab"),
|
||||
)
|
||||
check(
|
||||
b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00"
|
||||
b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
|
||||
("", "a", "ab", "", "a", "ab"),
|
||||
raw=False,
|
||||
)
|
||||
|
||||
|
||||
def testArray():
|
||||
|
|
|
|||
|
|
@ -1,50 +1,33 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
import pytest
|
||||
from array import array
|
||||
from msgpack import packb, unpackb
|
||||
import sys
|
||||
|
||||
|
||||
# For Python < 3:
|
||||
# - array type only supports old buffer interface
|
||||
# - array.frombytes is not available, must use deprecated array.fromstring
|
||||
if sys.version_info[0] < 3:
|
||||
|
||||
def make_memoryview(obj):
|
||||
return memoryview(buffer(obj))
|
||||
|
||||
def make_array(f, data):
|
||||
a = array(f)
|
||||
a.fromstring(data)
|
||||
return a
|
||||
|
||||
def get_data(a):
|
||||
return a.tostring()
|
||||
pytestmark = pytest.mark.skipif(
|
||||
sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol"
|
||||
)
|
||||
|
||||
|
||||
else:
|
||||
make_memoryview = memoryview
|
||||
|
||||
def make_array(f, data):
|
||||
def make_array(f, data):
|
||||
a = array(f)
|
||||
a.frombytes(data)
|
||||
return a
|
||||
|
||||
def get_data(a):
|
||||
return a.tobytes()
|
||||
|
||||
|
||||
def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type):
|
||||
# create a new array
|
||||
original_array = array(format)
|
||||
original_array.fromlist([255] * (nbytes // original_array.itemsize))
|
||||
original_data = get_data(original_array)
|
||||
view = make_memoryview(original_array)
|
||||
original_data = original_array.tobytes()
|
||||
view = memoryview(original_array)
|
||||
|
||||
# pack, unpack, and reconstruct array
|
||||
packed = packb(view, use_bin_type=use_bin_type)
|
||||
unpacked = unpackb(packed)
|
||||
unpacked = unpackb(packed, raw=(not use_bin_type))
|
||||
reconstructed_array = make_array(format, unpacked)
|
||||
|
||||
# check that we got the right amount of data
|
||||
|
|
|
|||
|
|
@ -10,14 +10,16 @@ def test_str8():
|
|||
assert len(b) == len(data) + 2
|
||||
assert b[0:2] == header + b"\x20"
|
||||
assert b[2:] == data
|
||||
assert unpackb(b) == data
|
||||
assert unpackb(b, raw=True) == data
|
||||
assert unpackb(b, raw=False) == data.decode()
|
||||
|
||||
data = b"x" * 255
|
||||
b = packb(data.decode(), use_bin_type=True)
|
||||
assert len(b) == len(data) + 2
|
||||
assert b[0:2] == header + b"\xff"
|
||||
assert b[2:] == data
|
||||
assert unpackb(b) == data
|
||||
assert unpackb(b, raw=True) == data
|
||||
assert unpackb(b, raw=False) == data.decode()
|
||||
|
||||
|
||||
def test_bin8():
|
||||
|
|
|
|||
|
|
@ -8,9 +8,9 @@ def test_read_array_header():
|
|||
unpacker = Unpacker()
|
||||
unpacker.feed(packb(["a", "b", "c"]))
|
||||
assert unpacker.read_array_header() == 3
|
||||
assert unpacker.unpack() == b"a"
|
||||
assert unpacker.unpack() == b"b"
|
||||
assert unpacker.unpack() == b"c"
|
||||
assert unpacker.unpack() == "a"
|
||||
assert unpacker.unpack() == "b"
|
||||
assert unpacker.unpack() == "c"
|
||||
try:
|
||||
unpacker.unpack()
|
||||
assert 0, "should raise exception"
|
||||
|
|
@ -22,8 +22,8 @@ def test_read_map_header():
|
|||
unpacker = Unpacker()
|
||||
unpacker.feed(packb({"a": "A"}))
|
||||
assert unpacker.read_map_header() == 1
|
||||
assert unpacker.unpack() == b"a"
|
||||
assert unpacker.unpack() == b"A"
|
||||
assert unpacker.unpack() == "a"
|
||||
assert unpacker.unpack() == "A"
|
||||
try:
|
||||
unpacker.unpack()
|
||||
assert 0, "should raise exception"
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
import io
|
||||
from msgpack import Unpacker, BufferFull
|
||||
from msgpack import pack
|
||||
|
|
@ -26,7 +25,7 @@ def test_partialdata():
|
|||
with raises(StopIteration):
|
||||
next(iter(unpacker))
|
||||
unpacker.feed(b"o")
|
||||
assert next(iter(unpacker)) == b"hallo"
|
||||
assert next(iter(unpacker)) == "hallo"
|
||||
|
||||
|
||||
def test_foobar():
|
||||
|
|
@ -98,13 +97,13 @@ def test_readbytes():
|
|||
def test_issue124():
|
||||
unpacker = Unpacker()
|
||||
unpacker.feed(b"\xa1?\xa1!")
|
||||
assert tuple(unpacker) == (b"?", b"!")
|
||||
assert tuple(unpacker) == ("?", "!")
|
||||
assert tuple(unpacker) == ()
|
||||
unpacker.feed(b"\xa1?\xa1")
|
||||
assert tuple(unpacker) == (b"?",)
|
||||
assert tuple(unpacker) == ("?",)
|
||||
assert tuple(unpacker) == ()
|
||||
unpacker.feed(b"!")
|
||||
assert tuple(unpacker) == (b"!",)
|
||||
assert tuple(unpacker) == ("!",)
|
||||
assert tuple(unpacker) == ()
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue