Use new msgpack spec by default. (#386)

This commit is contained in:
Inada Naoki 2019-12-05 21:34:10 +09:00 committed by GitHub
parent de320488ae
commit 7e9905bdfa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 75 additions and 126 deletions

View file

@ -37,36 +37,16 @@ Sadly, this doesn't work for upgrade install. After `pip install -U msgpack-pyt
msgpack is removed and `import msgpack` fail.
Deprecating encoding option
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Compatibility with old format
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
encoding and unicode_errors options are deprecated.
You can use ``use_bin_type=False`` option to pack ``bytes``
object into raw type in old msgpack spec, instead of bin type in new msgpack spec.
In case of packer, use UTF-8 always. Storing other than UTF-8 is not recommended.
You can unpack old msgpack formatk using ``raw=True`` option.
It unpacks str (raw) type in msgpack into Python bytes.
For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes``
object into msgpack raw type.
In case of unpacker, there is new ``raw`` option. It is ``True`` by default
for backward compatibility, but it is changed to ``False`` in near future.
You can use ``raw=False`` instead of ``encoding='utf-8'``.
Planned backward incompatible changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
When msgpack 1.0, I planning these breaking changes:
* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option.
* packer: Change default of ``use_bin_type`` option from False to True.
* unpacker: Change default of ``raw`` option from True to False.
* unpacker: Reduce all ``max_xxx_len`` options for typical usage.
* unpacker: Remove ``write_bytes`` option from all methods.
To avoid these breaking changes breaks your application, please:
* Don't use deprecated options.
* Pass ``use_bin_type`` and ``raw`` options explicitly.
* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too.
See note in below for detail.
Install
@ -76,6 +56,7 @@ Install
$ pip install msgpack
Pure Python implementation
^^^^^^^^^^^^^^^^^^^^^^^^^^
@ -100,6 +81,13 @@ Without extension, using pure Python implementation on CPython runs slowly.
How to use
----------
.. note::
In examples below, I use ``raw=False`` and ``use_bin_type=True`` for users
using msgpack < 1.0.
These options are default from msgpack 1.0 so you can omit them.
One-shot pack & unpack
^^^^^^^^^^^^^^^^^^^^^^
@ -252,36 +240,18 @@ Notes
string and binary type
^^^^^^^^^^^^^^^^^^^^^^
Early versions of msgpack didn't distinguish string and binary types (like Python 1).
Early versions of msgpack didn't distinguish string and binary types.
The type for representing both string and binary types was named **raw**.
For backward compatibility reasons, msgpack-python will still default all
strings to byte strings, unless you specify the ``use_bin_type=True`` option in
the packer. If you do so, it will use a non-standard type called **bin** to
serialize byte arrays, and **raw** becomes to mean **str**. If you want to
distinguish **bin** and **raw** in the unpacker, specify ``raw=False``.
Note that Python 2 defaults to byte-arrays over Unicode strings:
You can pack into and unpack from this old spec using ``use_bin_type=False``
and ``raw=True`` options.
.. code-block:: pycon
>>> import msgpack
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
['spam', 'eggs']
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
raw=False)
['spam', u'eggs']
This is the same code in Python 3 (same behaviour, but Python 3 has a
different default):
.. code-block:: pycon
>>> import msgpack
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True)
[b'spam', b'eggs']
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
raw=False)
>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False)
[b'spam', 'eggs']

View file

@ -80,9 +80,7 @@ cdef class Packer(object):
:param bool use_bin_type:
Use bin type introduced in msgpack spec 2.0 for bytes.
It also enables str8 type for unicode.
Current default value is false, but it will be changed to true
in future version. You should specify it explicitly.
It also enables str8 type for unicode. (default: True)
:param bool strict_types:
If set to true, types will be checked to be exact. Derived classes
@ -113,7 +111,7 @@ cdef class Packer(object):
self.pk.length = 0
def __init__(self, *, default=None, unicode_errors=None,
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
bint use_single_float=False, bint autoreset=True, bint use_bin_type=True,
bint strict_types=False):
self.use_float = use_single_float
self.strict_types = strict_types

View file

@ -131,7 +131,7 @@ cdef inline int get_data_from_buffer(object obj,
def unpackb(object packed, *, object object_hook=None, object list_hook=None,
bint use_list=True, bint raw=True, bint strict_map_key=False,
bint use_list=True, bint raw=False, bint strict_map_key=False,
unicode_errors=None,
object_pairs_hook=None, ext_hook=ExtType,
Py_ssize_t max_str_len=-1,
@ -217,12 +217,8 @@ cdef class Unpacker(object):
Otherwise, unpack to Python tuple. (default: True)
:param bool raw:
If true, unpack msgpack raw to Python bytes (default).
Otherwise, unpack to Python str (or unicode on Python 2) by decoding
with UTF-8 encoding (recommended).
Currently, the default is true, but it will be changed to false in
near future. So you must specify it explicitly for keeping backward
compatibility.
If true, unpack msgpack raw to Python bytes.
Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
:param bool strict_map_key:
If true, only str or bytes are accepted for map (dict) keys.
@ -268,13 +264,13 @@ cdef class Unpacker(object):
Example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024)
unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024)
for o in unpacker:
process(o)
Example of streaming deserialize from socket::
unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024)
unpacker = Unpacker(max_buffer_size=10*1024*1024)
while True:
buf = sock.recv(1024**2)
if not buf:
@ -309,7 +305,7 @@ cdef class Unpacker(object):
self.buf = NULL
def __init__(self, file_like=None, *, Py_ssize_t read_size=0,
bint use_list=True, bint raw=True, bint strict_map_key=False,
bint use_list=True, bint raw=False, bint strict_map_key=False,
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
unicode_errors=None, Py_ssize_t max_buffer_size=0,
object ext_hook=ExtType,

View file

@ -158,7 +158,7 @@ else:
class Unpacker(object):
"""Streaming unpacker.
arguments:
Arguments:
:param file_like:
File-like object having `.read(n)` method.
@ -172,12 +172,8 @@ class Unpacker(object):
Otherwise, unpack to Python tuple. (default: True)
:param bool raw:
If true, unpack msgpack raw to Python bytes (default).
Otherwise, unpack to Python str (or unicode on Python 2) by decoding
with UTF-8 encoding (recommended).
Currently, the default is true, but it will be changed to false in
near future. So you must specify it explicitly for keeping backward
compatibility.
If true, unpack msgpack raw to Python bytes.
Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
:param bool strict_map_key:
If true, only str or bytes are accepted for map (dict) keys.
@ -226,13 +222,13 @@ class Unpacker(object):
Example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024)
unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024)
for o in unpacker:
process(o)
Example of streaming deserialize from socket::
unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024)
unpacker = Unpacker(max_buffer_size=10*1024*1024)
while True:
buf = sock.recv(1024**2)
if not buf:
@ -253,7 +249,7 @@ class Unpacker(object):
file_like=None,
read_size=0,
use_list=True,
raw=True,
raw=False,
strict_map_key=False,
object_hook=None,
object_pairs_hook=None,
@ -748,7 +744,7 @@ class Packer(object):
:param bool use_bin_type:
Use bin type introduced in msgpack spec 2.0 for bytes.
It also enables str8 type for unicode.
It also enables str8 type for unicode. (default: True)
:param bool strict_types:
If set to true, types will be checked to be exact. Derived classes
@ -769,7 +765,7 @@ class Packer(object):
unicode_errors=None,
use_single_float=False,
autoreset=True,
use_bin_type=False,
use_bin_type=True,
strict_types=False,
):
self._strict_types = strict_types

View file

@ -17,7 +17,7 @@ def test_unpack_buffer():
def test_unpack_bytearray():
buf = bytearray(packb(("foo", "bar")))
buf = bytearray(packb((b"foo", b"bar")))
obj = unpackb(buf, use_list=1)
assert [b"foo", b"bar"] == obj
expected_type = bytes
@ -25,7 +25,7 @@ def test_unpack_bytearray():
def test_unpack_memoryview():
buf = bytearray(packb(("foo", "bar")))
buf = bytearray(packb((b"foo", b"bar")))
view = memoryview(buf)
obj = unpackb(view, use_list=1)
assert [b"foo", b"bar"] == obj

View file

@ -1,13 +1,12 @@
#!/usr/bin/env python
# coding: utf-8
from msgpack import packb, unpackb
def check(length, obj):
v = packb(obj)
def check(length, obj, use_bin_type=True):
v = packb(obj, use_bin_type=use_bin_type)
assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v))
assert unpackb(v, use_list=0) == obj
assert unpackb(v, use_list=0, raw=not use_bin_type) == obj
def test_1():
@ -56,7 +55,7 @@ def test_9():
def check_raw(overhead, num):
check(num + overhead, b" " * num)
check(num + overhead, b" " * num, use_bin_type=False)
def test_fixraw():
@ -135,4 +134,4 @@ def test_match():
def test_unicode():
assert unpackb(packb("foobar"), use_list=1) == b"foobar"
assert unpackb(packb(u"foobar"), use_list=1) == u"foobar"

View file

@ -4,8 +4,8 @@
from msgpack import unpackb
def check(src, should, use_list=0):
assert unpackb(src, use_list=use_list) == should
def check(src, should, use_list=0, raw=True):
assert unpackb(src, use_list=use_list, raw=raw) == should
def testSimpleValue():
@ -59,6 +59,12 @@ def testRaw():
b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
(b"", b"a", b"ab", b"", b"a", b"ab"),
)
check(
b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00"
b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
("", "a", "ab", "", "a", "ab"),
raw=False,
)
def testArray():

View file

@ -1,50 +1,33 @@
#!/usr/bin/env python
# coding: utf-8
import pytest
from array import array
from msgpack import packb, unpackb
import sys
# For Python < 3:
# - array type only supports old buffer interface
# - array.frombytes is not available, must use deprecated array.fromstring
if sys.version_info[0] < 3:
def make_memoryview(obj):
return memoryview(buffer(obj))
def make_array(f, data):
a = array(f)
a.fromstring(data)
return a
def get_data(a):
return a.tostring()
pytestmark = pytest.mark.skipif(
sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol"
)
else:
make_memoryview = memoryview
def make_array(f, data):
def make_array(f, data):
a = array(f)
a.frombytes(data)
return a
def get_data(a):
return a.tobytes()
def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type):
# create a new array
original_array = array(format)
original_array.fromlist([255] * (nbytes // original_array.itemsize))
original_data = get_data(original_array)
view = make_memoryview(original_array)
original_data = original_array.tobytes()
view = memoryview(original_array)
# pack, unpack, and reconstruct array
packed = packb(view, use_bin_type=use_bin_type)
unpacked = unpackb(packed)
unpacked = unpackb(packed, raw=(not use_bin_type))
reconstructed_array = make_array(format, unpacked)
# check that we got the right amount of data

View file

@ -10,14 +10,16 @@ def test_str8():
assert len(b) == len(data) + 2
assert b[0:2] == header + b"\x20"
assert b[2:] == data
assert unpackb(b) == data
assert unpackb(b, raw=True) == data
assert unpackb(b, raw=False) == data.decode()
data = b"x" * 255
b = packb(data.decode(), use_bin_type=True)
assert len(b) == len(data) + 2
assert b[0:2] == header + b"\xff"
assert b[2:] == data
assert unpackb(b) == data
assert unpackb(b, raw=True) == data
assert unpackb(b, raw=False) == data.decode()
def test_bin8():

View file

@ -8,9 +8,9 @@ def test_read_array_header():
unpacker = Unpacker()
unpacker.feed(packb(["a", "b", "c"]))
assert unpacker.read_array_header() == 3
assert unpacker.unpack() == b"a"
assert unpacker.unpack() == b"b"
assert unpacker.unpack() == b"c"
assert unpacker.unpack() == "a"
assert unpacker.unpack() == "b"
assert unpacker.unpack() == "c"
try:
unpacker.unpack()
assert 0, "should raise exception"
@ -22,8 +22,8 @@ def test_read_map_header():
unpacker = Unpacker()
unpacker.feed(packb({"a": "A"}))
assert unpacker.read_map_header() == 1
assert unpacker.unpack() == b"a"
assert unpacker.unpack() == b"A"
assert unpacker.unpack() == "a"
assert unpacker.unpack() == "A"
try:
unpacker.unpack()
assert 0, "should raise exception"

View file

@ -1,6 +1,5 @@
#!/usr/bin/env python
# coding: utf-8
import io
from msgpack import Unpacker, BufferFull
from msgpack import pack
@ -26,7 +25,7 @@ def test_partialdata():
with raises(StopIteration):
next(iter(unpacker))
unpacker.feed(b"o")
assert next(iter(unpacker)) == b"hallo"
assert next(iter(unpacker)) == "hallo"
def test_foobar():
@ -98,13 +97,13 @@ def test_readbytes():
def test_issue124():
unpacker = Unpacker()
unpacker.feed(b"\xa1?\xa1!")
assert tuple(unpacker) == (b"?", b"!")
assert tuple(unpacker) == ("?", "!")
assert tuple(unpacker) == ()
unpacker.feed(b"\xa1?\xa1")
assert tuple(unpacker) == (b"?",)
assert tuple(unpacker) == ("?",)
assert tuple(unpacker) == ()
unpacker.feed(b"!")
assert tuple(unpacker) == (b"!",)
assert tuple(unpacker) == ("!",)
assert tuple(unpacker) == ()