Use new msgpack spec by default. (#386)

2025-10-29 16:44:14 +00:00 · 2019-12-05 21:34:10 +09:00 · 2019-12-05 21:34:10 +09:00 · 7e9905bdfa
commit 7e9905bdfa
parent de320488ae
11 changed files with 75 additions and 126 deletions
--- a/README.rst
+++ b/README.rst
@ -37,36 +37,16 @@ Sadly, this doesn't work for upgrade install.  After `pip install -U msgpack-pyt
 msgpack is removed and `import msgpack` fail.


-Deprecating encoding option
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Compatibility with old format
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-encoding and unicode_errors options are deprecated.
+You can use ``use_bin_type=False`` option to pack ``bytes``
+object into raw type in old msgpack spec, instead of bin type in new msgpack spec.

-In case of packer, use UTF-8 always.  Storing other than UTF-8 is not recommended.
+You can unpack old msgpack formatk using ``raw=True`` option.
+It unpacks str (raw) type in msgpack into Python bytes.

-For backward compatibility, you can use ``use_bin_type=False`` and pack ``bytes``
-object into msgpack raw type.
-
-In case of unpacker, there is new ``raw`` option.  It is ``True`` by default
-for backward compatibility, but it is changed to ``False`` in near future.
-You can use ``raw=False`` instead of ``encoding='utf-8'``.
-
-Planned backward incompatible changes
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-When msgpack 1.0, I planning these breaking changes:
-
-* packer and unpacker: Remove ``encoding`` and ``unicode_errors`` option.
-* packer: Change default of ``use_bin_type`` option from False to True.
-* unpacker: Change default of ``raw`` option from True to False.
-* unpacker: Reduce all ``max_xxx_len`` options for typical usage.
-* unpacker: Remove ``write_bytes`` option from all methods.
-
-To avoid these breaking changes breaks your application, please:
-
-* Don't use deprecated options.
-* Pass ``use_bin_type`` and ``raw`` options explicitly.
-* If your application handle large (>1MB) data, specify ``max_xxx_len`` options too.
+See note in below for detail.


 Install
@ -76,6 +56,7 @@ Install

   $ pip install msgpack

+
 Pure Python implementation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^

@ -100,6 +81,13 @@ Without extension, using pure Python implementation on CPython runs slowly.
 How to use
 ----------

+.. note::
+
+   In examples below, I use ``raw=False`` and ``use_bin_type=True`` for users
+   using msgpack < 1.0.
+   These options are default from msgpack 1.0 so you can omit them.
+
+
 One-shot pack & unpack
 ^^^^^^^^^^^^^^^^^^^^^^

@ -252,36 +240,18 @@ Notes
 string and binary type
 ^^^^^^^^^^^^^^^^^^^^^^

-Early versions of msgpack didn't distinguish string and binary types (like Python 1).
+Early versions of msgpack didn't distinguish string and binary types.
 The type for representing both string and binary types was named **raw**.

-For backward compatibility reasons, msgpack-python will still default all
-strings to byte strings, unless you specify the ``use_bin_type=True`` option in
-the packer. If you do so, it will use a non-standard type called **bin** to
-serialize byte arrays, and **raw** becomes to mean **str**. If you want to
-distinguish **bin** and **raw** in the unpacker, specify ``raw=False``.
-
-Note that Python 2 defaults to byte-arrays over Unicode strings:
+You can pack into and unpack from this old spec using ``use_bin_type=False``
+and ``raw=True`` options.

 .. code-block:: pycon

    >>> import msgpack
-    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
-    ['spam', 'eggs']
-    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
-                        raw=False)
-    ['spam', u'eggs']
-
-This is the same code in Python 3 (same behaviour, but Python 3 has a
-different default):
-
-.. code-block:: pycon
-
-    >>> import msgpack
-    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs']))
+    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True)
    [b'spam', b'eggs']
-    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True),
-                        raw=False)
+    >>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False)
    [b'spam', 'eggs']


--- a/msgpack/_packer.pyx
+++ b/msgpack/_packer.pyx
@ -80,9 +80,7 @@ cdef class Packer(object):

    :param bool use_bin_type:
        Use bin type introduced in msgpack spec 2.0 for bytes.
-        It also enables str8 type for unicode.
-        Current default value is false, but it will be changed to true
-        in future version.  You should specify it explicitly.
+        It also enables str8 type for unicode. (default: True)

    :param bool strict_types:
        If set to true, types will be checked to be exact. Derived classes
@ -113,7 +111,7 @@ cdef class Packer(object):
        self.pk.length = 0

    def __init__(self, *, default=None, unicode_errors=None,
-                 bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
+                 bint use_single_float=False, bint autoreset=True, bint use_bin_type=True,
                 bint strict_types=False):
        self.use_float = use_single_float
        self.strict_types = strict_types
--- a/msgpack/_unpacker.pyx
+++ b/msgpack/_unpacker.pyx
@ -131,7 +131,7 @@ cdef inline int get_data_from_buffer(object obj,


 def unpackb(object packed, *, object object_hook=None, object list_hook=None,
-            bint use_list=True, bint raw=True, bint strict_map_key=False,
+            bint use_list=True, bint raw=False, bint strict_map_key=False,
            unicode_errors=None,
            object_pairs_hook=None, ext_hook=ExtType,
            Py_ssize_t max_str_len=-1,
@ -217,12 +217,8 @@ cdef class Unpacker(object):
        Otherwise, unpack to Python tuple. (default: True)

    :param bool raw:
-        If true, unpack msgpack raw to Python bytes (default).
-        Otherwise, unpack to Python str (or unicode on Python 2) by decoding
-        with UTF-8 encoding (recommended).
-        Currently, the default is true, but it will be changed to false in
-        near future.  So you must specify it explicitly for keeping backward
-        compatibility.
+        If true, unpack msgpack raw to Python bytes.
+        Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).

    :param bool strict_map_key:
        If true, only str or bytes are accepted for map (dict) keys.
@ -268,13 +264,13 @@ cdef class Unpacker(object):

    Example of streaming deserialize from file-like object::

-        unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024)
+        unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024)
        for o in unpacker:
            process(o)

    Example of streaming deserialize from socket::

-        unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024)
+        unpacker = Unpacker(max_buffer_size=10*1024*1024)
        while True:
            buf = sock.recv(1024**2)
            if not buf:
@ -309,7 +305,7 @@ cdef class Unpacker(object):
        self.buf = NULL

    def __init__(self, file_like=None, *, Py_ssize_t read_size=0,
-                 bint use_list=True, bint raw=True, bint strict_map_key=False,
+                 bint use_list=True, bint raw=False, bint strict_map_key=False,
                 object object_hook=None, object object_pairs_hook=None, object list_hook=None,
                 unicode_errors=None, Py_ssize_t max_buffer_size=0,
                 object ext_hook=ExtType,
--- a/msgpack/fallback.py
+++ b/msgpack/fallback.py
@ -158,7 +158,7 @@ else:
 class Unpacker(object):
    """Streaming unpacker.

-    arguments:
+    Arguments:

    :param file_like:
        File-like object having `.read(n)` method.
@ -172,12 +172,8 @@ class Unpacker(object):
        Otherwise, unpack to Python tuple. (default: True)

    :param bool raw:
-        If true, unpack msgpack raw to Python bytes (default).
-        Otherwise, unpack to Python str (or unicode on Python 2) by decoding
-        with UTF-8 encoding (recommended).
-        Currently, the default is true, but it will be changed to false in
-        near future.  So you must specify it explicitly for keeping backward
-        compatibility.
+        If true, unpack msgpack raw to Python bytes.
+        Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).

    :param bool strict_map_key:
        If true, only str or bytes are accepted for map (dict) keys.
@ -226,13 +222,13 @@ class Unpacker(object):

    Example of streaming deserialize from file-like object::

-        unpacker = Unpacker(file_like, raw=False, max_buffer_size=10*1024*1024)
+        unpacker = Unpacker(file_like, max_buffer_size=10*1024*1024)
        for o in unpacker:
            process(o)

    Example of streaming deserialize from socket::

-        unpacker = Unpacker(raw=False, max_buffer_size=10*1024*1024)
+        unpacker = Unpacker(max_buffer_size=10*1024*1024)
        while True:
            buf = sock.recv(1024**2)
            if not buf:
@ -253,7 +249,7 @@ class Unpacker(object):
        file_like=None,
        read_size=0,
        use_list=True,
-        raw=True,
+        raw=False,
        strict_map_key=False,
        object_hook=None,
        object_pairs_hook=None,
@ -748,7 +744,7 @@ class Packer(object):

    :param bool use_bin_type:
        Use bin type introduced in msgpack spec 2.0 for bytes.
-        It also enables str8 type for unicode.
+        It also enables str8 type for unicode. (default: True)

    :param bool strict_types:
        If set to true, types will be checked to be exact. Derived classes
@ -769,7 +765,7 @@ class Packer(object):
        unicode_errors=None,
        use_single_float=False,
        autoreset=True,
-        use_bin_type=False,
+        use_bin_type=True,
        strict_types=False,
    ):
        self._strict_types = strict_types
--- a/test/test_buffer.py
+++ b/test/test_buffer.py
@ -17,7 +17,7 @@ def test_unpack_buffer():


 def test_unpack_bytearray():
-    buf = bytearray(packb(("foo", "bar")))
+    buf = bytearray(packb((b"foo", b"bar")))
    obj = unpackb(buf, use_list=1)
    assert [b"foo", b"bar"] == obj
    expected_type = bytes
@ -25,7 +25,7 @@ def test_unpack_bytearray():


 def test_unpack_memoryview():
-    buf = bytearray(packb(("foo", "bar")))
+    buf = bytearray(packb((b"foo", b"bar")))
    view = memoryview(buf)
    obj = unpackb(view, use_list=1)
    assert [b"foo", b"bar"] == obj
--- a/test/test_case.py
+++ b/test/test_case.py
@ -1,13 +1,12 @@
 #!/usr/bin/env python
 # coding: utf-8
-
 from msgpack import packb, unpackb


-def check(length, obj):
-    v = packb(obj)
+def check(length, obj, use_bin_type=True):
+    v = packb(obj, use_bin_type=use_bin_type)
    assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v))
-    assert unpackb(v, use_list=0) == obj
+    assert unpackb(v, use_list=0, raw=not use_bin_type) == obj


 def test_1():
@ -56,7 +55,7 @@ def test_9():


 def check_raw(overhead, num):
-    check(num + overhead, b" " * num)
+    check(num + overhead, b" " * num, use_bin_type=False)


 def test_fixraw():
@ -135,4 +134,4 @@ def test_match():


 def test_unicode():
-    assert unpackb(packb("foobar"), use_list=1) == b"foobar"
+    assert unpackb(packb(u"foobar"), use_list=1) == u"foobar"
--- a/test/test_format.py
+++ b/test/test_format.py
@ -4,8 +4,8 @@
 from msgpack import unpackb


-def check(src, should, use_list=0):
-    assert unpackb(src, use_list=use_list) == should
+def check(src, should, use_list=0, raw=True):
+    assert unpackb(src, use_list=use_list, raw=raw) == should


 def testSimpleValue():
@ -59,6 +59,12 @@ def testRaw():
        b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
        (b"", b"a", b"ab", b"", b"a", b"ab"),
    )
+    check(
+        b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00"
+        b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab",
+        ("", "a", "ab", "", "a", "ab"),
+        raw=False,
+    )


 def testArray():
--- a/test/test_memoryview.py
+++ b/test/test_memoryview.py
@ -1,50 +1,33 @@
 #!/usr/bin/env python
 # coding: utf-8

+import pytest
 from array import array
 from msgpack import packb, unpackb
 import sys


-# For Python < 3:
-#  - array type only supports old buffer interface
-#  - array.frombytes is not available, must use deprecated array.fromstring
-if sys.version_info[0] < 3:
-
-    def make_memoryview(obj):
-        return memoryview(buffer(obj))
-
-    def make_array(f, data):
-        a = array(f)
-        a.fromstring(data)
-        return a
-
-    def get_data(a):
-        return a.tostring()
+pytestmark = pytest.mark.skipif(
+    sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol"
+)


-else:
-    make_memoryview = memoryview
-
-    def make_array(f, data):
+def make_array(f, data):
    a = array(f)
    a.frombytes(data)
    return a

-    def get_data(a):
-        return a.tobytes()
-

 def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type):
    # create a new array
    original_array = array(format)
    original_array.fromlist([255] * (nbytes // original_array.itemsize))
-    original_data = get_data(original_array)
-    view = make_memoryview(original_array)
+    original_data = original_array.tobytes()
+    view = memoryview(original_array)

    # pack, unpack, and reconstruct array
    packed = packb(view, use_bin_type=use_bin_type)
-    unpacked = unpackb(packed)
+    unpacked = unpackb(packed, raw=(not use_bin_type))
    reconstructed_array = make_array(format, unpacked)

    # check that we got the right amount of data
--- a/test/test_newspec.py
+++ b/test/test_newspec.py
@ -10,14 +10,16 @@ def test_str8():
    assert len(b) == len(data) + 2
    assert b[0:2] == header + b"\x20"
    assert b[2:] == data
-    assert unpackb(b) == data
+    assert unpackb(b, raw=True) == data
+    assert unpackb(b, raw=False) == data.decode()

    data = b"x" * 255
    b = packb(data.decode(), use_bin_type=True)
    assert len(b) == len(data) + 2
    assert b[0:2] == header + b"\xff"
    assert b[2:] == data
-    assert unpackb(b) == data
+    assert unpackb(b, raw=True) == data
+    assert unpackb(b, raw=False) == data.decode()


 def test_bin8():
--- a/test/test_read_size.py
+++ b/test/test_read_size.py
@ -8,9 +8,9 @@ def test_read_array_header():
    unpacker = Unpacker()
    unpacker.feed(packb(["a", "b", "c"]))
    assert unpacker.read_array_header() == 3
-    assert unpacker.unpack() == b"a"
-    assert unpacker.unpack() == b"b"
-    assert unpacker.unpack() == b"c"
+    assert unpacker.unpack() == "a"
+    assert unpacker.unpack() == "b"
+    assert unpacker.unpack() == "c"
    try:
        unpacker.unpack()
        assert 0, "should raise exception"
@ -22,8 +22,8 @@ def test_read_map_header():
    unpacker = Unpacker()
    unpacker.feed(packb({"a": "A"}))
    assert unpacker.read_map_header() == 1
-    assert unpacker.unpack() == b"a"
-    assert unpacker.unpack() == b"A"
+    assert unpacker.unpack() == "a"
+    assert unpacker.unpack() == "A"
    try:
        unpacker.unpack()
        assert 0, "should raise exception"
--- a/test/test_sequnpack.py
+++ b/test/test_sequnpack.py
@ -1,6 +1,5 @@
 #!/usr/bin/env python
 # coding: utf-8
-
 import io
 from msgpack import Unpacker, BufferFull
 from msgpack import pack
@ -26,7 +25,7 @@ def test_partialdata():
    with raises(StopIteration):
        next(iter(unpacker))
    unpacker.feed(b"o")
-    assert next(iter(unpacker)) == b"hallo"
+    assert next(iter(unpacker)) == "hallo"


 def test_foobar():
@ -98,13 +97,13 @@ def test_readbytes():
 def test_issue124():
    unpacker = Unpacker()
    unpacker.feed(b"\xa1?\xa1!")
-    assert tuple(unpacker) == (b"?", b"!")
+    assert tuple(unpacker) == ("?", "!")
    assert tuple(unpacker) == ()
    unpacker.feed(b"\xa1?\xa1")
-    assert tuple(unpacker) == (b"?",)
+    assert tuple(unpacker) == ("?",)
    assert tuple(unpacker) == ()
    unpacker.feed(b"!")
-    assert tuple(unpacker) == (b"!",)
+    assert tuple(unpacker) == ("!",)
    assert tuple(unpacker) == ()