mirror of
https://github.com/msgpack/msgpack-python.git
synced 2025-10-19 12:03:15 +00:00
Ressurect unicode_errors of the Packer. (#379)
This commit is contained in:
parent
a0480c7602
commit
83ebb63c44
4 changed files with 50 additions and 13 deletions
|
@ -5,7 +5,7 @@ Release Date: TBD
|
||||||
|
|
||||||
* Remove Python 2 support from the ``msgpack/_cmsgpack``.
|
* Remove Python 2 support from the ``msgpack/_cmsgpack``.
|
||||||
``msgpack/fallback`` still supports Python 2.
|
``msgpack/fallback`` still supports Python 2.
|
||||||
* Remove encoding and unicode_errors options from the Packer.
|
* Remove ``encoding`` option from the Packer.
|
||||||
|
|
||||||
|
|
||||||
0.6.2
|
0.6.2
|
||||||
|
|
|
@ -89,9 +89,15 @@ cdef class Packer(object):
|
||||||
Additionally tuples will not be serialized as lists.
|
Additionally tuples will not be serialized as lists.
|
||||||
This is useful when trying to implement accurate serialization
|
This is useful when trying to implement accurate serialization
|
||||||
for python types.
|
for python types.
|
||||||
|
|
||||||
|
:param str unicode_errors:
|
||||||
|
The error handler for encoding unicode. (default: 'strict')
|
||||||
|
DO NOT USE THIS!! This option is kept for very specific usage.
|
||||||
"""
|
"""
|
||||||
cdef msgpack_packer pk
|
cdef msgpack_packer pk
|
||||||
cdef object _default
|
cdef object _default
|
||||||
|
cdef object _berrors
|
||||||
|
cdef const char *unicode_errors
|
||||||
cdef bint strict_types
|
cdef bint strict_types
|
||||||
cdef bool use_float
|
cdef bool use_float
|
||||||
cdef bint autoreset
|
cdef bint autoreset
|
||||||
|
@ -104,10 +110,8 @@ cdef class Packer(object):
|
||||||
self.pk.buf_size = buf_size
|
self.pk.buf_size = buf_size
|
||||||
self.pk.length = 0
|
self.pk.length = 0
|
||||||
|
|
||||||
def __init__(self, default=None,
|
def __init__(self, *, default=None, unicode_errors=None,
|
||||||
bint use_single_float=False,
|
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
|
||||||
bint autoreset=True,
|
|
||||||
bint use_bin_type=False,
|
|
||||||
bint strict_types=False):
|
bint strict_types=False):
|
||||||
self.use_float = use_single_float
|
self.use_float = use_single_float
|
||||||
self.strict_types = strict_types
|
self.strict_types = strict_types
|
||||||
|
@ -118,6 +122,12 @@ cdef class Packer(object):
|
||||||
raise TypeError("default must be a callable.")
|
raise TypeError("default must be a callable.")
|
||||||
self._default = default
|
self._default = default
|
||||||
|
|
||||||
|
self._berrors = unicode_errors
|
||||||
|
if unicode_errors is None:
|
||||||
|
self.unicode_errors = NULL
|
||||||
|
else:
|
||||||
|
self.unicode_errors = self._berrors
|
||||||
|
|
||||||
def __dealloc__(self):
|
def __dealloc__(self):
|
||||||
PyMem_Free(self.pk.buf)
|
PyMem_Free(self.pk.buf)
|
||||||
self.pk.buf = NULL
|
self.pk.buf = NULL
|
||||||
|
@ -183,9 +193,19 @@ cdef class Packer(object):
|
||||||
if ret == 0:
|
if ret == 0:
|
||||||
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
|
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
|
||||||
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
|
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
|
||||||
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
|
if self.unicode_errors == NULL:
|
||||||
if ret == -2:
|
ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT);
|
||||||
raise ValueError("unicode string is too large")
|
if ret == -2:
|
||||||
|
raise ValueError("unicode string is too large")
|
||||||
|
else:
|
||||||
|
o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors)
|
||||||
|
L = Py_SIZE(o)
|
||||||
|
if L > ITEM_LIMIT:
|
||||||
|
raise ValueError("unicode string is too large")
|
||||||
|
ret = msgpack_pack_raw(&self.pk, L)
|
||||||
|
if ret == 0:
|
||||||
|
rawval = o
|
||||||
|
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
|
||||||
elif PyDict_CheckExact(o):
|
elif PyDict_CheckExact(o):
|
||||||
d = <dict>o
|
d = <dict>o
|
||||||
L = len(d)
|
L = len(d)
|
||||||
|
|
|
@ -667,7 +667,7 @@ class Unpacker(object):
|
||||||
elif self._raw:
|
elif self._raw:
|
||||||
obj = bytes(obj)
|
obj = bytes(obj)
|
||||||
else:
|
else:
|
||||||
obj = obj.decode('utf_8')
|
obj = obj.decode('utf_8', self._unicode_errors)
|
||||||
return obj
|
return obj
|
||||||
if typ == TYPE_EXT:
|
if typ == TYPE_EXT:
|
||||||
return self._ext_hook(n, bytes(obj))
|
return self._ext_hook(n, bytes(obj))
|
||||||
|
@ -752,14 +752,19 @@ class Packer(object):
|
||||||
Additionally tuples will not be serialized as lists.
|
Additionally tuples will not be serialized as lists.
|
||||||
This is useful when trying to implement accurate serialization
|
This is useful when trying to implement accurate serialization
|
||||||
for python types.
|
for python types.
|
||||||
|
|
||||||
|
:param str unicode_errors:
|
||||||
|
The error handler for encoding unicode. (default: 'strict')
|
||||||
|
DO NOT USE THIS!! This option is kept for very specific usage.
|
||||||
"""
|
"""
|
||||||
def __init__(self, default=None,
|
def __init__(self, default=None, unicode_errors=None,
|
||||||
use_single_float=False, autoreset=True, use_bin_type=False,
|
use_single_float=False, autoreset=True, use_bin_type=False,
|
||||||
strict_types=False):
|
strict_types=False):
|
||||||
self._strict_types = strict_types
|
self._strict_types = strict_types
|
||||||
self._use_float = use_single_float
|
self._use_float = use_single_float
|
||||||
self._autoreset = autoreset
|
self._autoreset = autoreset
|
||||||
self._use_bin_type = use_bin_type
|
self._use_bin_type = use_bin_type
|
||||||
|
self._unicode_errors = unicode_errors or "strict"
|
||||||
self._buffer = StringIO()
|
self._buffer = StringIO()
|
||||||
if default is not None:
|
if default is not None:
|
||||||
if not callable(default):
|
if not callable(default):
|
||||||
|
@ -816,7 +821,7 @@ class Packer(object):
|
||||||
self._pack_bin_header(n)
|
self._pack_bin_header(n)
|
||||||
return self._buffer.write(obj)
|
return self._buffer.write(obj)
|
||||||
if check(obj, unicode):
|
if check(obj, unicode):
|
||||||
obj = obj.encode("utf-8")
|
obj = obj.encode("utf-8", self._unicode_errors)
|
||||||
n = len(obj)
|
n = len(obj)
|
||||||
if n >= 2**32:
|
if n >= 2**32:
|
||||||
raise ValueError("String is too large")
|
raise ValueError("String is too large")
|
||||||
|
|
|
@ -5,6 +5,7 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
import struct
|
import struct
|
||||||
|
import sys
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from pytest import raises, xfail
|
from pytest import raises, xfail
|
||||||
|
@ -54,13 +55,24 @@ def testPackByteArrays():
|
||||||
for td in test_data:
|
for td in test_data:
|
||||||
check(td)
|
check(td)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(sys.version_info < (3,0), reason="Python 2 passes invalid surrogates")
|
||||||
|
def testIgnoreUnicodeErrors():
|
||||||
|
re = unpackb(packb(b'abc\xeddef', use_bin_type=False),
|
||||||
|
raw=False, unicode_errors='ignore')
|
||||||
|
assert re == "abcdef"
|
||||||
|
|
||||||
def testStrictUnicodeUnpack():
|
def testStrictUnicodeUnpack():
|
||||||
packed = packb(b'abc\xeddef')
|
packed = packb(b'abc\xeddef', use_bin_type=False)
|
||||||
with pytest.raises(UnicodeDecodeError):
|
with pytest.raises(UnicodeDecodeError):
|
||||||
unpackb(packed, raw=False, use_list=1)
|
unpackb(packed, raw=False, use_list=1)
|
||||||
|
|
||||||
|
@pytest.mark.skipif(sys.version_info < (3,0), reason="Python 2 passes invalid surrogates")
|
||||||
|
def testIgnoreErrorsPack():
|
||||||
|
re = unpackb(packb(u"abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors='ignore'), raw=False, use_list=1)
|
||||||
|
assert re == "abcdef"
|
||||||
|
|
||||||
def testDecodeBinary():
|
def testDecodeBinary():
|
||||||
re = unpackb(packb(b"abc"), encoding=None, use_list=1)
|
re = unpackb(packb(b"abc"), use_list=1)
|
||||||
assert re == b"abc"
|
assert re == b"abc"
|
||||||
|
|
||||||
def testPackFloat():
|
def testPackFloat():
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue