mirror of
https://github.com/msgpack/msgpack-python.git
synced 2025-11-03 19:10:55 +00:00
Add raw_as_bytes option to Unpacker. (#265)
This commit is contained in:
parent
50ea49c86f
commit
5534d0c7af
11 changed files with 199 additions and 93 deletions
|
|
@ -2,7 +2,7 @@
|
|||
#cython: embedsignature=True
|
||||
|
||||
from cpython cimport *
|
||||
#from cpython.exc cimport PyErr_WarnEx
|
||||
from cpython.exc cimport PyErr_WarnEx
|
||||
|
||||
from msgpack.exceptions import PackValueError, PackOverflowError
|
||||
from msgpack import ExtType
|
||||
|
|
@ -39,7 +39,7 @@ cdef extern from "pack.h":
|
|||
int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l)
|
||||
|
||||
cdef int DEFAULT_RECURSE_LIMIT=511
|
||||
cdef size_t ITEM_LIMIT = (2**32)-1
|
||||
cdef long long ITEM_LIMIT = (2**32)-1
|
||||
|
||||
|
||||
cdef inline int PyBytesLike_Check(object o):
|
||||
|
|
@ -110,9 +110,13 @@ cdef class Packer(object):
|
|||
self.pk.buf_size = buf_size
|
||||
self.pk.length = 0
|
||||
|
||||
def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
|
||||
def __init__(self, default=None, encoding=None, unicode_errors=None,
|
||||
bint use_single_float=False, bint autoreset=True, bint use_bin_type=False,
|
||||
bint strict_types=False):
|
||||
if encoding is not None:
|
||||
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated.", 1)
|
||||
if unicode_errors is not None:
|
||||
PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated.", 1)
|
||||
self.use_float = use_single_float
|
||||
self.strict_types = strict_types
|
||||
self.autoreset = autoreset
|
||||
|
|
@ -122,7 +126,7 @@ cdef class Packer(object):
|
|||
raise TypeError("default must be a callable.")
|
||||
self._default = default
|
||||
if encoding is None:
|
||||
self.encoding = NULL
|
||||
self.encoding = 'utf_8'
|
||||
self.unicode_errors = NULL
|
||||
else:
|
||||
if isinstance(encoding, unicode):
|
||||
|
|
@ -134,7 +138,8 @@ cdef class Packer(object):
|
|||
self._berrors = unicode_errors.encode('ascii')
|
||||
else:
|
||||
self._berrors = unicode_errors
|
||||
self.unicode_errors = PyBytes_AsString(self._berrors)
|
||||
if self._berrors is not None:
|
||||
self.unicode_errors = PyBytes_AsString(self._berrors)
|
||||
|
||||
def __dealloc__(self):
|
||||
PyMem_Free(self.pk.buf)
|
||||
|
|
@ -149,7 +154,7 @@ cdef class Packer(object):
|
|||
cdef char* rawval
|
||||
cdef int ret
|
||||
cdef dict d
|
||||
cdef size_t L
|
||||
cdef Py_ssize_t L
|
||||
cdef int default_used = 0
|
||||
cdef bint strict_types = self.strict_types
|
||||
cdef Py_buffer view
|
||||
|
|
@ -203,6 +208,7 @@ cdef class Packer(object):
|
|||
elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o):
|
||||
if not self.encoding:
|
||||
raise TypeError("Can't encode unicode string: no encoding is specified")
|
||||
#TODO: Use faster API for UTF-8
|
||||
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
|
||||
L = len(o)
|
||||
if L > ITEM_LIMIT:
|
||||
|
|
|
|||
|
|
@ -43,8 +43,9 @@ from msgpack import ExtType
|
|||
cdef extern from "unpack.h":
|
||||
ctypedef struct msgpack_user:
|
||||
bint use_list
|
||||
PyObject* object_hook
|
||||
bint raw_as_bytes
|
||||
bint has_pairs_hook # call object_hook with k-v pairs
|
||||
PyObject* object_hook
|
||||
PyObject* list_hook
|
||||
PyObject* ext_hook
|
||||
char *encoding
|
||||
|
|
@ -73,12 +74,14 @@ cdef extern from "unpack.h":
|
|||
cdef inline init_ctx(unpack_context *ctx,
|
||||
object object_hook, object object_pairs_hook,
|
||||
object list_hook, object ext_hook,
|
||||
bint use_list, char* encoding, char* unicode_errors,
|
||||
bint use_list, bint raw_as_bytes,
|
||||
char* encoding, char* unicode_errors,
|
||||
Py_ssize_t max_str_len, Py_ssize_t max_bin_len,
|
||||
Py_ssize_t max_array_len, Py_ssize_t max_map_len,
|
||||
Py_ssize_t max_ext_len):
|
||||
unpack_init(ctx)
|
||||
ctx.user.use_list = use_list
|
||||
ctx.user.raw_as_bytes = raw_as_bytes
|
||||
ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL
|
||||
ctx.user.max_str_len = max_str_len
|
||||
ctx.user.max_bin_len = max_bin_len
|
||||
|
|
@ -155,7 +158,8 @@ cdef inline int get_data_from_buffer(object obj,
|
|||
return 1
|
||||
|
||||
def unpackb(object packed, object object_hook=None, object list_hook=None,
|
||||
bint use_list=1, encoding=None, unicode_errors="strict",
|
||||
bint use_list=True, bint raw_as_bytes=True,
|
||||
encoding=None, unicode_errors="strict",
|
||||
object_pairs_hook=None, ext_hook=ExtType,
|
||||
Py_ssize_t max_str_len=2147483647, # 2**32-1
|
||||
Py_ssize_t max_bin_len=2147483647,
|
||||
|
|
@ -180,21 +184,26 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
|
|||
cdef char* cerr = NULL
|
||||
cdef int new_protocol = 0
|
||||
|
||||
if encoding is not None:
|
||||
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
|
||||
if isinstance(encoding, unicode):
|
||||
encoding = encoding.encode('ascii')
|
||||
elif not isinstance(encoding, bytes):
|
||||
raise TypeError("encoding should be bytes or unicode")
|
||||
cenc = PyBytes_AsString(encoding)
|
||||
|
||||
if unicode_errors is not None:
|
||||
PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
|
||||
if isinstance(unicode_errors, unicode):
|
||||
unicode_errors = unicode_errors.encode('ascii')
|
||||
elif not isinstance(unicode_errors, bytes):
|
||||
raise TypeError("unicode_errors should be bytes or unicode")
|
||||
cerr = PyBytes_AsString(unicode_errors)
|
||||
|
||||
get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
|
||||
|
||||
try:
|
||||
if encoding is not None:
|
||||
if isinstance(encoding, unicode):
|
||||
encoding = encoding.encode('ascii')
|
||||
cenc = PyBytes_AsString(encoding)
|
||||
|
||||
if unicode_errors is not None:
|
||||
if isinstance(unicode_errors, unicode):
|
||||
unicode_errors = unicode_errors.encode('ascii')
|
||||
cerr = PyBytes_AsString(unicode_errors)
|
||||
|
||||
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
|
||||
use_list, cenc, cerr,
|
||||
use_list, raw_as_bytes, cenc, cerr,
|
||||
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
|
||||
ret = unpack_construct(&ctx, buf, buf_len, &off)
|
||||
finally:
|
||||
|
|
@ -252,6 +261,16 @@ cdef class Unpacker(object):
|
|||
If true, unpack msgpack array to Python list.
|
||||
Otherwise, unpack to Python tuple. (default: True)
|
||||
|
||||
:param bool raw_as_bytes:
|
||||
If true, unpack msgpack raw to Python bytes (default).
|
||||
Otherwise, unpack to Python str (or unicode on Python 2) by decoding
|
||||
with UTF-8 encoding (recommended).
|
||||
Currently, the default is true, but it will be changed to false in
|
||||
near future. So you must specify it explicitly for keeping backward
|
||||
compatibility.
|
||||
|
||||
*encoding* option which is deprecated overrides this option.
|
||||
|
||||
:param callable object_hook:
|
||||
When specified, it should be callable.
|
||||
Unpacker calls it with a dict argument after unpacking msgpack map.
|
||||
|
|
@ -262,14 +281,6 @@ cdef class Unpacker(object):
|
|||
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
|
||||
(See also simplejson)
|
||||
|
||||
:param str encoding:
|
||||
Encoding used for decoding msgpack raw.
|
||||
If it is None (default), msgpack raw is deserialized to Python bytes.
|
||||
|
||||
:param str unicode_errors:
|
||||
Used for decoding msgpack raw with *encoding*.
|
||||
(default: `'strict'`)
|
||||
|
||||
:param int max_buffer_size:
|
||||
Limits size of data waiting unpacked. 0 means system's INT_MAX (default).
|
||||
Raises `BufferFull` exception when it is insufficient.
|
||||
|
|
@ -287,16 +298,25 @@ cdef class Unpacker(object):
|
|||
:param int max_map_len:
|
||||
Limits max length of map. (default: 2**31-1)
|
||||
|
||||
:param str encoding:
|
||||
Deprecated, use raw_as_bytes instead.
|
||||
Encoding used for decoding msgpack raw.
|
||||
If it is None (default), msgpack raw is deserialized to Python bytes.
|
||||
|
||||
example of streaming deserialize from file-like object::
|
||||
:param str unicode_errors:
|
||||
Deprecated. Used for decoding msgpack raw with *encoding*.
|
||||
(default: `'strict'`)
|
||||
|
||||
unpacker = Unpacker(file_like)
|
||||
|
||||
Example of streaming deserialize from file-like object::
|
||||
|
||||
unpacker = Unpacker(file_like, raw_as_bytes=False)
|
||||
for o in unpacker:
|
||||
process(o)
|
||||
|
||||
example of streaming deserialize from socket::
|
||||
Example of streaming deserialize from socket::
|
||||
|
||||
unpacker = Unpacker()
|
||||
unpacker = Unpacker(raw_as_bytes=False)
|
||||
while True:
|
||||
buf = sock.recv(1024**2)
|
||||
if not buf:
|
||||
|
|
@ -324,7 +344,8 @@ cdef class Unpacker(object):
|
|||
PyMem_Free(self.buf)
|
||||
self.buf = NULL
|
||||
|
||||
def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
|
||||
def __init__(self, file_like=None, Py_ssize_t read_size=0,
|
||||
bint use_list=True, bint raw_as_bytes=True,
|
||||
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
|
||||
encoding=None, unicode_errors='strict', int max_buffer_size=0,
|
||||
object ext_hook=ExtType,
|
||||
|
|
@ -363,6 +384,7 @@ cdef class Unpacker(object):
|
|||
self.stream_offset = 0
|
||||
|
||||
if encoding is not None:
|
||||
PyErr_WarnEx(PendingDeprecationWarning, "encoding is deprecated, Use raw_as_bytes=False instead.", 1)
|
||||
if isinstance(encoding, unicode):
|
||||
self.encoding = encoding.encode('ascii')
|
||||
elif isinstance(encoding, bytes):
|
||||
|
|
@ -372,6 +394,7 @@ cdef class Unpacker(object):
|
|||
cenc = PyBytes_AsString(self.encoding)
|
||||
|
||||
if unicode_errors is not None:
|
||||
PyErr_WarnEx(PendingDeprecationWarning, "unicode_errors is deprecated", 1)
|
||||
if isinstance(unicode_errors, unicode):
|
||||
self.unicode_errors = unicode_errors.encode('ascii')
|
||||
elif isinstance(unicode_errors, bytes):
|
||||
|
|
@ -381,7 +404,7 @@ cdef class Unpacker(object):
|
|||
cerr = PyBytes_AsString(self.unicode_errors)
|
||||
|
||||
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
|
||||
ext_hook, use_list, cenc, cerr,
|
||||
ext_hook, use_list, raw_as_bytes, cenc, cerr,
|
||||
max_str_len, max_bin_len, max_array_len,
|
||||
max_map_len, max_ext_len)
|
||||
|
||||
|
|
|
|||
|
|
@ -145,6 +145,16 @@ class Unpacker(object):
|
|||
If true, unpack msgpack array to Python list.
|
||||
Otherwise, unpack to Python tuple. (default: True)
|
||||
|
||||
:param bool raw_as_bytes:
|
||||
If true, unpack msgpack raw to Python bytes (default).
|
||||
Otherwise, unpack to Python str (or unicode on Python 2) by decoding
|
||||
with UTF-8 encoding (recommended).
|
||||
Currently, the default is true, but it will be changed to false in
|
||||
near future. So you must specify it explicitly for keeping backward
|
||||
compatibility.
|
||||
|
||||
*encoding* option which is deprecated overrides this option.
|
||||
|
||||
:param callable object_hook:
|
||||
When specified, it should be callable.
|
||||
Unpacker calls it with a dict argument after unpacking msgpack map.
|
||||
|
|
@ -183,13 +193,13 @@ class Unpacker(object):
|
|||
|
||||
example of streaming deserialize from file-like object::
|
||||
|
||||
unpacker = Unpacker(file_like)
|
||||
unpacker = Unpacker(file_like, raw_as_bytes=False)
|
||||
for o in unpacker:
|
||||
process(o)
|
||||
|
||||
example of streaming deserialize from socket::
|
||||
|
||||
unpacker = Unpacker()
|
||||
unpacker = Unpacker(raw_as_bytes=False)
|
||||
while True:
|
||||
buf = sock.recv(1024**2)
|
||||
if not buf:
|
||||
|
|
@ -199,15 +209,28 @@ class Unpacker(object):
|
|||
process(o)
|
||||
"""
|
||||
|
||||
def __init__(self, file_like=None, read_size=0, use_list=True,
|
||||
def __init__(self, file_like=None, read_size=0, use_list=True, raw_as_bytes=True,
|
||||
object_hook=None, object_pairs_hook=None, list_hook=None,
|
||||
encoding=None, unicode_errors='strict', max_buffer_size=0,
|
||||
encoding=None, unicode_errors=None, max_buffer_size=0,
|
||||
ext_hook=ExtType,
|
||||
max_str_len=2147483647, # 2**32-1
|
||||
max_bin_len=2147483647,
|
||||
max_array_len=2147483647,
|
||||
max_map_len=2147483647,
|
||||
max_ext_len=2147483647):
|
||||
|
||||
if encoding is not None:
|
||||
warnings.warn(
|
||||
"encoding is deprecated, Use raw_as_bytes=False instead.",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
if unicode_errors is not None:
|
||||
warnings.warn(
|
||||
"unicode_errors is deprecated.",
|
||||
PendingDeprecationWarning)
|
||||
else:
|
||||
unicode_errors = 'strict'
|
||||
|
||||
if file_like is None:
|
||||
self._feeding = True
|
||||
else:
|
||||
|
|
@ -234,6 +257,7 @@ class Unpacker(object):
|
|||
if read_size > self._max_buffer_size:
|
||||
raise ValueError("read_size must be smaller than max_buffer_size")
|
||||
self._read_size = read_size or min(self._max_buffer_size, 16*1024)
|
||||
self._raw_as_bytes = bool(raw_as_bytes)
|
||||
self._encoding = encoding
|
||||
self._unicode_errors = unicode_errors
|
||||
self._use_list = use_list
|
||||
|
|
@ -582,8 +606,10 @@ class Unpacker(object):
|
|||
if typ == TYPE_RAW:
|
||||
if self._encoding is not None:
|
||||
obj = obj.decode(self._encoding, self._unicode_errors)
|
||||
else:
|
||||
elif self._raw_as_bytes:
|
||||
obj = bytes(obj)
|
||||
else:
|
||||
obj = obj.decode('utf_8')
|
||||
return obj
|
||||
if typ == TYPE_EXT:
|
||||
return self._ext_hook(n, bytes(obj))
|
||||
|
|
@ -682,9 +708,23 @@ class Packer(object):
|
|||
:param str unicode_errors:
|
||||
(deprecated) Error handler for encoding unicode. (default: 'strict')
|
||||
"""
|
||||
def __init__(self, default=None, encoding='utf-8', unicode_errors='strict',
|
||||
def __init__(self, default=None, encoding=None, unicode_errors=None,
|
||||
use_single_float=False, autoreset=True, use_bin_type=False,
|
||||
strict_types=False):
|
||||
if encoding is None:
|
||||
encoding = 'utf_8'
|
||||
else:
|
||||
warnings.warn(
|
||||
"encoding is deprecated, Use raw_as_bytes=False instead.",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
if unicode_errors is None:
|
||||
unicode_errors = 'strict'
|
||||
else:
|
||||
warnings.warn(
|
||||
"unicode_errors is deprecated.",
|
||||
PendingDeprecationWarning)
|
||||
|
||||
self._strict_types = strict_types
|
||||
self._use_float = use_single_float
|
||||
self._autoreset = autoreset
|
||||
|
|
|
|||
|
|
@ -20,9 +20,10 @@
|
|||
#include "unpack_define.h"
|
||||
|
||||
typedef struct unpack_user {
|
||||
int use_list;
|
||||
PyObject *object_hook;
|
||||
bool use_list;
|
||||
bool raw_as_bytes;
|
||||
bool has_pairs_hook;
|
||||
PyObject *object_hook;
|
||||
PyObject *list_hook;
|
||||
PyObject *ext_hook;
|
||||
const char *encoding;
|
||||
|
|
@ -225,10 +226,13 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
|
|||
}
|
||||
|
||||
PyObject *py;
|
||||
if(u->encoding) {
|
||||
|
||||
if (u->encoding) {
|
||||
py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors);
|
||||
} else {
|
||||
} else if (u->raw_as_bytes) {
|
||||
py = PyBytes_FromStringAndSize(p, l);
|
||||
} else {
|
||||
py = PyUnicode_DecodeUTF8(p, l, NULL);
|
||||
}
|
||||
if (!py)
|
||||
return -1;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue