msgpack-python/msgpack/fallback.py

1070 lines
36 KiB
Python
Raw Normal View History

2013-01-29 14:33:37 +09:00
"""Fallback pure Python implementation of msgpack"""
2019-12-11 23:48:16 +09:00
from datetime import datetime as _DateTime
import sys
import struct
2018-11-30 16:28:41 +09:00
PY2 = sys.version_info[0] == 2
if PY2:
2018-11-30 16:28:41 +09:00
int_types = (int, long)
2019-12-05 18:50:13 +09:00
2018-11-30 16:28:41 +09:00
def dict_iteritems(d):
return d.iteritems()
2019-12-05 18:50:13 +09:00
2018-11-30 16:28:41 +09:00
else:
2013-01-29 15:10:22 +09:00
int_types = int
2018-11-30 16:28:41 +09:00
unicode = str
2013-01-29 15:10:22 +09:00
xrange = range
2019-12-05 18:50:13 +09:00
2013-01-29 15:10:22 +09:00
def dict_iteritems(d):
return d.items()
2019-12-05 18:50:13 +09:00
2018-11-20 13:12:49 +09:00
if sys.version_info < (3, 5):
# Ugly hack...
RecursionError = RuntimeError
def _is_recursionerror(e):
2019-12-05 18:50:13 +09:00
return (
len(e.args) == 1
and isinstance(e.args[0], str)
and e.args[0].startswith("maximum recursion depth exceeded")
)
2018-11-20 13:12:49 +09:00
else:
2019-12-05 18:50:13 +09:00
2018-11-20 13:12:49 +09:00
def _is_recursionerror(e):
return True
2013-01-29 15:10:22 +09:00
2019-12-05 18:50:13 +09:00
if hasattr(sys, "pypy_version_info"):
2019-12-13 15:10:32 +09:00
# StringIO is slow on PyPy, StringIO is faster. However: PyPy's own
# StringBuilder is fastest.
from __pypy__ import newlist_hint
2019-12-05 18:50:13 +09:00
2014-11-14 14:47:03 +11:00
try:
from __pypy__.builders import BytesBuilder as StringBuilder
except ImportError:
from __pypy__.builders import StringBuilder
USING_STRINGBUILDER = True
2019-12-05 18:50:13 +09:00
class StringIO(object):
2019-12-05 18:50:13 +09:00
def __init__(self, s=b""):
if s:
self.builder = StringBuilder(len(s))
self.builder.append(s)
else:
self.builder = StringBuilder()
2019-12-05 18:50:13 +09:00
def write(self, s):
2015-11-12 11:49:19 +01:00
if isinstance(s, memoryview):
s = s.tobytes()
elif isinstance(s, bytearray):
s = bytes(s)
self.builder.append(s)
2019-12-05 18:50:13 +09:00
def getvalue(self):
return self.builder.build()
2019-12-05 18:50:13 +09:00
else:
USING_STRINGBUILDER = False
2013-01-29 15:10:22 +09:00
from io import BytesIO as StringIO
2019-12-05 18:50:13 +09:00
newlist_hint = lambda size: []
2016-05-07 15:18:20 +02:00
from .exceptions import (
2013-10-20 15:40:20 +09:00
BufferFull,
OutOfData,
2018-11-20 13:12:49 +09:00
ExtraData,
FormatError,
StackError,
)
2019-12-05 18:29:15 +09:00
from .ext import ExtType, Timestamp
2019-12-05 18:50:13 +09:00
EX_SKIP = 0
EX_CONSTRUCT = 1
EX_READ_ARRAY_HEADER = 2
EX_READ_MAP_HEADER = 3
2019-12-05 18:50:13 +09:00
TYPE_IMMEDIATE = 0
TYPE_ARRAY = 1
TYPE_MAP = 2
TYPE_RAW = 3
TYPE_BIN = 4
TYPE_EXT = 5
DEFAULT_RECURSE_LIMIT = 511
2013-10-20 15:40:20 +09:00
2015-11-10 03:33:50 +09:00
def _check_type_strict(obj, t, type=type, tuple=tuple):
if type(t) is tuple:
return type(obj) in t
else:
return type(obj) is t
2016-05-07 15:18:20 +02:00
def _get_data_from_buffer(obj):
view = memoryview(obj)
2016-05-07 15:18:20 +02:00
if view.itemsize != 1:
raise ValueError("cannot unpack from multi-byte object")
return view
2013-02-25 18:23:42 +09:00
def unpackb(packed, **kwargs):
"""
Unpack an object from `packed`.
2018-11-20 13:12:49 +09:00
Raises ``ExtraData`` when *packed* contains extra bytes.
Raises ``ValueError`` when *packed* is incomplete.
Raises ``FormatError`` when *packed* is not valid msgpack.
Raises ``StackError`` when *packed* contains too nested.
Other exceptions can be raised during unpacking.
2013-02-25 18:23:42 +09:00
See :class:`Unpacker` for options.
"""
2019-01-24 18:46:39 +09:00
unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs)
unpacker.feed(packed)
2013-02-22 17:41:52 +09:00
try:
2016-05-22 11:08:20 +09:00
ret = unpacker._unpack()
2013-02-22 17:41:52 +09:00
except OutOfData:
2018-11-20 13:12:49 +09:00
raise ValueError("Unpack failed: incomplete input")
except RecursionError as e:
if _is_recursionerror(e):
raise StackError
raise
2016-05-22 11:08:20 +09:00
if unpacker._got_extradata():
raise ExtraData(ret, unpacker._get_extradata())
return ret
2013-10-20 15:40:20 +09:00
if sys.version_info < (2, 7, 6):
2019-12-05 18:50:13 +09:00
def _unpack_from(f, b, o=0):
2019-12-13 15:10:32 +09:00
"""Explicit type cast for legacy struct.unpack_from"""
return struct.unpack_from(f, bytes(b), o)
2019-12-05 18:50:13 +09:00
else:
_unpack_from = struct.unpack_from
class Unpacker(object):
"""Streaming unpacker.
Arguments:
:param file_like:
File-like object having `.read(n)` method.
If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable.
:param int read_size:
Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`)
:param bool use_list:
If true, unpack msgpack array to Python list.
Otherwise, unpack to Python tuple. (default: True)
2018-01-12 19:22:36 +09:00
:param bool raw:
If true, unpack msgpack raw to Python bytes.
Otherwise, unpack to Python str by decoding with UTF-8 encoding (default).
2019-12-11 23:48:16 +09:00
:param int timestamp:
Control how timestamp type is unpacked:
2019-12-13 15:10:32 +09:00
0 - Timestamp
2019-12-11 23:48:16 +09:00
1 - float (Seconds from the EPOCH)
2 - int (Nanoseconds from the EPOCH)
3 - datetime.datetime (UTC). Python 2 is not supported.
:param bool strict_map_key:
If true (default), only str or bytes are accepted for map (dict) keys.
:param callable object_hook:
When specified, it should be callable.
Unpacker calls it with a dict argument after unpacking msgpack map.
(See also simplejson)
:param callable object_pairs_hook:
When specified, it should be callable.
Unpacker calls it with a list of key-value pairs after unpacking msgpack map.
(See also simplejson)
:param str unicode_errors:
The error handler for decoding unicode. (default: 'strict')
This option should be used only when you have msgpack data which
contains invalid UTF-8 string.
:param int max_buffer_size:
Limits size of data waiting unpacked. 0 means 2**32-1.
The default value is 100*1024*1024 (100MiB).
Raises `BufferFull` exception when it is insufficient.
You should set this parameter when unpacking data from untrusted source.
:param int max_str_len:
Deprecated, use *max_buffer_size* instead.
Limits max length of str. (default: max_buffer_size)
:param int max_bin_len:
Deprecated, use *max_buffer_size* instead.
Limits max length of bin. (default: max_buffer_size)
:param int max_array_len:
2019-01-24 18:46:39 +09:00
Limits max length of array.
(default: max_buffer_size)
:param int max_map_len:
2019-01-24 18:46:39 +09:00
Limits max length of map.
(default: max_buffer_size//2)
:param int max_ext_len:
Deprecated, use *max_buffer_size* instead.
Limits max size of ext type. (default: max_buffer_size)
Example of streaming deserialize from file-like object::
unpacker = Unpacker(file_like)
for o in unpacker:
process(o)
Example of streaming deserialize from socket::
unpacker = Unpacker(max_buffer_size)
while True:
buf = sock.recv(1024**2)
if not buf:
break
unpacker.feed(buf)
for o in unpacker:
process(o)
2018-11-20 13:12:49 +09:00
Raises ``ExtraData`` when *packed* contains extra bytes.
Raises ``OutOfData`` when *packed* is incomplete.
Raises ``FormatError`` when *packed* is not valid msgpack.
Raises ``StackError`` when *packed* contains too nested.
Other exceptions can be raised during unpacking.
"""
2019-12-05 18:50:13 +09:00
def __init__(
self,
file_like=None,
read_size=0,
use_list=True,
raw=False,
2019-12-11 23:48:16 +09:00
timestamp=0,
strict_map_key=True,
2019-12-05 18:50:13 +09:00
object_hook=None,
object_pairs_hook=None,
list_hook=None,
unicode_errors=None,
max_buffer_size=100 * 1024 * 1024,
2019-12-05 18:50:13 +09:00
ext_hook=ExtType,
max_str_len=-1,
max_bin_len=-1,
max_array_len=-1,
max_map_len=-1,
max_ext_len=-1,
):
if unicode_errors is None:
2019-12-05 18:50:13 +09:00
unicode_errors = "strict"
if file_like is None:
2016-05-22 11:08:20 +09:00
self._feeding = True
else:
if not callable(file_like.read):
raise TypeError("`file_like.read` must be callable")
self.file_like = file_like
2016-05-22 11:08:20 +09:00
self._feeding = False
#: array of bytes fed.
2016-05-24 07:32:30 +09:00
self._buffer = bytearray()
#: Which position we currently reads
self._buff_i = 0
# When Unpacker is used as an iterable, between the calls to next(),
# the buffer is not "consumed" completely, for efficiency sake.
# Instead, it is done sloppily. To make sure we raise BufferFull at
# the correct moments, we have to keep track of how sloppy we were.
# Furthermore, when the buffer is incomplete (that is: in the case
# we raise an OutOfData) we need to rollback the buffer to the correct
# state, which _buf_checkpoint records.
self._buf_checkpoint = 0
if not max_buffer_size:
max_buffer_size = 2 ** 31 - 1
2019-01-24 18:46:39 +09:00
if max_str_len == -1:
max_str_len = max_buffer_size
2019-01-24 18:46:39 +09:00
if max_bin_len == -1:
max_bin_len = max_buffer_size
2019-01-24 18:46:39 +09:00
if max_array_len == -1:
max_array_len = max_buffer_size
2019-01-24 18:46:39 +09:00
if max_map_len == -1:
max_map_len = max_buffer_size // 2
2019-01-24 18:46:39 +09:00
if max_ext_len == -1:
max_ext_len = max_buffer_size
2019-01-24 18:46:39 +09:00
self._max_buffer_size = max_buffer_size
if read_size > self._max_buffer_size:
raise ValueError("read_size must be smaller than max_buffer_size")
2019-12-05 18:50:13 +09:00
self._read_size = read_size or min(self._max_buffer_size, 16 * 1024)
2018-01-12 19:22:36 +09:00
self._raw = bool(raw)
self._strict_map_key = bool(strict_map_key)
self._unicode_errors = unicode_errors
self._use_list = use_list
2019-12-11 23:48:16 +09:00
if not (0 <= timestamp <= 3):
raise ValueError("timestamp must be 0..3")
self._timestamp = timestamp
self._list_hook = list_hook
self._object_hook = object_hook
self._object_pairs_hook = object_pairs_hook
self._ext_hook = ext_hook
self._max_str_len = max_str_len
self._max_bin_len = max_bin_len
self._max_array_len = max_array_len
self._max_map_len = max_map_len
self._max_ext_len = max_ext_len
2017-04-29 19:33:20 +02:00
self._stream_offset = 0
if list_hook is not None and not callable(list_hook):
2019-12-05 18:50:13 +09:00
raise TypeError("`list_hook` is not callable")
if object_hook is not None and not callable(object_hook):
2019-12-05 18:50:13 +09:00
raise TypeError("`object_hook` is not callable")
if object_pairs_hook is not None and not callable(object_pairs_hook):
2019-12-05 18:50:13 +09:00
raise TypeError("`object_pairs_hook` is not callable")
if object_hook is not None and object_pairs_hook is not None:
2019-12-05 18:50:13 +09:00
raise TypeError(
"object_pairs_hook and object_hook are mutually " "exclusive"
)
if not callable(ext_hook):
raise TypeError("`ext_hook` is not callable")
def feed(self, next_bytes):
2016-05-22 11:08:20 +09:00
assert self._feeding
2016-05-07 15:18:20 +02:00
view = _get_data_from_buffer(next_bytes)
2019-12-05 18:50:13 +09:00
if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size:
raise BufferFull
# Strip buffer before checkpoint before reading file.
if self._buf_checkpoint > 0:
2019-12-05 18:50:13 +09:00
del self._buffer[: self._buf_checkpoint]
self._buff_i -= self._buf_checkpoint
self._buf_checkpoint = 0
# Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython
self._buffer.extend(view)
2016-05-22 11:08:20 +09:00
def _consume(self):
""" Gets rid of the used parts of the buffer. """
2017-04-29 19:33:20 +02:00
self._stream_offset += self._buff_i - self._buf_checkpoint
self._buf_checkpoint = self._buff_i
2016-05-22 11:08:20 +09:00
def _got_extradata(self):
return self._buff_i < len(self._buffer)
2016-05-22 11:08:20 +09:00
def _get_extradata(self):
2019-12-05 18:50:13 +09:00
return self._buffer[self._buff_i :]
def read_bytes(self, n):
ret = self._read(n)
self._consume()
return ret
def _read(self, n):
# (int) -> bytearray
self._reserve(n)
i = self._buff_i
2019-12-05 18:50:13 +09:00
self._buff_i = i + n
return self._buffer[i : i + n]
def _reserve(self, n):
remain_bytes = len(self._buffer) - self._buff_i - n
# Fast path: buffer has n bytes already
if remain_bytes >= 0:
return
2016-05-22 11:08:20 +09:00
if self._feeding:
self._buff_i = self._buf_checkpoint
raise OutOfData
# Strip buffer before checkpoint before reading file.
if self._buf_checkpoint > 0:
2019-12-05 18:50:13 +09:00
del self._buffer[: self._buf_checkpoint]
self._buff_i -= self._buf_checkpoint
self._buf_checkpoint = 0
# Read from file
remain_bytes = -remain_bytes
while remain_bytes > 0:
to_read_bytes = max(self._read_size, remain_bytes)
read_data = self.file_like.read(to_read_bytes)
if not read_data:
break
assert isinstance(read_data, bytes)
self._buffer += read_data
remain_bytes -= len(read_data)
if len(self._buffer) < n + self._buff_i:
self._buff_i = 0 # rollback
raise OutOfData
def _read_header(self, execute=EX_CONSTRUCT):
typ = TYPE_IMMEDIATE
2013-02-21 14:01:12 +09:00
n = 0
obj = None
self._reserve(1)
2016-05-24 07:32:30 +09:00
b = self._buffer[self._buff_i]
self._buff_i += 1
if b & 0b10000000 == 0:
obj = b
elif b & 0b11100000 == 0b11100000:
2019-12-05 18:50:13 +09:00
obj = -1 - (b ^ 0xFF)
elif b & 0b11100000 == 0b10100000:
n = b & 0b00011111
typ = TYPE_RAW
if n > self._max_str_len:
raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
obj = self._read(n)
elif b & 0b11110000 == 0b10010000:
n = b & 0b00001111
typ = TYPE_ARRAY
if n > self._max_array_len:
raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
elif b & 0b11110000 == 0b10000000:
n = b & 0b00001111
typ = TYPE_MAP
if n > self._max_map_len:
raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
2019-12-05 18:50:13 +09:00
elif b == 0xC0:
obj = None
2019-12-05 18:50:13 +09:00
elif b == 0xC2:
obj = False
2019-12-05 18:50:13 +09:00
elif b == 0xC3:
obj = True
2019-12-05 18:50:13 +09:00
elif b == 0xC4:
typ = TYPE_BIN
self._reserve(1)
2016-05-24 07:32:30 +09:00
n = self._buffer[self._buff_i]
self._buff_i += 1
if n > self._max_bin_len:
raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
obj = self._read(n)
2019-12-05 18:50:13 +09:00
elif b == 0xC5:
typ = TYPE_BIN
self._reserve(2)
n = _unpack_from(">H", self._buffer, self._buff_i)[0]
self._buff_i += 2
if n > self._max_bin_len:
raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
obj = self._read(n)
2019-12-05 18:50:13 +09:00
elif b == 0xC6:
typ = TYPE_BIN
self._reserve(4)
n = _unpack_from(">I", self._buffer, self._buff_i)[0]
self._buff_i += 4
if n > self._max_bin_len:
raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len))
obj = self._read(n)
2019-12-05 18:50:13 +09:00
elif b == 0xC7: # ext 8
typ = TYPE_EXT
self._reserve(2)
2019-12-05 18:50:13 +09:00
L, n = _unpack_from("Bb", self._buffer, self._buff_i)
self._buff_i += 2
if L > self._max_ext_len:
raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
obj = self._read(L)
2019-12-05 18:50:13 +09:00
elif b == 0xC8: # ext 16
typ = TYPE_EXT
self._reserve(3)
2019-12-05 18:50:13 +09:00
L, n = _unpack_from(">Hb", self._buffer, self._buff_i)
self._buff_i += 3
if L > self._max_ext_len:
raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
obj = self._read(L)
2019-12-05 18:50:13 +09:00
elif b == 0xC9: # ext 32
typ = TYPE_EXT
self._reserve(5)
2019-12-05 18:50:13 +09:00
L, n = _unpack_from(">Ib", self._buffer, self._buff_i)
self._buff_i += 5
if L > self._max_ext_len:
raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len))
obj = self._read(L)
2019-12-05 18:50:13 +09:00
elif b == 0xCA:
self._reserve(4)
obj = _unpack_from(">f", self._buffer, self._buff_i)[0]
self._buff_i += 4
2019-12-05 18:50:13 +09:00
elif b == 0xCB:
self._reserve(8)
obj = _unpack_from(">d", self._buffer, self._buff_i)[0]
self._buff_i += 8
2019-12-05 18:50:13 +09:00
elif b == 0xCC:
self._reserve(1)
2016-05-24 07:32:30 +09:00
obj = self._buffer[self._buff_i]
self._buff_i += 1
2019-12-05 18:50:13 +09:00
elif b == 0xCD:
self._reserve(2)
obj = _unpack_from(">H", self._buffer, self._buff_i)[0]
self._buff_i += 2
2019-12-05 18:50:13 +09:00
elif b == 0xCE:
self._reserve(4)
obj = _unpack_from(">I", self._buffer, self._buff_i)[0]
self._buff_i += 4
2019-12-05 18:50:13 +09:00
elif b == 0xCF:
self._reserve(8)
obj = _unpack_from(">Q", self._buffer, self._buff_i)[0]
self._buff_i += 8
2019-12-05 18:50:13 +09:00
elif b == 0xD0:
self._reserve(1)
obj = _unpack_from("b", self._buffer, self._buff_i)[0]
self._buff_i += 1
2019-12-05 18:50:13 +09:00
elif b == 0xD1:
self._reserve(2)
obj = _unpack_from(">h", self._buffer, self._buff_i)[0]
self._buff_i += 2
2019-12-05 18:50:13 +09:00
elif b == 0xD2:
self._reserve(4)
obj = _unpack_from(">i", self._buffer, self._buff_i)[0]
self._buff_i += 4
2019-12-05 18:50:13 +09:00
elif b == 0xD3:
self._reserve(8)
obj = _unpack_from(">q", self._buffer, self._buff_i)[0]
self._buff_i += 8
2019-12-05 18:50:13 +09:00
elif b == 0xD4: # fixext 1
typ = TYPE_EXT
if self._max_ext_len < 1:
raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len))
self._reserve(2)
n, obj = _unpack_from("b1s", self._buffer, self._buff_i)
self._buff_i += 2
2019-12-05 18:50:13 +09:00
elif b == 0xD5: # fixext 2
typ = TYPE_EXT
if self._max_ext_len < 2:
raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len))
self._reserve(3)
n, obj = _unpack_from("b2s", self._buffer, self._buff_i)
self._buff_i += 3
2019-12-05 18:50:13 +09:00
elif b == 0xD6: # fixext 4
typ = TYPE_EXT
if self._max_ext_len < 4:
raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len))
self._reserve(5)
n, obj = _unpack_from("b4s", self._buffer, self._buff_i)
self._buff_i += 5
2019-12-05 18:50:13 +09:00
elif b == 0xD7: # fixext 8
typ = TYPE_EXT
if self._max_ext_len < 8:
raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len))
self._reserve(9)
n, obj = _unpack_from("b8s", self._buffer, self._buff_i)
self._buff_i += 9
2019-12-05 18:50:13 +09:00
elif b == 0xD8: # fixext 16
typ = TYPE_EXT
if self._max_ext_len < 16:
raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len))
self._reserve(17)
n, obj = _unpack_from("b16s", self._buffer, self._buff_i)
self._buff_i += 17
2019-12-05 18:50:13 +09:00
elif b == 0xD9:
typ = TYPE_RAW
self._reserve(1)
2016-05-24 07:32:30 +09:00
n = self._buffer[self._buff_i]
self._buff_i += 1
if n > self._max_str_len:
raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
obj = self._read(n)
2019-12-05 18:50:13 +09:00
elif b == 0xDA:
typ = TYPE_RAW
self._reserve(2)
2019-12-05 18:50:13 +09:00
(n,) = _unpack_from(">H", self._buffer, self._buff_i)
self._buff_i += 2
if n > self._max_str_len:
raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
obj = self._read(n)
2019-12-05 18:50:13 +09:00
elif b == 0xDB:
typ = TYPE_RAW
self._reserve(4)
2019-12-05 18:50:13 +09:00
(n,) = _unpack_from(">I", self._buffer, self._buff_i)
self._buff_i += 4
if n > self._max_str_len:
raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len)
obj = self._read(n)
2019-12-05 18:50:13 +09:00
elif b == 0xDC:
typ = TYPE_ARRAY
self._reserve(2)
2019-12-05 18:50:13 +09:00
(n,) = _unpack_from(">H", self._buffer, self._buff_i)
self._buff_i += 2
if n > self._max_array_len:
raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
2019-12-05 18:50:13 +09:00
elif b == 0xDD:
typ = TYPE_ARRAY
self._reserve(4)
2019-12-05 18:50:13 +09:00
(n,) = _unpack_from(">I", self._buffer, self._buff_i)
self._buff_i += 4
if n > self._max_array_len:
raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len)
2019-12-05 18:50:13 +09:00
elif b == 0xDE:
self._reserve(2)
2019-12-05 18:50:13 +09:00
(n,) = _unpack_from(">H", self._buffer, self._buff_i)
self._buff_i += 2
if n > self._max_map_len:
raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
typ = TYPE_MAP
2019-12-05 18:50:13 +09:00
elif b == 0xDF:
self._reserve(4)
2019-12-05 18:50:13 +09:00
(n,) = _unpack_from(">I", self._buffer, self._buff_i)
self._buff_i += 4
if n > self._max_map_len:
raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len)
typ = TYPE_MAP
else:
2018-11-20 13:12:49 +09:00
raise FormatError("Unknown header: 0x%x" % b)
2013-02-21 14:01:12 +09:00
return typ, n, obj
def _unpack(self, execute=EX_CONSTRUCT):
typ, n, obj = self._read_header(execute)
2013-02-21 14:01:12 +09:00
if execute == EX_READ_ARRAY_HEADER:
if typ != TYPE_ARRAY:
raise ValueError("Expected array")
return n
if execute == EX_READ_MAP_HEADER:
if typ != TYPE_MAP:
raise ValueError("Expected map")
return n
# TODO should we eliminate the recursion?
if typ == TYPE_ARRAY:
if execute == EX_SKIP:
for i in xrange(n):
# TODO check whether we need to call `list_hook`
self._unpack(EX_SKIP)
return
ret = newlist_hint(n)
for i in xrange(n):
ret.append(self._unpack(EX_CONSTRUCT))
if self._list_hook is not None:
ret = self._list_hook(ret)
# TODO is the interaction between `list_hook` and `use_list` ok?
return ret if self._use_list else tuple(ret)
if typ == TYPE_MAP:
if execute == EX_SKIP:
for i in xrange(n):
# TODO check whether we need to call hooks
self._unpack(EX_SKIP)
self._unpack(EX_SKIP)
return
if self._object_pairs_hook is not None:
ret = self._object_pairs_hook(
2019-12-05 18:50:13 +09:00
(self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT))
for _ in xrange(n)
)
else:
ret = {}
for _ in xrange(n):
key = self._unpack(EX_CONSTRUCT)
2018-11-30 16:28:41 +09:00
if self._strict_map_key and type(key) not in (unicode, bytes):
2019-12-05 18:50:13 +09:00
raise ValueError(
"%s is not allowed for map key" % str(type(key))
)
2019-12-03 21:18:17 +09:00
if not PY2 and type(key) is str:
key = sys.intern(key)
ret[key] = self._unpack(EX_CONSTRUCT)
if self._object_hook is not None:
ret = self._object_hook(ret)
return ret
if execute == EX_SKIP:
return
if typ == TYPE_RAW:
if self._raw:
2016-05-24 07:32:30 +09:00
obj = bytes(obj)
else:
2019-12-05 18:50:13 +09:00
obj = obj.decode("utf_8", self._unicode_errors)
return obj
if typ == TYPE_BIN:
2016-05-24 07:32:30 +09:00
return bytes(obj)
2019-12-11 23:48:16 +09:00
if typ == TYPE_EXT:
if n == -1: # timestamp
ts = Timestamp.from_bytes(bytes(obj))
if self._timestamp == 1:
2019-12-12 19:43:59 +09:00
return ts.to_unix()
2019-12-11 23:48:16 +09:00
elif self._timestamp == 2:
2019-12-12 19:43:59 +09:00
return ts.to_unix_nano()
2019-12-11 23:48:16 +09:00
elif self._timestamp == 3:
return ts.to_datetime()
else:
return ts
else:
return self._ext_hook(n, bytes(obj))
assert typ == TYPE_IMMEDIATE
return obj
def __iter__(self):
return self
def __next__(self):
try:
ret = self._unpack(EX_CONSTRUCT)
2016-05-22 11:08:20 +09:00
self._consume()
return ret
except OutOfData:
2016-05-22 11:08:20 +09:00
self._consume()
raise StopIteration
2018-11-20 13:12:49 +09:00
except RecursionError:
raise StackError
next = __next__
def skip(self):
self._unpack(EX_SKIP)
2016-05-22 11:08:20 +09:00
self._consume()
def unpack(self):
2018-11-20 13:12:49 +09:00
try:
ret = self._unpack(EX_CONSTRUCT)
except RecursionError:
raise StackError
2016-05-22 11:08:20 +09:00
self._consume()
return ret
def read_array_header(self):
ret = self._unpack(EX_READ_ARRAY_HEADER)
2016-05-22 11:08:20 +09:00
self._consume()
return ret
def read_map_header(self):
ret = self._unpack(EX_READ_MAP_HEADER)
2016-05-22 11:08:20 +09:00
self._consume()
return ret
2017-04-29 19:33:20 +02:00
def tell(self):
return self._stream_offset
class Packer(object):
2013-02-26 09:20:44 +09:00
"""
MessagePack Packer
2019-12-13 15:10:32 +09:00
Usage:
2013-02-26 09:20:44 +09:00
packer = Packer()
astream.write(packer.pack(a))
astream.write(packer.pack(b))
Packer's constructor has some keyword arguments:
:param callable default:
Convert user type to builtin type that Packer supports.
See also simplejson's document.
2018-01-10 02:58:55 +09:00
2013-02-26 09:20:44 +09:00
:param bool use_single_float:
Use single precision float type for float. (default: False)
2018-01-10 02:58:55 +09:00
2013-02-26 09:20:44 +09:00
:param bool autoreset:
Reset buffer after each pack and return its content as `bytes`. (default: True).
2013-02-26 09:20:44 +09:00
If set this to false, use `bytes()` to get content and `.reset()` to clear buffer.
2018-01-10 02:58:55 +09:00
:param bool use_bin_type:
Use bin type introduced in msgpack spec 2.0 for bytes.
It also enables str8 type for unicode. (default: True)
2018-01-10 02:58:55 +09:00
2015-11-10 03:37:54 +09:00
:param bool strict_types:
If set to true, types will be checked to be exact. Derived classes
2019-12-09 17:02:35 +09:00
from serializable types will not be serialized and will be
2015-11-10 03:37:54 +09:00
treated as unsupported type and forwarded to default.
Additionally tuples will not be serialized as lists.
This is useful when trying to implement accurate serialization
for python types.
2019-12-11 23:48:16 +09:00
:param bool datetime:
If set to true, datetime with tzinfo is packed into Timestamp type.
Note that the tzinfo is stripped in the timestamp.
2019-12-13 15:10:32 +09:00
You can get UTC datetime with `timestamp=3` option of the Unpacker.
2019-12-11 23:48:16 +09:00
(Python 2 is not supported).
:param str unicode_errors:
The error handler for encoding unicode. (default: 'strict')
DO NOT USE THIS!! This option is kept for very specific usage.
2013-02-26 09:20:44 +09:00
"""
2019-12-05 18:50:13 +09:00
def __init__(
self,
default=None,
use_single_float=False,
autoreset=True,
use_bin_type=True,
2019-12-05 18:50:13 +09:00
strict_types=False,
2019-12-11 23:48:16 +09:00
datetime=False,
unicode_errors=None,
2019-12-05 18:50:13 +09:00
):
2015-11-10 03:33:50 +09:00
self._strict_types = strict_types
self._use_float = use_single_float
self._autoreset = autoreset
self._use_bin_type = use_bin_type
self._buffer = StringIO()
2019-12-11 23:48:16 +09:00
if PY2 and datetime:
raise ValueError("datetime is not supported in Python 2")
self._datetime = bool(datetime)
self._unicode_errors = unicode_errors or "strict"
if default is not None:
if not callable(default):
raise TypeError("default must be callable")
self._default = default
2013-01-29 14:33:37 +09:00
2019-12-05 18:50:13 +09:00
def _pack(
self,
obj,
nest_limit=DEFAULT_RECURSE_LIMIT,
check=isinstance,
check_type_strict=_check_type_strict,
):
2013-10-21 00:59:22 +09:00
default_used = False
2015-11-10 03:33:50 +09:00
if self._strict_types:
check = check_type_strict
list_types = list
else:
list_types = (list, tuple)
2013-10-21 00:59:22 +09:00
while True:
if nest_limit < 0:
raise ValueError("recursion limit exceeded")
2013-10-21 00:59:22 +09:00
if obj is None:
return self._buffer.write(b"\xc0")
if check(obj, bool):
2013-10-21 00:59:22 +09:00
if obj:
return self._buffer.write(b"\xc3")
return self._buffer.write(b"\xc2")
if check(obj, int_types):
2013-10-21 00:59:22 +09:00
if 0 <= obj < 0x80:
return self._buffer.write(struct.pack("B", obj))
if -0x20 <= obj < 0:
return self._buffer.write(struct.pack("b", obj))
2019-12-05 18:50:13 +09:00
if 0x80 <= obj <= 0xFF:
return self._buffer.write(struct.pack("BB", 0xCC, obj))
2013-10-21 00:59:22 +09:00
if -0x80 <= obj < 0:
2019-12-05 18:50:13 +09:00
return self._buffer.write(struct.pack(">Bb", 0xD0, obj))
if 0xFF < obj <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xCD, obj))
2013-10-21 00:59:22 +09:00
if -0x8000 <= obj < -0x80:
2019-12-05 18:50:13 +09:00
return self._buffer.write(struct.pack(">Bh", 0xD1, obj))
if 0xFFFF < obj <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xCE, obj))
2013-10-21 00:59:22 +09:00
if -0x80000000 <= obj < -0x8000:
2019-12-05 18:50:13 +09:00
return self._buffer.write(struct.pack(">Bi", 0xD2, obj))
if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF:
return self._buffer.write(struct.pack(">BQ", 0xCF, obj))
2013-10-21 00:59:22 +09:00
if -0x8000000000000000 <= obj < -0x80000000:
2019-12-05 18:50:13 +09:00
return self._buffer.write(struct.pack(">Bq", 0xD3, obj))
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = True
continue
raise OverflowError("Integer value out of range")
if check(obj, (bytes, bytearray)):
2013-10-21 00:59:22 +09:00
n = len(obj)
2019-12-05 18:50:13 +09:00
if n >= 2 ** 32:
raise ValueError("%s is too large" % type(obj).__name__)
2016-05-22 11:08:20 +09:00
self._pack_bin_header(n)
2013-10-21 00:59:22 +09:00
return self._buffer.write(obj)
2018-11-30 16:28:41 +09:00
if check(obj, unicode):
obj = obj.encode("utf-8", self._unicode_errors)
2013-10-21 00:59:22 +09:00
n = len(obj)
2019-12-05 18:50:13 +09:00
if n >= 2 ** 32:
raise ValueError("String is too large")
2016-05-22 11:08:20 +09:00
self._pack_raw_header(n)
2013-10-21 00:59:22 +09:00
return self._buffer.write(obj)
if check(obj, memoryview):
n = len(obj) * obj.itemsize
2019-12-05 18:50:13 +09:00
if n >= 2 ** 32:
raise ValueError("Memoryview is too large")
2016-05-22 11:08:20 +09:00
self._pack_bin_header(n)
return self._buffer.write(obj)
if check(obj, float):
2013-10-21 00:59:22 +09:00
if self._use_float:
2019-12-05 18:50:13 +09:00
return self._buffer.write(struct.pack(">Bf", 0xCA, obj))
return self._buffer.write(struct.pack(">Bd", 0xCB, obj))
2019-12-05 18:29:15 +09:00
if check(obj, (ExtType, Timestamp)):
if check(obj, Timestamp):
code = -1
data = obj.to_bytes()
else:
code = obj.code
data = obj.data
2013-10-21 00:59:22 +09:00
assert isinstance(code, int)
assert isinstance(data, bytes)
L = len(data)
if L == 1:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xd4")
2013-10-21 00:59:22 +09:00
elif L == 2:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xd5")
2013-10-21 00:59:22 +09:00
elif L == 4:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xd6")
2013-10-21 00:59:22 +09:00
elif L == 8:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xd7")
2013-10-21 00:59:22 +09:00
elif L == 16:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xd8")
elif L <= 0xFF:
self._buffer.write(struct.pack(">BB", 0xC7, L))
elif L <= 0xFFFF:
self._buffer.write(struct.pack(">BH", 0xC8, L))
2013-10-21 00:59:22 +09:00
else:
2019-12-05 18:50:13 +09:00
self._buffer.write(struct.pack(">BI", 0xC9, L))
2013-10-21 00:59:22 +09:00
self._buffer.write(struct.pack("b", code))
self._buffer.write(data)
return
if check(obj, list_types):
2013-10-21 00:59:22 +09:00
n = len(obj)
2016-05-22 11:08:20 +09:00
self._pack_array_header(n)
2013-10-21 00:59:22 +09:00
for i in xrange(n):
self._pack(obj[i], nest_limit - 1)
return
if check(obj, dict):
2019-12-05 18:50:13 +09:00
return self._pack_map_pairs(
len(obj), dict_iteritems(obj), nest_limit - 1
)
2019-12-11 23:48:16 +09:00
if self._datetime and check(obj, _DateTime):
obj = Timestamp.from_datetime(obj)
default_used = 1
continue
2013-10-21 00:59:22 +09:00
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = 1
continue
2019-12-05 18:50:13 +09:00
raise TypeError("Cannot serialize %r" % (obj,))
2013-01-29 14:33:37 +09:00
def pack(self, obj):
try:
self._pack(obj)
except:
self._buffer = StringIO() # force reset
raise
if self._autoreset:
2018-11-09 20:55:13 +09:00
ret = self._buffer.getvalue()
self._buffer = StringIO()
2018-11-09 20:55:13 +09:00
return ret
2013-01-29 14:33:37 +09:00
def pack_map_pairs(self, pairs):
2016-05-22 11:08:20 +09:00
self._pack_map_pairs(len(pairs), pairs)
if self._autoreset:
2018-11-09 20:55:13 +09:00
ret = self._buffer.getvalue()
self._buffer = StringIO()
2018-11-09 20:55:13 +09:00
return ret
2013-01-29 14:33:37 +09:00
def pack_array_header(self, n):
2019-12-05 18:50:13 +09:00
if n >= 2 ** 32:
raise ValueError
2016-05-22 11:08:20 +09:00
self._pack_array_header(n)
if self._autoreset:
2018-11-09 20:55:13 +09:00
ret = self._buffer.getvalue()
self._buffer = StringIO()
2018-11-09 20:55:13 +09:00
return ret
2013-01-29 14:33:37 +09:00
def pack_map_header(self, n):
2019-12-05 18:50:13 +09:00
if n >= 2 ** 32:
raise ValueError
2016-05-22 11:08:20 +09:00
self._pack_map_header(n)
if self._autoreset:
2018-11-09 20:55:13 +09:00
ret = self._buffer.getvalue()
self._buffer = StringIO()
2018-11-09 20:55:13 +09:00
return ret
2013-01-29 14:33:37 +09:00
2013-10-20 15:40:20 +09:00
def pack_ext_type(self, typecode, data):
if not isinstance(typecode, int):
raise TypeError("typecode must have int type.")
if not 0 <= typecode <= 127:
raise ValueError("typecode should be 0-127")
if not isinstance(data, bytes):
raise TypeError("data must have bytes type")
L = len(data)
2019-12-05 18:50:13 +09:00
if L > 0xFFFFFFFF:
raise ValueError("Too large data")
2013-10-20 15:40:20 +09:00
if L == 1:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xd4")
2013-10-20 15:40:20 +09:00
elif L == 2:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xd5")
2013-10-20 15:40:20 +09:00
elif L == 4:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xd6")
2013-10-20 15:40:20 +09:00
elif L == 8:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xd7")
2013-10-20 15:40:20 +09:00
elif L == 16:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xd8")
elif L <= 0xFF:
self._buffer.write(b"\xc7" + struct.pack("B", L))
elif L <= 0xFFFF:
self._buffer.write(b"\xc8" + struct.pack(">H", L))
2013-10-20 15:40:20 +09:00
else:
2019-12-05 18:50:13 +09:00
self._buffer.write(b"\xc9" + struct.pack(">I", L))
self._buffer.write(struct.pack("B", typecode))
2013-10-20 15:40:20 +09:00
self._buffer.write(data)
2016-05-22 11:08:20 +09:00
def _pack_array_header(self, n):
2019-12-05 18:50:13 +09:00
if n <= 0x0F:
return self._buffer.write(struct.pack("B", 0x90 + n))
if n <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xDC, n))
if n <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xDD, n))
raise ValueError("Array is too large")
2013-01-29 14:33:37 +09:00
2016-05-22 11:08:20 +09:00
def _pack_map_header(self, n):
2019-12-05 18:50:13 +09:00
if n <= 0x0F:
return self._buffer.write(struct.pack("B", 0x80 + n))
if n <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xDE, n))
if n <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xDF, n))
raise ValueError("Dict is too large")
2013-01-29 14:33:37 +09:00
2016-05-22 11:08:20 +09:00
def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT):
self._pack_map_header(n)
for (k, v) in pairs:
self._pack(k, nest_limit - 1)
self._pack(v, nest_limit - 1)
2013-01-29 14:33:37 +09:00
2016-05-22 11:08:20 +09:00
def _pack_raw_header(self, n):
2019-12-05 18:50:13 +09:00
if n <= 0x1F:
self._buffer.write(struct.pack("B", 0xA0 + n))
elif self._use_bin_type and n <= 0xFF:
self._buffer.write(struct.pack(">BB", 0xD9, n))
elif n <= 0xFFFF:
self._buffer.write(struct.pack(">BH", 0xDA, n))
elif n <= 0xFFFFFFFF:
self._buffer.write(struct.pack(">BI", 0xDB, n))
else:
2019-12-05 18:50:13 +09:00
raise ValueError("Raw is too large")
2016-05-22 11:08:20 +09:00
def _pack_bin_header(self, n):
if not self._use_bin_type:
2016-05-22 11:08:20 +09:00
return self._pack_raw_header(n)
2019-12-05 18:50:13 +09:00
elif n <= 0xFF:
return self._buffer.write(struct.pack(">BB", 0xC4, n))
elif n <= 0xFFFF:
return self._buffer.write(struct.pack(">BH", 0xC5, n))
elif n <= 0xFFFFFFFF:
return self._buffer.write(struct.pack(">BI", 0xC6, n))
else:
2019-12-05 18:50:13 +09:00
raise ValueError("Bin is too large")
def bytes(self):
2018-11-09 20:55:13 +09:00
"""Return internal buffer contents as bytes object"""
return self._buffer.getvalue()
2013-01-29 14:33:37 +09:00
def reset(self):
2018-11-09 20:55:13 +09:00
"""Reset internal buffer.
2019-12-09 17:02:35 +09:00
This method is useful only when autoreset=False.
2018-11-09 20:55:13 +09:00
"""
self._buffer = StringIO()
2018-11-09 20:55:13 +09:00
def getbuffer(self):
"""Return view of internal buffer."""
2018-11-30 16:28:41 +09:00
if USING_STRINGBUILDER or PY2:
2018-11-09 20:55:13 +09:00
return memoryview(self.bytes())
else:
return self._buffer.getbuffer()