Merge pull request #79 from msgpack/newspec

[WIP] Newspec stage 2.
This commit is contained in:
INADA Naoki 2013-10-20 09:18:50 -07:00
commit e802abebf1
14 changed files with 530 additions and 207 deletions

View file

@ -3,8 +3,8 @@ MessagePack for Python
=======================
:author: INADA Naoki
:version: 0.3.0
:date: 2012-12-07
:version: 0.4.0
:date: 2013-10-21
.. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png
:target: https://travis-ci.org/#!/msgpack/msgpack-python
@ -39,8 +39,40 @@ amd64. Windows SDK is recommanded way to build amd64 msgpack without any fee.)
Without extension, using pure python implementation on CPython runs slowly.
Notes
-----
Note for msgpack 2.0 support
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
msgpack 2.0 adds two types: *bin* and *ext*.
*raw* was bytes or string type like Python 2's ``str``.
To distinguish string and bytes, msgpack 2.0 adds *bin*.
It is non-string binary like Python 3's ``bytes``.
To use *bin* type for packing ``bytes``, pass ``use_bin_type=True`` to
packer argument.
>>> import msgpack
>>> packed = msgpack.packb([b'spam', u'egg'], use_bin_type=True)
>>> msgpack.unpackb(packed, encoding='utf-8')
['spam', u'egg']
You shoud use it carefully. When you use ``use_bin_type=True``, packed
binary can be unpacked by unpackers supporting msgpack-2.0.
To use *ext* type, pass ``msgpack.ExtType`` object to packer.
>>> import msgpack
>>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy'))
>>> msgpack.unpackb(packed)
ExtType(code=42, data='xyzzy')
You can use it with ``default`` and ``ext_hook``. See below.
Note for msgpack 0.2.x users
----------------------------
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
The msgpack 0.3 have some incompatible changes.
@ -140,6 +172,31 @@ It is also possible to pack/unpack custom data types. Here is an example for
``object_pairs_hook`` callback may instead be used to receive a list of
key-value pairs.
Extended types
^^^^^^^^^^^^^^^
It is also possible to pack/unpack custom data types using the msgpack 2.0 feature.
>>> import msgpack
>>> import array
>>> def default(obj):
... if isinstance(obj, array.array) and obj.typecode == 'd':
... return msgpack.ExtType(42, obj.tostring())
... raise TypeError("Unknown type: %r" % (obj,))
...
>>> def ext_hook(code, data):
... if code == 42:
... a = array.array('d')
... a.fromstring(data)
... return a
... return ExtType(code, data)
...
>>> data = array.array('d', [1.2, 3.4])
>>> packed = msgpack.packb(data, default=default)
>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook)
>>> data == unpacked
True
Advanced unpacking control
^^^^^^^^^^^^^^^^^^^^^^^^^^

View file

@ -4,7 +4,17 @@ from msgpack.exceptions import *
from collections import namedtuple
ExtType = namedtuple('ExtType', 'code data')
class ExtType(namedtuple('ExtType', 'code data')):
def __new__(cls, code, data):
if not isinstance(code, int):
raise TypeError("code must be int")
if not isinstance(data, bytes):
raise TypeError("data must be bytes")
if not 0 <= code <= 127:
raise ValueError("code must be 0~127")
return super(ExtType, cls).__new__(cls, code, data)
import os
if os.environ.get('MSGPACK_PUREPYTHON'):
@ -26,6 +36,7 @@ def pack(o, stream, **kwargs):
packer = Packer(**kwargs)
stream.write(packer.pack(o))
def packb(o, **kwargs):
"""
Pack object `o` and return packed bytes
@ -40,4 +51,3 @@ loads = unpackb
dump = pack
dumps = packb

View file

@ -5,8 +5,11 @@ from cpython cimport *
from libc.stdlib cimport *
from libc.string cimport *
from libc.limits cimport *
from libc.stdint cimport int8_t
from msgpack.exceptions import PackValueError
from msgpack import ExtType
cdef extern from "pack.h":
struct msgpack_packer:
@ -29,11 +32,11 @@ cdef extern from "pack.h":
int msgpack_pack_raw(msgpack_packer* pk, size_t l)
int msgpack_pack_bin(msgpack_packer* pk, size_t l)
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l)
cdef int DEFAULT_RECURSE_LIMIT=511
cdef class Packer(object):
"""
MessagePack Packer
@ -118,77 +121,87 @@ cdef class Packer(object):
cdef int ret
cdef dict d
cdef size_t L
cdef int default_used = 0
if nest_limit < 0:
raise PackValueError("recursion limit exceeded.")
if o is None:
ret = msgpack_pack_nil(&self.pk)
elif isinstance(o, bool):
if o:
ret = msgpack_pack_true(&self.pk)
else:
ret = msgpack_pack_false(&self.pk)
elif PyLong_Check(o):
if o > 0:
ullval = o
ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
else:
llval = o
ret = msgpack_pack_long_long(&self.pk, llval)
elif PyInt_Check(o):
longval = o
ret = msgpack_pack_long(&self.pk, longval)
elif PyFloat_Check(o):
if self.use_float:
fval = o
ret = msgpack_pack_float(&self.pk, fval)
else:
dval = o
ret = msgpack_pack_double(&self.pk, dval)
elif PyBytes_Check(o):
rawval = o
L = len(o)
ret = msgpack_pack_bin(&self.pk, L)
if ret == 0:
while True:
if o is None:
ret = msgpack_pack_nil(&self.pk)
elif isinstance(o, bool):
if o:
ret = msgpack_pack_true(&self.pk)
else:
ret = msgpack_pack_false(&self.pk)
elif PyLong_Check(o):
if o > 0:
ullval = o
ret = msgpack_pack_unsigned_long_long(&self.pk, ullval)
else:
llval = o
ret = msgpack_pack_long_long(&self.pk, llval)
elif PyInt_Check(o):
longval = o
ret = msgpack_pack_long(&self.pk, longval)
elif PyFloat_Check(o):
if self.use_float:
fval = o
ret = msgpack_pack_float(&self.pk, fval)
else:
dval = o
ret = msgpack_pack_double(&self.pk, dval)
elif PyBytes_Check(o):
rawval = o
L = len(o)
ret = msgpack_pack_bin(&self.pk, L)
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyUnicode_Check(o):
if not self.encoding:
raise TypeError("Can't encode unicode string: no encoding is specified")
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
rawval = o
ret = msgpack_pack_raw(&self.pk, len(o))
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
elif PyDict_CheckExact(o):
d = <dict>o
ret = msgpack_pack_map(&self.pk, len(d))
if ret == 0:
for k, v in d.iteritems():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif PyDict_Check(o):
ret = msgpack_pack_map(&self.pk, len(o))
if ret == 0:
for k, v in o.items():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif isinstance(o, ExtType):
# This should be before Tuple because ExtType is namedtuple.
longval = o.code
rawval = o.data
L = len(o.data)
ret = msgpack_pack_ext(&self.pk, longval, L)
ret = msgpack_pack_raw_body(&self.pk, rawval, L)
elif PyUnicode_Check(o):
if not self.encoding:
raise TypeError("Can't encode unicode string: no encoding is specified")
o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors)
rawval = o
ret = msgpack_pack_raw(&self.pk, len(o))
if ret == 0:
ret = msgpack_pack_raw_body(&self.pk, rawval, len(o))
elif PyDict_CheckExact(o):
d = <dict>o
ret = msgpack_pack_map(&self.pk, len(d))
if ret == 0:
for k, v in d.iteritems():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif PyDict_Check(o):
ret = msgpack_pack_map(&self.pk, len(o))
if ret == 0:
for k, v in o.items():
ret = self._pack(k, nest_limit-1)
if ret != 0: break
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif PyTuple_Check(o) or PyList_Check(o):
ret = msgpack_pack_array(&self.pk, len(o))
if ret == 0:
for v in o:
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif self._default:
o = self._default(o)
ret = self._pack(o, nest_limit-1)
else:
raise TypeError("can't serialize %r" % (o,))
return ret
elif PyTuple_Check(o) or PyList_Check(o):
ret = msgpack_pack_array(&self.pk, len(o))
if ret == 0:
for v in o:
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif not default_used and self._default:
o = self._default(o)
default_used = 1
continue
else:
raise TypeError("can't serialize %r" % (o,))
return ret
cpdef pack(self, object obj):
cdef int ret
@ -202,6 +215,10 @@ cdef class Packer(object):
self.pk.length = 0
return buf
def pack_ext_type(self, typecode, data):
msgpack_pack_ext(&self.pk, typecode, len(data))
msgpack_pack_raw_body(&self.pk, data, len(data))
def pack_array_header(self, size_t size):
cdef int ret = msgpack_pack_array(&self.pk, size)
if ret == -1:

View file

@ -16,6 +16,7 @@ from msgpack.exceptions import (
UnpackValueError,
ExtraData,
)
from msgpack import ExtType
cdef extern from "unpack.h":
@ -24,6 +25,7 @@ cdef extern from "unpack.h":
PyObject* object_hook
bint has_pairs_hook # call object_hook with k-v pairs
PyObject* list_hook
PyObject* ext_hook
char *encoding
char *unicode_errors
@ -31,8 +33,6 @@ cdef extern from "unpack.h":
msgpack_user user
PyObject* obj
size_t count
unsigned int ct
PyObject* key
ctypedef int (*execute_fn)(unpack_context* ctx, const char* data,
size_t len, size_t* off) except? -1
@ -44,7 +44,8 @@ cdef extern from "unpack.h":
object unpack_data(unpack_context* ctx)
cdef inline init_ctx(unpack_context *ctx,
object object_hook, object object_pairs_hook, object list_hook,
object object_hook, object object_pairs_hook,
object list_hook, object ext_hook,
bint use_list, char* encoding, char* unicode_errors):
unpack_init(ctx)
ctx.user.use_list = use_list
@ -71,13 +72,20 @@ cdef inline init_ctx(unpack_context *ctx,
raise TypeError("list_hook must be a callable.")
ctx.user.list_hook = <PyObject*>list_hook
if ext_hook is not None:
if not PyCallable_Check(ext_hook):
raise TypeError("ext_hook must be a callable.")
ctx.user.ext_hook = <PyObject*>ext_hook
ctx.user.encoding = encoding
ctx.user.unicode_errors = unicode_errors
def default_read_extended_type(typecode, data):
raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
def unpackb(object packed, object object_hook=None, object list_hook=None,
bint use_list=1, encoding=None, unicode_errors="strict",
object_pairs_hook=None,
):
object_pairs_hook=None, ext_hook=ExtType):
"""
Unpack packed_bytes to object. Returns an unpacked object.
@ -106,7 +114,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
unicode_errors = unicode_errors.encode('ascii')
cerr = PyBytes_AsString(unicode_errors)
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook,
use_list, cenc, cerr)
ret = unpack_construct(&ctx, buf, buf_len, &off)
if ret == 1:
obj = unpack_data(&ctx)
@ -211,7 +220,7 @@ cdef class Unpacker(object):
def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1,
object object_hook=None, object object_pairs_hook=None, object list_hook=None,
str encoding=None, str unicode_errors='strict', int max_buffer_size=0,
):
object ext_hook=ExtType):
cdef char *cenc=NULL, *cerr=NULL
self.file_like = file_like
@ -248,7 +257,8 @@ cdef class Unpacker(object):
self.unicode_errors = unicode_errors
cerr = PyBytes_AsString(self.unicode_errors)
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
ext_hook, use_list, cenc, cerr)
def feed(self, object next_bytes):
"""Append `next_bytes` to internal buffer."""

View file

@ -42,11 +42,11 @@ else:
newlist_hint = lambda size: []
from msgpack.exceptions import (
BufferFull,
OutOfData,
UnpackValueError,
PackValueError,
ExtraData)
BufferFull,
OutOfData,
UnpackValueError,
PackValueError,
ExtraData)
from msgpack import ExtType
@ -65,6 +65,7 @@ TYPE_EXT = 5
DEFAULT_RECURSE_LIMIT = 511
def unpack(stream, **kwargs):
"""
Unpack an object from `stream`.
@ -78,6 +79,7 @@ def unpack(stream, **kwargs):
raise ExtraData(ret, unpacker._fb_get_extradata())
return ret
def unpackb(packed, **kwargs):
"""
Unpack an object from `packed`.
@ -95,6 +97,7 @@ def unpackb(packed, **kwargs):
raise ExtraData(ret, unpacker._fb_get_extradata())
return ret
class Unpacker(object):
"""
Streaming unpacker.
@ -503,82 +506,111 @@ class Packer(object):
self._default = default
def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance):
if nest_limit < 0:
raise PackValueError("recursion limit exceeded")
if obj is None:
return self._buffer.write(b"\xc0")
if isinstance(obj, bool):
if obj:
return self._buffer.write(b"\xc3")
return self._buffer.write(b"\xc2")
if isinstance(obj, int_types):
if 0 <= obj < 0x80:
return self._buffer.write(struct.pack("B", obj))
if -0x20 <= obj < 0:
return self._buffer.write(struct.pack("b", obj))
if 0x80 <= obj <= 0xff:
return self._buffer.write(struct.pack("BB", 0xcc, obj))
if -0x80 <= obj < 0:
return self._buffer.write(struct.pack(">Bb", 0xd0, obj))
if 0xff < obj <= 0xffff:
return self._buffer.write(struct.pack(">BH", 0xcd, obj))
if -0x8000 <= obj < -0x80:
return self._buffer.write(struct.pack(">Bh", 0xd1, obj))
if 0xffff < obj <= 0xffffffff:
return self._buffer.write(struct.pack(">BI", 0xce, obj))
if -0x80000000 <= obj < -0x8000:
return self._buffer.write(struct.pack(">Bi", 0xd2, obj))
if 0xffffffff < obj <= 0xffffffffffffffff:
return self._buffer.write(struct.pack(">BQ", 0xcf, obj))
if -0x8000000000000000 <= obj < -0x80000000:
return self._buffer.write(struct.pack(">Bq", 0xd3, obj))
raise PackValueError("Integer value out of range")
if self._use_bin_type and isinstance(obj, bytes):
n = len(obj)
if n <= 0xff:
self._buffer.write(struct.pack('>BB', 0xc4, n))
elif n <= 0xffff:
self._buffer.write(struct.pack(">BH", 0xc5, n))
elif n <= 0xffffffff:
self._buffer.write(struct.pack(">BI", 0xc6, n))
else:
raise PackValueError("Bytes is too large")
return self._buffer.write(obj)
if isinstance(obj, (Unicode, bytes)):
if isinstance(obj, Unicode):
if self._encoding is None:
raise TypeError(
default_used = False
while True:
if nest_limit < 0:
raise PackValueError("recursion limit exceeded")
if obj is None:
return self._buffer.write(b"\xc0")
if isinstance(obj, bool):
if obj:
return self._buffer.write(b"\xc3")
return self._buffer.write(b"\xc2")
if isinstance(obj, int_types):
if 0 <= obj < 0x80:
return self._buffer.write(struct.pack("B", obj))
if -0x20 <= obj < 0:
return self._buffer.write(struct.pack("b", obj))
if 0x80 <= obj <= 0xff:
return self._buffer.write(struct.pack("BB", 0xcc, obj))
if -0x80 <= obj < 0:
return self._buffer.write(struct.pack(">Bb", 0xd0, obj))
if 0xff < obj <= 0xffff:
return self._buffer.write(struct.pack(">BH", 0xcd, obj))
if -0x8000 <= obj < -0x80:
return self._buffer.write(struct.pack(">Bh", 0xd1, obj))
if 0xffff < obj <= 0xffffffff:
return self._buffer.write(struct.pack(">BI", 0xce, obj))
if -0x80000000 <= obj < -0x8000:
return self._buffer.write(struct.pack(">Bi", 0xd2, obj))
if 0xffffffff < obj <= 0xffffffffffffffff:
return self._buffer.write(struct.pack(">BQ", 0xcf, obj))
if -0x8000000000000000 <= obj < -0x80000000:
return self._buffer.write(struct.pack(">Bq", 0xd3, obj))
raise PackValueError("Integer value out of range")
if self._use_bin_type and isinstance(obj, bytes):
n = len(obj)
if n <= 0xff:
self._buffer.write(struct.pack('>BB', 0xc4, n))
elif n <= 0xffff:
self._buffer.write(struct.pack(">BH", 0xc5, n))
elif n <= 0xffffffff:
self._buffer.write(struct.pack(">BI", 0xc6, n))
else:
raise PackValueError("Bytes is too large")
return self._buffer.write(obj)
if isinstance(obj, (Unicode, bytes)):
if isinstance(obj, Unicode):
if self._encoding is None:
raise TypeError(
"Can't encode unicode string: "
"no encoding is specified")
obj = obj.encode(self._encoding, self._unicode_errors)
n = len(obj)
if n <= 0x1f:
self._buffer.write(struct.pack('B', 0xa0 + n))
elif self._use_bin_type and n <= 0xff:
self._buffer.write(struct.pack('>BB', 0xd9, n))
elif n <= 0xffff:
self._buffer.write(struct.pack(">BH", 0xda, n))
elif n <= 0xffffffff:
self._buffer.write(struct.pack(">BI", 0xdb, n))
else:
raise PackValueError("String is too large")
return self._buffer.write(obj)
if isinstance(obj, float):
if self._use_float:
return self._buffer.write(struct.pack(">Bf", 0xca, obj))
return self._buffer.write(struct.pack(">Bd", 0xcb, obj))
if isinstance(obj, (list, tuple)):
n = len(obj)
self._fb_pack_array_header(n)
for i in xrange(n):
self._pack(obj[i], nest_limit - 1)
return
if isinstance(obj, dict):
return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj),
nest_limit - 1)
if self._default is not None:
return self._pack(self._default(obj), nest_limit - 1)
raise TypeError("Cannot serialize %r" % obj)
obj = obj.encode(self._encoding, self._unicode_errors)
n = len(obj)
if n <= 0x1f:
self._buffer.write(struct.pack('B', 0xa0 + n))
elif self._use_bin_type and n <= 0xff:
self._buffer.write(struct.pack('>BB', 0xd9, n))
elif n <= 0xffff:
self._buffer.write(struct.pack(">BH", 0xda, n))
elif n <= 0xffffffff:
self._buffer.write(struct.pack(">BI", 0xdb, n))
else:
raise PackValueError("String is too large")
return self._buffer.write(obj)
if isinstance(obj, float):
if self._use_float:
return self._buffer.write(struct.pack(">Bf", 0xca, obj))
return self._buffer.write(struct.pack(">Bd", 0xcb, obj))
if isinstance(obj, ExtType):
code = obj.code
data = obj.data
assert isinstance(code, int)
assert isinstance(data, bytes)
L = len(data)
if L == 1:
self._buffer.write(b'\xd4')
elif L == 2:
self._buffer.write(b'\xd5')
elif L == 4:
self._buffer.write(b'\xd6')
elif L == 8:
self._buffer.write(b'\xd7')
elif L == 16:
self._buffer.write(b'\xd8')
elif L <= 0xff:
self._buffer.write(struct.pack(">BB", 0xc7, L))
elif L <= 0xffff:
self._buffer.write(struct.pack(">BH", 0xc8, L))
else:
self._buffer.write(struct.pack(">BI", 0xc9, L))
self._buffer.write(struct.pack("b", code))
self._buffer.write(data)
return
if isinstance(obj, (list, tuple)):
n = len(obj)
self._fb_pack_array_header(n)
for i in xrange(n):
self._pack(obj[i], nest_limit - 1)
return
if isinstance(obj, dict):
return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj),
nest_limit - 1)
if not default_used and self._default is not None:
obj = self._default(obj)
default_used = 1
continue
raise TypeError("Cannot serialize %r" % obj)
def pack(self, obj):
self._pack(obj)
@ -616,6 +648,35 @@ class Packer(object):
self._buffer = StringIO(ret)
return ret
def pack_ext_type(self, typecode, data):
if not isinstance(typecode, int):
raise TypeError("typecode must have int type.")
if not 0 <= typecode <= 127:
raise ValueError("typecode should be 0-127")
if not isinstance(data, bytes):
raise TypeError("data must have bytes type")
L = len(data)
if L > 0xffffffff:
raise ValueError("Too large data")
if L == 1:
self._buffer.write(b'\xd4')
elif L == 2:
self._buffer.write(b'\xd5')
elif L == 4:
self._buffer.write(b'\xd6')
elif L == 8:
self._buffer.write(b'\xd7')
elif L == 16:
self._buffer.write(b'\xd8')
elif L <= 0xff:
self._buffer.write(b'\xc7' + struct.pack('B', L))
elif L <= 0xffff:
self._buffer.write(b'\xc8' + struct.pack('>H', L))
else:
self._buffer.write(b'\xc9' + struct.pack('>I', L))
self._buffer.write(struct.pack('B', typecode))
self._buffer.write(data)
def _fb_pack_array_header(self, n):
if n <= 0x0f:
return self._buffer.write(struct.pack('B', 0x90 + n))

View file

@ -71,6 +71,8 @@ static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l);
static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l);
static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l);
static inline int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l);
static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l)
{
char* buf = pk->buf;

View file

@ -687,7 +687,7 @@ static inline int msgpack_pack_raw(msgpack_packer* x, size_t l)
static inline int msgpack_pack_bin(msgpack_packer *x, size_t l)
{
if (!x->use_bin_type) {
return msgpack_pack_raw(x, l)
return msgpack_pack_raw(x, l);
}
if (l < 256) {
unsigned char buf[2] = {0xc4, (unsigned char)l};
@ -705,9 +705,69 @@ static inline int msgpack_pack_bin(msgpack_packer *x, size_t l)
static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t l)
{
msgpack_pack_append_buffer(x, (const unsigned char*)b, l);
if (l > 0) msgpack_pack_append_buffer(x, (const unsigned char*)b, l);
return 0;
}
/*
* Ext
*/
static inline int msgpack_pack_ext(msgpack_packer* x, int8_t typecode, size_t l)
{
if (l == 1) {
unsigned char buf[2];
buf[0] = 0xd4;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 2) {
unsigned char buf[2];
buf[0] = 0xd5;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 4) {
unsigned char buf[2];
buf[0] = 0xd6;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 8) {
unsigned char buf[2];
buf[0] = 0xd7;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 16) {
unsigned char buf[2];
buf[0] = 0xd8;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l < 256) {
unsigned char buf[3];
buf[0] = 0xc7;
buf[1] = l;
buf[2] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 3);
} else if(l < 65536) {
unsigned char buf[4];
buf[0] = 0xc8;
_msgpack_store16(&buf[1], (uint16_t)l);
buf[3] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 4);
} else {
unsigned char buf[6];
buf[0] = 0xc9;
_msgpack_store32(&buf[1], (uint32_t)l);
buf[5] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 6);
}
}
#undef msgpack_pack_append_buffer
#undef TAKE8_8

View file

@ -24,6 +24,7 @@ typedef struct unpack_user {
PyObject *object_hook;
bool has_pairs_hook;
PyObject *list_hook;
PyObject *ext_hook;
const char *encoding;
const char *unicode_errors;
} unpack_user;
@ -156,7 +157,7 @@ static inline int unpack_callback_array_item(unpack_user* u, unsigned int curren
static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c)
{
if (u->list_hook) {
PyObject *new_c = PyEval_CallFunction(u->list_hook, "(O)", *c);
PyObject *new_c = PyObject_CallFunctionObjArgs(u->list_hook, *c, NULL);
if (!new_c)
return -1;
Py_DECREF(*c);
@ -202,7 +203,7 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current,
static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c)
{
if (u->object_hook) {
PyObject *new_c = PyEval_CallFunction(u->object_hook, "(O)", *c);
PyObject *new_c = PyObject_CallFunctionObjArgs(u->object_hook, *c, NULL);
if (!new_c)
return -1;
@ -235,4 +236,25 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char*
return 0;
}
static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
unsigned int lenght, msgpack_unpack_object* o)
{
PyObject *py;
int8_t typecode = (int8_t)*pos++;
if (!u->ext_hook) {
PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL");
return -1;
}
// length also includes the typecode, so the actual data is lenght-1
#if PY_MAJOR_VERSION == 2
py = PyObject_CallFunction(u->ext_hook, "(is#)", typecode, pos, lenght-1);
#else
py = PyObject_CallFunction(u->ext_hook, "(iy#)", typecode, pos, lenght-1);
#endif
if (!py)
return -1;
*o = py;
return 0;
}
#include "unpack_template.h"

View file

@ -45,10 +45,11 @@ typedef enum {
CS_BIN_8 = 0x04,
CS_BIN_16 = 0x05,
CS_BIN_32 = 0x06,
//CS_ = 0x07,
//CS_ = 0x08,
//CS_ = 0x09,
CS_EXT_8 = 0x07,
CS_EXT_16 = 0x08,
CS_EXT_32 = 0x09,
CS_FLOAT = 0x0a,
CS_DOUBLE = 0x0b,
CS_UINT_8 = 0x0c,
@ -60,6 +61,12 @@ typedef enum {
CS_INT_32 = 0x12,
CS_INT_64 = 0x13,
//CS_FIXEXT1 = 0x14,
//CS_FIXEXT2 = 0x15,
//CS_FIXEXT4 = 0x16,
//CS_FIXEXT8 = 0x17,
//CS_FIXEXT16 = 0x18,
CS_RAW_8 = 0x19,
CS_RAW_16 = 0x1a,
CS_RAW_32 = 0x1b,
@ -70,6 +77,7 @@ typedef enum {
ACS_RAW_VALUE,
ACS_BIN_VALUE,
ACS_EXT_VALUE,
} msgpack_unpack_state;
@ -85,4 +93,3 @@ typedef enum {
#endif
#endif /* msgpack/unpack_define.h */

View file

@ -178,15 +178,23 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
switch(*p) {
case 0xc0: // nil
push_simple_value(_nil);
//case 0xc1: // string
// again_terminal_trail(NEXT_CS(p), p+1);
//case 0xc1: // never used
case 0xc2: // false
push_simple_value(_false);
case 0xc3: // true
push_simple_value(_true);
//case 0xc7:
//case 0xc8:
//case 0xc9:
case 0xc4: // bin 8
again_fixed_trail(NEXT_CS(p), 1);
case 0xc5: // bin 16
again_fixed_trail(NEXT_CS(p), 2);
case 0xc6: // bin 32
again_fixed_trail(NEXT_CS(p), 4);
case 0xc7: // ext 8
again_fixed_trail(NEXT_CS(p), 1);
case 0xc8: // ext 16
again_fixed_trail(NEXT_CS(p), 2);
case 0xc9: // ext 32
again_fixed_trail(NEXT_CS(p), 4);
case 0xca: // float
case 0xcb: // double
case 0xcc: // unsigned int 8
@ -198,15 +206,17 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case 0xd2: // signed int 32
case 0xd3: // signed int 64
again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03));
case 0xc4: // bin 8
case 0xc5: // bin 16
case 0xc6: // bin 32
//case 0xd4:
//case 0xd5:
//case 0xd6: // big integer 16
//case 0xd7: // big integer 32
//case 0xd8: // big float 16
case 0xd9: // raw 8
case 0xd4: // fixext 1
case 0xd5: // fixext 2
case 0xd6: // fixext 4
case 0xd7: // fixext 8
again_fixed_trail_if_zero(ACS_EXT_VALUE,
(1 << (((unsigned int)*p) & 0x03))+1,
_ext_zero);
case 0xd8: // fixext 16
again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero);
case 0xd9: // str 8
again_fixed_trail(NEXT_CS(p), 1);
case 0xda: // raw 16
case 0xdb: // raw 32
case 0xdc: // array 16
@ -237,8 +247,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
if((size_t)(pe - p) < trail) { goto _out; }
n = p; p += trail - 1;
switch(cs) {
//case CS_
//case CS_
case CS_EXT_8:
again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero);
case CS_EXT_16:
again_fixed_trail_if_zero(ACS_EXT_VALUE,
_msgpack_load16(uint16_t,n)+1,
_ext_zero);
case CS_EXT_32:
again_fixed_trail_if_zero(ACS_EXT_VALUE,
_msgpack_load32(uint32_t,n)+1,
_ext_zero);
case CS_FLOAT: {
union { uint32_t i; float f; } mem;
mem.i = _msgpack_load32(uint32_t,n);
@ -269,26 +287,6 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case CS_INT_64:
push_fixed_value(_int64, _msgpack_load64(int64_t,n));
//case CS_
//case CS_
//case CS_BIG_INT_16:
// again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load16(uint16_t,n), _big_int_zero);
//case CS_BIG_INT_32:
// again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load32(uint32_t,n), _big_int_zero);
//case ACS_BIG_INT_VALUE:
//_big_int_zero:
// // FIXME
// push_variable_value(_big_int, data, n, trail);
//case CS_BIG_FLOAT_16:
// again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load16(uint16_t,n), _big_float_zero);
//case CS_BIG_FLOAT_32:
// again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load32(uint32_t,n), _big_float_zero);
//case ACS_BIG_FLOAT_VALUE:
//_big_float_zero:
// // FIXME
// push_variable_value(_big_float, data, n, trail);
case CS_BIN_8:
again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero);
case CS_BIN_16:
@ -309,6 +307,10 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
_raw_zero:
push_variable_value(_raw, data, n, trail);
case ACS_EXT_VALUE:
_ext_zero:
push_variable_value(_ext, data, n, trail);
case CS_ARRAY_16:
start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM);
case CS_ARRAY_32:

57
test/test_extension.py Normal file
View file

@ -0,0 +1,57 @@
from __future__ import print_function
import array
import msgpack
from msgpack import ExtType
def test_pack_ext_type():
def p(s):
packer = msgpack.Packer()
packer.pack_ext_type(0x42, s)
return packer.bytes()
assert p(b'A') == b'\xd4\x42A' # fixext 1
assert p(b'AB') == b'\xd5\x42AB' # fixext 2
assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4
assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8
assert p(b'A'*16) == b'\xd8\x42' + b'A'*16 # fixext 16
assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8
assert p(b'A'*0x0123) == b'\xc8\x01\x23\x42' + b'A'*0x0123 # ext 16
assert p(b'A'*0x00012345) == b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345 # ext 32
def test_unpack_ext_type():
def check(b, expected):
assert msgpack.unpackb(b) == expected
check(b'\xd4\x42A', ExtType(0x42, b'A')) # fixext 1
check(b'\xd5\x42AB', ExtType(0x42, b'AB')) # fixext 2
check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD')) # fixext 4
check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH')) # fixext 8
check(b'\xd8\x42' + b'A'*16, ExtType(0x42, b'A'*16)) # fixext 16
check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC')) # ext 8
check(b'\xc8\x01\x23\x42' + b'A'*0x0123,
ExtType(0x42, b'A'*0x0123)) # ext 16
check(b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345,
ExtType(0x42, b'A'*0x00012345)) # ext 32
def test_extension_type():
def default(obj):
print('default called', obj)
if isinstance(obj, array.array):
typecode = 123 # application specific typecode
data = obj.tostring()
return ExtType(typecode, data)
raise TypeError("Unknwon type object %r" % (obj,))
def ext_hook(code, data):
print('ext_hook called', code, data)
assert code == 123
obj = array.array('d')
obj.fromstring(data)
return obj
obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])]
s = msgpack.packb(obj, default=default)
obj2 = msgpack.unpackb(s, ext_hook=ext_hook)
assert obj == obj2

View file

@ -1,6 +1,6 @@
# coding: utf-8
from msgpack import packb, unpackb
from msgpack import packb, unpackb, ExtType
def test_str8():
@ -66,4 +66,23 @@ def test_bin32():
assert b[5:] == data
assert unpackb(b) == data
def test_ext():
def check(ext, packed):
assert packb(ext) == packed
assert unpackb(packed) == ext
check(ExtType(0x42, b'Z'), b'\xd4\x42Z') # fixext 1
check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ') # fixext 2
check(ExtType(0x42, b'Z'*4), b'\xd6\x42' + b'Z'*4) # fixext 4
check(ExtType(0x42, b'Z'*8), b'\xd7\x42' + b'Z'*8) # fixext 8
check(ExtType(0x42, b'Z'*16), b'\xd8\x42' + b'Z'*16) # fixext 16
# ext 8
check(ExtType(0x42, b''), b'\xc7\x00\x42')
check(ExtType(0x42, b'Z'*255), b'\xc7\xff\x42' + b'Z'*255)
# ext 16
check(ExtType(0x42, b'Z'*256), b'\xc8\x01\x00\x42' + b'Z'*256)
check(ExtType(0x42, b'Z'*0xffff), b'\xc8\xff\xff\x42' + b'Z'*0xffff)
# ext 32
check(ExtType(0x42, b'Z'*0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z'*0x10000)
# needs large memory
#check(ExtType(0x42, b'Z'*0xffffffff),
# b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff)

View file

@ -35,7 +35,7 @@ def test_only_one_obj_hook():
unpackb(b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x)
def test_bad_hook():
with raises(ValueError):
with raises(TypeError):
packed = packb([3, 1+2j], default=lambda o: o)
unpacked = unpackb(packed, use_list=1)

View file

@ -84,4 +84,3 @@ def test_readbytes():
assert unpacker.read_bytes(3) == b'oob'
assert unpacker.unpack() == ord(b'a')
assert unpacker.unpack() == ord(b'r')