Merge branch 'master' of https://github.com/antocuni/msgpack-python into newspec

Conflicts:
	msgpack/fallback.py
	msgpack/unpack.h
	msgpack/unpack_define.h
	msgpack/unpack_template.h
This commit is contained in:
INADA Naoki 2013-10-20 15:08:31 +09:00
commit 27f0cba8a5
11 changed files with 261 additions and 41 deletions

View file

@ -140,6 +140,14 @@ It is also possible to pack/unpack custom data types. Here is an example for
``object_pairs_hook`` callback may instead be used to receive a list of ``object_pairs_hook`` callback may instead be used to receive a list of
key-value pairs. key-value pairs.
Extended types
^^^^^^^^^^^^^^^
It is also possible to pack/unpack custom data types using the msgpack feature
of "extended types". For example, msgpack-pypy uses it to provide very fast serialization of int/float lists on top of PyPy (experimental for now):
https://bitbucket.org/antocuni/msgpack-pypy/src/default/msgpack_pypy.py
Advanced unpacking control Advanced unpacking control
^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^

View file

@ -5,6 +5,7 @@ from cpython cimport *
from libc.stdlib cimport * from libc.stdlib cimport *
from libc.string cimport * from libc.string cimport *
from libc.limits cimport * from libc.limits cimport *
from libc.stdint cimport int8_t
from msgpack.exceptions import PackValueError from msgpack.exceptions import PackValueError
@ -29,6 +30,7 @@ cdef extern from "pack.h":
int msgpack_pack_raw(msgpack_packer* pk, size_t l) int msgpack_pack_raw(msgpack_packer* pk, size_t l)
int msgpack_pack_bin(msgpack_packer* pk, size_t l) int msgpack_pack_bin(msgpack_packer* pk, size_t l)
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l)
cdef int DEFAULT_RECURSE_LIMIT=511 cdef int DEFAULT_RECURSE_LIMIT=511
@ -183,6 +185,9 @@ cdef class Packer(object):
for v in o: for v in o:
ret = self._pack(v, nest_limit-1) ret = self._pack(v, nest_limit-1)
if ret != 0: break if ret != 0: break
elif self.handle_unknown_type(o):
# it means that obj was succesfully packed, so we are done
return 0
elif self._default: elif self._default:
o = self._default(o) o = self._default(o)
ret = self._pack(o, nest_limit-1) ret = self._pack(o, nest_limit-1)
@ -202,6 +207,13 @@ cdef class Packer(object):
self.pk.length = 0 self.pk.length = 0
return buf return buf
def handle_unknown_type(self, obj):
return None
def pack_extended_type(self, typecode, data):
msgpack_pack_ext(&self.pk, typecode, len(data))
msgpack_pack_raw_body(&self.pk, data, len(data))
def pack_array_header(self, size_t size): def pack_array_header(self, size_t size):
cdef int ret = msgpack_pack_array(&self.pk, size) cdef int ret = msgpack_pack_array(&self.pk, size)
if ret == -1: if ret == -1:

View file

@ -24,6 +24,7 @@ cdef extern from "unpack.h":
PyObject* object_hook PyObject* object_hook
bint has_pairs_hook # call object_hook with k-v pairs bint has_pairs_hook # call object_hook with k-v pairs
PyObject* list_hook PyObject* list_hook
PyObject* ext_type_hook
char *encoding char *encoding
char *unicode_errors char *unicode_errors
@ -45,6 +46,7 @@ cdef extern from "unpack.h":
cdef inline init_ctx(unpack_context *ctx, cdef inline init_ctx(unpack_context *ctx,
object object_hook, object object_pairs_hook, object list_hook, object object_hook, object object_pairs_hook, object list_hook,
object ext_type_hook,
bint use_list, char* encoding, char* unicode_errors): bint use_list, char* encoding, char* unicode_errors):
unpack_init(ctx) unpack_init(ctx)
ctx.user.use_list = use_list ctx.user.use_list = use_list
@ -71,9 +73,17 @@ cdef inline init_ctx(unpack_context *ctx,
raise TypeError("list_hook must be a callable.") raise TypeError("list_hook must be a callable.")
ctx.user.list_hook = <PyObject*>list_hook ctx.user.list_hook = <PyObject*>list_hook
if ext_type_hook is not None:
if not PyCallable_Check(ext_type_hook):
raise TypeError("ext_type_hook must be a callable.")
ctx.user.ext_type_hook = <PyObject*>ext_type_hook
ctx.user.encoding = encoding ctx.user.encoding = encoding
ctx.user.unicode_errors = unicode_errors ctx.user.unicode_errors = unicode_errors
def default_read_extended_type(typecode, data):
raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
def unpackb(object packed, object object_hook=None, object list_hook=None, def unpackb(object packed, object object_hook=None, object list_hook=None,
bint use_list=1, encoding=None, unicode_errors="strict", bint use_list=1, encoding=None, unicode_errors="strict",
object_pairs_hook=None, object_pairs_hook=None,
@ -106,7 +116,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
unicode_errors = unicode_errors.encode('ascii') unicode_errors = unicode_errors.encode('ascii')
cerr = PyBytes_AsString(unicode_errors) cerr = PyBytes_AsString(unicode_errors)
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, default_read_extended_type,
use_list, cenc, cerr)
ret = unpack_construct(&ctx, buf, buf_len, &off) ret = unpack_construct(&ctx, buf, buf_len, &off)
if ret == 1: if ret == 1:
obj = unpack_data(&ctx) obj = unpack_data(&ctx)
@ -248,7 +259,10 @@ cdef class Unpacker(object):
self.unicode_errors = unicode_errors self.unicode_errors = unicode_errors
cerr = PyBytes_AsString(self.unicode_errors) cerr = PyBytes_AsString(self.unicode_errors)
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr) ext_type_hook = self.read_extended_type
Py_INCREF(ext_type_hook)
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
ext_type_hook, use_list, cenc, cerr)
def feed(self, object next_bytes): def feed(self, object next_bytes):
"""Append `next_bytes` to internal buffer.""" """Append `next_bytes` to internal buffer."""
@ -358,6 +372,24 @@ cdef class Unpacker(object):
""" """
return self._unpack(unpack_construct, write_bytes) return self._unpack(unpack_construct, write_bytes)
def unpack_one(self, object write_bytes=None):
"""
unpack one object
If write_bytes is not None, it will be called with parts of the raw
message as it is unpacked.
Raises `UnpackValueError` if there are no more bytes to unpack.
Raises ``ExtraData`` if there are still bytes left after the unpacking.
"""
try:
result = self.unpack()
except OutOfData:
raise UnpackValueError("Data is not enough")
if self.buf_head < self.buf_tail:
raise ExtraData(result, self.buf[self.buf_head:])
return result
def skip(self, object write_bytes=None): def skip(self, object write_bytes=None):
""" """
read and ignore one object, returning None read and ignore one object, returning None
@ -385,6 +417,9 @@ cdef class Unpacker(object):
""" """
return self._unpack(read_map_header, write_bytes) return self._unpack(read_map_header, write_bytes)
def read_extended_type(self, typecode, data):
return default_read_extended_type(typecode, data)
def __iter__(self): def __iter__(self):
return self return self

View file

@ -71,6 +71,8 @@ static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l);
static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l); static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l);
static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l);
static inline int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l);
static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l)
{ {
char* buf = pk->buf; char* buf = pk->buf;

View file

@ -708,6 +708,66 @@ static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t
msgpack_pack_append_buffer(x, (const unsigned char*)b, l); msgpack_pack_append_buffer(x, (const unsigned char*)b, l);
} }
/*
* Ext
*/
static inline int msgpack_pack_ext(msgpack_packer* x, int8_t typecode, size_t l)
{
if (l == 1) {
unsigned char buf[2];
buf[0] = 0xd4;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 2) {
unsigned char buf[2];
buf[0] = 0xd5;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 4) {
unsigned char buf[2];
buf[0] = 0xd6;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 8) {
unsigned char buf[2];
buf[0] = 0xd7;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 16) {
unsigned char buf[2];
buf[0] = 0xd8;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l < 256) {
unsigned char buf[3];
buf[0] = 0xc7;
buf[1] = l;
buf[2] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 3);
} else if(l < 65536) {
unsigned char buf[4];
buf[0] = 0xc8;
_msgpack_store16(&buf[1], (uint16_t)l);
buf[3] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 4);
} else {
unsigned char buf[6];
buf[0] = 0xc9;
_msgpack_store32(&buf[1], (uint32_t)l);
buf[5] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 6);
}
}
#undef msgpack_pack_append_buffer #undef msgpack_pack_append_buffer
#undef TAKE8_8 #undef TAKE8_8

View file

@ -24,6 +24,7 @@ typedef struct unpack_user {
PyObject *object_hook; PyObject *object_hook;
bool has_pairs_hook; bool has_pairs_hook;
PyObject *list_hook; PyObject *list_hook;
PyObject *ext_type_hook;
const char *encoding; const char *encoding;
const char *unicode_errors; const char *unicode_errors;
} unpack_user; } unpack_user;
@ -235,4 +236,21 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char*
return 0; return 0;
} }
static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
unsigned int lenght, msgpack_unpack_object* o)
{
PyObject *py;
int8_t typecode = (int8_t)*pos++;
if (!u->ext_type_hook) {
PyErr_SetString(PyExc_AssertionError, "u->ext_type_hook cannot be NULL");
return -1;
}
// lenght also includes the typecode, so the actual data is lenght-1
py = PyEval_CallFunction(u->ext_type_hook, "(is#)", typecode, pos, lenght-1);
if (!py)
return -1;
*o = py;
return 0;
}
#include "unpack_template.h" #include "unpack_template.h"

View file

@ -45,10 +45,11 @@ typedef enum {
CS_BIN_8 = 0x04, CS_BIN_8 = 0x04,
CS_BIN_16 = 0x05, CS_BIN_16 = 0x05,
CS_BIN_32 = 0x06, CS_BIN_32 = 0x06,
//CS_ = 0x07,
//CS_ = 0x08, CS_EXT_8 = 0x07,
//CS_ = 0x09, CS_EXT_16 = 0x08,
CS_EXT_32 = 0x09,
CS_FLOAT = 0x0a, CS_FLOAT = 0x0a,
CS_DOUBLE = 0x0b, CS_DOUBLE = 0x0b,
CS_UINT_8 = 0x0c, CS_UINT_8 = 0x0c,
@ -60,6 +61,12 @@ typedef enum {
CS_INT_32 = 0x12, CS_INT_32 = 0x12,
CS_INT_64 = 0x13, CS_INT_64 = 0x13,
//CS_FIXEXT1 = 0x14,
//CS_FIXEXT2 = 0x15,
//CS_FIXEXT4 = 0x16,
//CS_FIXEXT8 = 0x17,
//CS_FIXEXT16 = 0x18,
CS_RAW_8 = 0x19, CS_RAW_8 = 0x19,
CS_RAW_16 = 0x1a, CS_RAW_16 = 0x1a,
CS_RAW_32 = 0x1b, CS_RAW_32 = 0x1b,
@ -70,6 +77,7 @@ typedef enum {
ACS_RAW_VALUE, ACS_RAW_VALUE,
ACS_BIN_VALUE, ACS_BIN_VALUE,
ACS_EXT_VALUE,
} msgpack_unpack_state; } msgpack_unpack_state;

View file

@ -184,9 +184,15 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
push_simple_value(_false); push_simple_value(_false);
case 0xc3: // true case 0xc3: // true
push_simple_value(_true); push_simple_value(_true);
//case 0xc7: //case 0xc4:
//case 0xc8: //case 0xc5:
//case 0xc9: //case 0xc6:
case 0xc7: // ext 8
again_fixed_trail(NEXT_CS(p), 1);
case 0xc8: // ext 16
again_fixed_trail(NEXT_CS(p), 2);
case 0xc9: // ext 32
again_fixed_trail(NEXT_CS(p), 4);
case 0xca: // float case 0xca: // float
case 0xcb: // double case 0xcb: // double
case 0xcc: // unsigned int 8 case 0xcc: // unsigned int 8
@ -198,15 +204,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case 0xd2: // signed int 32 case 0xd2: // signed int 32
case 0xd3: // signed int 64 case 0xd3: // signed int 64
again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03));
case 0xc4: // bin 8 case 0xd4: // fixext 1
case 0xc5: // bin 16 case 0xd5: // fixext 2
case 0xc6: // bin 32 case 0xd6: // fixext 4
//case 0xd4: case 0xd7: // fixext 8
//case 0xd5: again_fixed_trail_if_zero(ACS_EXT_VALUE,
//case 0xd6: // big integer 16 (1 << (((unsigned int)*p) & 0x03))+1,
//case 0xd7: // big integer 32 _ext_zero);
//case 0xd8: // big float 16 case 0xd8: // fixext 16
case 0xd9: // raw 8 again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero);
//case 0xd9:
case 0xda: // raw 16 case 0xda: // raw 16
case 0xdb: // raw 32 case 0xdb: // raw 32
case 0xdc: // array 16 case 0xdc: // array 16
@ -237,8 +244,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
if((size_t)(pe - p) < trail) { goto _out; } if((size_t)(pe - p) < trail) { goto _out; }
n = p; p += trail - 1; n = p; p += trail - 1;
switch(cs) { switch(cs) {
//case CS_ case CS_EXT_8:
//case CS_ again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero);
case CS_EXT_16:
again_fixed_trail_if_zero(ACS_EXT_VALUE,
_msgpack_load16(uint16_t,n)+1,
_ext_zero);
case CS_EXT_32:
again_fixed_trail_if_zero(ACS_EXT_VALUE,
_msgpack_load32(uint32_t,n)+1,
_ext_zero);
case CS_FLOAT: { case CS_FLOAT: {
union { uint32_t i; float f; } mem; union { uint32_t i; float f; } mem;
mem.i = _msgpack_load32(uint32_t,n); mem.i = _msgpack_load32(uint32_t,n);
@ -269,26 +284,6 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case CS_INT_64: case CS_INT_64:
push_fixed_value(_int64, _msgpack_load64(int64_t,n)); push_fixed_value(_int64, _msgpack_load64(int64_t,n));
//case CS_
//case CS_
//case CS_BIG_INT_16:
// again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load16(uint16_t,n), _big_int_zero);
//case CS_BIG_INT_32:
// again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load32(uint32_t,n), _big_int_zero);
//case ACS_BIG_INT_VALUE:
//_big_int_zero:
// // FIXME
// push_variable_value(_big_int, data, n, trail);
//case CS_BIG_FLOAT_16:
// again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load16(uint16_t,n), _big_float_zero);
//case CS_BIG_FLOAT_32:
// again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load32(uint32_t,n), _big_float_zero);
//case ACS_BIG_FLOAT_VALUE:
//_big_float_zero:
// // FIXME
// push_variable_value(_big_float, data, n, trail);
case CS_BIN_8: case CS_BIN_8:
again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero); again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero);
case CS_BIN_16: case CS_BIN_16:
@ -309,6 +304,10 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
_raw_zero: _raw_zero:
push_variable_value(_raw, data, n, trail); push_variable_value(_raw, data, n, trail);
case ACS_EXT_VALUE:
_ext_zero:
push_variable_value(_ext, data, n, trail);
case CS_ARRAY_16: case CS_ARRAY_16:
start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM);
case CS_ARRAY_32: case CS_ARRAY_32:
@ -320,7 +319,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case CS_MAP_32: case CS_MAP_32:
/* FIXME security guard */ /* FIXME security guard */
start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY);
default: default:
goto _failed; goto _failed;
} }

View file

@ -92,6 +92,7 @@ if not hasattr(sys, 'pypy_version_info'):
libraries=libraries, libraries=libraries,
include_dirs=['.'], include_dirs=['.'],
define_macros=macros, define_macros=macros,
extra_compile_args=['-O0'],
)) ))
del libraries, macros del libraries, macros

64
test/test_extension.py Normal file
View file

@ -0,0 +1,64 @@
import py
import array
import struct
import msgpack
def test_pack_extended_type():
def p(s):
packer = msgpack.Packer()
packer.pack_extended_type(0x42, s)
return packer.bytes()
assert p('A') == '\xd4\x42A' # fixext 1
assert p('AB') == '\xd5\x42AB' # fixext 2
assert p('ABCD') == '\xd6\x42ABCD' # fixext 4
assert p('ABCDEFGH') == '\xd7\x42ABCDEFGH' # fixext 8
assert p('A'*16) == '\xd8\x42' + 'A'*16 # fixext 16
assert p('ABC') == '\xc7\x03\x42ABC' # ext 8
assert p('A'*0x0123) == '\xc8\x01\x23\x42' + 'A'*0x0123 # ext 16
assert p('A'*0x00012345) == '\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345 # ext 32
def test_unpack_extended_type():
class MyUnpacker(msgpack.Unpacker):
def read_extended_type(self, typecode, data):
return (typecode, data)
def u(s):
unpacker = MyUnpacker()
unpacker.feed(s)
return unpacker.unpack_one()
assert u('\xd4\x42A') == (0x42, 'A') # fixext 1
assert u('\xd5\x42AB') == (0x42, 'AB') # fixext 2
assert u('\xd6\x42ABCD') == (0x42, 'ABCD') # fixext 4
assert u('\xd7\x42ABCDEFGH') == (0x42, 'ABCDEFGH') # fixext 8
assert u('\xd8\x42' + 'A'*16) == (0x42, 'A'*16) # fixext 16
assert u('\xc7\x03\x42ABC') == (0x42, 'ABC') # ext 8
assert (u('\xc8\x01\x23\x42' + 'A'*0x0123) ==
(0x42, 'A'*0x0123)) # ext 16
assert (u('\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345) ==
(0x42, 'A'*0x00012345)) # ext 32
def test_extension_type():
class MyPacker(msgpack.Packer):
def handle_unknown_type(self, obj):
if isinstance(obj, array.array):
typecode = 123 # application specific typecode
data = obj.tostring()
self.pack_extended_type(typecode, data)
return True
class MyUnpacker(msgpack.Unpacker):
def read_extended_type(self, typecode, data):
assert typecode == 123
obj = array.array('d')
obj.fromstring(data)
return obj
obj = [42, 'hello', array.array('d', [1.1, 2.2, 3.3])]
packer = MyPacker()
unpacker = MyUnpacker(None)
s = packer.pack(obj)
unpacker.feed(s)
obj2 = unpacker.unpack_one()
assert obj == obj2

View file

@ -1,9 +1,10 @@
#!/usr/bin/env python #!/usr/bin/env python
# coding: utf-8 # coding: utf-8
import py
import six import six
from msgpack import Unpacker, BufferFull from msgpack import Unpacker, BufferFull
from msgpack.exceptions import OutOfData from msgpack.exceptions import OutOfData, ExtraData, UnpackValueError
from pytest import raises from pytest import raises
@ -85,3 +86,15 @@ def test_readbytes():
assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'a')
assert unpacker.unpack() == ord(b'r') assert unpacker.unpack() == ord(b'r')
def test_unpack_one():
unpacker = Unpacker()
unpacker.feed('\xda\x00\x03abc')
assert unpacker.unpack_one() == 'abc'
#
unpacker = Unpacker()
unpacker.feed('\xda\x00\x03abcd')
py.test.raises(ExtraData, "unpacker.unpack_one()")
#
unpacker = Unpacker()
unpacker.feed('\xda\x00\x03ab')
py.test.raises(UnpackValueError, "unpacker.unpack_one()")