Merge branch 'master' of https://github.com/antocuni/msgpack-python into newspec

Conflicts:
	msgpack/fallback.py
	msgpack/unpack.h
	msgpack/unpack_define.h
	msgpack/unpack_template.h
This commit is contained in:
INADA Naoki 2013-10-20 15:08:31 +09:00
commit 27f0cba8a5
11 changed files with 261 additions and 41 deletions

View file

@ -140,6 +140,14 @@ It is also possible to pack/unpack custom data types. Here is an example for
``object_pairs_hook`` callback may instead be used to receive a list of
key-value pairs.
Extended types
^^^^^^^^^^^^^^^
It is also possible to pack/unpack custom data types using the msgpack feature
of "extended types". For example, msgpack-pypy uses it to provide very fast serialization of int/float lists on top of PyPy (experimental for now):
https://bitbucket.org/antocuni/msgpack-pypy/src/default/msgpack_pypy.py
Advanced unpacking control
^^^^^^^^^^^^^^^^^^^^^^^^^^

View file

@ -5,6 +5,7 @@ from cpython cimport *
from libc.stdlib cimport *
from libc.string cimport *
from libc.limits cimport *
from libc.stdint cimport int8_t
from msgpack.exceptions import PackValueError
@ -29,6 +30,7 @@ cdef extern from "pack.h":
int msgpack_pack_raw(msgpack_packer* pk, size_t l)
int msgpack_pack_bin(msgpack_packer* pk, size_t l)
int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l)
int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l)
cdef int DEFAULT_RECURSE_LIMIT=511
@ -183,6 +185,9 @@ cdef class Packer(object):
for v in o:
ret = self._pack(v, nest_limit-1)
if ret != 0: break
elif self.handle_unknown_type(o):
# it means that obj was succesfully packed, so we are done
return 0
elif self._default:
o = self._default(o)
ret = self._pack(o, nest_limit-1)
@ -202,6 +207,13 @@ cdef class Packer(object):
self.pk.length = 0
return buf
def handle_unknown_type(self, obj):
return None
def pack_extended_type(self, typecode, data):
msgpack_pack_ext(&self.pk, typecode, len(data))
msgpack_pack_raw_body(&self.pk, data, len(data))
def pack_array_header(self, size_t size):
cdef int ret = msgpack_pack_array(&self.pk, size)
if ret == -1:

View file

@ -24,6 +24,7 @@ cdef extern from "unpack.h":
PyObject* object_hook
bint has_pairs_hook # call object_hook with k-v pairs
PyObject* list_hook
PyObject* ext_type_hook
char *encoding
char *unicode_errors
@ -45,6 +46,7 @@ cdef extern from "unpack.h":
cdef inline init_ctx(unpack_context *ctx,
object object_hook, object object_pairs_hook, object list_hook,
object ext_type_hook,
bint use_list, char* encoding, char* unicode_errors):
unpack_init(ctx)
ctx.user.use_list = use_list
@ -71,9 +73,17 @@ cdef inline init_ctx(unpack_context *ctx,
raise TypeError("list_hook must be a callable.")
ctx.user.list_hook = <PyObject*>list_hook
if ext_type_hook is not None:
if not PyCallable_Check(ext_type_hook):
raise TypeError("ext_type_hook must be a callable.")
ctx.user.ext_type_hook = <PyObject*>ext_type_hook
ctx.user.encoding = encoding
ctx.user.unicode_errors = unicode_errors
def default_read_extended_type(typecode, data):
raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
def unpackb(object packed, object object_hook=None, object list_hook=None,
bint use_list=1, encoding=None, unicode_errors="strict",
object_pairs_hook=None,
@ -106,7 +116,8 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
unicode_errors = unicode_errors.encode('ascii')
cerr = PyBytes_AsString(unicode_errors)
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, default_read_extended_type,
use_list, cenc, cerr)
ret = unpack_construct(&ctx, buf, buf_len, &off)
if ret == 1:
obj = unpack_data(&ctx)
@ -248,7 +259,10 @@ cdef class Unpacker(object):
self.unicode_errors = unicode_errors
cerr = PyBytes_AsString(self.unicode_errors)
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, cenc, cerr)
ext_type_hook = self.read_extended_type
Py_INCREF(ext_type_hook)
init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook,
ext_type_hook, use_list, cenc, cerr)
def feed(self, object next_bytes):
"""Append `next_bytes` to internal buffer."""
@ -358,6 +372,24 @@ cdef class Unpacker(object):
"""
return self._unpack(unpack_construct, write_bytes)
def unpack_one(self, object write_bytes=None):
"""
unpack one object
If write_bytes is not None, it will be called with parts of the raw
message as it is unpacked.
Raises `UnpackValueError` if there are no more bytes to unpack.
Raises ``ExtraData`` if there are still bytes left after the unpacking.
"""
try:
result = self.unpack()
except OutOfData:
raise UnpackValueError("Data is not enough")
if self.buf_head < self.buf_tail:
raise ExtraData(result, self.buf[self.buf_head:])
return result
def skip(self, object write_bytes=None):
"""
read and ignore one object, returning None
@ -385,6 +417,9 @@ cdef class Unpacker(object):
"""
return self._unpack(read_map_header, write_bytes)
def read_extended_type(self, typecode, data):
return default_read_extended_type(typecode, data)
def __iter__(self):
return self

View file

@ -71,6 +71,8 @@ static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l);
static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l);
static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l);
static inline int msgpack_pack_ext(msgpack_packer* pk, int8_t typecode, size_t l);
static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l)
{
char* buf = pk->buf;

View file

@ -708,6 +708,66 @@ static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t
msgpack_pack_append_buffer(x, (const unsigned char*)b, l);
}
/*
* Ext
*/
static inline int msgpack_pack_ext(msgpack_packer* x, int8_t typecode, size_t l)
{
if (l == 1) {
unsigned char buf[2];
buf[0] = 0xd4;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 2) {
unsigned char buf[2];
buf[0] = 0xd5;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 4) {
unsigned char buf[2];
buf[0] = 0xd6;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 8) {
unsigned char buf[2];
buf[0] = 0xd7;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l == 16) {
unsigned char buf[2];
buf[0] = 0xd8;
buf[1] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 2);
}
else if(l < 256) {
unsigned char buf[3];
buf[0] = 0xc7;
buf[1] = l;
buf[2] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 3);
} else if(l < 65536) {
unsigned char buf[4];
buf[0] = 0xc8;
_msgpack_store16(&buf[1], (uint16_t)l);
buf[3] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 4);
} else {
unsigned char buf[6];
buf[0] = 0xc9;
_msgpack_store32(&buf[1], (uint32_t)l);
buf[5] = (unsigned char)typecode;
msgpack_pack_append_buffer(x, buf, 6);
}
}
#undef msgpack_pack_append_buffer
#undef TAKE8_8

View file

@ -24,6 +24,7 @@ typedef struct unpack_user {
PyObject *object_hook;
bool has_pairs_hook;
PyObject *list_hook;
PyObject *ext_type_hook;
const char *encoding;
const char *unicode_errors;
} unpack_user;
@ -235,4 +236,21 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char*
return 0;
}
static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos,
unsigned int lenght, msgpack_unpack_object* o)
{
PyObject *py;
int8_t typecode = (int8_t)*pos++;
if (!u->ext_type_hook) {
PyErr_SetString(PyExc_AssertionError, "u->ext_type_hook cannot be NULL");
return -1;
}
// lenght also includes the typecode, so the actual data is lenght-1
py = PyEval_CallFunction(u->ext_type_hook, "(is#)", typecode, pos, lenght-1);
if (!py)
return -1;
*o = py;
return 0;
}
#include "unpack_template.h"

View file

@ -45,10 +45,11 @@ typedef enum {
CS_BIN_8 = 0x04,
CS_BIN_16 = 0x05,
CS_BIN_32 = 0x06,
//CS_ = 0x07,
//CS_ = 0x08,
//CS_ = 0x09,
CS_EXT_8 = 0x07,
CS_EXT_16 = 0x08,
CS_EXT_32 = 0x09,
CS_FLOAT = 0x0a,
CS_DOUBLE = 0x0b,
CS_UINT_8 = 0x0c,
@ -60,6 +61,12 @@ typedef enum {
CS_INT_32 = 0x12,
CS_INT_64 = 0x13,
//CS_FIXEXT1 = 0x14,
//CS_FIXEXT2 = 0x15,
//CS_FIXEXT4 = 0x16,
//CS_FIXEXT8 = 0x17,
//CS_FIXEXT16 = 0x18,
CS_RAW_8 = 0x19,
CS_RAW_16 = 0x1a,
CS_RAW_32 = 0x1b,
@ -70,6 +77,7 @@ typedef enum {
ACS_RAW_VALUE,
ACS_BIN_VALUE,
ACS_EXT_VALUE,
} msgpack_unpack_state;

View file

@ -184,9 +184,15 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
push_simple_value(_false);
case 0xc3: // true
push_simple_value(_true);
//case 0xc7:
//case 0xc8:
//case 0xc9:
//case 0xc4:
//case 0xc5:
//case 0xc6:
case 0xc7: // ext 8
again_fixed_trail(NEXT_CS(p), 1);
case 0xc8: // ext 16
again_fixed_trail(NEXT_CS(p), 2);
case 0xc9: // ext 32
again_fixed_trail(NEXT_CS(p), 4);
case 0xca: // float
case 0xcb: // double
case 0xcc: // unsigned int 8
@ -198,15 +204,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case 0xd2: // signed int 32
case 0xd3: // signed int 64
again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03));
case 0xc4: // bin 8
case 0xc5: // bin 16
case 0xc6: // bin 32
//case 0xd4:
//case 0xd5:
//case 0xd6: // big integer 16
//case 0xd7: // big integer 32
//case 0xd8: // big float 16
case 0xd9: // raw 8
case 0xd4: // fixext 1
case 0xd5: // fixext 2
case 0xd6: // fixext 4
case 0xd7: // fixext 8
again_fixed_trail_if_zero(ACS_EXT_VALUE,
(1 << (((unsigned int)*p) & 0x03))+1,
_ext_zero);
case 0xd8: // fixext 16
again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero);
//case 0xd9:
case 0xda: // raw 16
case 0xdb: // raw 32
case 0xdc: // array 16
@ -237,8 +244,16 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
if((size_t)(pe - p) < trail) { goto _out; }
n = p; p += trail - 1;
switch(cs) {
//case CS_
//case CS_
case CS_EXT_8:
again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero);
case CS_EXT_16:
again_fixed_trail_if_zero(ACS_EXT_VALUE,
_msgpack_load16(uint16_t,n)+1,
_ext_zero);
case CS_EXT_32:
again_fixed_trail_if_zero(ACS_EXT_VALUE,
_msgpack_load32(uint32_t,n)+1,
_ext_zero);
case CS_FLOAT: {
union { uint32_t i; float f; } mem;
mem.i = _msgpack_load32(uint32_t,n);
@ -269,26 +284,6 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case CS_INT_64:
push_fixed_value(_int64, _msgpack_load64(int64_t,n));
//case CS_
//case CS_
//case CS_BIG_INT_16:
// again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load16(uint16_t,n), _big_int_zero);
//case CS_BIG_INT_32:
// again_fixed_trail_if_zero(ACS_BIG_INT_VALUE, _msgpack_load32(uint32_t,n), _big_int_zero);
//case ACS_BIG_INT_VALUE:
//_big_int_zero:
// // FIXME
// push_variable_value(_big_int, data, n, trail);
//case CS_BIG_FLOAT_16:
// again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load16(uint16_t,n), _big_float_zero);
//case CS_BIG_FLOAT_32:
// again_fixed_trail_if_zero(ACS_BIG_FLOAT_VALUE, _msgpack_load32(uint32_t,n), _big_float_zero);
//case ACS_BIG_FLOAT_VALUE:
//_big_float_zero:
// // FIXME
// push_variable_value(_big_float, data, n, trail);
case CS_BIN_8:
again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero);
case CS_BIN_16:
@ -309,6 +304,10 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
_raw_zero:
push_variable_value(_raw, data, n, trail);
case ACS_EXT_VALUE:
_ext_zero:
push_variable_value(_ext, data, n, trail);
case CS_ARRAY_16:
start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM);
case CS_ARRAY_32:
@ -320,7 +319,7 @@ static inline int unpack_execute(unpack_context* ctx, const char* data, size_t l
case CS_MAP_32:
/* FIXME security guard */
start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY);
default:
goto _failed;
}

View file

@ -92,6 +92,7 @@ if not hasattr(sys, 'pypy_version_info'):
libraries=libraries,
include_dirs=['.'],
define_macros=macros,
extra_compile_args=['-O0'],
))
del libraries, macros

64
test/test_extension.py Normal file
View file

@ -0,0 +1,64 @@
import py
import array
import struct
import msgpack
def test_pack_extended_type():
def p(s):
packer = msgpack.Packer()
packer.pack_extended_type(0x42, s)
return packer.bytes()
assert p('A') == '\xd4\x42A' # fixext 1
assert p('AB') == '\xd5\x42AB' # fixext 2
assert p('ABCD') == '\xd6\x42ABCD' # fixext 4
assert p('ABCDEFGH') == '\xd7\x42ABCDEFGH' # fixext 8
assert p('A'*16) == '\xd8\x42' + 'A'*16 # fixext 16
assert p('ABC') == '\xc7\x03\x42ABC' # ext 8
assert p('A'*0x0123) == '\xc8\x01\x23\x42' + 'A'*0x0123 # ext 16
assert p('A'*0x00012345) == '\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345 # ext 32
def test_unpack_extended_type():
class MyUnpacker(msgpack.Unpacker):
def read_extended_type(self, typecode, data):
return (typecode, data)
def u(s):
unpacker = MyUnpacker()
unpacker.feed(s)
return unpacker.unpack_one()
assert u('\xd4\x42A') == (0x42, 'A') # fixext 1
assert u('\xd5\x42AB') == (0x42, 'AB') # fixext 2
assert u('\xd6\x42ABCD') == (0x42, 'ABCD') # fixext 4
assert u('\xd7\x42ABCDEFGH') == (0x42, 'ABCDEFGH') # fixext 8
assert u('\xd8\x42' + 'A'*16) == (0x42, 'A'*16) # fixext 16
assert u('\xc7\x03\x42ABC') == (0x42, 'ABC') # ext 8
assert (u('\xc8\x01\x23\x42' + 'A'*0x0123) ==
(0x42, 'A'*0x0123)) # ext 16
assert (u('\xc9\x00\x01\x23\x45\x42' + 'A'*0x00012345) ==
(0x42, 'A'*0x00012345)) # ext 32
def test_extension_type():
class MyPacker(msgpack.Packer):
def handle_unknown_type(self, obj):
if isinstance(obj, array.array):
typecode = 123 # application specific typecode
data = obj.tostring()
self.pack_extended_type(typecode, data)
return True
class MyUnpacker(msgpack.Unpacker):
def read_extended_type(self, typecode, data):
assert typecode == 123
obj = array.array('d')
obj.fromstring(data)
return obj
obj = [42, 'hello', array.array('d', [1.1, 2.2, 3.3])]
packer = MyPacker()
unpacker = MyUnpacker(None)
s = packer.pack(obj)
unpacker.feed(s)
obj2 = unpacker.unpack_one()
assert obj == obj2

View file

@ -1,9 +1,10 @@
#!/usr/bin/env python
# coding: utf-8
import py
import six
from msgpack import Unpacker, BufferFull
from msgpack.exceptions import OutOfData
from msgpack.exceptions import OutOfData, ExtraData, UnpackValueError
from pytest import raises
@ -85,3 +86,15 @@ def test_readbytes():
assert unpacker.unpack() == ord(b'a')
assert unpacker.unpack() == ord(b'r')
def test_unpack_one():
unpacker = Unpacker()
unpacker.feed('\xda\x00\x03abc')
assert unpacker.unpack_one() == 'abc'
#
unpacker = Unpacker()
unpacker.feed('\xda\x00\x03abcd')
py.test.raises(ExtraData, "unpacker.unpack_one()")
#
unpacker = Unpacker()
unpacker.feed('\xda\x00\x03ab')
py.test.raises(UnpackValueError, "unpacker.unpack_one()")