Support object_pairs_hook

Merge remote-tracking branch 'jnothman/object_pairs_hook' into 0.2-maint
Conflicts:
	msgpack/_msgpack.pyx
	test/test_pack.py
	test/test_sequnpack.py
This commit is contained in:
INADA Naoki 2012-09-24 03:05:39 +09:00
commit e381032641
7 changed files with 143 additions and 90 deletions

View file

@ -1,3 +1,12 @@
0.3.0
=====
:release date: in development
Changes
-------
* Add ``.skip()`` method to ``Unpacker`` (thanks to jnothman)
0.2.3 0.2.3
======= =======
:release date: in development :release date: in development

View file

@ -201,6 +201,7 @@ cdef extern from "unpack.h":
ctypedef struct msgpack_user: ctypedef struct msgpack_user:
bint use_list bint use_list
PyObject* object_hook PyObject* object_hook
bint has_pairs_hook # call object_hook with k-v pairs
PyObject* list_hook PyObject* list_hook
char *encoding char *encoding
char *unicode_errors char *unicode_errors
@ -213,13 +214,54 @@ cdef extern from "unpack.h":
PyObject* key PyObject* key
int template_execute(template_context* ctx, const_char_ptr data, int template_execute(template_context* ctx, const_char_ptr data,
size_t len, size_t* off) except -1 size_t len, size_t* off, bint construct) except -1
void template_init(template_context* ctx) void template_init(template_context* ctx)
object template_data(template_context* ctx) object template_data(template_context* ctx)
cdef inline init_ctx(template_context *ctx, object object_hook, object object_pairs_hook, object list_hook, bint use_list, encoding, unicode_errors):
template_init(ctx)
ctx.user.use_list = use_list
ctx.user.object_hook = ctx.user.list_hook = <PyObject*>NULL
if object_hook is not None and object_pairs_hook is not None:
raise ValueError("object_pairs_hook and object_hook are mutually exclusive.")
if object_hook is not None:
if not PyCallable_Check(object_hook):
raise TypeError("object_hook must be a callable.")
ctx.user.object_hook = <PyObject*>object_hook
if object_pairs_hook is None:
ctx.user.has_pairs_hook = False
else:
if not PyCallable_Check(object_pairs_hook):
raise TypeError("object_pairs_hook must be a callable.")
ctx.user.object_hook = <PyObject*>object_pairs_hook
ctx.user.has_pairs_hook = True
if list_hook is not None:
if not PyCallable_Check(list_hook):
raise TypeError("list_hook must be a callable.")
ctx.user.list_hook = <PyObject*>list_hook
if encoding is None:
ctx.user.encoding = NULL
ctx.user.unicode_errors = NULL
else:
if isinstance(encoding, unicode):
_bencoding = encoding.encode('ascii')
else:
_bencoding = encoding
ctx.user.encoding = PyBytes_AsString(_bencoding)
if isinstance(unicode_errors, unicode):
_berrors = unicode_errors.encode('ascii')
else:
_berrors = unicode_errors
ctx.user.unicode_errors = PyBytes_AsString(_berrors)
def unpackb(object packed, object object_hook=None, object list_hook=None, def unpackb(object packed, object object_hook=None, object list_hook=None,
use_list=None, encoding=None, unicode_errors="strict", use_list=None, encoding=None, unicode_errors="strict",
object_pairs_hook=None,
): ):
"""Unpack packed_bytes to object. Returns an unpacked object. """Unpack packed_bytes to object. Returns an unpacked object.
@ -234,39 +276,11 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
PyObject_AsReadBuffer(packed, <const_void_ptr*>&buf, &buf_len) PyObject_AsReadBuffer(packed, <const_void_ptr*>&buf, &buf_len)
if encoding is None:
enc = NULL
err = NULL
else:
if isinstance(encoding, unicode):
bencoding = encoding.encode('ascii')
else:
bencoding = encoding
if isinstance(unicode_errors, unicode):
berrors = unicode_errors.encode('ascii')
else:
berrors = unicode_errors
enc = PyBytes_AsString(bencoding)
err = PyBytes_AsString(berrors)
template_init(&ctx)
if use_list is None: if use_list is None:
warnings.warn("Set use_list explicitly.", category=DeprecationWarning, stacklevel=1) warnings.warn("Set use_list explicitly.", category=DeprecationWarning, stacklevel=1)
ctx.user.use_list = 0 use_list = 0
else: init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, use_list, encoding, unicode_errors)
ctx.user.use_list = use_list ret = template_execute(&ctx, buf, buf_len, &off, 1)
ctx.user.object_hook = ctx.user.list_hook = NULL
ctx.user.encoding = <const_char_ptr>enc
ctx.user.unicode_errors = <const_char_ptr>err
if object_hook is not None:
if not PyCallable_Check(object_hook):
raise TypeError("object_hook must be a callable.")
ctx.user.object_hook = <PyObject*>object_hook
if list_hook is not None:
if not PyCallable_Check(list_hook):
raise TypeError("list_hook must be a callable.")
ctx.user.list_hook = <PyObject*>list_hook
ret = template_execute(&ctx, buf, buf_len, &off)
if ret == 1: if ret == 1:
obj = template_data(&ctx) obj = template_data(&ctx)
if off < buf_len: if off < buf_len:
@ -278,6 +292,7 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
def unpack(object stream, object object_hook=None, object list_hook=None, def unpack(object stream, object object_hook=None, object list_hook=None,
use_list=None, encoding=None, unicode_errors="strict", use_list=None, encoding=None, unicode_errors="strict",
object_pairs_hook=None,
): ):
"""Unpack an object from `stream`. """Unpack an object from `stream`.
@ -287,7 +302,7 @@ def unpack(object stream, object object_hook=None, object list_hook=None,
warnings.warn("Set use_list explicitly.", category=DeprecationWarning, stacklevel=1) warnings.warn("Set use_list explicitly.", category=DeprecationWarning, stacklevel=1)
use_list = 0 use_list = 0
return unpackb(stream.read(), use_list=use_list, return unpackb(stream.read(), use_list=use_list,
object_hook=object_hook, list_hook=list_hook, object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook,
encoding=encoding, unicode_errors=unicode_errors, encoding=encoding, unicode_errors=unicode_errors,
) )
@ -307,7 +322,10 @@ cdef class Unpacker(object):
Otherwise, it is deserialized to Python tuple. Otherwise, it is deserialized to Python tuple.
`object_hook` is same to simplejson. If it is not None, it should be callable `object_hook` is same to simplejson. If it is not None, it should be callable
and Unpacker calls it when deserializing key-value. and Unpacker calls it with a dict argument after deserializing a map.
`object_pairs_hook` is same to simplejson. If it is not None, it should be callable
and Unpacker calls it with a list of key-value pairs after deserializing a map.
`encoding` is encoding used for decoding msgpack bytes. If it is None (default), `encoding` is encoding used for decoding msgpack bytes. If it is None (default),
msgpack bytes is deserialized to Python bytes. msgpack bytes is deserialized to Python bytes.
@ -357,9 +375,8 @@ cdef class Unpacker(object):
self.buf = NULL self.buf = NULL
def __init__(self, file_like=None, Py_ssize_t read_size=0, use_list=None, def __init__(self, file_like=None, Py_ssize_t read_size=0, use_list=None,
object object_hook=None, object list_hook=None, object object_hook=None, object object_pairs_hook=None, object list_hook=None,
encoding=None, unicode_errors='strict', int max_buffer_size=0, encoding=None, unicode_errors='strict', int max_buffer_size=0,
object object_pairs_hook=None,
): ):
if use_list is None: if use_list is None:
warnings.warn("Set use_list explicitly.", category=DeprecationWarning, stacklevel=1) warnings.warn("Set use_list explicitly.", category=DeprecationWarning, stacklevel=1)
@ -384,31 +401,7 @@ cdef class Unpacker(object):
self.buf_size = read_size self.buf_size = read_size
self.buf_head = 0 self.buf_head = 0
self.buf_tail = 0 self.buf_tail = 0
template_init(&self.ctx) init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, use_list, encoding, unicode_errors)
self.ctx.user.use_list = use_list
self.ctx.user.object_hook = self.ctx.user.list_hook = <PyObject*>NULL
if object_hook is not None:
if not PyCallable_Check(object_hook):
raise TypeError("object_hook must be a callable.")
self.ctx.user.object_hook = <PyObject*>object_hook
if list_hook is not None:
if not PyCallable_Check(list_hook):
raise TypeError("list_hook must be a callable.")
self.ctx.user.list_hook = <PyObject*>list_hook
if encoding is None:
self.ctx.user.encoding = NULL
self.ctx.user.unicode_errors = NULL
else:
if isinstance(encoding, unicode):
self._bencoding = encoding.encode('ascii')
else:
self._bencoding = encoding
self.ctx.user.encoding = PyBytes_AsString(self._bencoding)
if isinstance(unicode_errors, unicode):
self._berrors = unicode_errors.encode('ascii')
else:
self._berrors = unicode_errors
self.ctx.user.unicode_errors = PyBytes_AsString(self._berrors)
def feed(self, object next_bytes): def feed(self, object next_bytes):
cdef char* buf cdef char* buf
@ -469,15 +462,18 @@ cdef class Unpacker(object):
else: else:
self.file_like = None self.file_like = None
cpdef unpack(self): cdef object _unpack(self, bint construct):
"""unpack one object"""
cdef int ret cdef int ret
cdef object obj
while 1: while 1:
ret = template_execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) ret = template_execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head, construct)
if ret == 1: if ret == 1:
o = template_data(&self.ctx) if construct:
obj = template_data(&self.ctx)
else:
obj = None
template_init(&self.ctx) template_init(&self.ctx)
return o return obj
elif ret == 0: elif ret == 0:
if self.file_like is not None: if self.file_like is not None:
self.read_from_file() self.read_from_file()
@ -486,11 +482,19 @@ cdef class Unpacker(object):
else: else:
raise ValueError("Unpack failed: error = %d" % (ret,)) raise ValueError("Unpack failed: error = %d" % (ret,))
def unpack(self):
"""unpack one object"""
return self._unpack(1)
def skip(self):
"""read and ignore one object, returning None"""
return self._unpack(0)
def __iter__(self): def __iter__(self):
return self return self
def __next__(self): def __next__(self):
return self.unpack() return self._unpack(1)
# for debug. # for debug.
#def _buf(self): #def _buf(self):

View file

@ -22,6 +22,7 @@
typedef struct unpack_user { typedef struct unpack_user {
int use_list; int use_list;
PyObject *object_hook; PyObject *object_hook;
bool has_pairs_hook;
PyObject *list_hook; PyObject *list_hook;
const char *encoding; const char *encoding;
const char *unicode_errors; const char *unicode_errors;
@ -160,9 +161,7 @@ static inline int template_callback_array_item(unpack_user* u, unsigned int curr
static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_object* c) static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_object* c)
{ {
if (u->list_hook) { if (u->list_hook) {
PyObject *arglist = Py_BuildValue("(O)", *c); PyObject *new_c = PyEval_CallFunction(u->list_hook, "(O)", *c);
PyObject *new_c = PyEval_CallObject(u->list_hook, arglist);
Py_DECREF(arglist);
Py_DECREF(*c); Py_DECREF(*c);
*c = new_c; *c = new_c;
} }
@ -171,16 +170,31 @@ static inline int template_callback_array_end(unpack_user* u, msgpack_unpack_obj
static inline int template_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o) static inline int template_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o)
{ {
PyObject *p = PyDict_New(); PyObject *p;
if (u->has_pairs_hook) {
p = PyList_New(n); // Or use tuple?
}
else {
p = PyDict_New();
}
if (!p) if (!p)
return -1; return -1;
*o = p; *o = p;
return 0; return 0;
} }
static inline int template_callback_map_item(unpack_user* u, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) static inline int template_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v)
{ {
if (PyDict_SetItem(*c, k, v) == 0) { if (u->has_pairs_hook) {
msgpack_unpack_object item = PyTuple_Pack(2, k, v);
if (!item)
return -1;
Py_DECREF(k);
Py_DECREF(v);
PyList_SET_ITEM(*c, current, item);
return 0;
}
else if (PyDict_SetItem(*c, k, v) == 0) {
Py_DECREF(k); Py_DECREF(k);
Py_DECREF(v); Py_DECREF(v);
return 0; return 0;
@ -191,9 +205,7 @@ static inline int template_callback_map_item(unpack_user* u, msgpack_unpack_obje
static inline int template_callback_map_end(unpack_user* u, msgpack_unpack_object* c) static inline int template_callback_map_end(unpack_user* u, msgpack_unpack_object* c)
{ {
if (u->object_hook) { if (u->object_hook) {
PyObject *arglist = Py_BuildValue("(O)", *c); PyObject *new_c = PyEval_CallFunction(u->object_hook, "(O)", *c);
PyObject *new_c = PyEval_CallObject(u->object_hook, arglist);
Py_DECREF(arglist);
Py_DECREF(*c); Py_DECREF(*c);
*c = new_c; *c = new_c;
} }

View file

@ -95,7 +95,7 @@ msgpack_unpack_func(msgpack_unpack_object, _data)(msgpack_unpack_struct(_context
} }
msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off) msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const char* data, size_t len, size_t* off, int construct)
{ {
assert(len >= *off); assert(len >= *off);
@ -117,14 +117,17 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c
int ret; int ret;
#define construct_cb(name) \
construct && msgpack_unpack_callback(name)
#define push_simple_value(func) \ #define push_simple_value(func) \
if(msgpack_unpack_callback(func)(user, &obj) < 0) { goto _failed; } \ if(construct_cb(func)(user, &obj) < 0) { goto _failed; } \
goto _push goto _push
#define push_fixed_value(func, arg) \ #define push_fixed_value(func, arg) \
if(msgpack_unpack_callback(func)(user, arg, &obj) < 0) { goto _failed; } \ if(construct_cb(func)(user, arg, &obj) < 0) { goto _failed; } \
goto _push goto _push
#define push_variable_value(func, base, pos, len) \ #define push_variable_value(func, base, pos, len) \
if(msgpack_unpack_callback(func)(user, \ if(construct_cb(func)(user, \
(const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \ (const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \
goto _push goto _push
@ -140,9 +143,9 @@ msgpack_unpack_func(int, _execute)(msgpack_unpack_struct(_context)* ctx, const c
#define start_container(func, count_, ct_) \ #define start_container(func, count_, ct_) \
if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \
if(msgpack_unpack_callback(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \
if((count_) == 0) { obj = stack[top].obj; \ if((count_) == 0) { obj = stack[top].obj; \
msgpack_unpack_callback(func##_end)(user, &obj); \ construct_cb(func##_end)(user, &obj); \
goto _push; } \ goto _push; } \
stack[top].ct = ct_; \ stack[top].ct = ct_; \
stack[top].size = count_; \ stack[top].size = count_; \
@ -340,10 +343,10 @@ _push:
c = &stack[top-1]; c = &stack[top-1];
switch(c->ct) { switch(c->ct) {
case CT_ARRAY_ITEM: case CT_ARRAY_ITEM:
if(msgpack_unpack_callback(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; } if(construct_cb(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; }
if(++c->count == c->size) { if(++c->count == c->size) {
obj = c->obj; obj = c->obj;
msgpack_unpack_callback(_array_end)(user, &obj); construct_cb(_array_end)(user, &obj);
--top; --top;
/*printf("stack pop %d\n", top);*/ /*printf("stack pop %d\n", top);*/
goto _push; goto _push;
@ -354,10 +357,10 @@ _push:
c->ct = CT_MAP_VALUE; c->ct = CT_MAP_VALUE;
goto _header_again; goto _header_again;
case CT_MAP_VALUE: case CT_MAP_VALUE:
if(msgpack_unpack_callback(_map_item)(user, &c->obj, c->map_key, obj) < 0) { goto _failed; } if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; }
if(++c->count == c->size) { if(++c->count == c->size) {
obj = c->obj; obj = c->obj;
msgpack_unpack_callback(_map_end)(user, &obj); construct_cb(_map_end)(user, &obj);
--top; --top;
/*printf("stack pop %d\n", top);*/ /*printf("stack pop %d\n", top);*/
goto _push; goto _push;
@ -399,6 +402,7 @@ _end:
*off = p - (const unsigned char*)data; *off = p - (const unsigned char*)data;
return ret; return ret;
#undef construct_cb
} }

View file

@ -26,6 +26,16 @@ def test_decode_hook():
unpacked = unpackb(packed, object_hook=_decode_complex, use_list=1) unpacked = unpackb(packed, object_hook=_decode_complex, use_list=1)
eq_(unpacked[1], 1+2j) eq_(unpacked[1], 1+2j)
def test_decode_pairs_hook():
packed = packb([3, {1: 2, 3: 4}])
prod_sum = 1 * 2 + 3 * 4
unpacked = unpackb(packed, object_pairs_hook=lambda l: sum(k * v for k, v in l))
eq_(unpacked[1], prod_sum)
@raises(ValueError)
def test_only_one_obj_hook():
unpackb(b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x)
@raises(ValueError) @raises(ValueError)
def test_bad_hook(): def test_bad_hook():
packed = packb([3, 1+2j], default=lambda o: o) packed = packb([3, 1+2j], default=lambda o: o)

View file

@ -110,10 +110,9 @@ def test_odict():
seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)] seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)]
od = odict(seq) od = odict(seq)
assert_equal(unpackb(packb(od), use_list=1), dict(seq)) assert_equal(unpackb(packb(od), use_list=1), dict(seq))
# After object_pairs_hook is implemented. def pair_hook(seq):
#def pair_hook(seq): return seq
# return seq assert_equal(unpackb(packb(od), object_pairs_hook=pair_hook), seq)
#assert_equal(unpackb(packb(od), object_pairs_hook=pair_hook), seq)
if __name__ == '__main__': if __name__ == '__main__':

View file

@ -28,6 +28,21 @@ def test_foobar():
k += 1 k += 1
assert k == len(b'foobar') assert k == len(b'foobar')
def test_foobar_skip():
unpacker = Unpacker(read_size=3, use_list=1)
unpacker.feed(b'foobar')
assert unpacker.unpack() == ord(b'f')
unpacker.skip()
assert unpacker.unpack() == ord(b'o')
unpacker.skip()
assert unpacker.unpack() == ord(b'a')
unpacker.skip()
try:
o = unpacker.unpack()
assert 0, "should raise exception"
except StopIteration:
assert 1, "ok"
def test_maxbuffersize(): def test_maxbuffersize():
nose.tools.assert_raises(ValueError, Unpacker, read_size=5, max_buffer_size=3) nose.tools.assert_raises(ValueError, Unpacker, read_size=5, max_buffer_size=3)
unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1)