enable unpacking from memoryview

This commit is contained in:
folz 2016-05-07 15:18:20 +02:00
parent b887c1a4ad
commit 2b63e9fbbb
3 changed files with 106 additions and 32 deletions

View file

@ -8,16 +8,23 @@ from cpython.bytes cimport (
) )
from cpython.buffer cimport ( from cpython.buffer cimport (
Py_buffer, Py_buffer,
PyBuffer_Release, PyObject_CheckBuffer,
PyObject_GetBuffer, PyObject_GetBuffer,
PyBuffer_Release,
PyBuffer_IsContiguous,
PyBUF_READ,
PyBUF_SIMPLE, PyBUF_SIMPLE,
PyBUF_FULL_RO,
) )
from cpython.mem cimport PyMem_Malloc, PyMem_Free from cpython.mem cimport PyMem_Malloc, PyMem_Free
from cpython.object cimport PyCallable_Check from cpython.object cimport PyCallable_Check
from cpython.ref cimport Py_DECREF
from cpython.exc cimport PyErr_WarnEx
cdef extern from "Python.h": cdef extern from "Python.h":
ctypedef struct PyObject ctypedef struct PyObject
cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1
object PyMemoryView_GetContiguous(object obj, int buffertype, char order)
from libc.stdlib cimport * from libc.stdlib cimport *
from libc.string cimport * from libc.string cimport *
@ -110,6 +117,42 @@ cdef inline init_ctx(unpack_context *ctx,
def default_read_extended_type(typecode, data): def default_read_extended_type(typecode, data):
raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode)
cdef inline int get_data_from_buffer(object obj,
Py_buffer *view,
char **buf,
Py_ssize_t *buffer_len,
int *new_protocol) except 0:
cdef object contiguous
cdef Py_buffer tmp
if PyObject_CheckBuffer(obj):
new_protocol[0] = 1
if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1:
raise
if view.itemsize != 1:
PyBuffer_Release(view)
raise BufferError("cannot unpack from multi-byte object")
if PyBuffer_IsContiguous(view, 'A') == 0:
PyBuffer_Release(view)
# create a contiguous copy and get buffer
contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, 'C')
PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE)
# view must hold the only reference to contiguous,
# so memory is freed when view is released
Py_DECREF(contiguous)
buffer_len[0] = view.len
buf[0] = <char*> view.buf
return 1
else:
new_protocol[0] = 0
if PyObject_AsReadBuffer(obj, <const void**> buf, buffer_len) == -1:
raise BufferError("could not get memoryview")
PyErr_WarnEx(RuntimeWarning,
"using old buffer interface to unpack %s; "
"this leads to unpacking errors if slicing is used and "
"will be removed in a future version" % type(obj),
1)
return 1
def unpackb(object packed, object object_hook=None, object list_hook=None, def unpackb(object packed, object object_hook=None, object list_hook=None,
bint use_list=1, encoding=None, unicode_errors="strict", bint use_list=1, encoding=None, unicode_errors="strict",
object_pairs_hook=None, ext_hook=ExtType, object_pairs_hook=None, ext_hook=ExtType,
@ -129,13 +172,16 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
cdef Py_ssize_t off = 0 cdef Py_ssize_t off = 0
cdef int ret cdef int ret
cdef char* buf cdef Py_buffer view
cdef char* buf = NULL
cdef Py_ssize_t buf_len cdef Py_ssize_t buf_len
cdef char* cenc = NULL cdef char* cenc = NULL
cdef char* cerr = NULL cdef char* cerr = NULL
cdef int new_protocol = 0
PyObject_AsReadBuffer(packed, <const void**>&buf, &buf_len) get_data_from_buffer(packed, &view, &buf, &buf_len, &new_protocol)
try:
if encoding is not None: if encoding is not None:
if isinstance(encoding, unicode): if isinstance(encoding, unicode):
encoding = encoding.encode('ascii') encoding = encoding.encode('ascii')
@ -150,6 +196,10 @@ def unpackb(object packed, object object_hook=None, object list_hook=None,
use_list, cenc, cerr, use_list, cenc, cerr,
max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len)
ret = unpack_construct(&ctx, buf, buf_len, &off) ret = unpack_construct(&ctx, buf, buf_len, &off)
finally:
if new_protocol:
PyBuffer_Release(&view);
if ret == 1: if ret == 1:
obj = unpack_data(&ctx) obj = unpack_data(&ctx)
if off < buf_len: if off < buf_len:
@ -335,13 +385,19 @@ cdef class Unpacker(object):
def feed(self, object next_bytes): def feed(self, object next_bytes):
"""Append `next_bytes` to internal buffer.""" """Append `next_bytes` to internal buffer."""
cdef Py_buffer pybuff cdef Py_buffer pybuff
cdef int new_protocol = 0
cdef char* buf
cdef Py_ssize_t buf_len
if self.file_like is not None: if self.file_like is not None:
raise AssertionError( raise AssertionError(
"unpacker.feed() is not be able to use with `file_like`.") "unpacker.feed() is not be able to use with `file_like`.")
PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE)
get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len, &new_protocol)
try: try:
self.append_buffer(<char*>pybuff.buf, pybuff.len) self.append_buffer(buf, buf_len)
finally: finally:
if new_protocol:
PyBuffer_Release(&pybuff) PyBuffer_Release(&pybuff)
cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len):

View file

@ -1,8 +1,8 @@
"""Fallback pure Python implementation of msgpack""" """Fallback pure Python implementation of msgpack"""
import sys import sys
import array
import struct import struct
import warnings
if sys.version_info[0] == 3: if sys.version_info[0] == 3:
PY3 = True PY3 = True
@ -46,6 +46,7 @@ else:
from io import BytesIO as StringIO from io import BytesIO as StringIO
newlist_hint = lambda size: [] newlist_hint = lambda size: []
from msgpack.exceptions import ( from msgpack.exceptions import (
BufferFull, BufferFull,
OutOfData, OutOfData,
@ -79,6 +80,24 @@ def _check_type_strict(obj, t, type=type, tuple=tuple):
return type(obj) is t return type(obj) is t
def _get_data_from_buffer(obj):
try:
view = memoryview(obj)
except TypeError:
# try to use legacy buffer protocol if 2.7, otherwise re-raise
if not PY3:
view = memoryview(buffer(obj))
warnings.warn("using old buffer interface to unpack %s; "
"this leads to unpacking errors if slicing is used and "
"will be removed in a future version" % type(obj),
RuntimeWarning)
else:
raise
if view.itemsize != 1:
raise ValueError("cannot unpack from multi-byte object")
return view
def unpack(stream, **kwargs): def unpack(stream, **kwargs):
""" """
Unpack an object from `stream`. Unpack an object from `stream`.
@ -239,17 +258,11 @@ class Unpacker(object):
raise TypeError("`ext_hook` is not callable") raise TypeError("`ext_hook` is not callable")
def feed(self, next_bytes): def feed(self, next_bytes):
if isinstance(next_bytes, array.array):
next_bytes = next_bytes.tostring()
if not isinstance(next_bytes, (bytes, bytearray)):
raise TypeError("next_bytes should be bytes, bytearray or array.array")
assert self._feeding assert self._feeding
view = _get_data_from_buffer(next_bytes)
if (len(self._buffer) - self._buff_i + len(next_bytes) > self._max_buffer_size): if (len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size):
raise BufferFull raise BufferFull
# bytes + bytearray -> bytearray self._buffer += view
# So cast before append
self._buffer += next_bytes
def _consume(self): def _consume(self):
""" Gets rid of the used parts of the buffer. """ """ Gets rid of the used parts of the buffer. """
@ -308,7 +321,6 @@ class Unpacker(object):
n = 0 n = 0
obj = None obj = None
self._reserve(1) self._reserve(1)
#b = struct.unpack_from("B", self._buffer, self._buff_i)[0]
b = self._buffer[self._buff_i] b = self._buffer[self._buff_i]
self._buff_i += 1 self._buff_i += 1
if b & 0b10000000 == 0: if b & 0b10000000 == 0:
@ -340,7 +352,6 @@ class Unpacker(object):
elif b == 0xc4: elif b == 0xc4:
typ = TYPE_BIN typ = TYPE_BIN
self._reserve(1) self._reserve(1)
#n = struct.unpack_from("B", self._buffer, self._buff_i)[0]
n = self._buffer[self._buff_i] n = self._buffer[self._buff_i]
self._buff_i += 1 self._buff_i += 1
if n > self._max_bin_len: if n > self._max_bin_len:
@ -396,7 +407,6 @@ class Unpacker(object):
self._buff_i += 8 self._buff_i += 8
elif b == 0xcc: elif b == 0xcc:
self._reserve(1) self._reserve(1)
#obj = struct.unpack_from("B", self._buffer, self._buff_i)[0]
obj = self._buffer[self._buff_i] obj = self._buffer[self._buff_i]
self._buff_i += 1 self._buff_i += 1
elif b == 0xcd: elif b == 0xcd:
@ -465,7 +475,6 @@ class Unpacker(object):
elif b == 0xd9: elif b == 0xd9:
typ = TYPE_RAW typ = TYPE_RAW
self._reserve(1) self._reserve(1)
#n, = struct.unpack_from("B", self._buffer, self._buff_i)
n = self._buffer[self._buff_i] n = self._buffer[self._buff_i]
self._buff_i += 1 self._buff_i += 1
if n > self._max_str_len: if n > self._max_str_len:

View file

@ -18,3 +18,12 @@ def test_unpack_bytearray():
assert [b'foo', b'bar'] == obj assert [b'foo', b'bar'] == obj
expected_type = bytes expected_type = bytes
assert all(type(s) == expected_type for s in obj) assert all(type(s) == expected_type for s in obj)
def test_unpack_memoryview():
buf = bytearray(packb(('foo', 'bar')))
view = memoryview(buf)
obj = unpackb(view, use_list=1)
assert [b'foo', b'bar'] == obj
expected_type = bytes
assert all(type(s) == expected_type for s in obj)