From 0ec2e3534f9b7751be484bd2f1344e24c49bb24f Mon Sep 17 00:00:00 2001 From: folz Date: Thu, 28 Apr 2016 15:08:28 +0200 Subject: [PATCH 1/5] fix problems associated with packing memoryviews fix wrong length when packing multibyte memoryviews in fallback add tests for memoryviews of different types and sizes and check contents of packed data --- msgpack/_packer.pyx | 23 +++---- msgpack/fallback.py | 26 +++++++- test/test_memoryview.py | 133 ++++++++++++++++++++++++++++++++++++++-- 3 files changed, 164 insertions(+), 18 deletions(-) diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index b1a912b..e07b194 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -39,6 +39,7 @@ cdef extern from "pack.h": int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) cdef int DEFAULT_RECURSE_LIMIT=511 +cdef size_t ITEM_LIMIT = (2**32)-1 cdef class Packer(object): @@ -178,7 +179,7 @@ cdef class Packer(object): ret = msgpack_pack_double(&self.pk, dval) elif PyBytes_CheckExact(o) if strict_types else PyBytes_Check(o): L = len(o) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("bytes is too large") rawval = o ret = msgpack_pack_bin(&self.pk, L) @@ -189,7 +190,7 @@ cdef class Packer(object): raise TypeError("Can't encode unicode string: no encoding is specified") o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) L = len(o) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("unicode string is too large") rawval = o ret = msgpack_pack_raw(&self.pk, L) @@ -198,7 +199,7 @@ cdef class Packer(object): elif PyDict_CheckExact(o): d = o L = len(d) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: @@ -209,7 +210,7 @@ cdef class Packer(object): if ret != 0: break elif not strict_types and PyDict_Check(o): L = len(o) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("dict is too large") ret = msgpack_pack_map(&self.pk, L) if ret == 0: @@ -223,13 +224,13 @@ cdef class Packer(object): longval = o.code rawval = o.data L = len(o.data) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("EXT data is too large") ret = msgpack_pack_ext(&self.pk, longval, L) ret = msgpack_pack_raw_body(&self.pk, rawval, L) elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): L = len(o) - if L > (2**32)-1: + if L > ITEM_LIMIT: raise PackValueError("list is too large") ret = msgpack_pack_array(&self.pk, L) if ret == 0: @@ -240,7 +241,7 @@ cdef class Packer(object): if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: raise PackValueError("could not get buffer for memoryview") L = view.len - if L > (2**32)-1: + if L > ITEM_LIMIT: PyBuffer_Release(&view); raise PackValueError("memoryview is too large") ret = msgpack_pack_bin(&self.pk, L) @@ -271,8 +272,8 @@ cdef class Packer(object): msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) - def pack_array_header(self, long long size): - if size > (2**32-1): + def pack_array_header(self, size_t size): + if size > ITEM_LIMIT: raise PackValueError cdef int ret = msgpack_pack_array(&self.pk, size) if ret == -1: @@ -284,8 +285,8 @@ cdef class Packer(object): self.pk.length = 0 return buf - def pack_map_header(self, long long size): - if size > (2**32-1): + def pack_map_header(self, size_t size): + if size > ITEM_LIMIT: raise PackValueError cdef int ret = msgpack_pack_map(&self.pk, size) if ret == -1: diff --git a/msgpack/fallback.py b/msgpack/fallback.py index d8c5d73..db47d5c 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -685,7 +685,7 @@ class Packer(object): default_used = True continue raise PackOverflowError("Integer value out of range") - if self._use_bin_type and check(obj, (bytes, memoryview)): + if self._use_bin_type and check(obj, bytes): n = len(obj) if n <= 0xff: self._buffer.write(struct.pack('>BB', 0xc4, n)) @@ -696,7 +696,7 @@ class Packer(object): else: raise PackValueError("Bytes is too large") return self._buffer.write(obj) - if check(obj, (Unicode, bytes, memoryview)): + if check(obj, (Unicode, bytes)): if check(obj, Unicode): if self._encoding is None: raise TypeError( @@ -715,6 +715,28 @@ class Packer(object): else: raise PackValueError("String is too large") return self._buffer.write(obj) + if check(obj, memoryview): + n = len(obj) * obj.itemsize + if self._use_bin_type: + if n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xc4, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xc5, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xc6, n)) + else: + raise PackValueError("memoryview is too large") + return self._buffer.write(obj) + else: + if n <= 0x1f: + self._buffer.write(struct.pack('B', 0xa0 + n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xda, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xdb, n)) + else: + raise PackValueError("memoryview is too large") + return self._buffer.write(obj) if check(obj, float): if self._use_float: return self._buffer.write(struct.pack(">Bf", 0xca, obj)) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index aed5069..f555c5b 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -2,11 +2,134 @@ # coding: utf-8 +from array import array from msgpack import packb, unpackb +import sys -def test_pack_memoryview(): - data = bytearray(range(256)) - view = memoryview(data) - unpacked = unpackb(packb(view)) - assert data == unpacked +# For Python < 3: +# - array type only supports old buffer interface +# - array.frombytes is not available, must use deprecated array.fromstring +if sys.version_info[0] < 3: + def __memoryview(obj): + return memoryview(buffer(obj)) + + def __make_array(f, data): + a = array(f) + a.fromstring(data) + return a + + def __get_data(a): + return a.tostring() +else: + __memoryview = memoryview + + def __make_array(f, data): + a = array(f) + a.frombytes(data) + return a + + def __get_data(a): + return a.tobytes() + + +def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): + # create a new array + original_array = array(format) + original_array.fromlist([255] * (nbytes // original_array.itemsize)) + original_data = __get_data(original_array) + view = __memoryview(original_array) + + # pack, unpack, and reconstruct array + packed = packb(view, use_bin_type=use_bin_type) + unpacked = unpackb(packed) + reconstructed_array = __make_array(format, unpacked) + + # check that we got the right amount of data + assert len(original_data) == nbytes + # check packed header + assert packed[:1] == expected_header + # check packed length prefix, if any + assert packed[1:1+len(expected_prefix)] == expected_prefix + # check packed data + assert packed[1+len(expected_prefix):] == original_data + # check array unpacked correctly + assert original_array == reconstructed_array + + +# ----------- +# test fixstr +# ----------- + + +def test_memoryview_byte_fixstr(): + __run_test('B', 31, b'\xbf', b'', False) + + +def test_memoryview_float_fixstr(): + __run_test('f', 28, b'\xbc', b'', False) + + +# ---------- +# test str16 +# ---------- + + +def test_memoryview_byte_str16(): + __run_test('B', 2**8, b'\xda', b'\x01\x00', False) + + +def test_memoryview_float_str16(): + __run_test('f', 2**8, b'\xda', b'\x01\x00', False) + + +# ---------- +# test str32 +# ---------- + + +def test_memoryview_byte_str32(): + __run_test('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + + +def test_memoryview_float_str32(): + __run_test('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + + +# --------- +# test bin8 +# --------- + + +def test_memoryview_byte_bin8(): + __run_test('B', 1, b'\xc4', b'\x01', True) + + +def test_memoryview_float_bin8(): + __run_test('f', 4, b'\xc4', b'\x04', True) + + +# ---------- +# test bin16 +# ---------- + + +def test_memoryview_byte_bin16(): + __run_test('B', 2**8, b'\xc5', b'\x01\x00', True) + + +def test_memoryview_float_bin16(): + __run_test('f', 2**8, b'\xc5', b'\x01\x00', True) + + +# ---------- +# test bin32 +# ---------- + + +def test_memoryview_byte_bin32(): + __run_test('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + + +def test_memoryview_float_bin32(): + __run_test('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) From 0b55989f0b045f1a77d4230bea3b6da70eb3d840 Mon Sep 17 00:00:00 2001 From: folz Date: Wed, 4 May 2016 10:04:09 +0200 Subject: [PATCH 2/5] more descriptive test names --- test/test_memoryview.py | 72 ++++++++++++----------------------------- 1 file changed, 21 insertions(+), 51 deletions(-) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index f555c5b..2768867 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -11,25 +11,25 @@ import sys # - array type only supports old buffer interface # - array.frombytes is not available, must use deprecated array.fromstring if sys.version_info[0] < 3: - def __memoryview(obj): + def make_memoryview(obj): return memoryview(buffer(obj)) - def __make_array(f, data): + def make_array(f, data): a = array(f) a.fromstring(data) return a - def __get_data(a): + def get_data(a): return a.tostring() else: - __memoryview = memoryview + make_memoryview = memoryview - def __make_array(f, data): + def make_array(f, data): a = array(f) a.frombytes(data) return a - def __get_data(a): + def get_data(a): return a.tobytes() @@ -37,13 +37,13 @@ def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): # create a new array original_array = array(format) original_array.fromlist([255] * (nbytes // original_array.itemsize)) - original_data = __get_data(original_array) - view = __memoryview(original_array) + original_data = get_data(original_array) + view = make_memoryview(original_array) # pack, unpack, and reconstruct array packed = packb(view, use_bin_type=use_bin_type) unpacked = unpackb(packed) - reconstructed_array = __make_array(format, unpacked) + reconstructed_array = make_array(format, unpacked) # check that we got the right amount of data assert len(original_data) == nbytes @@ -57,79 +57,49 @@ def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): assert original_array == reconstructed_array -# ----------- -# test fixstr -# ----------- - - -def test_memoryview_byte_fixstr(): +def test_fixstr_from_byte(): __run_test('B', 31, b'\xbf', b'', False) -def test_memoryview_float_fixstr(): +def test_fixstr_from_float(): __run_test('f', 28, b'\xbc', b'', False) -# ---------- -# test str16 -# ---------- - - -def test_memoryview_byte_str16(): +def test_str16_from_byte(): __run_test('B', 2**8, b'\xda', b'\x01\x00', False) -def test_memoryview_float_str16(): +def test_str16_from_float(): __run_test('f', 2**8, b'\xda', b'\x01\x00', False) -# ---------- -# test str32 -# ---------- - - -def test_memoryview_byte_str32(): +def test_str32_from_byte(): __run_test('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) -def test_memoryview_float_str32(): +def test_str32_from_float(): __run_test('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) -# --------- -# test bin8 -# --------- - - -def test_memoryview_byte_bin8(): +def test_bin8_from_byte(): __run_test('B', 1, b'\xc4', b'\x01', True) -def test_memoryview_float_bin8(): +def test_bin8_from_float(): __run_test('f', 4, b'\xc4', b'\x04', True) -# ---------- -# test bin16 -# ---------- - - -def test_memoryview_byte_bin16(): +def test_bin16_from_byte(): __run_test('B', 2**8, b'\xc5', b'\x01\x00', True) -def test_memoryview_float_bin16(): +def test_bin16_from_float(): __run_test('f', 2**8, b'\xc5', b'\x01\x00', True) -# ---------- -# test bin32 -# ---------- - - -def test_memoryview_byte_bin32(): +def test_bin32_from_byte(): __run_test('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) -def test_memoryview_float_bin32(): +def test_bin32_from_float(): __run_test('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) From 5860af953ae1c3f459ddc589cd815ec195db46a9 Mon Sep 17 00:00:00 2001 From: folz Date: Wed, 4 May 2016 11:01:27 +0200 Subject: [PATCH 3/5] refactor header packing for str and bin types --- msgpack/fallback.py | 83 +++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index db47d5c..abed3d9 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -685,58 +685,29 @@ class Packer(object): default_used = True continue raise PackOverflowError("Integer value out of range") - if self._use_bin_type and check(obj, bytes): + if check(obj, bytes): n = len(obj) - if n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xc4, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xc5, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xc6, n)) - else: + if n >= 2**32: raise PackValueError("Bytes is too large") + self._fb_pack_bin_header(n) return self._buffer.write(obj) - if check(obj, (Unicode, bytes)): - if check(obj, Unicode): - if self._encoding is None: - raise TypeError( - "Can't encode unicode string: " - "no encoding is specified") - obj = obj.encode(self._encoding, self._unicode_errors) + if check(obj, Unicode): + if self._encoding is None: + raise TypeError( + "Can't encode unicode string: " + "no encoding is specified") + obj = obj.encode(self._encoding, self._unicode_errors) n = len(obj) - if n <= 0x1f: - self._buffer.write(struct.pack('B', 0xa0 + n)) - elif self._use_bin_type and n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xd9, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xda, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xdb, n)) - else: + if n >= 2**32: raise PackValueError("String is too large") + self._fb_pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): n = len(obj) * obj.itemsize - if self._use_bin_type: - if n <= 0xff: - self._buffer.write(struct.pack('>BB', 0xc4, n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xc5, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xc6, n)) - else: - raise PackValueError("memoryview is too large") - return self._buffer.write(obj) - else: - if n <= 0x1f: - self._buffer.write(struct.pack('B', 0xa0 + n)) - elif n <= 0xffff: - self._buffer.write(struct.pack(">BH", 0xda, n)) - elif n <= 0xffffffff: - self._buffer.write(struct.pack(">BI", 0xdb, n)) - else: - raise PackValueError("memoryview is too large") - return self._buffer.write(obj) + if n >= 2**32: + raise PackValueError("Memoryview is too large") + self._fb_pack_bin_header(n) + return self._buffer.write(obj) if check(obj, float): if self._use_float: return self._buffer.write(struct.pack(">Bf", 0xca, obj)) @@ -874,6 +845,30 @@ class Packer(object): self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) + def _fb_pack_raw_header(self, n): + if n <= 0x1f: + self._buffer.write(struct.pack('B', 0xa0 + n)) + elif self._use_bin_type and n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xd9, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xda, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xdb, n)) + else: + raise PackValueError('Raw is too large') + + def _fb_pack_bin_header(self, n): + if not self._use_bin_type: + return self._fb_pack_raw_header(n) + elif n <= 0xff: + return self._buffer.write(struct.pack('>BB', 0xc4, n)) + elif n <= 0xffff: + return self._buffer.write(struct.pack(">BH", 0xc5, n)) + elif n <= 0xffffffff: + return self._buffer.write(struct.pack(">BI", 0xc6, n)) + else: + raise PackValueError('Bin is too large') + def bytes(self): return self._buffer.getvalue() From a91d5c538ea5bbee0f00ff180a8e72d27df6cfc1 Mon Sep 17 00:00:00 2001 From: folz Date: Wed, 4 May 2016 12:03:37 +0200 Subject: [PATCH 4/5] add lower bound tests for memoryviews --- test/test_memoryview.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 2768867..7ce6bfc 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -58,19 +58,23 @@ def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): def test_fixstr_from_byte(): + __run_test('B', 1, b'\xa1', b'', False) __run_test('B', 31, b'\xbf', b'', False) def test_fixstr_from_float(): + __run_test('f', 4, b'\xa4', b'', False) __run_test('f', 28, b'\xbc', b'', False) def test_str16_from_byte(): __run_test('B', 2**8, b'\xda', b'\x01\x00', False) + __run_test('B', 2**16-1, b'\xda', b'\xff\xff', False) def test_str16_from_float(): __run_test('f', 2**8, b'\xda', b'\x01\x00', False) + __run_test('f', 2**16-4, b'\xda', b'\xff\xfc', False) def test_str32_from_byte(): @@ -83,18 +87,22 @@ def test_str32_from_float(): def test_bin8_from_byte(): __run_test('B', 1, b'\xc4', b'\x01', True) + __run_test('B', 2**8-1, b'\xc4', b'\xff', True) def test_bin8_from_float(): __run_test('f', 4, b'\xc4', b'\x04', True) + __run_test('f', 2**8-4, b'\xc4', b'\xfc', True) def test_bin16_from_byte(): __run_test('B', 2**8, b'\xc5', b'\x01\x00', True) + __run_test('B', 2**16-1, b'\xc5', b'\xff\xff', True) def test_bin16_from_float(): __run_test('f', 2**8, b'\xc5', b'\x01\x00', True) + __run_test('f', 2**16-4, b'\xc5', b'\xff\xfc', True) def test_bin32_from_byte(): From 53f47ef55d8d93e276ecf9041a9a8b43fc041aef Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Thu, 5 May 2016 00:49:48 +0900 Subject: [PATCH 5/5] Remove double underscore prefix --- test/test_memoryview.py | 43 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 7ce6bfc..f6d74ed 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,7 +1,6 @@ #!/usr/bin/env python # coding: utf-8 - from array import array from msgpack import packb, unpackb import sys @@ -33,7 +32,7 @@ else: return a.tobytes() -def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): +def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type): # create a new array original_array = array(format) original_array.fromlist([255] * (nbytes // original_array.itemsize)) @@ -58,56 +57,56 @@ def __run_test(format, nbytes, expected_header, expected_prefix, use_bin_type): def test_fixstr_from_byte(): - __run_test('B', 1, b'\xa1', b'', False) - __run_test('B', 31, b'\xbf', b'', False) + _runtest('B', 1, b'\xa1', b'', False) + _runtest('B', 31, b'\xbf', b'', False) def test_fixstr_from_float(): - __run_test('f', 4, b'\xa4', b'', False) - __run_test('f', 28, b'\xbc', b'', False) + _runtest('f', 4, b'\xa4', b'', False) + _runtest('f', 28, b'\xbc', b'', False) def test_str16_from_byte(): - __run_test('B', 2**8, b'\xda', b'\x01\x00', False) - __run_test('B', 2**16-1, b'\xda', b'\xff\xff', False) + _runtest('B', 2**8, b'\xda', b'\x01\x00', False) + _runtest('B', 2**16-1, b'\xda', b'\xff\xff', False) def test_str16_from_float(): - __run_test('f', 2**8, b'\xda', b'\x01\x00', False) - __run_test('f', 2**16-4, b'\xda', b'\xff\xfc', False) + _runtest('f', 2**8, b'\xda', b'\x01\x00', False) + _runtest('f', 2**16-4, b'\xda', b'\xff\xfc', False) def test_str32_from_byte(): - __run_test('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + _runtest('B', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) def test_str32_from_float(): - __run_test('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) + _runtest('f', 2**16, b'\xdb', b'\x00\x01\x00\x00', False) def test_bin8_from_byte(): - __run_test('B', 1, b'\xc4', b'\x01', True) - __run_test('B', 2**8-1, b'\xc4', b'\xff', True) + _runtest('B', 1, b'\xc4', b'\x01', True) + _runtest('B', 2**8-1, b'\xc4', b'\xff', True) def test_bin8_from_float(): - __run_test('f', 4, b'\xc4', b'\x04', True) - __run_test('f', 2**8-4, b'\xc4', b'\xfc', True) + _runtest('f', 4, b'\xc4', b'\x04', True) + _runtest('f', 2**8-4, b'\xc4', b'\xfc', True) def test_bin16_from_byte(): - __run_test('B', 2**8, b'\xc5', b'\x01\x00', True) - __run_test('B', 2**16-1, b'\xc5', b'\xff\xff', True) + _runtest('B', 2**8, b'\xc5', b'\x01\x00', True) + _runtest('B', 2**16-1, b'\xc5', b'\xff\xff', True) def test_bin16_from_float(): - __run_test('f', 2**8, b'\xc5', b'\x01\x00', True) - __run_test('f', 2**16-4, b'\xc5', b'\xff\xfc', True) + _runtest('f', 2**8, b'\xc5', b'\x01\x00', True) + _runtest('f', 2**16-4, b'\xc5', b'\xff\xfc', True) def test_bin32_from_byte(): - __run_test('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + _runtest('B', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) def test_bin32_from_float(): - __run_test('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True) + _runtest('f', 2**16, b'\xc6', b'\x00\x01\x00\x00', True)