From 95dfec808a700c99144e454fbf9d98be420f2c51 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 3 Feb 2013 00:02:37 +0900 Subject: [PATCH 1/5] Add simple benchmark. --- benchmark/benchmark.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 benchmark/benchmark.py diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py new file mode 100644 index 0000000..1439289 --- /dev/null +++ b/benchmark/benchmark.py @@ -0,0 +1,32 @@ +from msgpack import fallback +try: + from msgpack import _unpacker, _packer + has_ext = True +except ImportError: + has_ext = False +import timeit + + +def profile(name, func): + times = timeit.repeat(func, number=1000, repeat=4) + times = ', '.join(["%8f" % t for t in times]) + print("%-30s %40s" % (name, times)) + + +def simple(name, data): + if has_ext: + profile("packing %s (ext)" % name, lambda: _packer.packb(data)) + profile('packing %s (fallback)' % name, lambda: fallback.packb(data)) + + data = fallback.packb(data) + if has_ext: + profile('unpacking %s (ext)' % name, lambda: _unpacker.unpackb(data)) + profile('unpacking %s (fallback)' % name, lambda: fallback.unpackb(data)) + +def main(): + simple("integers", [7]*10000) + simple("bytes", [b'x'*n for n in range(100)]*10) + simple("lists", [[]]*10000) + simple("dicts", [{}]*10000) + +main() From 0536d1bd0cb5c781b41e8cf7ede66448396dc993 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 3 Feb 2013 00:11:26 +0900 Subject: [PATCH 2/5] Don't compile extension module when running on pypy --- setup.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/setup.py b/setup.py index d4808d6..888de09 100644 --- a/setup.py +++ b/setup.py @@ -75,18 +75,19 @@ else: macros = [('__LITTLE_ENDIAN__', '1')] ext_modules = [] -ext_modules.append(Extension('msgpack._packer', - sources=['msgpack/_packer.cpp'], - libraries=libraries, - include_dirs=['.'], - define_macros=macros, - )) -ext_modules.append(Extension('msgpack._unpacker', - sources=['msgpack/_unpacker.cpp'], - libraries=libraries, - include_dirs=['.'], - define_macros=macros, - )) +if not hasattr(sys, 'pypy_version_info'): + ext_modules.append(Extension('msgpack._packer', + sources=['msgpack/_packer.cpp'], + libraries=libraries, + include_dirs=['.'], + define_macros=macros, + )) + ext_modules.append(Extension('msgpack._unpacker', + sources=['msgpack/_unpacker.cpp'], + libraries=libraries, + include_dirs=['.'], + define_macros=macros, + )) del libraries, macros From 22920baae6957e2259a82d0595c2b97fc58fcd02 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 3 Feb 2013 00:20:00 +0900 Subject: [PATCH 3/5] Fix minor bugs and tuning unpacking dict. --- msgpack/fallback.py | 25 ++++++++++++++++--------- test/test_pack.py | 2 +- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index ac6dbf9..c10c6ac 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -360,16 +360,19 @@ class Unpacker(object): self._fb_unpack(EX_SKIP, write_bytes) self._fb_unpack(EX_SKIP, write_bytes) return - ret = [] - for i in xrange(n): - ret.append((self._fb_unpack(EX_CONSTRUCT, write_bytes), - self._fb_unpack(EX_CONSTRUCT, write_bytes))) if self.object_pairs_hook is not None: - ret = self.object_pairs_hook(ret) + ret = self.object_pairs_hook( + (self._fb_unpack(EX_CONSTRUCT, write_bytes), + self._fb_unpack(EX_CONSTRUCT, write_bytes)) + for _ in xrange(n) + ) else: - ret = dict(ret) - if self.object_hook is not None: - ret = self.object_hook(ret) + ret = {} + for _ in xrange(n): + key = self._fb_unpack(EX_CONSTRUCT, write_bytes) + ret[key] = self._fb_unpack(EX_CONSTRUCT, write_bytes) + if self.object_hook is not None: + ret = self.object_hook(ret) return ret if execute == EX_SKIP: return @@ -421,7 +424,7 @@ class Packer(object): raise TypeError("default must be callable") self._default = default - def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT): + def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): if nest_limit < 0: raise PackValueError("recursion limit exceeded") if obj is None: @@ -454,6 +457,10 @@ class Packer(object): raise PackValueError("Integer value out of range") if isinstance(obj, (Unicode, bytes)): if isinstance(obj, Unicode): + if self.encoding is None: + raise TypeError( + "Can't encode unicode string: " + "no encoding is specified") obj = obj.encode(self.encoding, self.unicode_errors) n = len(obj) if n <= 0x1f: diff --git a/test/test_pack.py b/test/test_pack.py index 8f4117c..3225f41 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -151,7 +151,7 @@ def test_odict(): od = odict(seq) assert unpackb(packb(od), use_list=1) == dict(seq) def pair_hook(seq): - return seq + return list(seq) assert unpackb(packb(od), object_pairs_hook=pair_hook, use_list=1) == seq From 1951b197b547c3f12b755790717d799272fbeb34 Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Sun, 3 Feb 2013 00:52:05 +0900 Subject: [PATCH 4/5] Skip compile error for extension modules. --- setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 888de09..1055a61 100644 --- a/setup.py +++ b/setup.py @@ -46,7 +46,12 @@ class BuildExt(build_ext): print("Install Cython >= 0.16 or install msgpack from PyPI.") print("Falling back to pure Python implementation.") return - return build_ext.build_extension(self, ext) + try: + return build_ext.build_extension(self, ext) + except Exception as e: + print("WARNING: Failed to compile extensiom modules.") + print("msgpack uses fallback pure python implementation.") + print(e) exec(open('msgpack/_version.py').read()) From a865f8f7e94ea5b484771c1be3fe57ff3a63aa2a Mon Sep 17 00:00:00 2001 From: INADA Naoki Date: Mon, 4 Feb 2013 15:14:30 +0900 Subject: [PATCH 5/5] Use _private names for non public data members. (fix #44) --- msgpack/fallback.py | 151 ++++++++++++++++++++++---------------------- 1 file changed, 76 insertions(+), 75 deletions(-) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index c10c6ac..a834229 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -168,18 +168,18 @@ class Unpacker(object): self._fb_buf_o = 0 self._fb_buf_i = 0 self._fb_buf_n = 0 - self.max_buffer_size = (2**31-1 if max_buffer_size == 0 - else max_buffer_size) - self.read_size = (read_size if read_size != 0 - else min(self.max_buffer_size, 2048)) - if read_size > self.max_buffer_size: + self._max_buffer_size = (2**31-1 if max_buffer_size == 0 + else max_buffer_size) + self._read_size = (read_size if read_size != 0 + else min(self._max_buffer_size, 2048)) + if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") - self.encoding = encoding - self.unicode_errors = unicode_errors - self.use_list = use_list - self.list_hook = list_hook - self.object_hook = object_hook - self.object_pairs_hook = object_pairs_hook + self._encoding = encoding + self._unicode_errors = unicode_errors + self._use_list = use_list + self._list_hook = list_hook + self._object_hook = object_hook + self._object_pairs_hook = object_pairs_hook if list_hook is not None and not callable(list_hook): raise ValueError('`list_hook` is not callable') @@ -195,7 +195,7 @@ class Unpacker(object): if isinstance(next_bytes, array.array): next_bytes = next_bytes.tostring() assert self._fb_feeding - if self._fb_buf_n + len(next_bytes) > self.max_buffer_size: + if self._fb_buf_n + len(next_bytes) > self._max_buffer_size: raise BufferFull self._fb_buf_n += len(next_bytes) self._fb_buffers.append(next_bytes) @@ -246,7 +246,7 @@ class Unpacker(object): if self._fb_buf_i == len(self._fb_buffers): if self._fb_feeding: break - tmp = self.file_like.read(self.read_size) + tmp = self.file_like.read(self._read_size) if not tmp: break self._fb_buffers.append(tmp) @@ -349,10 +349,10 @@ class Unpacker(object): ret = [] for i in xrange(n): ret.append(self._fb_unpack(EX_CONSTRUCT, write_bytes)) - if self.list_hook is not None: - ret = self.list_hook(ret) + if self._list_hook is not None: + ret = self._list_hook(ret) # TODO is the interaction between `list_hook` and `use_list` ok? - return ret if self.use_list else tuple(ret) + return ret if self._use_list else tuple(ret) if typ == TYPE_MAP: if execute == EX_SKIP: for i in xrange(n): @@ -360,8 +360,8 @@ class Unpacker(object): self._fb_unpack(EX_SKIP, write_bytes) self._fb_unpack(EX_SKIP, write_bytes) return - if self.object_pairs_hook is not None: - ret = self.object_pairs_hook( + if self._object_pairs_hook is not None: + ret = self._object_pairs_hook( (self._fb_unpack(EX_CONSTRUCT, write_bytes), self._fb_unpack(EX_CONSTRUCT, write_bytes)) for _ in xrange(n) @@ -371,14 +371,14 @@ class Unpacker(object): for _ in xrange(n): key = self._fb_unpack(EX_CONSTRUCT, write_bytes) ret[key] = self._fb_unpack(EX_CONSTRUCT, write_bytes) - if self.object_hook is not None: - ret = self.object_hook(ret) + if self._object_hook is not None: + ret = self._object_hook(ret) return ret if execute == EX_SKIP: return if typ == TYPE_RAW: - if self.encoding is not None: - obj = obj.decode(self.encoding, self.unicode_errors) + if self._encoding is not None: + obj = obj.decode(self._encoding, self._unicode_errors) return obj assert typ == TYPE_IMMEDIATE return obj @@ -411,14 +411,15 @@ class Unpacker(object): self._fb_consume() return ret + class Packer(object): def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', use_single_float=False, autoreset=True): - self.use_float = use_single_float - self.autoreset = autoreset - self.encoding = encoding - self.unicode_errors = unicode_errors - self.buffer = StringIO() + self._use_float = use_single_float + self._autoreset = autoreset + self._encoding = encoding + self._unicode_errors = unicode_errors + self._buffer = StringIO() if default is not None: if not callable(default): raise TypeError("default must be callable") @@ -428,55 +429,55 @@ class Packer(object): if nest_limit < 0: raise PackValueError("recursion limit exceeded") if obj is None: - return self.buffer.write(b"\xc0") + return self._buffer.write(b"\xc0") if isinstance(obj, bool): if obj: - return self.buffer.write(b"\xc3") - return self.buffer.write(b"\xc2") + return self._buffer.write(b"\xc3") + return self._buffer.write(b"\xc2") if isinstance(obj, int_types): if 0 <= obj < 0x80: - return self.buffer.write(struct.pack("B", obj)) + return self._buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: - return self.buffer.write(struct.pack("b", obj)) + return self._buffer.write(struct.pack("b", obj)) if 0x80 <= obj <= 0xff: - return self.buffer.write(struct.pack("BB", 0xcc, obj)) + return self._buffer.write(struct.pack("BB", 0xcc, obj)) if -0x80 <= obj < 0: - return self.buffer.write(struct.pack(">Bb", 0xd0, obj)) + return self._buffer.write(struct.pack(">Bb", 0xd0, obj)) if 0xff < obj <= 0xffff: - return self.buffer.write(struct.pack(">BH", 0xcd, obj)) + return self._buffer.write(struct.pack(">BH", 0xcd, obj)) if -0x8000 <= obj < -0x80: - return self.buffer.write(struct.pack(">Bh", 0xd1, obj)) + return self._buffer.write(struct.pack(">Bh", 0xd1, obj)) if 0xffff < obj <= 0xffffffff: - return self.buffer.write(struct.pack(">BI", 0xce, obj)) + return self._buffer.write(struct.pack(">BI", 0xce, obj)) if -0x80000000 <= obj < -0x8000: - return self.buffer.write(struct.pack(">Bi", 0xd2, obj)) + return self._buffer.write(struct.pack(">Bi", 0xd2, obj)) if 0xffffffff < obj <= 0xffffffffffffffff: - return self.buffer.write(struct.pack(">BQ", 0xcf, obj)) + return self._buffer.write(struct.pack(">BQ", 0xcf, obj)) if -0x8000000000000000 <= obj < -0x80000000: - return self.buffer.write(struct.pack(">Bq", 0xd3, obj)) + return self._buffer.write(struct.pack(">Bq", 0xd3, obj)) raise PackValueError("Integer value out of range") if isinstance(obj, (Unicode, bytes)): if isinstance(obj, Unicode): - if self.encoding is None: + if self._encoding is None: raise TypeError( "Can't encode unicode string: " "no encoding is specified") - obj = obj.encode(self.encoding, self.unicode_errors) + obj = obj.encode(self._encoding, self._unicode_errors) n = len(obj) if n <= 0x1f: - self.buffer.write(struct.pack('B', 0xa0 + n)) - return self.buffer.write(obj) + self._buffer.write(struct.pack('B', 0xa0 + n)) + return self._buffer.write(obj) if n <= 0xffff: - self.buffer.write(struct.pack(">BH", 0xda, n)) - return self.buffer.write(obj) + self._buffer.write(struct.pack(">BH", 0xda, n)) + return self._buffer.write(obj) if n <= 0xffffffff: - self.buffer.write(struct.pack(">BI", 0xdb, n)) - return self.buffer.write(obj) + self._buffer.write(struct.pack(">BI", 0xdb, n)) + return self._buffer.write(obj) raise PackValueError("String is too large") if isinstance(obj, float): - if self.use_float: - return self.buffer.write(struct.pack(">Bf", 0xca, obj)) - return self.buffer.write(struct.pack(">Bd", 0xcb, obj)) + if self._use_float: + return self._buffer.write(struct.pack(">Bf", 0xca, obj)) + return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) if isinstance(obj, list) or isinstance(obj, tuple): n = len(obj) self._fb_pack_array_header(n) @@ -492,56 +493,56 @@ class Packer(object): def pack(self, obj): self._pack(obj) - ret = self.buffer.getvalue() - if self.autoreset: - self.buffer = StringIO() + ret = self._buffer.getvalue() + if self._autoreset: + self._buffer = StringIO() elif USING_STRINGBUILDER: - self.buffer = StringIO(ret) + self._buffer = StringIO(ret) return ret def pack_map_pairs(self, pairs): self._fb_pack_map_pairs(len(pairs), pairs) - ret = self.buffer.getvalue() - if self.autoreset: - self.buffer = StringIO() + ret = self._buffer.getvalue() + if self._autoreset: + self._buffer = StringIO() elif USING_STRINGBUILDER: - self.buffer = StringIO(ret) + self._buffer = StringIO(ret) return ret def pack_array_header(self, n): self._fb_pack_array_header(n) - ret = self.buffer.getvalue() - if self.autoreset: - self.buffer = StringIO() + ret = self._buffer.getvalue() + if self._autoreset: + self._buffer = StringIO() elif USING_STRINGBUILDER: - self.buffer = StringIO(ret) + self._buffer = StringIO(ret) return ret def pack_map_header(self, n): self._fb_pack_map_header(n) - ret = self.buffer.getvalue() - if self.autoreset: - self.buffer = StringIO() + ret = self._buffer.getvalue() + if self._autoreset: + self._buffer = StringIO() elif USING_STRINGBUILDER: - self.buffer = StringIO(ret) + self._buffer = StringIO(ret) return ret def _fb_pack_array_header(self, n): if n <= 0x0f: - return self.buffer.write(struct.pack('B', 0x90 + n)) + return self._buffer.write(struct.pack('B', 0x90 + n)) if n <= 0xffff: - return self.buffer.write(struct.pack(">BH", 0xdc, n)) + return self._buffer.write(struct.pack(">BH", 0xdc, n)) if n <= 0xffffffff: - return self.buffer.write(struct.pack(">BI", 0xdd, n)) + return self._buffer.write(struct.pack(">BI", 0xdd, n)) raise PackValueError("Array is too large") def _fb_pack_map_header(self, n): if n <= 0x0f: - return self.buffer.write(struct.pack('B', 0x80 + n)) + return self._buffer.write(struct.pack('B', 0x80 + n)) if n <= 0xffff: - return self.buffer.write(struct.pack(">BH", 0xde, n)) + return self._buffer.write(struct.pack(">BH", 0xde, n)) if n <= 0xffffffff: - return self.buffer.write(struct.pack(">BI", 0xdf, n)) + return self._buffer.write(struct.pack(">BI", 0xdf, n)) raise PackValueError("Dict is too large") def _fb_pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): @@ -551,7 +552,7 @@ class Packer(object): self._pack(v, nest_limit - 1) def bytes(self): - return self.buffer.getvalue() + return self._buffer.getvalue() def reset(self): - self.buffer = StringIO() + self._buffer = StringIO()