diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml deleted file mode 100644 index b696b92..0000000 --- a/.github/workflows/docs.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: docs - -on: ["push", "pull_request"] - -jobs: - docs: - # We want to run on external PRs, but not on our own internal PRs as they'll be run - # by the push to the branch. - if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository - - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - cache: "pip" - cache-dependency-path: | - requirements.txt - docs/requirements.txt - - - name: Build - run: | - pip install -r requirements.txt - make cython - - - name: Sphinx Documentation Generator - run: | - pip install -r docs/requirements.txt - make docs diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml deleted file mode 100644 index 198cf7b..0000000 --- a/.github/workflows/lint.yaml +++ /dev/null @@ -1,22 +0,0 @@ -name: lint - -on: ["push", "pull_request"] - -jobs: - lint: - # We want to run on external PRs, but not on our own internal PRs as they'll be run - # by the push to the branch. - if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository - - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: ruff check - run: | - pipx run ruff check --diff msgpack/ test/ setup.py - - - name: ruff format - run: | - pipx run ruff format --diff msgpack/ test/ setup.py diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 6b1664a..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: Run tests -on: - push: - branches: [main] - pull_request: - create: - -jobs: - test: - strategy: - matrix: - os: ["ubuntu-latest", "windows-latest", "windows-11-arm", "macos-latest"] - py: ["3.14", "3.14t", "3.13", "3.12", "3.11", "3.10"] - exclude: - - os: windows-11-arm - py: "3.10" - runs-on: ${{ matrix.os }} - name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} - - steps: - - name: Checkout - uses: actions/checkout@v5 - - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.py }} - allow-prereleases: true - cache: "pip" - - - name: Prepare - shell: bash - run: | - python -m pip install -r requirements.txt pytest - - - name: Build - shell: bash - run: | - make cython - pip install . - - - name: Test (C extension) - shell: bash - run: | - pytest -v test - - - name: Test (pure Python fallback) - shell: bash - run: | - MSGPACK_PUREPYTHON=1 pytest -v test - - - name: build packages - shell: bash - run: | - python -m build -nv - - - name: upload packages - uses: actions/upload-artifact@v4 - with: - name: dist-${{ matrix.os }}-${{ matrix.py }} - path: dist diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml deleted file mode 100644 index 531abbc..0000000 --- a/.github/workflows/wheel.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: Build sdist and Wheels -on: - push: - branches: [main] - release: - types: - - published - workflow_dispatch: - -jobs: - build_wheels: - strategy: - matrix: - # macos-13 is for intel - os: ["ubuntu-24.04", "ubuntu-24.04-arm", "windows-latest", "windows-11-arm", "macos-13", "macos-latest"] - runs-on: ${{ matrix.os }} - name: Build wheels on ${{ matrix.os }} - - steps: - - uses: actions/checkout@v6 - - uses: actions/setup-python@v6 - with: - python-version: "3.x" - cache: "pip" - - name: Cythonize - shell: bash - run: | - pip install -r requirements.txt - make cython - - - name: Build - uses: pypa/cibuildwheel@v3.3.0 - env: - CIBW_TEST_REQUIRES: "pytest" - CIBW_TEST_COMMAND: "pytest {package}/test" - CIBW_SKIP: "pp* cp38-* cp39-* cp310-win_arm64" - - - name: Build sdist - if: runner.os == 'Linux' && runner.arch == 'X64' - run: | - pip install build - python -m build -s -o wheelhouse - - - name: Upload Wheels to artifact - uses: actions/upload-artifact@v4 - with: - name: wheels-${{ matrix.os }} - path: wheelhouse - - # combine all wheels into one artifact - combine_wheels: - needs: [build_wheels] - runs-on: ubuntu-latest - steps: - - uses: actions/download-artifact@v4 - with: - # unpacks all CIBW artifacts into dist/ - pattern: wheels-* - path: dist - merge-multiple: true - - - name: Upload Wheels to artifact - uses: actions/upload-artifact@v4 - with: - name: wheels-all - path: dist - - # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml - upload_pypi: - needs: [build_wheels] - runs-on: ubuntu-latest - environment: pypi - permissions: - id-token: write - if: github.event_name == 'release' && github.event.action == 'published' - # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) - # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') - steps: - - uses: actions/download-artifact@v4 - with: - # unpacks all CIBW artifacts into dist/ - pattern: wheels-* - path: dist - merge-multiple: true - - - uses: pypa/gh-action-pypi-publish@release/v1 - #with: - # To test: repository-url: https://test.pypi.org/legacy/ diff --git a/.gitignore b/.gitignore index 341be63..1bd68b4 100644 --- a/.gitignore +++ b/.gitignore @@ -2,16 +2,10 @@ MANIFEST build/* dist/* .tox -.python-version *.pyc *.pyo *.so *~ msgpack/__version__.py -msgpack/*.c msgpack/*.cpp *.egg-info -/venv -/tags -/docs/_build -.cache diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 88d8718..0000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Read the Docs configuration file for Sphinx projects. -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details. - -version: 2 - -build: - os: ubuntu-22.04 - tools: - python: "3.11" - apt_packages: - - build-essential - jobs: - pre_install: - - pip install -r requirements.txt - - make cython - -python: - install: - - method: pip - path: . - - requirements: docs/requirements.txt - -sphinx: - configuration: docs/conf.py diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..dad7e87 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,24 @@ +sudo: false +cache: + directories: + - wheelhouse + +language: python +python: + - 2.7 + +env: + - TOXENV=py26-c,py27-c + - TOXENV=py32-c,py33-c,py34-c + - TOXENV=py26-pure,py27-pure + - TOXENV=py32-pure,py33-pure,py34-pure + - TOXENV=pypy-pure,pypy3-pure + +install: + - pip install wheel tox + - ls -la wheelhouse + - if [ ! -f wheelhouse/Cython-0.21.2-cp27-none-linux_x86_64.whl ] ; then pip wheel cython ; fi + - pip install wheelhouse/Cython-0.21.2-cp27-none-linux_x86_64.whl + - cython --cplus msgpack/_packer.pyx msgpack/_unpacker.pyx + +script: tox diff --git a/ChangeLog.rst b/ChangeLog.rst index beeab15..34f4cd4 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,318 +1,3 @@ -1.1.2 -===== - -Release Date: 2025-10-08 - -This release does not change source code. It updates only building wheels: - -* Update Cython to v3.1.4 -* Update cibuildwheel to v3.2.0 -* Drop Python 3.8 -* Add Python 3.14 -* Add windows-arm - -1.1.1 -===== - -Release Date: 2025-06-13 - -* No change from 1.1.1rc1. - -1.1.1rc1 -======== - -Release Date: 2025-06-06 - -* Update Cython to 3.1.1 and cibuildwheel to 2.23.3. - -1.1.0 -===== - -Release Date: 2024-09-10 - -* use ``PyLong_*`` instead of ``PyInt_*`` for compatibility with - future Cython. (#620) - -1.1.0rc2 -======== - -Release Date: 2024-08-19 - -* Update Cython to 3.0.11 for better Python 3.13 support. -* Update cibuildwheel to 2.20.0 to build Python 3.13 wheels. - -1.1.0rc1 -======== - -Release Date: 2024-05-07 - -* Update Cython to 3.0.10 to reduce C warnings and future support for Python 3.13. -* Stop using C++ mode in Cython to reduce compile error on some compilers. -* ``Packer()`` has ``buf_size`` option to specify initial size of - internal buffer to reduce reallocation. -* The default internal buffer size of ``Packer()`` is reduced from - 1MiB to 256KiB to optimize for common use cases. Use ``buf_size`` - if you are packing large data. -* ``Timestamp.to_datetime()`` and ``Timestamp.from_datetime()`` become - more accurate by avoiding floating point calculations. (#591) -* The Cython code for ``Unpacker`` has been slightly rewritten for maintainability. -* The fallback implementation of ``Packer()`` and ``Unpacker()`` now uses keyword-only - arguments to improve compatibility with the Cython implementation. - -1.0.8 -===== - -Release Date: 2024-03-01 - -* Update Cython to 3.0.8. This fixes memory leak when iterating - ``Unpacker`` object on Python 3.12. -* Do not include C/Cython files in binary wheels. - - -1.0.7 -===== - -Release Date: 2023-09-28 - -* Fix build error of extension module on Windows. (#567) -* ``setup.py`` doesn't skip build error of extension module. (#568) - - -1.0.6 -===== - -Release Date: 2023-09-21 - -.. note:: - v1.0.6 Wheels for Windows don't contain extension module. - Please upgrade to v1.0.7 or newer. - -* Add Python 3.12 wheels (#517) -* Remove Python 2.7, 3.6, and 3.7 support - - -1.0.5 -===== - -Release Date: 2023-03-08 - -* Use ``__BYTE_ORDER__`` instead of ``__BYTE_ORDER`` for portability. (#513, #514) -* Add Python 3.11 wheels (#517) -* fallback: Fix packing multidimensional memoryview (#527) - -1.0.4 -===== - -Release Date: 2022-06-03 - -* Support Python 3.11 (beta). -* Don't define `__*_ENDIAN__` macro on Unix. by @methane in https://github.com/msgpack/msgpack-python/pull/495 -* Use PyFloat_Pack8() on Python 3.11a7 by @vstinner in https://github.com/msgpack/msgpack-python/pull/499 -* Fix Unpacker max_buffer_length handling by @methane in https://github.com/msgpack/msgpack-python/pull/506 - -1.0.3 -===== - -Release Date: 2021-11-24 JST - -* Fix Docstring (#459) -* Fix error formatting (#463) -* Improve error message about strict_map_key (#485) - -1.0.2 -===== - -* Fix year 2038 problem regression in 1.0.1. (#451) - -1.0.1 -===== - -* Add Python 3.9 and linux/arm64 wheels. (#439) -* Fixed Unpacker.tell() after read_bytes() (#426) -* Fixed unpacking datetime before epoch on Windows (#433) -* Fixed fallback Packer didn't check DateTime.tzinfo (#434) - -1.0.0 -===== - -Release Date: 2020-02-17 - -* Remove Python 2 support from the ``msgpack/_cmsgpack``. - ``msgpack/fallback`` still supports Python 2. -* Remove ``encoding`` option from the Packer and Unpacker. -* Unpacker: The default value of ``max_buffer_size`` is changed to 100MiB. -* Unpacker: ``strict_map_key`` is True by default now. -* Unpacker: String map keys are interned. -* Drop old buffer protocol support. -* Support Timestamp type. -* Support serializing and decerializing ``datetime`` object - with tzinfo. -* Unpacker: ``Fix Unpacker.read_bytes()`` in fallback implementation. (#352) - - -0.6.2 -===== - -Release Date: 2019-09-20 - -* Support Python 3.8. -* Update Cython to 0.29.13 for support Python 3.8. -* Some small optimizations. - - -0.6.1 -====== - -Release Date: 2019-01-25 - -This release is for mitigating pain caused by v0.6.0 reduced max input limits -for security reason. - -* ``unpackb(data)`` configures ``max_*_len`` options from ``len(data)``, - instead of static default sizes. - -* ``Unpacker(max_buffer_len=N)`` configures ``max_*_len`` options from ``N``, - instead of static default sizes. - -* ``max_bin_len``, ``max_str_len``, and ``max_ext_len`` are deprecated. - Since this is minor release, it's document only deprecation. - - -0.6.0 -====== - -Release Date: 2018-11-30 - -This release contains some backward incompatible changes for security reason (DoS). - -Important changes ------------------ - -* unpacker: Default value of input limits are smaller than before to avoid DoS attack. - If you need to handle large data, you need to specify limits manually. (#319) - -* Unpacker doesn't wrap underlying ``ValueError`` (including ``UnicodeError``) into - ``UnpackValueError``. If you want to catch all exception during unpack, you need - to use ``try ... except Exception`` with minimum try code block. (#323, #233) - -* ``PackValueError`` and ``PackOverflowError`` are also removed. You need to catch - normal ``ValueError`` and ``OverflowError``. (#323, #233) - -* Unpacker has ``strict_map_key`` option now. When it is true, only bytes and str - (unicode in Python 2) are allowed for map keys. It is recommended to avoid - hashdos. Default value of this option is False for backward compatibility reason. - But it will be changed True in 1.0. (#296, #334) - -Other changes -------------- - -* Extension modules are merged. There is ``msgpack._cmsgpack`` instead of - ``msgpack._packer`` and ``msgpack._unpacker``. (#314, #328) - -* Add ``Unpacker.getbuffer()`` method. (#320) - -* unpacker: ``msgpack.StackError`` is raised when input data contains too - nested data. (#331) - -* unpacker: ``msgpack.FormatError`` is raised when input data is not valid - msgpack format. (#331) - - -0.5.6 -====== - -* Fix fallback.Unpacker.feed() dropped unused data from buffer (#287) -* Resurrect fallback.unpack() and _unpacker.unpack(). - They were removed at 0.5.5 but it breaks backward compatibility. (#288, #290) - -0.5.5 -====== - -* Fix memory leak in pure Python Unpacker.feed() (#283) -* Fix unpack() didn't support `raw` option (#285) - -0.5.4 -====== - -* Undeprecate ``unicode_errors`` option. (#278) - -0.5.3 -====== - -* Fixed regression when passing ``unicode_errors`` to Packer but not ``encoding``. (#277) - -0.5.2 -====== - -* Add ``raw`` option to Unpacker. It is preferred way than ``encoding`` option. - -* Packer.pack() reset buffer on exception (#274) - - -0.5.1 -====== - -* Remove FutureWarning about use_bin_type option (#271) - -0.5.0 -====== - -There are some deprecations. Please read changes carefully. - -Changes -------- - -* Drop Python 2.6 and ~3.4 support. Python 2.7 and 3.5+ are supported. - -* Deprecate useless custom exceptions. Use ValueError instead of PackValueError, - Exception instead of PackException and UnpackException, etc... - See msgpack/exceptions.py - -* Add *strict_types* option to packer. It can be used to serialize subclass of - builtin types. For example, when packing object which type is subclass of dict, - ``default()`` is called. ``default()`` is called for tuple too. - -* Pure Python implementation supports packing memoryview object. - -* Support packing bytearray. - -* Add ``Unpacker.tell()``. And ``write_bytes`` option is deprecated. - - -Bugs fixed ----------- - -* Fixed zero length raw can't be decoded when encoding is specified. (#236) - - -0.4.8 -===== -:release date: 2016-07-29 - -Bugs fixed ----------- - -* Calling ext_hook with wrong length. (Only on Windows, maybe. #203) - - -0.4.7 -===== -:release date: 2016-01-25 - -Bugs fixed ----------- - -* Memory leak when unpack is failed - -Changes -------- - -* Reduce compiler warnings while building extension module -* unpack() now accepts ext_hook argument like Unpacker and unpackb() -* Update Cython version to 0.23.4 -* default function is called when integer overflow - - 0.4.6 ===== :release date: 2015-03-13 @@ -448,7 +133,7 @@ Changes 0.2.4 -===== +======= :release date: 2012-12-22 Bugs fixed @@ -457,7 +142,7 @@ Bugs fixed * Fix SEGV when object_hook or object_pairs_hook raise Exception. (#39) 0.2.3 -===== +======= :release date: 2012-12-11 Changes @@ -465,11 +150,11 @@ Changes * Warn when use_list is not specified. It's default value will be changed in 0.3. Bugs fixed ----------- +----------- * Can't pack subclass of dict. 0.2.2 -===== +======= :release date: 2012-09-21 Changes @@ -478,7 +163,7 @@ Changes object in single precision format. Bugs fixed ----------- +----------- * ``unpack()`` didn't restores gc state when it called with gc disabled. ``unpack()`` doesn't control gc now instead of restoring gc state collectly. User can control gc state when gc cause performance issue. @@ -486,7 +171,7 @@ Bugs fixed * ``Unpacker``'s ``read_size`` option didn't used. 0.2.1 -===== +======= :release date: 2012-08-20 Changes @@ -494,8 +179,8 @@ Changes * Add ``max_buffer_size`` parameter to Unpacker. It limits internal buffer size and allows unpack data from untrusted source safely. -* Unpacker's buffer reallocation algorithm is less greedy now. It cause performance - decrease in rare case but memory efficient and don't allocate than ``max_buffer_size``. +* Unpacker's buffer reallocation algorithm is less greedy now. It cause perforamce + derease in rare case but memory efficient and don't allocate than ``max_buffer_size``. Bugs fixed ---------- @@ -505,7 +190,7 @@ Bugs fixed 0.2.0 -===== +======= :release date: 2012-06-27 Changes @@ -520,16 +205,16 @@ Bugs fixed 0.1.13 -====== +======= :release date: 2012-04-21 New ---- +---- * Don't accept subtype of list and tuple as msgpack list. (Steeve Morin) It allows customize how it serialized with ``default`` argument. Bugs fixed ----------- +----------- * Fix wrong error message. (David Wolever) * Fix memory leak while unpacking when ``object_hook`` or ``list_hook`` is used. (Steeve Morin) @@ -541,21 +226,21 @@ Other changes 0.1.12 -====== +======= :release date: 2011-12-27 Bugs fixed ----------- +------------- * Re-enable packs/unpacks removed at 0.1.11. It will be removed when 0.2 is released. 0.1.11 -====== +======= :release date: 2011-12-26 Bugs fixed ----------- +------------- * Include test code for Python3 to sdist. (Johan Bergström) * Fix compilation error on MSVC. (davidgaleano) @@ -573,7 +258,7 @@ New feature 0.1.9 -===== +====== :release date: 2011-01-29 New feature @@ -587,16 +272,16 @@ Bugs fixed * Add MemoryError check. 0.1.8 -===== +====== :release date: 2011-01-10 New feature ------------ +------------ * Support ``loads`` and ``dumps`` aliases for API compatibility with simplejson and pickle. * Add *object_hook* and *list_hook* option to unpacker. It allows you to - hook unpacking mapping type and array type. + hook unpacing mapping type and array type. * Add *default* option to packer. It allows you to pack unsupported types. @@ -608,13 +293,13 @@ Bugs fixed 0.1.7 -===== +====== :release date: 2010-11-02 New feature ------------ +------------ * Add *object_hook* and *list_hook* option to unpacker. It allows you to - hook unpacking mapping type and array type. + hook unpacing mapping type and array type. * Add *default* option to packer. It allows you to pack unsupported types. diff --git a/DEVELOP.md b/DEVELOP.md deleted file mode 100644 index 27adf8c..0000000 --- a/DEVELOP.md +++ /dev/null @@ -1,17 +0,0 @@ -# Developer's note - -### Build - -``` -$ make cython -``` - - -### Test - -MessagePack uses `pytest` for testing. -Run test with following command: - -``` -$ make test -``` diff --git a/MANIFEST.in b/MANIFEST.in index 6317706..e1912ca 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include setup.py include COPYING -include README.md -recursive-include msgpack *.h *.c *.pyx +include README.rst +recursive-include msgpack *.h *.c *.pyx *.cpp recursive-include test *.py diff --git a/Makefile b/Makefile index 51f3e0e..2e53d08 100644 --- a/Makefile +++ b/Makefile @@ -1,59 +1,22 @@ -PYTHON_SOURCES = msgpack test setup.py +.PHONY: test all python3 -.PHONY: all all: cython python setup.py build_ext -i -f -.PHONY: format -format: - ruff format $(PYTHON_SOURCES) - -.PHONY: lint -lint: - ruff check $(PYTHON_SOURCES) - -.PHONY: doc -doc: - cd docs && sphinx-build -n -v -W --keep-going -b html -d doctrees . html - -.PHONY: pyupgrade -pyupgrade: - @find $(PYTHON_SOURCES) -name '*.py' -type f -exec pyupgrade --py37-plus '{}' \; - -.PHONY: cython -cython: - cython msgpack/_cmsgpack.pyx - -.PHONY: test -test: cython - pip install -e . - pytest -v test - MSGPACK_PUREPYTHON=1 pytest -v test - -.PHONY: serve-doc -serve-doc: all +doc-serve: all cd docs && make serve -.PHONY: clean -clean: - rm -rf build - rm -f msgpack/_cmsgpack.cpp - rm -f msgpack/_cmsgpack.*.so - rm -f msgpack/_cmsgpack.*.pyd - rm -rf msgpack/__pycache__ - rm -rf test/__pycache__ +doc: + cd docs && make zip -.PHONY: update-docker -update-docker: - docker pull quay.io/pypa/manylinux2014_i686 - docker pull quay.io/pypa/manylinux2014_x86_64 - docker pull quay.io/pypa/manylinux2014_aarch64 +upload-doc: + python setup.py upload_docs --upload-dir docs/_build/html -.PHONY: linux-wheel -linux-wheel: - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_i686 bash docker/buildwheel.sh - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_x86_64 bash docker/buildwheel.sh +cython: + cython --cplus msgpack/*.pyx -.PHONY: linux-arm64-wheel -linux-arm64-wheel: - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_aarch64 bash docker/buildwheel.sh +python3: cython + python3 setup.py build_ext -i -f + +test: + py.test test diff --git a/README.md b/README.md deleted file mode 100644 index 1f06324..0000000 --- a/README.md +++ /dev/null @@ -1,242 +0,0 @@ -# MessagePack for Python - -[![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml) -[![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) - -## What is this? - -[MessagePack](https://msgpack.org/) is an efficient binary serialization format. -It lets you exchange data among multiple languages like JSON. -But it's faster and smaller. -This package provides CPython bindings for reading and writing MessagePack data. - -## Install - -``` -$ pip install msgpack -``` - -### Pure Python implementation - -The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy. - -But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy. - - -### Windows - -If you can't use a binary distribution, you need to install Visual Studio -or the Windows SDK on Windows. -Without the extension, the pure Python implementation on CPython runs slowly. - - -## How to use - -### One-shot pack & unpack - -Use `packb` for packing and `unpackb` for unpacking. -msgpack provides `dumps` and `loads` as aliases for compatibility with -`json` and `pickle`. - -`pack` and `dump` pack to a file-like object. -`unpack` and `load` unpack from a file-like object. - -```pycon ->>> import msgpack ->>> msgpack.packb([1, 2, 3]) -'\x93\x01\x02\x03' ->>> msgpack.unpackb(_) -[1, 2, 3] -``` - -Read the docstring for options. - - -### Streaming unpacking - -`Unpacker` is a "streaming unpacker". It unpacks multiple objects from one -stream (or from bytes provided through its `feed` method). - -```py -import msgpack -from io import BytesIO - -buf = BytesIO() -for i in range(100): - buf.write(msgpack.packb(i)) - -buf.seek(0) - -unpacker = msgpack.Unpacker(buf) -for unpacked in unpacker: - print(unpacked) -``` - - -### Packing/unpacking of custom data types - -It is also possible to pack/unpack custom data types. Here is an example for -`datetime.datetime`. - -```py -import datetime -import msgpack - -useful_dict = { - "id": 1, - "created": datetime.datetime.now(), -} - -def decode_datetime(obj): - if '__datetime__' in obj: - obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") - return obj - -def encode_datetime(obj): - if isinstance(obj, datetime.datetime): - return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")} - return obj - - -packed_dict = msgpack.packb(useful_dict, default=encode_datetime) -this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) -``` - -`Unpacker`'s `object_hook` callback receives a dict; the -`object_pairs_hook` callback may instead be used to receive a list of -key-value pairs. - -NOTE: msgpack can encode datetime with tzinfo into standard ext type for now. -See `datetime` option in `Packer` docstring. - - -### Extended types - -It is also possible to pack/unpack custom data types using the **ext** type. - -```pycon ->>> import msgpack ->>> import array ->>> def default(obj): -... if isinstance(obj, array.array) and obj.typecode == 'd': -... return msgpack.ExtType(42, obj.tostring()) -... raise TypeError("Unknown type: %r" % (obj,)) -... ->>> def ext_hook(code, data): -... if code == 42: -... a = array.array('d') -... a.fromstring(data) -... return a -... return ExtType(code, data) -... ->>> data = array.array('d', [1.2, 3.4]) ->>> packed = msgpack.packb(data, default=default) ->>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) ->>> data == unpacked -True -``` - - -### Advanced unpacking control - -As an alternative to iteration, `Unpacker` objects provide `unpack`, -`skip`, `read_array_header`, and `read_map_header` methods. The former two -read an entire message from the stream, respectively deserializing and returning -the result, or ignoring it. The latter two methods return the number of elements -in the upcoming container, so that each element in an array, or key-value pair -in a map, can be unpacked or skipped individually. - - -## Notes - -### String and binary types in the old MessagePack spec - -Early versions of msgpack didn't distinguish string and binary types. -The type for representing both string and binary types was named **raw**. - -You can pack into and unpack from this old spec using `use_bin_type=False` -and `raw=True` options. - -```pycon ->>> import msgpack ->>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True) -[b'spam', b'eggs'] ->>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False) -[b'spam', 'eggs'] -``` - -### ext type - -To use the **ext** type, pass a `msgpack.ExtType` object to the packer. - -```pycon ->>> import msgpack ->>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) ->>> msgpack.unpackb(packed) -ExtType(code=42, data='xyzzy') -``` - -You can use it with `default` and `ext_hook`. See below. - - -### Security - -When unpacking data received from an unreliable source, msgpack provides -two security options. - -`max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size. -It is also used to limit preallocated list sizes. - -`strict_map_key` (default: `True`) limits the type of map keys to bytes and str. -While the MessagePack spec doesn't limit map key types, -there is a risk of a hash DoS. -If you need to support other types for map keys, use `strict_map_key=False`. - - -### Performance tips - -CPython's GC starts when the number of allocated objects grows. -This means unpacking may trigger unnecessary GC. -You can use `gc.disable()` when unpacking a large message. - -A list is the default sequence type in Python. -However, a tuple is lighter than a list. -You can use `use_list=False` while unpacking when performance is important. - - -## Major breaking changes in the history - -### msgpack 0.5 - -The package name on PyPI was changed from `msgpack-python` to `msgpack` in 0.5. - -When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before -`pip install -U msgpack`. - - -### msgpack 1.0 - -* Python 2 support - - * The extension module no longer supports Python 2. - The pure Python implementation (`msgpack.fallback`) is used for Python 2. - - * msgpack 1.0.6 drops official support of Python 2.7, as pip and - GitHub Action "setup-python" no longer supports Python 2.7. - -* Packer - - * Packer uses `use_bin_type=True` by default. - Bytes are encoded in the bin type in MessagePack. - * The `encoding` option is removed. UTF-8 is always used. - -* Unpacker - - * Unpacker uses `raw=False` by default. It assumes str values are valid UTF-8 strings - and decodes them to Python str (Unicode) objects. - * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str). - * The default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attacks. - You need to pass `max_buffer_size=0` if you have large but safe data. - * The default value of `strict_map_key` is changed to True to avoid hash DoS. - You need to pass `strict_map_key=False` if you have data that contain map keys - whose type is neither bytes nor str. diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..3c1957d --- /dev/null +++ b/README.rst @@ -0,0 +1,269 @@ +======================= +MessagePack for Python +======================= + +:author: INADA Naoki +:version: 0.4.5 +:date: 2015-01-25 + +.. image:: https://secure.travis-ci.org/msgpack/msgpack-python.png + :target: https://travis-ci.org/#!/msgpack/msgpack-python + +.. image:: https://pypip.in/version/msgpack-python/badge.svg + :target: https://pypi.python.org/pypi/msgpack-python/ + :alt: Latest Version + +What's this +------------ + +`MessagePack `_ is a fast, compact binary serialization format, suitable for +similar data to JSON. This package provides CPython bindings for reading and +writing MessagePack data. + +Install +--------- + +:: + + $ pip install msgpack-python + +PyPy +^^^^^ + +msgpack-python provides pure python implementation. PyPy can use this. + +Windows +^^^^^^^ + +When you can't use binary distribution, you need to install Visual Studio +or Windows SDK on Windows. (NOTE: Visual C++ Express 2010 doesn't support +amd64. Windows SDK is recommanded way to build amd64 msgpack without any fee.) + +Without extension, using pure python implementation on CPython runs slowly. + +Notes +----- + +Note for msgpack 2.0 support +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +msgpack 2.0 adds two types: *bin* and *ext*. + +*raw* was bytes or string type like Python 2's ``str``. +To distinguish string and bytes, msgpack 2.0 adds *bin*. +It is non-string binary like Python 3's ``bytes``. + +To use *bin* type for packing ``bytes``, pass ``use_bin_type=True`` to +packer argument. + +.. code-block:: pycon + + >>> import msgpack + >>> packed = msgpack.packb([b'spam', u'egg'], use_bin_type=True) + >>> msgpack.unpackb(packed, encoding='utf-8') + ['spam', u'egg'] + +You shoud use it carefully. When you use ``use_bin_type=True``, packed +binary can be unpacked by unpackers supporting msgpack-2.0. + +To use *ext* type, pass ``msgpack.ExtType`` object to packer. + +.. code-block:: pycon + + >>> import msgpack + >>> packed = msgpack.packb(msgpack.ExtType(42, b'xyzzy')) + >>> msgpack.unpackb(packed) + ExtType(code=42, data='xyzzy') + +You can use it with ``default`` and ``ext_hook``. See below. + +Note for msgpack 0.2.x users +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The msgpack 0.3 have some incompatible changes. + +The default value of ``use_list`` keyword argument is ``True`` from 0.3. +You should pass the argument explicitly for backward compatibility. + +`Unpacker.unpack()` and some unpack methods now raises `OutOfData` +instead of `StopIteration`. +`StopIteration` is used for iterator protocol only. + + +How to use +----------- + +One-shot pack & unpack +^^^^^^^^^^^^^^^^^^^^^^ + +Use ``packb`` for packing and ``unpackb`` for unpacking. +msgpack provides ``dumps`` and ``loads`` as alias for compatibility with +``json`` and ``pickle``. + +``pack`` and ``dump`` packs to file-like object. +``unpack`` and ``load`` unpacks from file-like object. + +.. code-block:: pycon + + >>> import msgpack + >>> msgpack.packb([1, 2, 3]) + '\x93\x01\x02\x03' + >>> msgpack.unpackb(_) + [1, 2, 3] + +``unpack`` unpacks msgpack's array to Python's list, but can unpack to tuple: + +.. code-block:: pycon + + >>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False) + (1, 2, 3) + +You should always pass the ``use_list`` keyword argument. See performance issues relating to `use_list option`_ below. + +Read the docstring for other options. + + +Streaming unpacking +^^^^^^^^^^^^^^^^^^^ + +``Unpacker`` is a "streaming unpacker". It unpacks multiple objects from one +stream (or from bytes provided through its ``feed`` method). + +.. code-block:: python + + import msgpack + from io import BytesIO + + buf = BytesIO() + for i in range(100): + buf.write(msgpack.packb(range(i))) + + buf.seek(0) + + unpacker = msgpack.Unpacker(buf) + for unpacked in unpacker: + print unpacked + + +Packing/unpacking of custom data type +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +It is also possible to pack/unpack custom data types. Here is an example for +``datetime.datetime``. + +.. code-block:: python + + import datetime + + import msgpack + + useful_dict = { + "id": 1, + "created": datetime.datetime.now(), + } + + def decode_datetime(obj): + if b'__datetime__' in obj: + obj = datetime.datetime.strptime(obj["as_str"], "%Y%m%dT%H:%M:%S.%f") + return obj + + def encode_datetime(obj): + if isinstance(obj, datetime.datetime): + return {'__datetime__': True, 'as_str': obj.strftime("%Y%m%dT%H:%M:%S.%f")} + return obj + + + packed_dict = msgpack.packb(useful_dict, default=encode_datetime) + this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) + +``Unpacker``'s ``object_hook`` callback receives a dict; the +``object_pairs_hook`` callback may instead be used to receive a list of +key-value pairs. + +Extended types +^^^^^^^^^^^^^^^ + +It is also possible to pack/unpack custom data types using the msgpack 2.0 feature. + +.. code-block:: pycon + + >>> import msgpack + >>> import array + >>> def default(obj): + ... if isinstance(obj, array.array) and obj.typecode == 'd': + ... return msgpack.ExtType(42, obj.tostring()) + ... raise TypeError("Unknown type: %r" % (obj,)) + ... + >>> def ext_hook(code, data): + ... if code == 42: + ... a = array.array('d') + ... a.fromstring(data) + ... return a + ... return ExtType(code, data) + ... + >>> data = array.array('d', [1.2, 3.4]) + >>> packed = msgpack.packb(data, default=default) + >>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) + >>> data == unpacked + True + + +Advanced unpacking control +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +As an alternative to iteration, ``Unpacker`` objects provide ``unpack``, +``skip``, ``read_array_header`` and ``read_map_header`` methods. The former two +read an entire message from the stream, respectively deserialising and returning +the result, or ignoring it. The latter two methods return the number of elements +in the upcoming container, so that each element in an array, or key-value pair +in a map, can be unpacked or skipped individually. + +Each of these methods may optionally write the packed data it reads to a +callback function: + +.. code-block:: python + + from io import BytesIO + + def distribute(unpacker, get_worker): + nelems = unpacker.read_map_header() + for i in range(nelems): + # Select a worker for the given key + key = unpacker.unpack() + worker = get_worker(key) + + # Send the value as a packed message to worker + bytestream = BytesIO() + unpacker.skip(bytestream.write) + worker.send(bytestream.getvalue()) + +Note about performance +------------------------ + +GC +^^ + +CPython's GC starts when growing allocated object. +This means unpacking may cause useless GC. +You can use ``gc.disable()`` when unpacking large message. + +use_list option +^^^^^^^^^^^^^^^^ +List is the default sequence type of Python. +But tuple is lighter than list. +You can use ``use_list=False`` while unpacking when performance is important. + +Python's dict can't use list as key and MessagePack allows array for key of mapping. +``use_list=False`` allows unpacking such message. +Another way to unpacking such object is using ``object_pairs_hook``. + + +Test +---- +MessagePack uses `pytest` for testing. +Run test with following command: + + $ py.test + +.. + vim: filetype=rst diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index 75f0c54..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,5 +0,0 @@ -## Security contact information - -To report a security vulnerability, please use the -[Tidelift security contact](https://tidelift.com/security). -Tidelift will coordinate the fix and disclosure. \ No newline at end of file diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py index 2e778dd..80819c6 100644 --- a/benchmark/benchmark.py +++ b/benchmark/benchmark.py @@ -1,8 +1,6 @@ from msgpack import fallback - try: - from msgpack import _cmsgpack - + from msgpack import _unpacker, _packer has_ext = True except ImportError: has_ext = False @@ -11,28 +9,26 @@ import timeit def profile(name, func): times = timeit.repeat(func, number=1000, repeat=4) - times = ", ".join(["%8f" % t for t in times]) + times = ', '.join(["%8f" % t for t in times]) print("%-30s %40s" % (name, times)) def simple(name, data): if has_ext: - packer = _cmsgpack.Packer() + packer = _packer.Packer() profile("packing %s (ext)" % name, lambda: packer.pack(data)) packer = fallback.Packer() - profile("packing %s (fallback)" % name, lambda: packer.pack(data)) + profile('packing %s (fallback)' % name, lambda: packer.pack(data)) data = packer.pack(data) if has_ext: - profile("unpacking %s (ext)" % name, lambda: _cmsgpack.unpackb(data)) - profile("unpacking %s (fallback)" % name, lambda: fallback.unpackb(data)) - + profile('unpacking %s (ext)' % name, lambda: _unpacker.unpackb(data)) + profile('unpacking %s (fallback)' % name, lambda: fallback.unpackb(data)) def main(): - simple("integers", [7] * 10000) - simple("bytes", [b"x" * n for n in range(100)] * 10) - simple("lists", [[]] * 10000) - simple("dicts", [{}] * 10000) - + simple("integers", [7]*10000) + simple("bytes", [b'x'*n for n in range(100)]*10) + simple("lists", [[]]*10000) + simple("dicts", [{}]*10000) main() diff --git a/build_windows.bat b/build_windows.bat new file mode 100644 index 0000000..a71c0e0 --- /dev/null +++ b/build_windows.bat @@ -0,0 +1,24 @@ +set MSSdk=1 +set DISTUTILS_USE_SDK=1 + +rem Python27 x86 +rem call "C:\Program Files\Microsoft SDKs\Windows\v6.1\Bin\SetEnv.cmd" /Release /x86 /xp +call "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin\vcvars32.bat" +c:\Python27\python setup.py build_ext -f build install +pause + +rem Python27 amd64 +rem call "C:\Program Files\Microsoft SDKs\Windows\v6.1\Bin\SetEnv.cmd" /Release /x64 /xp +call "C:\Program Files (x86)\Microsoft Visual Studio 9.0\VC\bin\vcvars64.bat" +c:\Python27_amd64\python setup.py build_ext -f build install +pause + +rem Python33 x86 +call "C:\Program Files\Microsoft SDKs\Windows\v7.1\bin\SetEnv.cmd" /Release /x86 /xp +c:\Python33\python setup.py build_ext -f build install +pause + +rem Python33 amd64 +call "C:\Program Files\Microsoft SDKs\Windows\v7.1\bin\SetEnv.cmd" /Release /x64 /xp +c:\Python33_amd64\python setup.py build_ext -f build install +pause diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh deleted file mode 100644 index ff34139..0000000 --- a/docker/buildwheel.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash -DOCKER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -source "$DOCKER_DIR/shared.env" - -set -e -x - -ARCH=`uname -p` -echo "arch=$ARCH" - -ls /opt/python - -for V in "${PYTHON_VERSIONS[@]}"; do - PYBIN=/opt/python/$V/bin - rm -rf build/ # Avoid lib build by narrow Python is used by wide python - $PYBIN/python -m build -w -done - -cd dist -for whl in *.whl; do - auditwheel repair "$whl" - rm "$whl" -done diff --git a/docker/runtests.sh b/docker/runtests.sh deleted file mode 100755 index fa7e979..0000000 --- a/docker/runtests.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash -DOCKER_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -source "$DOCKER_DIR/shared.env" - -set -e -x - -for V in "${PYTHON_VERSIONS[@]}"; do - PYBIN=/opt/python/$V/bin - $PYBIN/python setup.py install - rm -rf build/ # Avoid lib build by narrow Python is used by wide python - $PYBIN/pip install pytest - pushd test # prevent importing msgpack package in current directory. - $PYBIN/python -c 'import sys; print(hex(sys.maxsize))' - $PYBIN/python -c 'from msgpack import _cmsgpack' # Ensure extension is available - $PYBIN/pytest -v . - popd -done diff --git a/docker/shared.env b/docker/shared.env deleted file mode 100644 index 80274ac..0000000 --- a/docker/shared.env +++ /dev/null @@ -1,7 +0,0 @@ -PYTHON_VERSIONS=( - cp310-cp310 - cp39-cp39 - cp38-cp38 - cp37-cp37m - cp36-cp36m -) diff --git a/docs/Makefile b/docs/Makefile index 831a6a7..0869604 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -153,7 +153,7 @@ doctest: "results in $(BUILDDIR)/doctest/output.txt." serve: html - python3 -m http.server -d _build/html + cd _build/html && python3.3 -m http.server zip: html cd _build/html && zip -r ../../../msgpack-doc.zip . diff --git a/docs/_static/README.txt b/docs/_static/README.txt deleted file mode 100644 index 1c70594..0000000 --- a/docs/_static/README.txt +++ /dev/null @@ -1 +0,0 @@ -Sphinx will copy the contents of docs/_static/ directory to the build location. diff --git a/docs/advanced.rst b/docs/advanced.rst deleted file mode 100644 index 3837008..0000000 --- a/docs/advanced.rst +++ /dev/null @@ -1,32 +0,0 @@ -Advanced usage -=============== - -Packer ------- - -autoreset -~~~~~~~~~ - -When you used ``autoreset=False`` option of :class:`~msgpack.Packer`, -``pack()`` method doesn't return packed ``bytes``. - -You can use :meth:`~msgpack.Packer.bytes` or :meth:`~msgpack.Packer.getbuffer` to -get packed data. - -``bytes()`` returns ``bytes`` object. ``getbuffer()`` returns some bytes-like -object. It's concrete type is implement detail and it will be changed in future -versions. - -You can reduce temporary bytes object by using ``Unpacker.getbuffer()``. - -.. code-block:: python - - packer = Packer(use_bin_type=True, autoreset=False) - - packer.pack([1, 2]) - packer.pack([3, 4]) - - with open('data.bin', 'wb') as f: - f.write(packer.getbuffer()) - - packer.reset() # reset internal buffer diff --git a/docs/api.rst b/docs/api.rst index f5dfbbd..841c134 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -5,19 +5,19 @@ API reference .. autofunction:: pack -``dump()`` is an alias for :func:`pack` +:func:`dump` is alias for :func:`pack` .. autofunction:: packb -``dumps()`` is an alias for :func:`packb` +:func:`dumps` is alias for :func:`packb` .. autofunction:: unpack -``load()`` is an alias for :func:`unpack` +:func:`load` is alias for :func:`unpack` .. autofunction:: unpackb -``loads()`` is an alias for :func:`unpackb` +:func:`loads` is alias for :func:`unpackb` .. autoclass:: Packer :members: @@ -27,12 +27,8 @@ API reference .. autoclass:: ExtType -.. autoclass:: Timestamp - :members: - :special-members: __init__ - exceptions ----------- +----------- These exceptions are accessible via `msgpack` package. (For example, `msgpack.OutOfData` is shortcut for `msgpack.exceptions.OutOfData`) diff --git a/docs/conf.py b/docs/conf.py index 28116cd..0f19fcc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- +# # msgpack documentation build configuration file, created by # sphinx-quickstart on Sun Feb 24 14:20:50 2013. # @@ -9,37 +11,37 @@ # All configuration values have a default; values that are commented out # serve to show the default. +import sys, os + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -# import os -# import sys -# sys.path.insert(0, os.path.abspath('..')) +#sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +#needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.autodoc", "sphinx.ext.viewcode"] +extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode'] # Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] +templates_path = ['_templates'] # The suffix of source filenames. -source_suffix = ".rst" +source_suffix = '.rst' # The encoding of source files. -# source_encoding = 'utf-8-sig' +#source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = "index" +master_doc = 'index' # General information about the project. -project = "msgpack" -copyright = "Inada Naoki" +project = u'msgpack' +copyright = u'2013, INADA Naoki' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -47,170 +49,176 @@ copyright = "Inada Naoki" # # The short X.Y version. # The full version, including alpha/beta/rc tags. -version = release = "1.0" +version = release = '0.4' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -# language = None +#language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -# today = '' +#today = '' # Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' +#today_fmt = '%B %d, %Y' today_fmt = "%Y-%m-%d" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ["_build"] +exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. -# default_role = None +#default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -# add_function_parentheses = True +#add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -# add_module_names = True +#add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -# show_authors = False +#show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" +pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] +#modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = "sphinx_rtd_theme" +html_theme = 'sphinxdoc' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -# html_theme_options = {} +#html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] +#html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -# html_title = None +#html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -# html_short_title = None +#html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -# html_logo = None +#html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -# html_favicon = None +#html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ["_static"] +html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -# html_last_updated_fmt = '%b %d, %Y' +#html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -# html_use_smartypants = True +#html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -# html_sidebars = {} +#html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -# html_additional_pages = {} +#html_additional_pages = {} # If false, no module index is generated. -# html_domain_indices = True +#html_domain_indices = True # If false, no index is generated. -# html_use_index = True +#html_use_index = True # If true, the index is split into individual pages for each letter. -# html_split_index = False +#html_split_index = False # If true, links to the reST sources are added to the pages. -# html_show_sourcelink = True +#html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -# html_show_sphinx = True +#html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -# html_show_copyright = True +#html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -# html_use_opensearch = '' +#html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = None +#html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = "msgpackdoc" +htmlhelp_basename = 'msgpackdoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - #'papersize': 'letterpaper', - # The font size ('10pt', '11pt' or '12pt'). - #'pointsize': '10pt', - # Additional stuff for the LaTeX preamble. - #'preamble': '', +# The paper size ('letterpaper' or 'a4paper'). +#'papersize': 'letterpaper', + +# The font size ('10pt', '11pt' or '12pt'). +#'pointsize': '10pt', + +# Additional stuff for the LaTeX preamble. +#'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ("index", "msgpack.tex", "msgpack Documentation", "Author", "manual"), + ('index', 'msgpack.tex', u'msgpack Documentation', + u'Author', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -# latex_logo = None +#latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -# latex_use_parts = False +#latex_use_parts = False # If true, show page references after internal links. -# latex_show_pagerefs = False +#latex_show_pagerefs = False # If true, show URL addresses after external links. -# latex_show_urls = False +#latex_show_urls = False # Documents to append as an appendix to all manuals. -# latex_appendices = [] +#latex_appendices = [] # If false, no module index is generated. -# latex_domain_indices = True +#latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [("index", "msgpack", "msgpack Documentation", ["Author"], 1)] +man_pages = [ + ('index', 'msgpack', u'msgpack Documentation', + [u'Author'], 1) +] # If true, show URL addresses after external links. -# man_show_urls = False +#man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ @@ -219,65 +227,59 @@ man_pages = [("index", "msgpack", "msgpack Documentation", ["Author"], 1)] # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ( - "index", - "msgpack", - "msgpack Documentation", - "Author", - "msgpack", - "One line description of project.", - "Miscellaneous", - ), + ('index', 'msgpack', u'msgpack Documentation', + u'Author', 'msgpack', 'One line description of project.', + 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. -# texinfo_appendices = [] +#texinfo_appendices = [] # If false, no module index is generated. -# texinfo_domain_indices = True +#texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -# texinfo_show_urls = 'footnote' +#texinfo_show_urls = 'footnote' # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. -epub_title = "msgpack" -epub_author = "Author" -epub_publisher = "Author" -epub_copyright = "2013, Author" +epub_title = u'msgpack' +epub_author = u'Author' +epub_publisher = u'Author' +epub_copyright = u'2013, Author' # The language of the text. It defaults to the language option # or en if the language is not set. -# epub_language = '' +#epub_language = '' # The scheme of the identifier. Typical schemes are ISBN or URL. -# epub_scheme = '' +#epub_scheme = '' # The unique identifier of the text. This can be a ISBN number # or the project homepage. -# epub_identifier = '' +#epub_identifier = '' # A unique identification for the text. -# epub_uid = '' +#epub_uid = '' # A tuple containing the cover image and cover page html template filenames. -# epub_cover = () +#epub_cover = () # HTML files that should be inserted before the pages created by sphinx. # The format is a list of tuples containing the path and title. -# epub_pre_files = [] +#epub_pre_files = [] # HTML files shat should be inserted after the pages created by sphinx. # The format is a list of tuples containing the path and title. -# epub_post_files = [] +#epub_post_files = [] # A list of files that should not be packed into the epub file. -# epub_exclude_files = [] +#epub_exclude_files = [] # The depth of the table of contents in toc.ncx. -# epub_tocdepth = 3 +#epub_tocdepth = 3 # Allow duplicate toc entries. -# epub_tocdup = True +#epub_tocdup = True diff --git a/docs/index.rst b/docs/index.rst index e9c2ce8..72d4499 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,5 +1,5 @@ msgpack document -================ +================== `MessagePack `_ is a efficient format for inter language data exchange. @@ -8,4 +8,3 @@ language data exchange. :maxdepth: 1 api - advanced diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 26002de..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -sphinx~=7.3.7 -sphinx-rtd-theme~=2.0.0 diff --git a/msgpack/__init__.py b/msgpack/__init__.py index f3266b7..6c5ae53 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,20 +1,31 @@ -# ruff: noqa: F401 +# coding: utf-8 +from msgpack._version import version +from msgpack.exceptions import * + +from collections import namedtuple + + +class ExtType(namedtuple('ExtType', 'code data')): + """ExtType represents ext type in msgpack.""" + def __new__(cls, code, data): + if not isinstance(code, int): + raise TypeError("code must be int") + if not isinstance(data, bytes): + raise TypeError("data must be bytes") + if not 0 <= code <= 127: + raise ValueError("code must be 0~127") + return super(ExtType, cls).__new__(cls, code, data) + + import os - -from .exceptions import * # noqa: F403 -from .ext import ExtType, Timestamp - -version = (1, 1, 2) -__version__ = "1.1.2" - - -if os.environ.get("MSGPACK_PUREPYTHON"): - from .fallback import Packer, Unpacker, unpackb +if os.environ.get('MSGPACK_PUREPYTHON'): + from msgpack.fallback import Packer, unpack, unpackb, Unpacker else: try: - from ._cmsgpack import Packer, Unpacker, unpackb + from msgpack._packer import Packer + from msgpack._unpacker import unpack, unpackb, Unpacker except ImportError: - from .fallback import Packer, Unpacker, unpackb + from msgpack.fallback import Packer, unpack, unpackb, Unpacker def pack(o, stream, **kwargs): @@ -35,18 +46,6 @@ def packb(o, **kwargs): """ return Packer(**kwargs).pack(o) - -def unpack(stream, **kwargs): - """ - Unpack an object from `stream`. - - Raises `ExtraData` when `stream` contains extra bytes. - See :class:`Unpacker` for options. - """ - data = stream.read() - return unpackb(data, **kwargs) - - # alias for compatibility to simplejson/marshal/pickle. load = unpack loads = unpackb diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx deleted file mode 100644 index 9680b31..0000000 --- a/msgpack/_cmsgpack.pyx +++ /dev/null @@ -1,12 +0,0 @@ -#cython: embedsignature=True, c_string_encoding=ascii, language_level=3 -#cython: freethreading_compatible = True -import cython -from cpython.datetime cimport import_datetime, datetime_new -import_datetime() - -import datetime -cdef object utc = datetime.timezone.utc -cdef object epoch = datetime_new(1970, 1, 1, 0, 0, 0, 0, tz=utc) - -include "_packer.pyx" -include "_unpacker.pyx" diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 94d1462..fcd20a7 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -1,19 +1,14 @@ +# coding: utf-8 +#cython: embedsignature=True + from cpython cimport * -from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact -from cpython.datetime cimport ( - PyDateTime_CheckExact, PyDelta_CheckExact, - datetime_tzinfo, timedelta_days, timedelta_seconds, timedelta_microseconds, -) +from libc.stdlib cimport * +from libc.string cimport * +from libc.limits cimport * -cdef ExtType -cdef Timestamp +from msgpack.exceptions import PackValueError +from msgpack import ExtType -from .ext import ExtType, Timestamp - - -cdef extern from "Python.h": - - int PyMemoryView_Check(object obj) cdef extern from "pack.h": struct msgpack_packer: @@ -22,39 +17,30 @@ cdef extern from "pack.h": size_t buf_size bint use_bin_type - int msgpack_pack_nil(msgpack_packer* pk) except -1 - int msgpack_pack_true(msgpack_packer* pk) except -1 - int msgpack_pack_false(msgpack_packer* pk) except -1 - int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1 - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1 - int msgpack_pack_float(msgpack_packer* pk, float d) except -1 - int msgpack_pack_double(msgpack_packer* pk, double d) except -1 - int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1 - int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1 - int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1 - + int msgpack_pack_int(msgpack_packer* pk, int d) + int msgpack_pack_nil(msgpack_packer* pk) + int msgpack_pack_true(msgpack_packer* pk) + int msgpack_pack_false(msgpack_packer* pk) + int msgpack_pack_long(msgpack_packer* pk, long d) + int msgpack_pack_long_long(msgpack_packer* pk, long long d) + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) + int msgpack_pack_float(msgpack_packer* pk, float d) + int msgpack_pack_double(msgpack_packer* pk, double d) + int msgpack_pack_array(msgpack_packer* pk, size_t l) + int msgpack_pack_map(msgpack_packer* pk, size_t l) + int msgpack_pack_raw(msgpack_packer* pk, size_t l) + int msgpack_pack_bin(msgpack_packer* pk, size_t l) + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) cdef int DEFAULT_RECURSE_LIMIT=511 -cdef long long ITEM_LIMIT = (2**32)-1 -cdef inline int PyBytesLike_Check(object o): - return PyBytes_Check(o) or PyByteArray_Check(o) - - -cdef inline int PyBytesLike_CheckExact(object o): - return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o) - - -cdef class Packer: +cdef class Packer(object): """ MessagePack Packer - Usage:: + usage:: packer = Packer() astream.write(packer.pack(a)) @@ -62,303 +48,247 @@ cdef class Packer: Packer's constructor has some keyword arguments: - :param default: - When specified, it should be callable. + :param callable default: Convert user type to builtin type that Packer supports. See also simplejson's document. - + :param str encoding: + Convert unicode to bytes with this encoding. (default: 'utf-8') + :param str unicode_errors: + Error handler for encoding unicode. (default: 'strict') :param bool use_single_float: Use single precision float type for float. (default: False) - :param bool autoreset: - Reset buffer after each pack and return its content as `bytes`. (default: True). + Reset buffer after each pack and return it's content as `bytes`. (default: True). If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. - :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. - It also enables str8 type for unicode. (default: True) - - :param bool strict_types: - If set to true, types will be checked to be exact. Derived classes - from serializeable types will not be serialized and will be - treated as unsupported type and forwarded to default. - Additionally tuples will not be serialized as lists. - This is useful when trying to implement accurate serialization - for python types. - - :param bool datetime: - If set to true, datetime with tzinfo is packed into Timestamp type. - Note that the tzinfo is stripped in the timestamp. - You can get UTC datetime with `timestamp=3` option of the Unpacker. - - :param str unicode_errors: - The error handler for encoding unicode. (default: 'strict') - DO NOT USE THIS!! This option is kept for very specific usage. - - :param int buf_size: - The size of the internal buffer. (default: 256*1024) - Useful if serialisation size can be correctly estimated, - avoid unnecessary reallocations. + It also enable str8 type for unicode. """ cdef msgpack_packer pk cdef object _default + cdef object _bencoding cdef object _berrors - cdef const char *unicode_errors - cdef size_t exports # number of exported buffers - cdef bint strict_types - cdef bint use_float + cdef char *encoding + cdef char *unicode_errors + cdef bool use_float cdef bint autoreset - cdef bint datetime - def __cinit__(self, buf_size=256*1024, **_kwargs): - self.pk.buf = PyMem_Malloc(buf_size) + def __cinit__(self): + cdef int buf_size = 1024*1024 + self.pk.buf = malloc(buf_size); if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.pk.buf_size = buf_size self.pk.length = 0 - self.exports = 0 - def __dealloc__(self): - PyMem_Free(self.pk.buf) - self.pk.buf = NULL - assert self.exports == 0 - - cdef _check_exports(self): - if self.exports > 0: - raise BufferError("Existing exports of data: Packer cannot be changed") - - @cython.critical_section - def __init__(self, *, default=None, - bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, - bint strict_types=False, bint datetime=False, unicode_errors=None, - buf_size=256*1024): + def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', + use_single_float=False, bint autoreset=1, bint use_bin_type=0): + """ + """ self.use_float = use_single_float - self.strict_types = strict_types self.autoreset = autoreset - self.datetime = datetime self.pk.use_bin_type = use_bin_type if default is not None: if not PyCallable_Check(default): raise TypeError("default must be a callable.") self._default = default - - self._berrors = unicode_errors - if unicode_errors is None: + if encoding is None: + self.encoding = NULL self.unicode_errors = NULL else: - self.unicode_errors = self._berrors - - # returns -2 when default should(o) be called - cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1: - cdef long long llval - cdef unsigned long long ullval - cdef unsigned long ulval - cdef const char* rawval - cdef Py_ssize_t L - cdef Py_buffer view - cdef bint strict = self.strict_types - - if o is None: - msgpack_pack_nil(&self.pk) - elif o is True: - msgpack_pack_true(&self.pk) - elif o is False: - msgpack_pack_false(&self.pk) - elif PyLong_CheckExact(o) if strict else PyLong_Check(o): - try: - if o > 0: - ullval = o - msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - msgpack_pack_long_long(&self.pk, llval) - except OverflowError as oe: - if will_default: - return -2 - else: - raise OverflowError("Integer value out of range") - elif PyFloat_CheckExact(o) if strict else PyFloat_Check(o): - if self.use_float: - msgpack_pack_float(&self.pk, o) + if isinstance(encoding, unicode): + self._bencoding = encoding.encode('ascii') else: - msgpack_pack_double(&self.pk, o) - elif PyBytesLike_CheckExact(o) if strict else PyBytesLike_Check(o): - L = Py_SIZE(o) - if L > ITEM_LIMIT: - PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) - rawval = o - msgpack_pack_bin(&self.pk, L) - msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_CheckExact(o) if strict else PyUnicode_Check(o): - if self.unicode_errors == NULL: - rawval = PyUnicode_AsUTF8AndSize(o, &L) - if L >ITEM_LIMIT: - raise ValueError("unicode string is too large") + self._bencoding = encoding + self.encoding = PyBytes_AsString(self._bencoding) + if isinstance(unicode_errors, unicode): + self._berrors = unicode_errors.encode('ascii') else: - o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) - L = Py_SIZE(o) - if L > ITEM_LIMIT: - raise ValueError("unicode string is too large") - rawval = o - msgpack_pack_raw(&self.pk, L) - msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyDict_CheckExact(o) if strict else PyDict_Check(o): - L = len(o) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") - msgpack_pack_map(&self.pk, L) - for k, v in o.items(): - self._pack(k, nest_limit) - self._pack(v, nest_limit) - elif type(o) is ExtType if strict else isinstance(o, ExtType): - # This should be before Tuple because ExtType is namedtuple. - rawval = o.data - L = len(o.data) - if L > ITEM_LIMIT: - raise ValueError("EXT data is too large") - msgpack_pack_ext(&self.pk, o.code, L) - msgpack_pack_raw_body(&self.pk, rawval, L) - elif type(o) is Timestamp: - llval = o.seconds - ulval = o.nanoseconds - msgpack_pack_timestamp(&self.pk, llval, ulval) - elif PyList_CheckExact(o) if strict else (PyTuple_Check(o) or PyList_Check(o)): - L = Py_SIZE(o) - if L > ITEM_LIMIT: - raise ValueError("list is too large") - msgpack_pack_array(&self.pk, L) - for v in o: - self._pack(v, nest_limit) - elif PyMemoryView_Check(o): - PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) - L = view.len - if L > ITEM_LIMIT: - PyBuffer_Release(&view); - raise ValueError("memoryview is too large") - try: - msgpack_pack_bin(&self.pk, L) - msgpack_pack_raw_body(&self.pk, view.buf, L) - finally: - PyBuffer_Release(&view); - elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: - delta = o - epoch - if not PyDelta_CheckExact(delta): - raise ValueError("failed to calculate delta") - llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) - ulval = timedelta_microseconds(delta) * 1000 - msgpack_pack_timestamp(&self.pk, llval, ulval) - elif will_default: - return -2 - elif self.datetime and PyDateTime_CheckExact(o): - # this should be later than will_default - PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) - else: - PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) + self._berrors = unicode_errors + self.unicode_errors = PyBytes_AsString(self._berrors) + + def __dealloc__(self): + free(self.pk.buf); cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef long long llval + cdef unsigned long long ullval + cdef long longval + cdef float fval + cdef double dval + cdef char* rawval cdef int ret - if nest_limit < 0: - raise ValueError("recursion limit exceeded.") - nest_limit -= 1 - if self._default is not None: - ret = self._pack_inner(o, 1, nest_limit) - if ret == -2: - o = self._default(o) - else: - return ret - return self._pack_inner(o, 0, nest_limit) + cdef dict d + cdef size_t L + cdef int default_used = 0 - @cython.critical_section - def pack(self, object obj): + if nest_limit < 0: + raise PackValueError("recursion limit exceeded.") + + while True: + if o is None: + ret = msgpack_pack_nil(&self.pk) + elif isinstance(o, bool): + if o: + ret = msgpack_pack_true(&self.pk) + else: + ret = msgpack_pack_false(&self.pk) + elif PyLong_Check(o): + # PyInt_Check(long) is True for Python 3. + # Sow we should test long before int. + if o > 0: + ullval = o + ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = o + ret = msgpack_pack_long_long(&self.pk, llval) + elif PyInt_Check(o): + longval = o + ret = msgpack_pack_long(&self.pk, longval) + elif PyFloat_Check(o): + if self.use_float: + fval = o + ret = msgpack_pack_float(&self.pk, fval) + else: + dval = o + ret = msgpack_pack_double(&self.pk, dval) + elif PyBytes_Check(o): + L = len(o) + if L > (2**32)-1: + raise ValueError("bytes is too large") + rawval = o + ret = msgpack_pack_bin(&self.pk, L) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_Check(o): + if not self.encoding: + raise TypeError("Can't encode unicode string: no encoding is specified") + o = PyUnicode_AsEncodedString(o, self.encoding, self.unicode_errors) + L = len(o) + if L > (2**32)-1: + raise ValueError("dict is too large") + rawval = o + ret = msgpack_pack_raw(&self.pk, len(o)) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) + elif PyDict_CheckExact(o): + d = o + L = len(d) + if L > (2**32)-1: + raise ValueError("dict is too large") + ret = msgpack_pack_map(&self.pk, L) + if ret == 0: + for k, v in d.iteritems(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif PyDict_Check(o): + L = len(o) + if L > (2**32)-1: + raise ValueError("dict is too large") + ret = msgpack_pack_map(&self.pk, L) + if ret == 0: + for k, v in o.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + longval = o.code + rawval = o.data + L = len(o.data) + if L > (2**32)-1: + raise ValueError("EXT data is too large") + ret = msgpack_pack_ext(&self.pk, longval, L) + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyTuple_Check(o) or PyList_Check(o): + L = len(o) + if L > (2**32)-1: + raise ValueError("list is too large") + ret = msgpack_pack_array(&self.pk, L) + if ret == 0: + for v in o: + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif not default_used and self._default: + o = self._default(o) + default_used = 1 + continue + else: + raise TypeError("can't serialize %r" % (o,)) + return ret + + cpdef pack(self, object obj): cdef int ret - self._check_exports() - try: - ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) - except: - self.pk.length = 0 - raise - if ret: # should not happen. - raise RuntimeError("internal error") + ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) + if ret == -1: + raise MemoryError + elif ret: # should not happen. + raise TypeError if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf - @cython.critical_section def pack_ext_type(self, typecode, data): - self._check_exports() - if len(data) > ITEM_LIMIT: - raise ValueError("ext data too large") msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) - @cython.critical_section - def pack_array_header(self, long long size): - self._check_exports() - if size > ITEM_LIMIT: - raise ValueError("array too large") - msgpack_pack_array(&self.pk, size) + def pack_array_header(self, size_t size): + if size > (2**32-1): + raise ValueError + cdef int ret = msgpack_pack_array(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf - @cython.critical_section - def pack_map_header(self, long long size): - self._check_exports() - if size > ITEM_LIMIT: - raise ValueError("map too learge") - msgpack_pack_map(&self.pk, size) + def pack_map_header(self, size_t size): + if size > (2**32-1): + raise ValueError + cdef int ret = msgpack_pack_map(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf - @cython.critical_section def pack_map_pairs(self, object pairs): """ Pack *pairs* as msgpack map type. - *pairs* should be a sequence of pairs. + *pairs* should sequence of pair. (`len(pairs)` and `for k, v in pairs:` should be supported.) """ - self._check_exports() - size = len(pairs) - if size > ITEM_LIMIT: - raise ValueError("map too large") - msgpack_pack_map(&self.pk, size) - for k, v in pairs: - self._pack(k) - self._pack(v) + cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) + if ret == 0: + for k, v in pairs: + ret = self._pack(k) + if ret != 0: break + ret = self._pack(v) + if ret != 0: break + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf - @cython.critical_section def reset(self): - """Reset internal buffer. - - This method is useful only when autoreset=False. - """ - self._check_exports() + """Clear internal buffer.""" self.pk.length = 0 - @cython.critical_section def bytes(self): - """Return internal buffer contents as bytes object""" + """Return buffer content.""" return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - - def getbuffer(self): - """Return memoryview of internal buffer. - - Note: Packer now supports buffer protocol. You can use memoryview(packer). - """ - return memoryview(self) - - def __getbuffer__(self, Py_buffer *buffer, int flags): - PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags) - self.exports += 1 - - def __releasebuffer__(self, Py_buffer *buffer): - self.exports -= 1 diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index f0cf96d..f5e7d95 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,39 +1,33 @@ +# coding: utf-8 +#cython: embedsignature=True + from cpython cimport * cdef extern from "Python.h": ctypedef struct PyObject - object PyMemoryView_GetContiguous(object obj, int buffertype, char order) + cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -from libc.stdint cimport uint64_t -from .exceptions import ( - BufferFull, - OutOfData, - ExtraData, - FormatError, - StackError, -) -from .ext import ExtType, Timestamp - -cdef object giga = 1_000_000_000 +from msgpack.exceptions import ( + BufferFull, + OutOfData, + UnpackValueError, + ExtraData, + ) +from msgpack import ExtType cdef extern from "unpack.h": ctypedef struct msgpack_user: bint use_list - bint raw - bint has_pairs_hook # call object_hook with k-v pairs - bint strict_map_key - int timestamp PyObject* object_hook + bint has_pairs_hook # call object_hook with k-v pairs PyObject* list_hook PyObject* ext_hook - PyObject* timestamp_t - PyObject *giga; - PyObject *utc; - const char *unicode_errors + char *encoding + char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len Py_ssize_t max_array_len @@ -43,31 +37,26 @@ cdef extern from "unpack.h": ctypedef struct unpack_context: msgpack_user user PyObject* obj - Py_ssize_t count + size_t count ctypedef int (*execute_fn)(unpack_context* ctx, const char* data, - Py_ssize_t len, Py_ssize_t* off) except? -1 + size_t len, size_t* off) except? -1 execute_fn unpack_construct execute_fn unpack_skip execute_fn read_array_header execute_fn read_map_header void unpack_init(unpack_context* ctx) object unpack_data(unpack_context* ctx) - void unpack_clear(unpack_context* ctx) cdef inline init_ctx(unpack_context *ctx, object object_hook, object object_pairs_hook, object list_hook, object ext_hook, - bint use_list, bint raw, int timestamp, - bint strict_map_key, - const char* unicode_errors, + bint use_list, char* encoding, char* unicode_errors, Py_ssize_t max_str_len, Py_ssize_t max_bin_len, Py_ssize_t max_array_len, Py_ssize_t max_map_len, Py_ssize_t max_ext_len): unpack_init(ctx) ctx.user.use_list = use_list - ctx.user.raw = raw - ctx.user.strict_map_key = strict_map_key ctx.user.object_hook = ctx.user.list_hook = NULL ctx.user.max_str_len = max_str_len ctx.user.max_bin_len = max_bin_len @@ -101,192 +90,137 @@ cdef inline init_ctx(unpack_context *ctx, raise TypeError("ext_hook must be a callable.") ctx.user.ext_hook = ext_hook - if timestamp < 0 or 3 < timestamp: - raise ValueError("timestamp must be 0..3") - - # Add Timestamp type to the user object so it may be used in unpack.h - ctx.user.timestamp = timestamp - ctx.user.timestamp_t = Timestamp - ctx.user.giga = giga - ctx.user.utc = utc + ctx.user.encoding = encoding ctx.user.unicode_errors = unicode_errors def default_read_extended_type(typecode, data): raise NotImplementedError("Cannot decode extended type with typecode=%d" % typecode) -cdef inline int get_data_from_buffer(object obj, - Py_buffer *view, - char **buf, - Py_ssize_t *buffer_len) except 0: - cdef object contiguous - cdef Py_buffer tmp - if PyObject_GetBuffer(obj, view, PyBUF_FULL_RO) == -1: - raise - if view.itemsize != 1: - PyBuffer_Release(view) - raise BufferError("cannot unpack from multi-byte object") - if PyBuffer_IsContiguous(view, b'A') == 0: - PyBuffer_Release(view) - # create a contiguous copy and get buffer - contiguous = PyMemoryView_GetContiguous(obj, PyBUF_READ, b'C') - PyObject_GetBuffer(contiguous, view, PyBUF_SIMPLE) - # view must hold the only reference to contiguous, - # so memory is freed when view is released - Py_DECREF(contiguous) - buffer_len[0] = view.len - buf[0] = view.buf - return 1 - - -def unpackb(object packed, *, object object_hook=None, object list_hook=None, - bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, - unicode_errors=None, +def unpackb(object packed, object object_hook=None, object list_hook=None, + bint use_list=1, encoding=None, unicode_errors="strict", object_pairs_hook=None, ext_hook=ExtType, - Py_ssize_t max_str_len=-1, - Py_ssize_t max_bin_len=-1, - Py_ssize_t max_array_len=-1, - Py_ssize_t max_map_len=-1, - Py_ssize_t max_ext_len=-1): + Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_bin_len=2147483647, + Py_ssize_t max_array_len=2147483647, + Py_ssize_t max_map_len=2147483647, + Py_ssize_t max_ext_len=2147483647): """ Unpack packed_bytes to object. Returns an unpacked object. - Raises ``ExtraData`` when *packed* contains extra bytes. - Raises ``ValueError`` when *packed* is incomplete. - Raises ``FormatError`` when *packed* is not valid msgpack. - Raises ``StackError`` when *packed* contains too nested. - Other exceptions can be raised during unpacking. + Raises `ValueError` when `packed` contains extra bytes. See :class:`Unpacker` for options. - - *max_xxx_len* options are configured automatically from ``len(packed)``. """ cdef unpack_context ctx - cdef Py_ssize_t off = 0 + cdef size_t off = 0 cdef int ret - cdef Py_buffer view - cdef char* buf = NULL + cdef char* buf cdef Py_ssize_t buf_len - cdef const char* cerr = NULL + cdef char* cenc = NULL + cdef char* cerr = NULL + + PyObject_AsReadBuffer(packed, &buf, &buf_len) + + if encoding is not None: + if isinstance(encoding, unicode): + encoding = encoding.encode('ascii') + cenc = PyBytes_AsString(encoding) if unicode_errors is not None: - cerr = unicode_errors - - get_data_from_buffer(packed, &view, &buf, &buf_len) - - if max_str_len == -1: - max_str_len = buf_len - if max_bin_len == -1: - max_bin_len = buf_len - if max_array_len == -1: - max_array_len = buf_len - if max_map_len == -1: - max_map_len = buf_len//2 - if max_ext_len == -1: - max_ext_len = buf_len - - try: - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, raw, timestamp, strict_map_key, cerr, - max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) - ret = unpack_construct(&ctx, buf, buf_len, &off) - finally: - PyBuffer_Release(&view); + if isinstance(unicode_errors, unicode): + unicode_errors = unicode_errors.encode('ascii') + cerr = PyBytes_AsString(unicode_errors) + init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, + use_list, cenc, cerr, + max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) + ret = unpack_construct(&ctx, buf, buf_len, &off) if ret == 1: obj = unpack_data(&ctx) if off < buf_len: raise ExtraData(obj, PyBytes_FromStringAndSize(buf+off, buf_len-off)) return obj - unpack_clear(&ctx) - if ret == 0: - raise ValueError("Unpack failed: incomplete input") - elif ret == -2: - raise FormatError - elif ret == -3: - raise StackError - raise ValueError("Unpack failed: error = %d" % (ret,)) + else: + raise UnpackValueError("Unpack failed: error = %d" % (ret,)) -cdef class Unpacker: +def unpack(object stream, object object_hook=None, object list_hook=None, + bint use_list=1, encoding=None, unicode_errors="strict", + object_pairs_hook=None, + ): + """ + Unpack an object from `stream`. + + Raises `ValueError` when `stream` has extra bytes. + + See :class:`Unpacker` for options. + """ + return unpackb(stream.read(), use_list=use_list, + object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook, + encoding=encoding, unicode_errors=unicode_errors, + ) + + +cdef class Unpacker(object): """Streaming unpacker. - Arguments: + arguments: :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and `.feed()` is not usable. + If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. :param int read_size: - Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) + Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`) :param bool use_list: If true, unpack msgpack array to Python list. Otherwise, unpack to Python tuple. (default: True) - :param bool raw: - If true, unpack msgpack raw to Python bytes. - Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). - - :param int timestamp: - Control how timestamp type is unpacked: - - 0 - Timestamp - 1 - float (Seconds from the EPOCH) - 2 - int (Nanoseconds from the EPOCH) - 3 - datetime.datetime (UTC). - - :param bool strict_map_key: - If true (default), only str or bytes are accepted for map (dict) keys. - - :param object_hook: + :param callable object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param object_pairs_hook: + :param callable object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) + :param str encoding: + Encoding used for decoding msgpack raw. + If it is None (default), msgpack raw is deserialized to Python bytes. + :param str unicode_errors: - The error handler for decoding unicode. (default: 'strict') - This option should be used only when you have msgpack data which - contains invalid UTF-8 string. + Used for decoding msgpack raw with *encoding*. + (default: `'strict'`) :param int max_buffer_size: - Limits size of data waiting unpacked. 0 means 2**32-1. - The default value is 100*1024*1024 (100MiB). + Limits size of data waiting unpacked. 0 means system's INT_MAX (default). Raises `BufferFull` exception when it is insufficient. - You should set this parameter when unpacking data from untrusted source. + You shoud set this parameter when unpacking data from untrasted source. :param int max_str_len: - Deprecated, use *max_buffer_size* instead. - Limits max length of str. (default: max_buffer_size) + Limits max length of str. (default: 2**31-1) :param int max_bin_len: - Deprecated, use *max_buffer_size* instead. - Limits max length of bin. (default: max_buffer_size) + Limits max length of bin. (default: 2**31-1) :param int max_array_len: - Limits max length of array. - (default: max_buffer_size) + Limits max length of array. (default: 2**31-1) :param int max_map_len: - Limits max length of map. - (default: max_buffer_size//2) + Limits max length of map. (default: 2**31-1) - :param int max_ext_len: - Deprecated, use *max_buffer_size* instead. - Limits max size of ext type. (default: max_buffer_size) - Example of streaming deserialize from file-like object:: + example of streaming deserialize from file-like object:: unpacker = Unpacker(file_like) for o in unpacker: process(o) - Example of streaming deserialize from socket:: + example of streaming deserialize from socket:: unpacker = Unpacker() while True: @@ -296,44 +230,36 @@ cdef class Unpacker: unpacker.feed(buf) for o in unpacker: process(o) - - Raises ``ExtraData`` when *packed* contains extra bytes. - Raises ``OutOfData`` when *packed* is incomplete. - Raises ``FormatError`` when *packed* is not valid msgpack. - Raises ``StackError`` when *packed* contains too nested. - Other exceptions can be raised during unpacking. """ cdef unpack_context ctx cdef char* buf - cdef Py_ssize_t buf_size, buf_head, buf_tail + cdef size_t buf_size, buf_head, buf_tail cdef object file_like cdef object file_like_read cdef Py_ssize_t read_size # To maintain refcnt. cdef object object_hook, object_pairs_hook, list_hook, ext_hook - cdef object unicode_errors - cdef Py_ssize_t max_buffer_size - cdef uint64_t stream_offset + cdef object encoding, unicode_errors + cdef size_t max_buffer_size def __cinit__(self): self.buf = NULL def __dealloc__(self): - PyMem_Free(self.buf) + free(self.buf) self.buf = NULL - @cython.critical_section - def __init__(self, file_like=None, *, Py_ssize_t read_size=0, - bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, + def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, object object_hook=None, object object_pairs_hook=None, object list_hook=None, - unicode_errors=None, Py_ssize_t max_buffer_size=100*1024*1024, + encoding=None, unicode_errors='strict', int max_buffer_size=0, object ext_hook=ExtType, - Py_ssize_t max_str_len=-1, - Py_ssize_t max_bin_len=-1, - Py_ssize_t max_array_len=-1, - Py_ssize_t max_map_len=-1, - Py_ssize_t max_ext_len=-1): - cdef const char *cerr=NULL + Py_ssize_t max_str_len=2147483647, # 2**32-1 + Py_ssize_t max_bin_len=2147483647, + Py_ssize_t max_array_len=2147483647, + Py_ssize_t max_map_len=2147483647, + Py_ssize_t max_ext_len=2147483647): + cdef char *cenc=NULL, + cdef char *cerr=NULL self.object_hook = object_hook self.object_pairs_hook = object_pairs_hook @@ -345,58 +271,53 @@ cdef class Unpacker: self.file_like_read = file_like.read if not PyCallable_Check(self.file_like_read): raise TypeError("`file_like.read` must be a callable.") - if not max_buffer_size: max_buffer_size = INT_MAX - if max_str_len == -1: - max_str_len = max_buffer_size - if max_bin_len == -1: - max_bin_len = max_buffer_size - if max_array_len == -1: - max_array_len = max_buffer_size - if max_map_len == -1: - max_map_len = max_buffer_size//2 - if max_ext_len == -1: - max_ext_len = max_buffer_size - if read_size > max_buffer_size: raise ValueError("read_size should be less or equal to max_buffer_size") if not read_size: read_size = min(max_buffer_size, 1024**2) - self.max_buffer_size = max_buffer_size self.read_size = read_size - self.buf = PyMem_Malloc(read_size) + self.buf = malloc(read_size) if self.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.buf_size = read_size self.buf_head = 0 self.buf_tail = 0 - self.stream_offset = 0 + + if encoding is not None: + if isinstance(encoding, unicode): + self.encoding = encoding.encode('ascii') + elif isinstance(encoding, bytes): + self.encoding = encoding + else: + raise TypeError("encoding should be bytes or unicode") + cenc = PyBytes_AsString(self.encoding) if unicode_errors is not None: - self.unicode_errors = unicode_errors - cerr = unicode_errors + if isinstance(unicode_errors, unicode): + self.unicode_errors = unicode_errors.encode('ascii') + elif isinstance(unicode_errors, bytes): + self.unicode_errors = unicode_errors + else: + raise TypeError("unicode_errors should be bytes or unicode") + cerr = PyBytes_AsString(self.unicode_errors) init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, raw, timestamp, strict_map_key, cerr, + ext_hook, use_list, cenc, cerr, max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) - @cython.critical_section def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff - cdef char* buf - cdef Py_ssize_t buf_len - if self.file_like is not None: raise AssertionError( "unpacker.feed() is not be able to use with `file_like`.") - - get_data_from_buffer(next_bytes, &pybuff, &buf, &buf_len) + PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE) try: - self.append_buffer(buf, buf_len) + self.append_buffer(pybuff.buf, pybuff.len) finally: PyBuffer_Release(&pybuff) @@ -404,10 +325,10 @@ cdef class Unpacker: cdef: char* buf = self.buf char* new_buf - Py_ssize_t head = self.buf_head - Py_ssize_t tail = self.buf_tail - Py_ssize_t buf_size = self.buf_size - Py_ssize_t new_size + size_t head = self.buf_head + size_t tail = self.buf_tail + size_t buf_size = self.buf_size + size_t new_size if tail + _buf_len > buf_size: if ((tail - head) + _buf_len) <= buf_size: @@ -421,13 +342,13 @@ cdef class Unpacker: if new_size > self.max_buffer_size: raise BufferFull new_size = min(new_size*2, self.max_buffer_size) - new_buf = PyMem_Malloc(new_size) + new_buf = malloc(new_size) if new_buf == NULL: # self.buf still holds old buffer and will be freed during # obj destruction raise MemoryError("Unable to enlarge internal buffer.") memcpy(new_buf, buf + head, tail - head) - PyMem_Free(buf) + free(buf) buf = new_buf buf_size = new_size @@ -440,30 +361,35 @@ cdef class Unpacker: self.buf_size = buf_size self.buf_tail = tail + _buf_len - cdef int read_from_file(self) except -1: - cdef Py_ssize_t remains = self.max_buffer_size - (self.buf_tail - self.buf_head) - if remains <= 0: - raise BufferFull - - next_bytes = self.file_like_read(min(self.read_size, remains)) + cdef read_from_file(self): + next_bytes = self.file_like_read( + min(self.read_size, + self.max_buffer_size - (self.buf_tail - self.buf_head) + )) if next_bytes: self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) else: self.file_like = None - return 0 - cdef object _unpack(self, execute_fn execute, bint iter=0): + cdef object _unpack(self, execute_fn execute, object write_bytes, bint iter=0): cdef int ret cdef object obj - cdef Py_ssize_t prev_head + cdef size_t prev_head + + if self.buf_head >= self.buf_tail and self.file_like is not None: + self.read_from_file() while 1: prev_head = self.buf_head - if prev_head < self.buf_tail: - ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - self.stream_offset += self.buf_head - prev_head - else: - ret = 0 + if prev_head >= self.buf_tail: + if iter: + raise StopIteration("No more data to unpack.") + else: + raise OutOfData("No more data to unpack.") + + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + if write_bytes is not None: + write_bytes(PyBytes_FromStringAndSize(self.buf + prev_head, self.buf_head - prev_head)) if ret == 1: obj = unpack_data(&self.ctx) @@ -477,74 +403,60 @@ cdef class Unpacker: raise StopIteration("No more data to unpack.") else: raise OutOfData("No more data to unpack.") - elif ret == -2: - raise FormatError - elif ret == -3: - raise StackError else: raise ValueError("Unpack failed: error = %d" % (ret,)) - @cython.critical_section def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" - cdef Py_ssize_t nread + cdef size_t nread nread = min(self.buf_tail - self.buf_head, nbytes) ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) self.buf_head += nread - if nread < nbytes and self.file_like is not None: - ret += self.file_like.read(nbytes - nread) - nread = len(ret) - self.stream_offset += nread + if len(ret) < nbytes and self.file_like is not None: + ret += self.file_like.read(nbytes - len(ret)) return ret - @cython.critical_section - def unpack(self): + def unpack(self, object write_bytes=None): """Unpack one object + If write_bytes is not None, it will be called with parts of the raw + message as it is unpacked. + Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(unpack_construct) + return self._unpack(unpack_construct, write_bytes) - @cython.critical_section - def skip(self): + def skip(self, object write_bytes=None): """Read and ignore one object, returning None + If write_bytes is not None, it will be called with parts of the raw + message as it is unpacked. + Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(unpack_skip) + return self._unpack(unpack_skip, write_bytes) - @cython.critical_section - def read_array_header(self): + def read_array_header(self, object write_bytes=None): """assuming the next object is an array, return its size n, such that the next n unpack() calls will iterate over its contents. Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(read_array_header) + return self._unpack(read_array_header, write_bytes) - @cython.critical_section - def read_map_header(self): + def read_map_header(self, object write_bytes=None): """assuming the next object is a map, return its size n, such that the next n * 2 unpack() calls will iterate over its key-value pairs. Raises `OutOfData` when there are no more bytes to unpack. """ - return self._unpack(read_map_header) - - @cython.critical_section - def tell(self): - """Returns the current position of the Unpacker in bytes, i.e., the - number of bytes that were read from the input, also the starting - position of the next object. - """ - return self.stream_offset + return self._unpack(read_map_header, write_bytes) def __iter__(self): return self - @cython.critical_section def __next__(self): - return self._unpack(unpack_construct, 1) + return self._unpack(unpack_construct, None, 1) # for debug. #def _buf(self): diff --git a/msgpack/_version.py b/msgpack/_version.py new file mode 100644 index 0000000..2c1c96c --- /dev/null +++ b/msgpack/_version.py @@ -0,0 +1 @@ +version = (0, 4, 6) diff --git a/msgpack/exceptions.py b/msgpack/exceptions.py index d6d2615..f7678f1 100644 --- a/msgpack/exceptions.py +++ b/msgpack/exceptions.py @@ -1,10 +1,5 @@ class UnpackException(Exception): - """Base class for some exceptions raised while unpacking. - - NOTE: unpack may raise exception other than subclass of - UnpackException. If you want to catch all error, catch - Exception instead. - """ + pass class BufferFull(UnpackException): @@ -15,25 +10,11 @@ class OutOfData(UnpackException): pass -class FormatError(ValueError, UnpackException): - """Invalid msgpack format""" +class UnpackValueError(UnpackException, ValueError): + pass -class StackError(ValueError, UnpackException): - """Too nested""" - - -# Deprecated. Use ValueError instead -UnpackValueError = ValueError - - -class ExtraData(UnpackValueError): - """ExtraData is raised when there is trailing data. - - This exception is raised while only one-shot (not streaming) - unpack. - """ - +class ExtraData(ValueError): def __init__(self, unpacked, extra): self.unpacked = unpacked self.extra = extra @@ -41,8 +22,8 @@ class ExtraData(UnpackValueError): def __str__(self): return "unpack(b) received extra data." +class PackException(Exception): + pass -# Deprecated. Use Exception instead to catch all exception during packing. -PackException = Exception -PackValueError = ValueError -PackOverflowError = OverflowError +class PackValueError(PackException, ValueError): + pass diff --git a/msgpack/ext.py b/msgpack/ext.py deleted file mode 100644 index 9694819..0000000 --- a/msgpack/ext.py +++ /dev/null @@ -1,170 +0,0 @@ -import datetime -import struct -from collections import namedtuple - - -class ExtType(namedtuple("ExtType", "code data")): - """ExtType represents ext type in msgpack.""" - - def __new__(cls, code, data): - if not isinstance(code, int): - raise TypeError("code must be int") - if not isinstance(data, bytes): - raise TypeError("data must be bytes") - if not 0 <= code <= 127: - raise ValueError("code must be 0~127") - return super().__new__(cls, code, data) - - -class Timestamp: - """Timestamp represents the Timestamp extension type in msgpack. - - When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. - When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and - unpack `Timestamp`. - - This class is immutable: Do not override seconds and nanoseconds. - """ - - __slots__ = ["seconds", "nanoseconds"] - - def __init__(self, seconds, nanoseconds=0): - """Initialize a Timestamp object. - - :param int seconds: - Number of seconds since the UNIX epoch (00:00:00 UTC Jan 1 1970, minus leap seconds). - May be negative. - - :param int nanoseconds: - Number of nanoseconds to add to `seconds` to get fractional time. - Maximum is 999_999_999. Default is 0. - - Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns. - """ - if not isinstance(seconds, int): - raise TypeError("seconds must be an integer") - if not isinstance(nanoseconds, int): - raise TypeError("nanoseconds must be an integer") - if not (0 <= nanoseconds < 10**9): - raise ValueError("nanoseconds must be a non-negative integer less than 999999999.") - self.seconds = seconds - self.nanoseconds = nanoseconds - - def __repr__(self): - """String representation of Timestamp.""" - return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})" - - def __eq__(self, other): - """Check for equality with another Timestamp object""" - if type(other) is self.__class__: - return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds - return False - - def __ne__(self, other): - """not-equals method (see :func:`__eq__()`)""" - return not self.__eq__(other) - - def __hash__(self): - return hash((self.seconds, self.nanoseconds)) - - @staticmethod - def from_bytes(b): - """Unpack bytes into a `Timestamp` object. - - Used for pure-Python msgpack unpacking. - - :param b: Payload from msgpack ext message with code -1 - :type b: bytes - - :returns: Timestamp object unpacked from msgpack ext payload - :rtype: Timestamp - """ - if len(b) == 4: - seconds = struct.unpack("!L", b)[0] - nanoseconds = 0 - elif len(b) == 8: - data64 = struct.unpack("!Q", b)[0] - seconds = data64 & 0x00000003FFFFFFFF - nanoseconds = data64 >> 34 - elif len(b) == 12: - nanoseconds, seconds = struct.unpack("!Iq", b) - else: - raise ValueError( - "Timestamp type can only be created from 32, 64, or 96-bit byte objects" - ) - return Timestamp(seconds, nanoseconds) - - def to_bytes(self): - """Pack this Timestamp object into bytes. - - Used for pure-Python msgpack packing. - - :returns data: Payload for EXT message with code -1 (timestamp type) - :rtype: bytes - """ - if (self.seconds >> 34) == 0: # seconds is non-negative and fits in 34 bits - data64 = self.nanoseconds << 34 | self.seconds - if data64 & 0xFFFFFFFF00000000 == 0: - # nanoseconds is zero and seconds < 2**32, so timestamp 32 - data = struct.pack("!L", data64) - else: - # timestamp 64 - data = struct.pack("!Q", data64) - else: - # timestamp 96 - data = struct.pack("!Iq", self.nanoseconds, self.seconds) - return data - - @staticmethod - def from_unix(unix_sec): - """Create a Timestamp from posix timestamp in seconds. - - :param unix_float: Posix timestamp in seconds. - :type unix_float: int or float - """ - seconds = int(unix_sec // 1) - nanoseconds = int((unix_sec % 1) * 10**9) - return Timestamp(seconds, nanoseconds) - - def to_unix(self): - """Get the timestamp as a floating-point value. - - :returns: posix timestamp - :rtype: float - """ - return self.seconds + self.nanoseconds / 1e9 - - @staticmethod - def from_unix_nano(unix_ns): - """Create a Timestamp from posix timestamp in nanoseconds. - - :param int unix_ns: Posix timestamp in nanoseconds. - :rtype: Timestamp - """ - return Timestamp(*divmod(unix_ns, 10**9)) - - def to_unix_nano(self): - """Get the timestamp as a unixtime in nanoseconds. - - :returns: posix timestamp in nanoseconds - :rtype: int - """ - return self.seconds * 10**9 + self.nanoseconds - - def to_datetime(self): - """Get the timestamp as a UTC datetime. - - :rtype: `datetime.datetime` - """ - utc = datetime.timezone.utc - return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( - seconds=self.seconds, microseconds=self.nanoseconds // 1000 - ) - - @staticmethod - def from_datetime(dt): - """Create a Timestamp from datetime with tzinfo. - - :rtype: Timestamp - """ - return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index b02e47c..235c201 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,211 +1,165 @@ """Fallback pure Python implementation of msgpack""" -import struct import sys -from datetime import datetime as _DateTime +import array +import struct -if hasattr(sys, "pypy_version_info"): +if sys.version_info[0] == 3: + PY3 = True + int_types = int + Unicode = str + xrange = range + def dict_iteritems(d): + return d.items() +else: + PY3 = False + int_types = (int, long) + Unicode = unicode + def dict_iteritems(d): + return d.iteritems() + + +if hasattr(sys, 'pypy_version_info'): + # cStringIO is slow on PyPy, StringIO is faster. However: PyPy's own + # StringBuilder is fastest. from __pypy__ import newlist_hint - from __pypy__.builders import BytesBuilder - - _USING_STRINGBUILDER = True - - class BytesIO: - def __init__(self, s=b""): + try: + from __pypy__.builders import BytesBuilder as StringBuilder + except ImportError: + from __pypy__.builders import StringBuilder + USING_STRINGBUILDER = True + class StringIO(object): + def __init__(self, s=b''): if s: - self.builder = BytesBuilder(len(s)) + self.builder = StringBuilder(len(s)) self.builder.append(s) else: - self.builder = BytesBuilder() - + self.builder = StringBuilder() def write(self, s): - if isinstance(s, memoryview): - s = s.tobytes() - elif isinstance(s, bytearray): - s = bytes(s) self.builder.append(s) - def getvalue(self): return self.builder.build() - else: - from io import BytesIO + USING_STRINGBUILDER = False + from io import BytesIO as StringIO + newlist_hint = lambda size: [] - _USING_STRINGBUILDER = False +from msgpack.exceptions import ( + BufferFull, + OutOfData, + UnpackValueError, + PackValueError, + ExtraData) - def newlist_hint(size): - return [] +from msgpack import ExtType -from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError -from .ext import ExtType, Timestamp +EX_SKIP = 0 +EX_CONSTRUCT = 1 +EX_READ_ARRAY_HEADER = 2 +EX_READ_MAP_HEADER = 3 -EX_SKIP = 0 -EX_CONSTRUCT = 1 -EX_READ_ARRAY_HEADER = 2 -EX_READ_MAP_HEADER = 3 - -TYPE_IMMEDIATE = 0 -TYPE_ARRAY = 1 -TYPE_MAP = 2 -TYPE_RAW = 3 -TYPE_BIN = 4 -TYPE_EXT = 5 +TYPE_IMMEDIATE = 0 +TYPE_ARRAY = 1 +TYPE_MAP = 2 +TYPE_RAW = 3 +TYPE_BIN = 4 +TYPE_EXT = 5 DEFAULT_RECURSE_LIMIT = 511 -def _check_type_strict(obj, t, type=type, tuple=tuple): - if type(t) is tuple: - return type(obj) in t - else: - return type(obj) is t +def unpack(stream, **kwargs): + """ + Unpack an object from `stream`. - -def _get_data_from_buffer(obj): - view = memoryview(obj) - if view.itemsize != 1: - raise ValueError("cannot unpack from multi-byte object") - return view + Raises `ExtraData` when `packed` contains extra bytes. + See :class:`Unpacker` for options. + """ + unpacker = Unpacker(stream, **kwargs) + ret = unpacker._fb_unpack() + if unpacker._fb_got_extradata(): + raise ExtraData(ret, unpacker._fb_get_extradata()) + return ret def unpackb(packed, **kwargs): """ Unpack an object from `packed`. - Raises ``ExtraData`` when *packed* contains extra bytes. - Raises ``ValueError`` when *packed* is incomplete. - Raises ``FormatError`` when *packed* is not valid msgpack. - Raises ``StackError`` when *packed* contains too nested. - Other exceptions can be raised during unpacking. - + Raises `ExtraData` when `packed` contains extra bytes. See :class:`Unpacker` for options. """ - unpacker = Unpacker(None, max_buffer_size=len(packed), **kwargs) + unpacker = Unpacker(None, **kwargs) unpacker.feed(packed) try: - ret = unpacker._unpack() + ret = unpacker._fb_unpack() except OutOfData: - raise ValueError("Unpack failed: incomplete input") - except RecursionError: - raise StackError - if unpacker._got_extradata(): - raise ExtraData(ret, unpacker._get_extradata()) + raise UnpackValueError("Data is not enough.") + if unpacker._fb_got_extradata(): + raise ExtraData(ret, unpacker._fb_get_extradata()) return ret -_NO_FORMAT_USED = "" -_MSGPACK_HEADERS = { - 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN), - 0xC5: (2, ">H", TYPE_BIN), - 0xC6: (4, ">I", TYPE_BIN), - 0xC7: (2, "Bb", TYPE_EXT), - 0xC8: (3, ">Hb", TYPE_EXT), - 0xC9: (5, ">Ib", TYPE_EXT), - 0xCA: (4, ">f"), - 0xCB: (8, ">d"), - 0xCC: (1, _NO_FORMAT_USED), - 0xCD: (2, ">H"), - 0xCE: (4, ">I"), - 0xCF: (8, ">Q"), - 0xD0: (1, "b"), - 0xD1: (2, ">h"), - 0xD2: (4, ">i"), - 0xD3: (8, ">q"), - 0xD4: (1, "b1s", TYPE_EXT), - 0xD5: (2, "b2s", TYPE_EXT), - 0xD6: (4, "b4s", TYPE_EXT), - 0xD7: (8, "b8s", TYPE_EXT), - 0xD8: (16, "b16s", TYPE_EXT), - 0xD9: (1, _NO_FORMAT_USED, TYPE_RAW), - 0xDA: (2, ">H", TYPE_RAW), - 0xDB: (4, ">I", TYPE_RAW), - 0xDC: (2, ">H", TYPE_ARRAY), - 0xDD: (4, ">I", TYPE_ARRAY), - 0xDE: (2, ">H", TYPE_MAP), - 0xDF: (4, ">I", TYPE_MAP), -} - - -class Unpacker: +class Unpacker(object): """Streaming unpacker. - Arguments: + arguments: :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and `.feed()` is not usable. + If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. :param int read_size: - Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) + Used as `file_like.read(read_size)`. (default: `min(1024**2, max_buffer_size)`) :param bool use_list: If true, unpack msgpack array to Python list. Otherwise, unpack to Python tuple. (default: True) - :param bool raw: - If true, unpack msgpack raw to Python bytes. - Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). - - :param int timestamp: - Control how timestamp type is unpacked: - - 0 - Timestamp - 1 - float (Seconds from the EPOCH) - 2 - int (Nanoseconds from the EPOCH) - 3 - datetime.datetime (UTC). - - :param bool strict_map_key: - If true (default), only str or bytes are accepted for map (dict) keys. - - :param object_hook: + :param callable object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param object_pairs_hook: + :param callable object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) + :param str encoding: + Encoding used for decoding msgpack raw. + If it is None (default), msgpack raw is deserialized to Python bytes. + :param str unicode_errors: - The error handler for decoding unicode. (default: 'strict') - This option should be used only when you have msgpack data which - contains invalid UTF-8 string. + Used for decoding msgpack raw with *encoding*. + (default: `'strict'`) :param int max_buffer_size: - Limits size of data waiting unpacked. 0 means 2**32-1. - The default value is 100*1024*1024 (100MiB). + Limits size of data waiting unpacked. 0 means system's INT_MAX (default). Raises `BufferFull` exception when it is insufficient. - You should set this parameter when unpacking data from untrusted source. + You shoud set this parameter when unpacking data from untrasted source. :param int max_str_len: - Deprecated, use *max_buffer_size* instead. - Limits max length of str. (default: max_buffer_size) + Limits max length of str. (default: 2**31-1) :param int max_bin_len: - Deprecated, use *max_buffer_size* instead. - Limits max length of bin. (default: max_buffer_size) + Limits max length of bin. (default: 2**31-1) :param int max_array_len: - Limits max length of array. - (default: max_buffer_size) + Limits max length of array. (default: 2**31-1) :param int max_map_len: - Limits max length of map. - (default: max_buffer_size//2) + Limits max length of map. (default: 2**31-1) - :param int max_ext_len: - Deprecated, use *max_buffer_size* instead. - Limits max size of ext type. (default: max_buffer_size) - Example of streaming deserialize from file-like object:: + example of streaming deserialize from file-like object:: unpacker = Unpacker(file_like) for o in unpacker: process(o) - Example of streaming deserialize from socket:: + example of streaming deserialize from socket:: unpacker = Unpacker() while True: @@ -215,50 +169,33 @@ class Unpacker: unpacker.feed(buf) for o in unpacker: process(o) - - Raises ``ExtraData`` when *packed* contains extra bytes. - Raises ``OutOfData`` when *packed* is incomplete. - Raises ``FormatError`` when *packed* is not valid msgpack. - Raises ``StackError`` when *packed* contains too nested. - Other exceptions can be raised during unpacking. """ - def __init__( - self, - file_like=None, - *, - read_size=0, - use_list=True, - raw=False, - timestamp=0, - strict_map_key=True, - object_hook=None, - object_pairs_hook=None, - list_hook=None, - unicode_errors=None, - max_buffer_size=100 * 1024 * 1024, - ext_hook=ExtType, - max_str_len=-1, - max_bin_len=-1, - max_array_len=-1, - max_map_len=-1, - max_ext_len=-1, - ): - if unicode_errors is None: - unicode_errors = "strict" - + def __init__(self, file_like=None, read_size=0, use_list=True, + object_hook=None, object_pairs_hook=None, list_hook=None, + encoding=None, unicode_errors='strict', max_buffer_size=0, + ext_hook=ExtType, + max_str_len=2147483647, # 2**32-1 + max_bin_len=2147483647, + max_array_len=2147483647, + max_map_len=2147483647, + max_ext_len=2147483647): if file_like is None: - self._feeding = True + self._fb_feeding = True else: if not callable(file_like.read): raise TypeError("`file_like.read` must be callable") self.file_like = file_like - self._feeding = False + self._fb_feeding = False - #: array of bytes fed. - self._buffer = bytearray() + #: array of bytes feeded. + self._fb_buffers = [] + #: Which buffer we currently reads + self._fb_buf_i = 0 #: Which position we currently reads - self._buff_i = 0 + self._fb_buf_o = 0 + #: Total size of _fb_bufferes + self._fb_buf_n = 0 # When Unpacker is used as an iterable, between the calls to next(), # the buffer is not "consumed" completely, for efficiency sake. @@ -266,33 +203,16 @@ class Unpacker: # the correct moments, we have to keep track of how sloppy we were. # Furthermore, when the buffer is incomplete (that is: in the case # we raise an OutOfData) we need to rollback the buffer to the correct - # state, which _buf_checkpoint records. - self._buf_checkpoint = 0 + # state, which _fb_slopiness records. + self._fb_sloppiness = 0 - if not max_buffer_size: - max_buffer_size = 2**31 - 1 - if max_str_len == -1: - max_str_len = max_buffer_size - if max_bin_len == -1: - max_bin_len = max_buffer_size - if max_array_len == -1: - max_array_len = max_buffer_size - if max_map_len == -1: - max_map_len = max_buffer_size // 2 - if max_ext_len == -1: - max_ext_len = max_buffer_size - - self._max_buffer_size = max_buffer_size + self._max_buffer_size = max_buffer_size or 2**31-1 if read_size > self._max_buffer_size: raise ValueError("read_size must be smaller than max_buffer_size") - self._read_size = read_size or min(self._max_buffer_size, 16 * 1024) - self._raw = bool(raw) - self._strict_map_key = bool(strict_map_key) + self._read_size = read_size or min(self._max_buffer_size, 4096) + self._encoding = encoding self._unicode_errors = unicode_errors self._use_list = use_list - if not (0 <= timestamp <= 3): - raise ValueError("timestamp must be 0..3") - self._timestamp = timestamp self._list_hook = list_hook self._object_hook = object_hook self._object_pairs_hook = object_pairs_hook @@ -302,310 +222,371 @@ class Unpacker: self._max_array_len = max_array_len self._max_map_len = max_map_len self._max_ext_len = max_ext_len - self._stream_offset = 0 if list_hook is not None and not callable(list_hook): - raise TypeError("`list_hook` is not callable") + raise TypeError('`list_hook` is not callable') if object_hook is not None and not callable(object_hook): - raise TypeError("`object_hook` is not callable") + raise TypeError('`object_hook` is not callable') if object_pairs_hook is not None and not callable(object_pairs_hook): - raise TypeError("`object_pairs_hook` is not callable") + raise TypeError('`object_pairs_hook` is not callable') if object_hook is not None and object_pairs_hook is not None: - raise TypeError("object_pairs_hook and object_hook are mutually exclusive") + raise TypeError("object_pairs_hook and object_hook are mutually " + "exclusive") if not callable(ext_hook): raise TypeError("`ext_hook` is not callable") def feed(self, next_bytes): - assert self._feeding - view = _get_data_from_buffer(next_bytes) - if len(self._buffer) - self._buff_i + len(view) > self._max_buffer_size: + if isinstance(next_bytes, array.array): + next_bytes = next_bytes.tostring() + elif isinstance(next_bytes, bytearray): + next_bytes = bytes(next_bytes) + assert self._fb_feeding + if (self._fb_buf_n + len(next_bytes) - self._fb_sloppiness + > self._max_buffer_size): raise BufferFull + self._fb_buf_n += len(next_bytes) + self._fb_buffers.append(next_bytes) - # Strip buffer before checkpoint before reading file. - if self._buf_checkpoint > 0: - del self._buffer[: self._buf_checkpoint] - self._buff_i -= self._buf_checkpoint - self._buf_checkpoint = 0 + def _fb_sloppy_consume(self): + """ Gets rid of some of the used parts of the buffer. """ + if self._fb_buf_i: + for i in xrange(self._fb_buf_i): + self._fb_buf_n -= len(self._fb_buffers[i]) + self._fb_buffers = self._fb_buffers[self._fb_buf_i:] + self._fb_buf_i = 0 + if self._fb_buffers: + self._fb_sloppiness = self._fb_buf_o + else: + self._fb_sloppiness = 0 - # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython - self._buffer.extend(view) - view.release() + def _fb_consume(self): + """ Gets rid of the used parts of the buffer. """ + if self._fb_buf_i: + for i in xrange(self._fb_buf_i): + self._fb_buf_n -= len(self._fb_buffers[i]) + self._fb_buffers = self._fb_buffers[self._fb_buf_i:] + self._fb_buf_i = 0 + if self._fb_buffers: + self._fb_buffers[0] = self._fb_buffers[0][self._fb_buf_o:] + self._fb_buf_n -= self._fb_buf_o + else: + self._fb_buf_n = 0 + self._fb_buf_o = 0 + self._fb_sloppiness = 0 - def _consume(self): - """Gets rid of the used parts of the buffer.""" - self._stream_offset += self._buff_i - self._buf_checkpoint - self._buf_checkpoint = self._buff_i + def _fb_got_extradata(self): + if self._fb_buf_i != len(self._fb_buffers): + return True + if self._fb_feeding: + return False + if not self.file_like: + return False + if self.file_like.read(1): + return True + return False - def _got_extradata(self): - return self._buff_i < len(self._buffer) - - def _get_extradata(self): - return self._buffer[self._buff_i :] + def __iter__(self): + return self def read_bytes(self, n): - ret = self._read(n, raise_outofdata=False) - self._consume() + return self._fb_read(n) + + def _fb_rollback(self): + self._fb_buf_i = 0 + self._fb_buf_o = self._fb_sloppiness + + def _fb_get_extradata(self): + bufs = self._fb_buffers[self._fb_buf_i:] + if bufs: + bufs[0] = bufs[0][self._fb_buf_o:] + return b''.join(bufs) + + def _fb_read(self, n, write_bytes=None): + buffs = self._fb_buffers + # We have a redundant codepath for the most common case, such that + # pypy optimizes it properly. This is the case that the read fits + # in the current buffer. + if (write_bytes is None and self._fb_buf_i < len(buffs) and + self._fb_buf_o + n < len(buffs[self._fb_buf_i])): + self._fb_buf_o += n + return buffs[self._fb_buf_i][self._fb_buf_o - n:self._fb_buf_o] + + # The remaining cases. + ret = b'' + while len(ret) != n: + sliced = n - len(ret) + if self._fb_buf_i == len(buffs): + if self._fb_feeding: + break + to_read = sliced + if self._read_size > to_read: + to_read = self._read_size + tmp = self.file_like.read(to_read) + if not tmp: + break + buffs.append(tmp) + self._fb_buf_n += len(tmp) + continue + ret += buffs[self._fb_buf_i][self._fb_buf_o:self._fb_buf_o + sliced] + self._fb_buf_o += sliced + if self._fb_buf_o >= len(buffs[self._fb_buf_i]): + self._fb_buf_o = 0 + self._fb_buf_i += 1 + if len(ret) != n: + self._fb_rollback() + raise OutOfData + if write_bytes is not None: + write_bytes(ret) return ret - def _read(self, n, raise_outofdata=True): - # (int) -> bytearray - self._reserve(n, raise_outofdata=raise_outofdata) - i = self._buff_i - ret = self._buffer[i : i + n] - self._buff_i = i + len(ret) - return ret - - def _reserve(self, n, raise_outofdata=True): - remain_bytes = len(self._buffer) - self._buff_i - n - - # Fast path: buffer has n bytes already - if remain_bytes >= 0: - return - - if self._feeding: - self._buff_i = self._buf_checkpoint - raise OutOfData - - # Strip buffer before checkpoint before reading file. - if self._buf_checkpoint > 0: - del self._buffer[: self._buf_checkpoint] - self._buff_i -= self._buf_checkpoint - self._buf_checkpoint = 0 - - # Read from file - remain_bytes = -remain_bytes - if remain_bytes + len(self._buffer) > self._max_buffer_size: - raise BufferFull - while remain_bytes > 0: - to_read_bytes = max(self._read_size, remain_bytes) - read_data = self.file_like.read(to_read_bytes) - if not read_data: - break - assert isinstance(read_data, bytes) - self._buffer += read_data - remain_bytes -= len(read_data) - - if len(self._buffer) < n + self._buff_i and raise_outofdata: - self._buff_i = 0 # rollback - raise OutOfData - - def _read_header(self): + def _read_header(self, execute=EX_CONSTRUCT, write_bytes=None): typ = TYPE_IMMEDIATE n = 0 obj = None - self._reserve(1) - b = self._buffer[self._buff_i] - self._buff_i += 1 - if b & 0b10000000 == 0: + c = self._fb_read(1, write_bytes) + b = ord(c) + if b & 0b10000000 == 0: obj = b elif b & 0b11100000 == 0b11100000: - obj = -1 - (b ^ 0xFF) + obj = struct.unpack("b", c)[0] elif b & 0b11100000 == 0b10100000: n = b & 0b00011111 + obj = self._fb_read(n, write_bytes) typ = TYPE_RAW if n > self._max_str_len: - raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") - obj = self._read(n) + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY if n > self._max_array_len: - raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") + raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b & 0b11110000 == 0b10000000: n = b & 0b00001111 typ = TYPE_MAP if n > self._max_map_len: - raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") - elif b == 0xC0: + raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + elif b == 0xc0: obj = None - elif b == 0xC2: + elif b == 0xc2: obj = False - elif b == 0xC3: + elif b == 0xc3: obj = True - elif 0xC4 <= b <= 0xC6: - size, fmt, typ = _MSGPACK_HEADERS[b] - self._reserve(size) - if len(fmt) > 0: - n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] - else: - n = self._buffer[self._buff_i] - self._buff_i += size + elif b == 0xc4: + typ = TYPE_BIN + n = struct.unpack("B", self._fb_read(1, write_bytes))[0] if n > self._max_bin_len: - raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})") - obj = self._read(n) - elif 0xC7 <= b <= 0xC9: - size, fmt, typ = _MSGPACK_HEADERS[b] - self._reserve(size) - L, n = struct.unpack_from(fmt, self._buffer, self._buff_i) - self._buff_i += size + raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + obj = self._fb_read(n, write_bytes) + elif b == 0xc5: + typ = TYPE_BIN + n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + if n > self._max_bin_len: + raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + obj = self._fb_read(n, write_bytes) + elif b == 0xc6: + typ = TYPE_BIN + n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + if n > self._max_bin_len: + raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + obj = self._fb_read(n, write_bytes) + elif b == 0xc7: # ext 8 + typ = TYPE_EXT + L, n = struct.unpack('Bb', self._fb_read(2, write_bytes)) if L > self._max_ext_len: - raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})") - obj = self._read(L) - elif 0xCA <= b <= 0xD3: - size, fmt = _MSGPACK_HEADERS[b] - self._reserve(size) - if len(fmt) > 0: - obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] - else: - obj = self._buffer[self._buff_i] - self._buff_i += size - elif 0xD4 <= b <= 0xD8: - size, fmt, typ = _MSGPACK_HEADERS[b] - if self._max_ext_len < size: - raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})") - self._reserve(size + 1) - n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i) - self._buff_i += size + 1 - elif 0xD9 <= b <= 0xDB: - size, fmt, typ = _MSGPACK_HEADERS[b] - self._reserve(size) - if len(fmt) > 0: - (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) - else: - n = self._buffer[self._buff_i] - self._buff_i += size + raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + obj = self._fb_read(L, write_bytes) + elif b == 0xc8: # ext 16 + typ = TYPE_EXT + L, n = struct.unpack('>Hb', self._fb_read(3, write_bytes)) + if L > self._max_ext_len: + raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + obj = self._fb_read(L, write_bytes) + elif b == 0xc9: # ext 32 + typ = TYPE_EXT + L, n = struct.unpack('>Ib', self._fb_read(5, write_bytes)) + if L > self._max_ext_len: + raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + obj = self._fb_read(L, write_bytes) + elif b == 0xca: + obj = struct.unpack(">f", self._fb_read(4, write_bytes))[0] + elif b == 0xcb: + obj = struct.unpack(">d", self._fb_read(8, write_bytes))[0] + elif b == 0xcc: + obj = struct.unpack("B", self._fb_read(1, write_bytes))[0] + elif b == 0xcd: + obj = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + elif b == 0xce: + obj = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + elif b == 0xcf: + obj = struct.unpack(">Q", self._fb_read(8, write_bytes))[0] + elif b == 0xd0: + obj = struct.unpack("b", self._fb_read(1, write_bytes))[0] + elif b == 0xd1: + obj = struct.unpack(">h", self._fb_read(2, write_bytes))[0] + elif b == 0xd2: + obj = struct.unpack(">i", self._fb_read(4, write_bytes))[0] + elif b == 0xd3: + obj = struct.unpack(">q", self._fb_read(8, write_bytes))[0] + elif b == 0xd4: # fixext 1 + typ = TYPE_EXT + if self._max_ext_len < 1: + raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) + n, obj = struct.unpack('b1s', self._fb_read(2, write_bytes)) + elif b == 0xd5: # fixext 2 + typ = TYPE_EXT + if self._max_ext_len < 2: + raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) + n, obj = struct.unpack('b2s', self._fb_read(3, write_bytes)) + elif b == 0xd6: # fixext 4 + typ = TYPE_EXT + if self._max_ext_len < 4: + raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) + n, obj = struct.unpack('b4s', self._fb_read(5, write_bytes)) + elif b == 0xd7: # fixext 8 + typ = TYPE_EXT + if self._max_ext_len < 8: + raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) + n, obj = struct.unpack('b8s', self._fb_read(9, write_bytes)) + elif b == 0xd8: # fixext 16 + typ = TYPE_EXT + if self._max_ext_len < 16: + raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) + n, obj = struct.unpack('b16s', self._fb_read(17, write_bytes)) + elif b == 0xd9: + typ = TYPE_RAW + n = struct.unpack("B", self._fb_read(1, write_bytes))[0] if n > self._max_str_len: - raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") - obj = self._read(n) - elif 0xDC <= b <= 0xDD: - size, fmt, typ = _MSGPACK_HEADERS[b] - self._reserve(size) - (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) - self._buff_i += size + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + obj = self._fb_read(n, write_bytes) + elif b == 0xda: + typ = TYPE_RAW + n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] + if n > self._max_str_len: + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + obj = self._fb_read(n, write_bytes) + elif b == 0xdb: + typ = TYPE_RAW + n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + if n > self._max_str_len: + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + obj = self._fb_read(n, write_bytes) + elif b == 0xdc: + n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] if n > self._max_array_len: - raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") - elif 0xDE <= b <= 0xDF: - size, fmt, typ = _MSGPACK_HEADERS[b] - self._reserve(size) - (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) - self._buff_i += size + raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + typ = TYPE_ARRAY + elif b == 0xdd: + n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + if n > self._max_array_len: + raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + typ = TYPE_ARRAY + elif b == 0xde: + n = struct.unpack(">H", self._fb_read(2, write_bytes))[0] if n > self._max_map_len: - raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") + raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + typ = TYPE_MAP + elif b == 0xdf: + n = struct.unpack(">I", self._fb_read(4, write_bytes))[0] + if n > self._max_map_len: + raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + typ = TYPE_MAP else: - raise FormatError("Unknown header: 0x%x" % b) + raise UnpackValueError("Unknown header: 0x%x" % b) return typ, n, obj - def _unpack(self, execute=EX_CONSTRUCT): - typ, n, obj = self._read_header() + def _fb_unpack(self, execute=EX_CONSTRUCT, write_bytes=None): + typ, n, obj = self._read_header(execute, write_bytes) if execute == EX_READ_ARRAY_HEADER: if typ != TYPE_ARRAY: - raise ValueError("Expected array") + raise UnpackValueError("Expected array") return n if execute == EX_READ_MAP_HEADER: if typ != TYPE_MAP: - raise ValueError("Expected map") + raise UnpackValueError("Expected map") return n # TODO should we eliminate the recursion? if typ == TYPE_ARRAY: if execute == EX_SKIP: - for i in range(n): + for i in xrange(n): # TODO check whether we need to call `list_hook` - self._unpack(EX_SKIP) + self._fb_unpack(EX_SKIP, write_bytes) return ret = newlist_hint(n) - for i in range(n): - ret.append(self._unpack(EX_CONSTRUCT)) + for i in xrange(n): + ret.append(self._fb_unpack(EX_CONSTRUCT, write_bytes)) if self._list_hook is not None: ret = self._list_hook(ret) # TODO is the interaction between `list_hook` and `use_list` ok? return ret if self._use_list else tuple(ret) if typ == TYPE_MAP: if execute == EX_SKIP: - for i in range(n): + for i in xrange(n): # TODO check whether we need to call hooks - self._unpack(EX_SKIP) - self._unpack(EX_SKIP) + self._fb_unpack(EX_SKIP, write_bytes) + self._fb_unpack(EX_SKIP, write_bytes) return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( - (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n) - ) + (self._fb_unpack(EX_CONSTRUCT, write_bytes), + self._fb_unpack(EX_CONSTRUCT, write_bytes)) + for _ in xrange(n)) else: ret = {} - for _ in range(n): - key = self._unpack(EX_CONSTRUCT) - if self._strict_map_key and type(key) not in (str, bytes): - raise ValueError("%s is not allowed for map key" % str(type(key))) - if isinstance(key, str): - key = sys.intern(key) - ret[key] = self._unpack(EX_CONSTRUCT) + for _ in xrange(n): + key = self._fb_unpack(EX_CONSTRUCT, write_bytes) + ret[key] = self._fb_unpack(EX_CONSTRUCT, write_bytes) if self._object_hook is not None: ret = self._object_hook(ret) return ret if execute == EX_SKIP: return if typ == TYPE_RAW: - if self._raw: - obj = bytes(obj) - else: - obj = obj.decode("utf_8", self._unicode_errors) + if self._encoding is not None: + obj = obj.decode(self._encoding, self._unicode_errors) return obj - if typ == TYPE_BIN: - return bytes(obj) if typ == TYPE_EXT: - if n == -1: # timestamp - ts = Timestamp.from_bytes(bytes(obj)) - if self._timestamp == 1: - return ts.to_unix() - elif self._timestamp == 2: - return ts.to_unix_nano() - elif self._timestamp == 3: - return ts.to_datetime() - else: - return ts - else: - return self._ext_hook(n, bytes(obj)) + return self._ext_hook(n, obj) + if typ == TYPE_BIN: + return obj assert typ == TYPE_IMMEDIATE return obj - def __iter__(self): - return self - - def __next__(self): + def next(self): try: - ret = self._unpack(EX_CONSTRUCT) - self._consume() + ret = self._fb_unpack(EX_CONSTRUCT, None) + self._fb_sloppy_consume() return ret except OutOfData: - self._consume() + self._fb_consume() raise StopIteration - except RecursionError: - raise StackError + __next__ = next - next = __next__ + def skip(self, write_bytes=None): + self._fb_unpack(EX_SKIP, write_bytes) + self._fb_consume() - def skip(self): - self._unpack(EX_SKIP) - self._consume() - - def unpack(self): - try: - ret = self._unpack(EX_CONSTRUCT) - except RecursionError: - raise StackError - self._consume() + def unpack(self, write_bytes=None): + ret = self._fb_unpack(EX_CONSTRUCT, write_bytes) + self._fb_consume() return ret - def read_array_header(self): - ret = self._unpack(EX_READ_ARRAY_HEADER) - self._consume() + def read_array_header(self, write_bytes=None): + ret = self._fb_unpack(EX_READ_ARRAY_HEADER, write_bytes) + self._fb_consume() return ret - def read_map_header(self): - ret = self._unpack(EX_READ_MAP_HEADER) - self._consume() + def read_map_header(self, write_bytes=None): + ret = self._fb_unpack(EX_READ_MAP_HEADER, write_bytes) + self._fb_consume() return ret - def tell(self): - return self._stream_offset - -class Packer: +class Packer(object): """ MessagePack Packer - Usage:: + usage: packer = Packer() astream.write(packer.pack(a)) @@ -613,225 +594,181 @@ class Packer: Packer's constructor has some keyword arguments: - :param default: - When specified, it should be callable. + :param callable default: Convert user type to builtin type that Packer supports. See also simplejson's document. - + :param str encoding: + Convert unicode to bytes with this encoding. (default: 'utf-8') + :param str unicode_errors: + Error handler for encoding unicode. (default: 'strict') :param bool use_single_float: Use single precision float type for float. (default: False) - :param bool autoreset: - Reset buffer after each pack and return its content as `bytes`. (default: True). + Reset buffer after each pack and return it's content as `bytes`. (default: True). If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. - :param bool use_bin_type: Use bin type introduced in msgpack spec 2.0 for bytes. - It also enables str8 type for unicode. (default: True) - - :param bool strict_types: - If set to true, types will be checked to be exact. Derived classes - from serializable types will not be serialized and will be - treated as unsupported type and forwarded to default. - Additionally tuples will not be serialized as lists. - This is useful when trying to implement accurate serialization - for python types. - - :param bool datetime: - If set to true, datetime with tzinfo is packed into Timestamp type. - Note that the tzinfo is stripped in the timestamp. - You can get UTC datetime with `timestamp=3` option of the Unpacker. - - :param str unicode_errors: - The error handler for encoding unicode. (default: 'strict') - DO NOT USE THIS!! This option is kept for very specific usage. - - :param int buf_size: - Internal buffer size. This option is used only for C implementation. + It also enable str8 type for unicode. """ - - def __init__( - self, - *, - default=None, - use_single_float=False, - autoreset=True, - use_bin_type=True, - strict_types=False, - datetime=False, - unicode_errors=None, - buf_size=None, - ): - self._strict_types = strict_types + def __init__(self, default=None, encoding='utf-8', unicode_errors='strict', + use_single_float=False, autoreset=True, use_bin_type=False): self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type - self._buffer = BytesIO() - self._datetime = bool(datetime) - self._unicode_errors = unicode_errors or "strict" - if default is not None and not callable(default): - raise TypeError("default must be callable") + self._encoding = encoding + self._unicode_errors = unicode_errors + self._buffer = StringIO() + if default is not None: + if not callable(default): + raise TypeError("default must be callable") self._default = default - def _pack( - self, - obj, - nest_limit=DEFAULT_RECURSE_LIMIT, - check=isinstance, - check_type_strict=_check_type_strict, - ): + def _pack(self, obj, nest_limit=DEFAULT_RECURSE_LIMIT, isinstance=isinstance): default_used = False - if self._strict_types: - check = check_type_strict - list_types = list - else: - list_types = (list, tuple) while True: if nest_limit < 0: - raise ValueError("recursion limit exceeded") + raise PackValueError("recursion limit exceeded") if obj is None: return self._buffer.write(b"\xc0") - if check(obj, bool): + if isinstance(obj, bool): if obj: return self._buffer.write(b"\xc3") return self._buffer.write(b"\xc2") - if check(obj, int): + if isinstance(obj, int_types): if 0 <= obj < 0x80: return self._buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: return self._buffer.write(struct.pack("b", obj)) - if 0x80 <= obj <= 0xFF: - return self._buffer.write(struct.pack("BB", 0xCC, obj)) + if 0x80 <= obj <= 0xff: + return self._buffer.write(struct.pack("BB", 0xcc, obj)) if -0x80 <= obj < 0: - return self._buffer.write(struct.pack(">Bb", 0xD0, obj)) - if 0xFF < obj <= 0xFFFF: - return self._buffer.write(struct.pack(">BH", 0xCD, obj)) + return self._buffer.write(struct.pack(">Bb", 0xd0, obj)) + if 0xff < obj <= 0xffff: + return self._buffer.write(struct.pack(">BH", 0xcd, obj)) if -0x8000 <= obj < -0x80: - return self._buffer.write(struct.pack(">Bh", 0xD1, obj)) - if 0xFFFF < obj <= 0xFFFFFFFF: - return self._buffer.write(struct.pack(">BI", 0xCE, obj)) + return self._buffer.write(struct.pack(">Bh", 0xd1, obj)) + if 0xffff < obj <= 0xffffffff: + return self._buffer.write(struct.pack(">BI", 0xce, obj)) if -0x80000000 <= obj < -0x8000: - return self._buffer.write(struct.pack(">Bi", 0xD2, obj)) - if 0xFFFFFFFF < obj <= 0xFFFFFFFFFFFFFFFF: - return self._buffer.write(struct.pack(">BQ", 0xCF, obj)) + return self._buffer.write(struct.pack(">Bi", 0xd2, obj)) + if 0xffffffff < obj <= 0xffffffffffffffff: + return self._buffer.write(struct.pack(">BQ", 0xcf, obj)) if -0x8000000000000000 <= obj < -0x80000000: - return self._buffer.write(struct.pack(">Bq", 0xD3, obj)) - if not default_used and self._default is not None: - obj = self._default(obj) - default_used = True - continue - raise OverflowError("Integer value out of range") - if check(obj, (bytes, bytearray)): + return self._buffer.write(struct.pack(">Bq", 0xd3, obj)) + raise PackValueError("Integer value out of range") + if self._use_bin_type and isinstance(obj, bytes): n = len(obj) - if n >= 2**32: - raise ValueError("%s is too large" % type(obj).__name__) - self._pack_bin_header(n) - return self._buffer.write(obj) - if check(obj, str): - obj = obj.encode("utf-8", self._unicode_errors) - n = len(obj) - if n >= 2**32: - raise ValueError("String is too large") - self._pack_raw_header(n) - return self._buffer.write(obj) - if check(obj, memoryview): - n = obj.nbytes - if n >= 2**32: - raise ValueError("Memoryview is too large") - self._pack_bin_header(n) - return self._buffer.write(obj) - if check(obj, float): - if self._use_float: - return self._buffer.write(struct.pack(">Bf", 0xCA, obj)) - return self._buffer.write(struct.pack(">Bd", 0xCB, obj)) - if check(obj, (ExtType, Timestamp)): - if check(obj, Timestamp): - code = -1 - data = obj.to_bytes() + if n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xc4, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xc5, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xc6, n)) else: - code = obj.code - data = obj.data + raise PackValueError("Bytes is too large") + return self._buffer.write(obj) + if isinstance(obj, (Unicode, bytes)): + if isinstance(obj, Unicode): + if self._encoding is None: + raise TypeError( + "Can't encode unicode string: " + "no encoding is specified") + obj = obj.encode(self._encoding, self._unicode_errors) + n = len(obj) + if n <= 0x1f: + self._buffer.write(struct.pack('B', 0xa0 + n)) + elif self._use_bin_type and n <= 0xff: + self._buffer.write(struct.pack('>BB', 0xd9, n)) + elif n <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xda, n)) + elif n <= 0xffffffff: + self._buffer.write(struct.pack(">BI", 0xdb, n)) + else: + raise PackValueError("String is too large") + return self._buffer.write(obj) + if isinstance(obj, float): + if self._use_float: + return self._buffer.write(struct.pack(">Bf", 0xca, obj)) + return self._buffer.write(struct.pack(">Bd", 0xcb, obj)) + if isinstance(obj, ExtType): + code = obj.code + data = obj.data assert isinstance(code, int) assert isinstance(data, bytes) L = len(data) if L == 1: - self._buffer.write(b"\xd4") + self._buffer.write(b'\xd4') elif L == 2: - self._buffer.write(b"\xd5") + self._buffer.write(b'\xd5') elif L == 4: - self._buffer.write(b"\xd6") + self._buffer.write(b'\xd6') elif L == 8: - self._buffer.write(b"\xd7") + self._buffer.write(b'\xd7') elif L == 16: - self._buffer.write(b"\xd8") - elif L <= 0xFF: - self._buffer.write(struct.pack(">BB", 0xC7, L)) - elif L <= 0xFFFF: - self._buffer.write(struct.pack(">BH", 0xC8, L)) + self._buffer.write(b'\xd8') + elif L <= 0xff: + self._buffer.write(struct.pack(">BB", 0xc7, L)) + elif L <= 0xffff: + self._buffer.write(struct.pack(">BH", 0xc8, L)) else: - self._buffer.write(struct.pack(">BI", 0xC9, L)) + self._buffer.write(struct.pack(">BI", 0xc9, L)) self._buffer.write(struct.pack("b", code)) self._buffer.write(data) return - if check(obj, list_types): + if isinstance(obj, (list, tuple)): n = len(obj) - self._pack_array_header(n) - for i in range(n): + self._fb_pack_array_header(n) + for i in xrange(n): self._pack(obj[i], nest_limit - 1) return - if check(obj, dict): - return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1) - - if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None: - obj = Timestamp.from_datetime(obj) - default_used = 1 - continue - + if isinstance(obj, dict): + return self._fb_pack_map_pairs(len(obj), dict_iteritems(obj), + nest_limit - 1) if not default_used and self._default is not None: obj = self._default(obj) default_used = 1 continue - - if self._datetime and check(obj, _DateTime): - raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None") - - raise TypeError(f"Cannot serialize {obj!r}") + raise TypeError("Cannot serialize %r" % obj) def pack(self, obj): - try: - self._pack(obj) - except: - self._buffer = BytesIO() # force reset - raise + self._pack(obj) + ret = self._buffer.getvalue() if self._autoreset: - ret = self._buffer.getvalue() - self._buffer = BytesIO() - return ret + self._buffer = StringIO() + elif USING_STRINGBUILDER: + self._buffer = StringIO(ret) + return ret def pack_map_pairs(self, pairs): - self._pack_map_pairs(len(pairs), pairs) + self._fb_pack_map_pairs(len(pairs), pairs) + ret = self._buffer.getvalue() if self._autoreset: - ret = self._buffer.getvalue() - self._buffer = BytesIO() - return ret + self._buffer = StringIO() + elif USING_STRINGBUILDER: + self._buffer = StringIO(ret) + return ret def pack_array_header(self, n): if n >= 2**32: raise ValueError - self._pack_array_header(n) + self._fb_pack_array_header(n) + ret = self._buffer.getvalue() if self._autoreset: - ret = self._buffer.getvalue() - self._buffer = BytesIO() - return ret + self._buffer = StringIO() + elif USING_STRINGBUILDER: + self._buffer = StringIO(ret) + return ret def pack_map_header(self, n): if n >= 2**32: raise ValueError - self._pack_map_header(n) + self._fb_pack_map_header(n) + ret = self._buffer.getvalue() if self._autoreset: - ret = self._buffer.getvalue() - self._buffer = BytesIO() - return ret + self._buffer = StringIO() + elif USING_STRINGBUILDER: + self._buffer = StringIO(ret) + return ret def pack_ext_type(self, typecode, data): if not isinstance(typecode, int): @@ -841,89 +778,53 @@ class Packer: if not isinstance(data, bytes): raise TypeError("data must have bytes type") L = len(data) - if L > 0xFFFFFFFF: + if L > 0xffffffff: raise ValueError("Too large data") if L == 1: - self._buffer.write(b"\xd4") + self._buffer.write(b'\xd4') elif L == 2: - self._buffer.write(b"\xd5") + self._buffer.write(b'\xd5') elif L == 4: - self._buffer.write(b"\xd6") + self._buffer.write(b'\xd6') elif L == 8: - self._buffer.write(b"\xd7") + self._buffer.write(b'\xd7') elif L == 16: - self._buffer.write(b"\xd8") - elif L <= 0xFF: - self._buffer.write(b"\xc7" + struct.pack("B", L)) - elif L <= 0xFFFF: - self._buffer.write(b"\xc8" + struct.pack(">H", L)) + self._buffer.write(b'\xd8') + elif L <= 0xff: + self._buffer.write(b'\xc7' + struct.pack('B', L)) + elif L <= 0xffff: + self._buffer.write(b'\xc8' + struct.pack('>H', L)) else: - self._buffer.write(b"\xc9" + struct.pack(">I", L)) - self._buffer.write(struct.pack("B", typecode)) + self._buffer.write(b'\xc9' + struct.pack('>I', L)) + self._buffer.write(struct.pack('B', typecode)) self._buffer.write(data) - def _pack_array_header(self, n): - if n <= 0x0F: - return self._buffer.write(struct.pack("B", 0x90 + n)) - if n <= 0xFFFF: - return self._buffer.write(struct.pack(">BH", 0xDC, n)) - if n <= 0xFFFFFFFF: - return self._buffer.write(struct.pack(">BI", 0xDD, n)) - raise ValueError("Array is too large") + def _fb_pack_array_header(self, n): + if n <= 0x0f: + return self._buffer.write(struct.pack('B', 0x90 + n)) + if n <= 0xffff: + return self._buffer.write(struct.pack(">BH", 0xdc, n)) + if n <= 0xffffffff: + return self._buffer.write(struct.pack(">BI", 0xdd, n)) + raise PackValueError("Array is too large") - def _pack_map_header(self, n): - if n <= 0x0F: - return self._buffer.write(struct.pack("B", 0x80 + n)) - if n <= 0xFFFF: - return self._buffer.write(struct.pack(">BH", 0xDE, n)) - if n <= 0xFFFFFFFF: - return self._buffer.write(struct.pack(">BI", 0xDF, n)) - raise ValueError("Dict is too large") + def _fb_pack_map_header(self, n): + if n <= 0x0f: + return self._buffer.write(struct.pack('B', 0x80 + n)) + if n <= 0xffff: + return self._buffer.write(struct.pack(">BH", 0xde, n)) + if n <= 0xffffffff: + return self._buffer.write(struct.pack(">BI", 0xdf, n)) + raise PackValueError("Dict is too large") - def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): - self._pack_map_header(n) - for k, v in pairs: + def _fb_pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): + self._fb_pack_map_header(n) + for (k, v) in pairs: self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) - def _pack_raw_header(self, n): - if n <= 0x1F: - self._buffer.write(struct.pack("B", 0xA0 + n)) - elif self._use_bin_type and n <= 0xFF: - self._buffer.write(struct.pack(">BB", 0xD9, n)) - elif n <= 0xFFFF: - self._buffer.write(struct.pack(">BH", 0xDA, n)) - elif n <= 0xFFFFFFFF: - self._buffer.write(struct.pack(">BI", 0xDB, n)) - else: - raise ValueError("Raw is too large") - - def _pack_bin_header(self, n): - if not self._use_bin_type: - return self._pack_raw_header(n) - elif n <= 0xFF: - return self._buffer.write(struct.pack(">BB", 0xC4, n)) - elif n <= 0xFFFF: - return self._buffer.write(struct.pack(">BH", 0xC5, n)) - elif n <= 0xFFFFFFFF: - return self._buffer.write(struct.pack(">BI", 0xC6, n)) - else: - raise ValueError("Bin is too large") - def bytes(self): - """Return internal buffer contents as bytes object""" return self._buffer.getvalue() def reset(self): - """Reset internal buffer. - - This method is useful only when autoreset=False. - """ - self._buffer = BytesIO() - - def getbuffer(self): - """Return view of internal buffer.""" - if _USING_STRINGBUILDER: - return memoryview(self.bytes()) - else: - return self._buffer.getbuffer() + self._buffer = StringIO() diff --git a/msgpack/pack.h b/msgpack/pack.h index edf3a3f..971065c 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -21,12 +21,15 @@ #include "sysdep.h" #include #include -#include #ifdef __cplusplus extern "C" { #endif +#ifdef _MSC_VER +#define inline __inline +#endif + typedef struct msgpack_packer { char *buf; size_t length; @@ -36,6 +39,40 @@ typedef struct msgpack_packer { typedef struct Packer Packer; +static inline int msgpack_pack_int(msgpack_packer* pk, int d); +static inline int msgpack_pack_long(msgpack_packer* pk, long d); +static inline int msgpack_pack_long_long(msgpack_packer* pk, long long d); +static inline int msgpack_pack_unsigned_short(msgpack_packer* pk, unsigned short d); +static inline int msgpack_pack_unsigned_int(msgpack_packer* pk, unsigned int d); +static inline int msgpack_pack_unsigned_long(msgpack_packer* pk, unsigned long d); +//static inline int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d); + +static inline int msgpack_pack_uint8(msgpack_packer* pk, uint8_t d); +static inline int msgpack_pack_uint16(msgpack_packer* pk, uint16_t d); +static inline int msgpack_pack_uint32(msgpack_packer* pk, uint32_t d); +static inline int msgpack_pack_uint64(msgpack_packer* pk, uint64_t d); +static inline int msgpack_pack_int8(msgpack_packer* pk, int8_t d); +static inline int msgpack_pack_int16(msgpack_packer* pk, int16_t d); +static inline int msgpack_pack_int32(msgpack_packer* pk, int32_t d); +static inline int msgpack_pack_int64(msgpack_packer* pk, int64_t d); + +static inline int msgpack_pack_float(msgpack_packer* pk, float d); +static inline int msgpack_pack_double(msgpack_packer* pk, double d); + +static inline int msgpack_pack_nil(msgpack_packer* pk); +static inline int msgpack_pack_true(msgpack_packer* pk); +static inline int msgpack_pack_false(msgpack_packer* pk); + +static inline int msgpack_pack_array(msgpack_packer* pk, unsigned int n); + +static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n); + +static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); +static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l); +static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); + +static inline int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l); + static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) { char* buf = pk->buf; @@ -44,11 +81,8 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ if (len + l > bs) { bs = (len + l) * 2; - buf = (char*)PyMem_Realloc(buf, bs); - if (!buf) { - PyErr_NoMemory(); - return -1; - } + buf = (char*)realloc(buf, bs); + if (!buf) return -1; } memcpy(buf + len, data, l); len += l; diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index b8959f0..5d1088f 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -37,6 +37,18 @@ * Integer */ +#define msgpack_pack_real_uint8(x, d) \ +do { \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ +} while(0) + #define msgpack_pack_real_uint16(x, d) \ do { \ if(d < (1<<7)) { \ @@ -111,6 +123,18 @@ do { \ } \ } while(0) +#define msgpack_pack_real_int8(x, d) \ +do { \ + if(d < -(1<<5)) { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ + } \ +} while(0) + #define msgpack_pack_real_int16(x, d) \ do { \ if(d < -(1<<5)) { \ @@ -240,6 +264,49 @@ do { \ } while(0) +static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d) +{ + msgpack_pack_real_uint8(x, d); +} + +static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d) +{ + msgpack_pack_real_uint16(x, d); +} + +static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d) +{ + msgpack_pack_real_uint32(x, d); +} + +static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d) +{ + msgpack_pack_real_uint64(x, d); +} + +static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d) +{ + msgpack_pack_real_int8(x, d); +} + +static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d) +{ + msgpack_pack_real_int16(x, d); +} + +static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d) +{ + msgpack_pack_real_int32(x, d); +} + +static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d) +{ + msgpack_pack_real_int64(x, d); +} + + +//#ifdef msgpack_pack_inline_func_cint + static inline int msgpack_pack_short(msgpack_packer* x, short d) { #if defined(SIZEOF_SHORT) @@ -305,37 +372,192 @@ if(sizeof(int) == 2) { static inline int msgpack_pack_long(msgpack_packer* x, long d) { #if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 4 +#if SIZEOF_LONG == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_LONG == 4 msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #elif defined(LONG_MAX) -#if LONG_MAX == 0x7fffffffL +#if LONG_MAX == 0x7fffL + msgpack_pack_real_int16(x, d); +#elif LONG_MAX == 0x7fffffffL msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #else - if (sizeof(long) == 4) { - msgpack_pack_real_int32(x, d); - } else { - msgpack_pack_real_int64(x, d); - } +if(sizeof(long) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(long) == 4) { + msgpack_pack_real_int32(x, d); +} else { + msgpack_pack_real_int64(x, d); +} #endif } static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) { +#if defined(SIZEOF_LONG_LONG) +#if SIZEOF_LONG_LONG == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_LONG_LONG == 4 + msgpack_pack_real_int32(x, d); +#else msgpack_pack_real_int64(x, d); +#endif + +#elif defined(LLONG_MAX) +#if LLONG_MAX == 0x7fffL + msgpack_pack_real_int16(x, d); +#elif LLONG_MAX == 0x7fffffffL + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#else +if(sizeof(long long) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(long long) == 4) { + msgpack_pack_real_int32(x, d); +} else { + msgpack_pack_real_int64(x, d); +} +#endif +} + +static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d) +{ +#if defined(SIZEOF_SHORT) +#if SIZEOF_SHORT == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_SHORT == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(USHRT_MAX) +#if USHRT_MAX == 0xffffU + msgpack_pack_real_uint16(x, d); +#elif USHRT_MAX == 0xffffffffU + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned short) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned short) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif +} + +static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d) +{ +#if defined(SIZEOF_INT) +#if SIZEOF_INT == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_INT == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(UINT_MAX) +#if UINT_MAX == 0xffffU + msgpack_pack_real_uint16(x, d); +#elif UINT_MAX == 0xffffffffU + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned int) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned int) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif +} + +static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d) +{ +#if defined(SIZEOF_LONG) +#if SIZEOF_LONG == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_LONG == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(ULONG_MAX) +#if ULONG_MAX == 0xffffUL + msgpack_pack_real_uint16(x, d); +#elif ULONG_MAX == 0xffffffffUL + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned long) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned long) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif } static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) { +#if defined(SIZEOF_LONG_LONG) +#if SIZEOF_LONG_LONG == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_LONG_LONG == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(ULLONG_MAX) +#if ULLONG_MAX == 0xffffUL + msgpack_pack_real_uint16(x, d); +#elif ULLONG_MAX == 0xffffffffUL + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned long long) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned long long) == 4) { + msgpack_pack_real_uint32(x, d); +} else { msgpack_pack_real_uint64(x, d); } +#endif +} + +//#undef msgpack_pack_inline_func_cint +//#endif + /* @@ -344,26 +566,24 @@ static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned lo static inline int msgpack_pack_float(msgpack_packer* x, float d) { + union { float f; uint32_t i; } mem; + mem.f = d; unsigned char buf[5]; - buf[0] = 0xca; - -#if PY_VERSION_HEX >= 0x030B00A7 - PyFloat_Pack4(d, (char *)&buf[1], 0); -#else - _PyFloat_Pack4(d, &buf[1], 0); -#endif + buf[0] = 0xca; _msgpack_store32(&buf[1], mem.i); msgpack_pack_append_buffer(x, buf, 5); } static inline int msgpack_pack_double(msgpack_packer* x, double d) { + union { double f; uint64_t i; } mem; + mem.f = d; unsigned char buf[9]; buf[0] = 0xcb; -#if PY_VERSION_HEX >= 0x030B00A7 - PyFloat_Pack8(d, (char *)&buf[1], 0); -#else - _PyFloat_Pack8(d, &buf[1], 0); +#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi + // https://github.com/msgpack/msgpack-perl/pull/1 + mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); #endif + _msgpack_store64(&buf[1], mem.i); msgpack_pack_append_buffer(x, buf, 9); } @@ -546,39 +766,6 @@ static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l) } -/* - * Pack Timestamp extension type. Follows msgpack-c pack_template.h. - */ -static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uint32_t nanoseconds) -{ - if ((seconds >> 34) == 0) { - /* seconds is unsigned and fits in 34 bits */ - uint64_t data64 = ((uint64_t)nanoseconds << 34) | (uint64_t)seconds; - if ((data64 & 0xffffffff00000000L) == 0) { - /* no nanoseconds and seconds is 32bits or smaller. timestamp32. */ - unsigned char buf[4]; - uint32_t data32 = (uint32_t)data64; - msgpack_pack_ext(x, -1, 4); - _msgpack_store32(buf, data32); - msgpack_pack_raw_body(x, buf, 4); - } else { - /* timestamp64 */ - unsigned char buf[8]; - msgpack_pack_ext(x, -1, 8); - _msgpack_store64(buf, data64); - msgpack_pack_raw_body(x, buf, 8); - - } - } else { - /* seconds is signed or >34bits */ - unsigned char buf[12]; - _msgpack_store32(&buf[0], nanoseconds); - _msgpack_store64(&buf[4], seconds); - msgpack_pack_ext(x, -1, 12); - msgpack_pack_raw_body(x, buf, 12); - } - return 0; -} #undef msgpack_pack_append_buffer @@ -588,9 +775,11 @@ static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uin #undef TAKE8_32 #undef TAKE8_64 +#undef msgpack_pack_real_uint8 #undef msgpack_pack_real_uint16 #undef msgpack_pack_real_uint32 #undef msgpack_pack_real_uint64 +#undef msgpack_pack_real_int8 #undef msgpack_pack_real_int16 #undef msgpack_pack_real_int32 #undef msgpack_pack_real_int64 diff --git a/msgpack/sysdep.h b/msgpack/sysdep.h index 7067300..ed9c1bc 100644 --- a/msgpack/sysdep.h +++ b/msgpack/sysdep.h @@ -61,14 +61,14 @@ typedef unsigned int _msgpack_atomic_counter_t; #endif #endif -#else /* _WIN32 */ -#include /* ntohs, ntohl */ +#else +#include /* __BYTE_ORDER */ #endif #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN #define __LITTLE_ENDIAN__ -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN #define __BIG_ENDIAN__ #elif _WIN32 #define __LITTLE_ENDIAN__ @@ -95,7 +95,7 @@ typedef unsigned int _msgpack_atomic_counter_t; #ifdef _WIN32 # if defined(ntohl) # define _msgpack_be32(x) ntohl(x) -# elif defined(_byteswap_ulong) || defined(_MSC_VER) +# elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400) # define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) # else # define _msgpack_be32(x) \ @@ -108,7 +108,7 @@ typedef unsigned int _msgpack_atomic_counter_t; # define _msgpack_be32(x) ntohl(x) #endif -#if defined(_byteswap_uint64) || defined(_MSC_VER) +#if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400) # define _msgpack_be64(x) (_byteswap_uint64(x)) #elif defined(bswap_64) # define _msgpack_be64(x) bswap_64(x) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 58a2f4f..5deb7cd 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -20,17 +20,12 @@ #include "unpack_define.h" typedef struct unpack_user { - bool use_list; - bool raw; - bool has_pairs_hook; - bool strict_map_key; - int timestamp; + int use_list; PyObject *object_hook; + bool has_pairs_hook; PyObject *list_hook; PyObject *ext_hook; - PyObject *timestamp_t; - PyObject *giga; - PyObject *utc; + const char *encoding; const char *unicode_errors; Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; } unpack_user; @@ -38,7 +33,7 @@ typedef struct unpack_user { typedef PyObject* msgpack_unpack_object; struct unpack_context; typedef struct unpack_context unpack_context; -typedef int (*execute_fn)(unpack_context *ctx, const char* data, Py_ssize_t len, Py_ssize_t* off); +typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off); static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) { @@ -47,7 +42,7 @@ static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromLong((long)d); + PyObject *p = PyInt_FromLong((long)d); if (!p) return -1; *o = p; @@ -61,7 +56,7 @@ static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpac static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromSize_t((size_t)d); + PyObject *p = PyInt_FromSize_t((size_t)d); if (!p) return -1; *o = p; @@ -74,7 +69,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp if (d > LONG_MAX) { p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); } else { - p = PyLong_FromLong((long)d); + p = PyInt_FromSize_t((size_t)d); } if (!p) return -1; @@ -84,7 +79,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromLong(d); + PyObject *p = PyInt_FromLong(d); if (!p) return -1; *o = p; @@ -105,9 +100,9 @@ static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpac { PyObject *p; if (d > LONG_MAX || d < LONG_MIN) { - p = PyLong_FromLongLong((PY_LONG_LONG)d); + p = PyLong_FromLongLong((unsigned PY_LONG_LONG)d); } else { - p = PyLong_FromLong((long)d); + p = PyInt_FromLong((long)d); } *o = p; return 0; @@ -192,13 +187,6 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_un static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) { - if (u->strict_map_key && !PyUnicode_CheckExact(k) && !PyBytes_CheckExact(k)) { - PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key when strict_map_key=True", Py_TYPE(k)->tp_name); - return -1; - } - if (PyUnicode_CheckExact(k)) { - PyUnicode_InternInPlace(&k); - } if (u->has_pairs_hook) { msgpack_unpack_object item = PyTuple_Pack(2, k, v); if (!item) @@ -237,11 +225,10 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* } PyObject *py; - - if (u->raw) { - py = PyBytes_FromStringAndSize(p, l); + if(u->encoding) { + py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); } else { - py = PyUnicode_DecodeUTF8(p, l, u->unicode_errors); + py = PyBytes_FromStringAndSize(p, l); } if (!py) return -1; @@ -263,43 +250,10 @@ static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* return 0; } -typedef struct msgpack_timestamp { - int64_t tv_sec; - uint32_t tv_nsec; -} msgpack_timestamp; - -/* - * Unpack ext buffer to a timestamp. Pulled from msgpack-c timestamp.h. - */ -static int unpack_timestamp(const char* buf, unsigned int buflen, msgpack_timestamp* ts) { - switch (buflen) { - case 4: - ts->tv_nsec = 0; - { - uint32_t v = _msgpack_load32(uint32_t, buf); - ts->tv_sec = (int64_t)v; - } - return 0; - case 8: { - uint64_t value =_msgpack_load64(uint64_t, buf); - ts->tv_nsec = (uint32_t)(value >> 34); - ts->tv_sec = value & 0x00000003ffffffffLL; - return 0; - } - case 12: - ts->tv_nsec = _msgpack_load32(uint32_t, buf); - ts->tv_sec = _msgpack_load64(int64_t, buf + 4); - return 0; - default: - return -1; - } -} - -#include "datetime.h" - -static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, - unsigned int length, msgpack_unpack_object* o) +static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, + unsigned int length, msgpack_unpack_object* o) { + PyObject *py; int8_t typecode = (int8_t)*pos++; if (!u->ext_hook) { PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); @@ -309,79 +263,12 @@ static int unpack_callback_ext(unpack_user* u, const char* base, const char* pos PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len); return -1; } - - PyObject *py = NULL; // length also includes the typecode, so the actual data is length-1 - if (typecode == -1) { - msgpack_timestamp ts; - if (unpack_timestamp(pos, length-1, &ts) < 0) { - return -1; - } - - if (u->timestamp == 2) { // int - PyObject *a = PyLong_FromLongLong(ts.tv_sec); - if (a == NULL) return -1; - - PyObject *c = PyNumber_Multiply(a, u->giga); - Py_DECREF(a); - if (c == NULL) { - return -1; - } - - PyObject *b = PyLong_FromUnsignedLong(ts.tv_nsec); - if (b == NULL) { - Py_DECREF(c); - return -1; - } - - py = PyNumber_Add(c, b); - Py_DECREF(c); - Py_DECREF(b); - } - else if (u->timestamp == 0) { // Timestamp - py = PyObject_CallFunction(u->timestamp_t, "(Lk)", ts.tv_sec, ts.tv_nsec); - } - else if (u->timestamp == 3) { // datetime - // Calculate datetime using epoch + delta - // due to limitations PyDateTime_FromTimestamp on Windows with negative timestamps - PyObject *epoch = PyDateTimeAPI->DateTime_FromDateAndTime(1970, 1, 1, 0, 0, 0, 0, u->utc, PyDateTimeAPI->DateTimeType); - if (epoch == NULL) { - return -1; - } - - PyObject* d = PyDelta_FromDSU(ts.tv_sec/(24*3600), ts.tv_sec%(24*3600), ts.tv_nsec / 1000); - if (d == NULL) { - Py_DECREF(epoch); - return -1; - } - - py = PyNumber_Add(epoch, d); - - Py_DECREF(epoch); - Py_DECREF(d); - } - else { // float - PyObject *a = PyFloat_FromDouble((double)ts.tv_nsec); - if (a == NULL) return -1; - - PyObject *b = PyNumber_TrueDivide(a, u->giga); - Py_DECREF(a); - if (b == NULL) return -1; - - PyObject *c = PyLong_FromLongLong(ts.tv_sec); - if (c == NULL) { - Py_DECREF(b); - return -1; - } - - a = PyNumber_Add(b, c); - Py_DECREF(b); - Py_DECREF(c); - py = a; - } - } else { - py = PyObject_CallFunction(u->ext_hook, "(iy#)", (int)typecode, pos, (Py_ssize_t)length-1); - } +#if PY_MAJOR_VERSION == 2 + py = PyObject_CallFunction(u->ext_hook, "(is#)", typecode, pos, length-1); +#else + py = PyObject_CallFunction(u->ext_hook, "(iy#)", typecode, pos, length-1); +#endif if (!py) return -1; *o = py; diff --git a/msgpack/unpack_container_header.h b/msgpack/unpack_container_header.h deleted file mode 100644 index c14a3c2..0000000 --- a/msgpack/unpack_container_header.h +++ /dev/null @@ -1,51 +0,0 @@ -static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) -{ - assert(len >= *off); - uint32_t size; - const unsigned char *const p = (unsigned char*)data + *off; - -#define inc_offset(inc) \ - if (len - *off < inc) \ - return 0; \ - *off += inc; - - switch (*p) { - case var_offset: - inc_offset(3); - size = _msgpack_load16(uint16_t, p + 1); - break; - case var_offset + 1: - inc_offset(5); - size = _msgpack_load32(uint32_t, p + 1); - break; -#ifdef USE_CASE_RANGE - case fixed_offset + 0x0 ... fixed_offset + 0xf: -#else - case fixed_offset + 0x0: - case fixed_offset + 0x1: - case fixed_offset + 0x2: - case fixed_offset + 0x3: - case fixed_offset + 0x4: - case fixed_offset + 0x5: - case fixed_offset + 0x6: - case fixed_offset + 0x7: - case fixed_offset + 0x8: - case fixed_offset + 0x9: - case fixed_offset + 0xa: - case fixed_offset + 0xb: - case fixed_offset + 0xc: - case fixed_offset + 0xd: - case fixed_offset + 0xe: - case fixed_offset + 0xf: -#endif - ++*off; - size = ((unsigned int)*p) & 0x0f; - break; - default: - PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); - return -1; - } - unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); - return 1; -} - diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index cce29e7..d34eced 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -24,8 +24,8 @@ typedef struct unpack_stack { PyObject* obj; - Py_ssize_t size; - Py_ssize_t count; + size_t size; + size_t count; unsigned int ct; PyObject* map_key; } unpack_stack; @@ -70,18 +70,15 @@ static inline PyObject* unpack_data(unpack_context* ctx) return (ctx)->stack[0].obj; } -static inline void unpack_clear(unpack_context *ctx) -{ - Py_CLEAR(ctx->stack[0].obj); -} -static inline int unpack_execute(bool construct, unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +template +static inline int unpack_execute(unpack_context* ctx, const char* data, size_t len, size_t* off) { assert(len >= *off); const unsigned char* p = (unsigned char*)data + *off; const unsigned char* const pe = (unsigned char*)data + len; - const void* n = p; + const void* n = NULL; unsigned int trail = ctx->trail; unsigned int cs = ctx->cs; @@ -92,7 +89,7 @@ static inline int unpack_execute(bool construct, unpack_context* ctx, const char */ unpack_user* user = &ctx->user; - PyObject* obj = NULL; + PyObject* obj; unpack_stack* c = NULL; int ret; @@ -122,7 +119,7 @@ static inline int unpack_execute(bool construct, unpack_context* ctx, const char goto _fixed_trail_again #define start_container(func, count_, ct_) \ - if(top >= MSGPACK_EMBED_STACK_SIZE) { ret = -3; goto _end; } \ + if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ if((count_) == 0) { obj = stack[top].obj; \ if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \ @@ -131,6 +128,27 @@ static inline int unpack_execute(bool construct, unpack_context* ctx, const char stack[top].size = count_; \ stack[top].count = 0; \ ++top; \ + /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \ + /*printf("stack push %d\n", top);*/ \ + /* FIXME \ + if(top >= stack_size) { \ + if(stack_size == MSGPACK_EMBED_STACK_SIZE) { \ + size_t csize = sizeof(unpack_stack) * MSGPACK_EMBED_STACK_SIZE; \ + size_t nsize = csize * 2; \ + unpack_stack* tmp = (unpack_stack*)malloc(nsize); \ + if(tmp == NULL) { goto _failed; } \ + memcpy(tmp, ctx->stack, csize); \ + ctx->stack = stack = tmp; \ + ctx->stack_size = stack_size = MSGPACK_EMBED_STACK_SIZE * 2; \ + } else { \ + size_t nsize = sizeof(unpack_stack) * ctx->stack_size * 2; \ + unpack_stack* tmp = (unpack_stack*)realloc(ctx->stack, nsize); \ + if(tmp == NULL) { goto _failed; } \ + ctx->stack = stack = tmp; \ + ctx->stack_size = stack_size = stack_size * 2; \ + } \ + } \ + */ \ goto _header_again #define NEXT_CS(p) ((unsigned int)*p & 0x1f) @@ -207,8 +225,7 @@ static inline int unpack_execute(bool construct, unpack_context* ctx, const char case 0xdf: // map 32 again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); default: - ret = -2; - goto _end; + goto _failed; } SWITCH_RANGE(0xa0, 0xbf) // FixRaw again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); @@ -218,8 +235,7 @@ static inline int unpack_execute(bool construct, unpack_context* ctx, const char start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); SWITCH_RANGE_DEFAULT - ret = -2; - goto _end; + goto _failed; SWITCH_RANGE_END // end CS_HEADER @@ -242,21 +258,17 @@ static inline int unpack_execute(bool construct, unpack_context* ctx, const char _msgpack_load32(uint32_t,n)+1, _ext_zero); case CS_FLOAT: { - double f; -#if PY_VERSION_HEX >= 0x030B00A7 - f = PyFloat_Unpack4((const char*)n, 0); -#else - f = _PyFloat_Unpack4((unsigned char*)n, 0); -#endif - push_fixed_value(_float, f); } + union { uint32_t i; float f; } mem; + mem.i = _msgpack_load32(uint32_t,n); + push_fixed_value(_float, mem.f); } case CS_DOUBLE: { - double f; -#if PY_VERSION_HEX >= 0x030B00A7 - f = PyFloat_Unpack8((const char*)n, 0); -#else - f = _PyFloat_Unpack8((unsigned char*)n, 0); + union { uint64_t i; double f; } mem; + mem.i = _msgpack_load64(uint64_t,n); +#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi + // https://github.com/msgpack/msgpack-perl/pull/1 + mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); #endif - push_fixed_value(_double, f); } + push_fixed_value(_double, mem.f); } case CS_UINT_8: push_fixed_value(_uint8, *(uint8_t*)n); case CS_UINT_16: @@ -385,7 +397,6 @@ _end: #undef construct_cb } -#undef NEXT_CS #undef SWITCH_RANGE_BEGIN #undef SWITCH_RANGE #undef SWITCH_RANGE_DEFAULT @@ -397,27 +408,68 @@ _end: #undef again_fixed_trail_if_zero #undef start_container -static int unpack_construct(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { - return unpack_execute(1, ctx, data, len, off); -} -static int unpack_skip(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { - return unpack_execute(0, ctx, data, len, off); +template +static inline int unpack_container_header(unpack_context* ctx, const char* data, size_t len, size_t* off) +{ + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; + +#define inc_offset(inc) \ + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; +#ifdef USE_CASE_RANGE + case fixed_offset + 0x0 ... fixed_offset + 0xf: +#else + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: +#endif + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; + } + unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); + return 1; } -#define unpack_container_header read_array_header -#define fixed_offset 0x90 -#define var_offset 0xdc -#include "unpack_container_header.h" -#undef unpack_container_header -#undef fixed_offset -#undef var_offset +#undef SWITCH_RANGE_BEGIN +#undef SWITCH_RANGE +#undef SWITCH_RANGE_DEFAULT +#undef SWITCH_RANGE_END -#define unpack_container_header read_map_header -#define fixed_offset 0x80 -#define var_offset 0xde -#include "unpack_container_header.h" -#undef unpack_container_header -#undef fixed_offset -#undef var_offset +static const execute_fn unpack_construct = &unpack_execute; +static const execute_fn unpack_skip = &unpack_execute; +static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>; +static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>; + +#undef NEXT_CS /* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index c69d5a7..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,45 +0,0 @@ -[build-system] -requires = ["setuptools >= 78.1.1"] -build-backend = "setuptools.build_meta" - -[project] -name = "msgpack" -dynamic = ["version"] -license = "Apache-2.0" -authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] -description = "MessagePack serializer" -readme = "README.md" -keywords = ["msgpack", "messagepack", "serializer", "serialization", "binary"] -requires-python = ">=3.10" -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Operating System :: OS Independent", - "Topic :: File Formats", - "Intended Audience :: Developers", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", -] - -[project.urls] -Homepage = "https://msgpack.org/" -Documentation = "https://msgpack-python.readthedocs.io/" -Repository = "https://github.com/msgpack/msgpack-python/" -Tracker = "https://github.com/msgpack/msgpack-python/issues" -Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" - -[tool.setuptools] -# Do not install C/C++/Cython source files -include-package-data = false - -[tool.setuptools.dynamic] -version = {attr = "msgpack.__version__"} - -[tool.ruff] -line-length = 100 -target-version = "py310" -lint.select = [ - "E", # pycodestyle - "F", # Pyflakes - "I", # isort - #"UP", pyupgrade -] diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 9e4643b..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -Cython==3.2.1 -setuptools==78.1.1 -build diff --git a/setup.py b/setup.py index 4029e9e..1055a61 100644 --- a/setup.py +++ b/setup.py @@ -1,32 +1,122 @@ #!/usr/bin/env python +# coding: utf-8 import os import sys +from glob import glob +from distutils.command.sdist import sdist +from setuptools import setup, Extension -from setuptools import Extension, setup +from distutils.command.build_ext import build_ext -PYPY = hasattr(sys, "pypy_version_info") +class NoCython(Exception): + pass + +try: + import Cython.Compiler.Main as cython_compiler + have_cython = True +except ImportError: + have_cython = False + + +def cythonize(src): + sys.stderr.write("cythonize: %r\n" % (src,)) + cython_compiler.compile([src], cplus=True, emit_linenums=True) + +def ensure_source(src): + pyx = os.path.splitext(src)[0] + '.pyx' + + if not os.path.exists(src): + if not have_cython: + raise NoCython + cythonize(pyx) + elif (os.path.exists(pyx) and + os.stat(src).st_mtime < os.stat(pyx).st_mtime and + have_cython): + cythonize(pyx) + return src + + +class BuildExt(build_ext): + def build_extension(self, ext): + try: + ext.sources = list(map(ensure_source, ext.sources)) + except NoCython: + print("WARNING") + print("Cython is required for building extension from checkout.") + print("Install Cython >= 0.16 or install msgpack from PyPI.") + print("Falling back to pure Python implementation.") + return + try: + return build_ext.build_extension(self, ext) + except Exception as e: + print("WARNING: Failed to compile extensiom modules.") + print("msgpack uses fallback pure python implementation.") + print(e) + + +exec(open('msgpack/_version.py').read()) + +version_str = '.'.join(str(x) for x in version[:3]) +if len(version) > 3 and version[3] != 'final': + version_str += version[3] + +# take care of extension modules. +if have_cython: + class Sdist(sdist): + def __init__(self, *args, **kwargs): + for src in glob('msgpack/*.pyx'): + cythonize(src) + sdist.__init__(self, *args, **kwargs) +else: + Sdist = sdist libraries = [] -macros = [] +if sys.platform == 'win32': + libraries.append('ws2_32') + +if sys.byteorder == 'big': + macros = [('__BIG_ENDIAN__', '1')] +else: + macros = [('__LITTLE_ENDIAN__', '1')] + ext_modules = [] - -if sys.platform == "win32": - libraries.append("ws2_32") - macros = [("__LITTLE_ENDIAN__", "1")] - -if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): - ext_modules.append( - Extension( - "msgpack._cmsgpack", - sources=["msgpack/_cmsgpack.c"], - libraries=libraries, - include_dirs=["."], - define_macros=macros, - ) - ) +if not hasattr(sys, 'pypy_version_info'): + ext_modules.append(Extension('msgpack._packer', + sources=['msgpack/_packer.cpp'], + libraries=libraries, + include_dirs=['.'], + define_macros=macros, + )) + ext_modules.append(Extension('msgpack._unpacker', + sources=['msgpack/_unpacker.cpp'], + libraries=libraries, + include_dirs=['.'], + define_macros=macros, + )) del libraries, macros -setup( - ext_modules=ext_modules, - packages=["msgpack"], -) + +desc = 'MessagePack (de)serializer.' +f = open('README.rst') +long_desc = f.read() +f.close() +del f + +setup(name='msgpack-python', + author='INADA Naoki', + author_email='songofacandy@gmail.com', + version=version_str, + cmdclass={'build_ext': BuildExt, 'sdist': Sdist}, + ext_modules=ext_modules, + packages=['msgpack'], + description=desc, + long_description=long_desc, + url='http://msgpack.org/', + download_url='http://pypi.python.org/pypi/msgpack/', + classifiers=[ + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 3', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + ] + ) diff --git a/test/test_buffer.py b/test/test_buffer.py index ca09722..5a71f90 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,49 +1,20 @@ -from pytest import raises +#!/usr/bin/env python +# coding: utf-8 -from msgpack import Packer, packb, unpackb +from msgpack import packb, unpackb def test_unpack_buffer(): from array import array - - buf = array("b") - buf.frombytes(packb((b"foo", b"bar"))) + buf = array('b') + buf.fromstring(packb((b'foo', b'bar'))) obj = unpackb(buf, use_list=1) - assert [b"foo", b"bar"] == obj + assert [b'foo', b'bar'] == obj def test_unpack_bytearray(): - buf = bytearray(packb((b"foo", b"bar"))) + buf = bytearray(packb(('foo', 'bar'))) obj = unpackb(buf, use_list=1) - assert [b"foo", b"bar"] == obj + assert [b'foo', b'bar'] == obj expected_type = bytes - assert all(type(s) is expected_type for s in obj) - - -def test_unpack_memoryview(): - buf = bytearray(packb((b"foo", b"bar"))) - view = memoryview(buf) - obj = unpackb(view, use_list=1) - assert [b"foo", b"bar"] == obj - expected_type = bytes - assert all(type(s) is expected_type for s in obj) - - -def test_packer_getbuffer(): - packer = Packer(autoreset=False) - packer.pack_array_header(2) - packer.pack(42) - packer.pack("hello") - buffer = packer.getbuffer() - assert isinstance(buffer, memoryview) - assert bytes(buffer) == b"\x92*\xa5hello" - - if Packer.__module__ == "msgpack._cmsgpack": # only for Cython - # cython Packer supports buffer protocol directly - assert bytes(packer) == b"\x92*\xa5hello" - - with raises(BufferError): - packer.pack(42) - buffer.release() - packer.pack(42) - assert bytes(packer) == b"\x92*\xa5hello*" + assert all(type(s) == expected_type for s in obj) diff --git a/test/test_case.py b/test/test_case.py index c4c615e..5a4bb6c 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -1,136 +1,102 @@ #!/usr/bin/env python +# coding: utf-8 + from msgpack import packb, unpackb -def check(length, obj, use_bin_type=True): - v = packb(obj, use_bin_type=use_bin_type) - assert len(v) == length, f"{obj!r} length should be {length!r} but get {len(v)!r}" - assert unpackb(v, use_list=0, raw=not use_bin_type) == obj - +def check(length, obj): + v = packb(obj) + assert len(v) == length, \ + "%r length should be %r but get %r" % (obj, length, len(v)) + assert unpackb(v, use_list=0) == obj def test_1(): - for o in [ - None, - True, - False, - 0, - 1, - (1 << 6), - (1 << 7) - 1, - -1, - -((1 << 5) - 1), - -(1 << 5), - ]: + for o in [None, True, False, 0, 1, (1 << 6), (1 << 7) - 1, -1, + -((1<<5)-1), -(1<<5)]: check(1, o) - def test_2(): - for o in [1 << 7, (1 << 8) - 1, -((1 << 5) + 1), -(1 << 7)]: + for o in [1 << 7, (1 << 8) - 1, + -((1<<5)+1), -(1<<7) + ]: check(2, o) - def test_3(): - for o in [1 << 8, (1 << 16) - 1, -((1 << 7) + 1), -(1 << 15)]: + for o in [1 << 8, (1 << 16) - 1, + -((1<<7)+1), -(1<<15)]: check(3, o) - def test_5(): - for o in [1 << 16, (1 << 32) - 1, -((1 << 15) + 1), -(1 << 31)]: + for o in [1 << 16, (1 << 32) - 1, + -((1<<15)+1), -(1<<31)]: check(5, o) - def test_9(): - for o in [ - 1 << 32, - (1 << 64) - 1, - -((1 << 31) + 1), - -(1 << 63), - 1.0, - 0.1, - -0.1, - -1.0, - ]: + for o in [1 << 32, (1 << 64) - 1, + -((1<<31)+1), -(1<<63), + 1.0, 0.1, -0.1, -1.0]: check(9, o) def check_raw(overhead, num): - check(num + overhead, b" " * num, use_bin_type=False) - + check(num + overhead, b" " * num) def test_fixraw(): check_raw(1, 0) - check_raw(1, (1 << 5) - 1) - + check_raw(1, (1<<5) - 1) def test_raw16(): - check_raw(3, 1 << 5) - check_raw(3, (1 << 16) - 1) - + check_raw(3, 1<<5) + check_raw(3, (1<<16) - 1) def test_raw32(): - check_raw(5, 1 << 16) + check_raw(5, 1<<16) def check_array(overhead, num): check(num + overhead, (None,) * num) - def test_fixarray(): check_array(1, 0) check_array(1, (1 << 4) - 1) - def test_array16(): check_array(3, 1 << 4) - check_array(3, (1 << 16) - 1) - + check_array(3, (1<<16)-1) def test_array32(): - check_array(5, (1 << 16)) + check_array(5, (1<<16)) def match(obj, buf): assert packb(obj) == buf - assert unpackb(buf, use_list=0, strict_map_key=False) == obj - + assert unpackb(buf, use_list=0) == obj def test_match(): cases = [ - (None, b"\xc0"), - (False, b"\xc2"), - (True, b"\xc3"), - (0, b"\x00"), - (127, b"\x7f"), - (128, b"\xcc\x80"), - (256, b"\xcd\x01\x00"), - (-1, b"\xff"), - (-33, b"\xd0\xdf"), - (-129, b"\xd1\xff\x7f"), - ({1: 1}, b"\x81\x01\x01"), + (None, b'\xc0'), + (False, b'\xc2'), + (True, b'\xc3'), + (0, b'\x00'), + (127, b'\x7f'), + (128, b'\xcc\x80'), + (256, b'\xcd\x01\x00'), + (-1, b'\xff'), + (-33, b'\xd0\xdf'), + (-129, b'\xd1\xff\x7f'), + ({1:1}, b'\x81\x01\x01'), (1.0, b"\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"), - ((), b"\x90"), - ( - tuple(range(15)), - b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e", - ), - ( - tuple(range(16)), - b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", - ), - ({}, b"\x80"), - ( - {x: x for x in range(15)}, - b"\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e", - ), - ( - {x: x for x in range(16)}, - b"\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f", - ), - ] + ((), b'\x90'), + (tuple(range(15)),b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e"), + (tuple(range(16)),b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"), + ({}, b'\x80'), + (dict([(x,x) for x in range(15)]), b'\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e'), + (dict([(x,x) for x in range(16)]), b'\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f'), + ] for v, p in cases: match(v, p) - def test_unicode(): - assert unpackb(packb("foobar"), use_list=1) == "foobar" + assert unpackb(packb('foobar'), use_list=1) == b'foobar' + diff --git a/test/test_except.py b/test/test_except.py index b77ac80..361d4ea 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -1,10 +1,10 @@ #!/usr/bin/env python - -import datetime +# coding: utf-8 from pytest import raises +from msgpack import packb, unpackb -from msgpack import FormatError, OutOfData, StackError, Unpacker, packb, unpackb +import datetime class DummyException(Exception): @@ -19,45 +19,13 @@ def test_raise_on_find_unsupported_value(): def test_raise_from_object_hook(): def hook(obj): raise DummyException - raises(DummyException, unpackb, packb({}), object_hook=hook) - raises(DummyException, unpackb, packb({"fizz": "buzz"}), object_hook=hook) - raises(DummyException, unpackb, packb({"fizz": "buzz"}), object_pairs_hook=hook) - raises(DummyException, unpackb, packb({"fizz": {"buzz": "spam"}}), object_hook=hook) - raises( - DummyException, - unpackb, - packb({"fizz": {"buzz": "spam"}}), - object_pairs_hook=hook, - ) + raises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_hook=hook) + raises(DummyException, unpackb, packb({'fizz': 'buzz'}), object_pairs_hook=hook) + raises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_hook=hook) + raises(DummyException, unpackb, packb({'fizz': {'buzz': 'spam'}}), object_pairs_hook=hook) def test_invalidvalue(): - incomplete = b"\xd9\x97#DL_" # raw8 - length=0x97 with raises(ValueError): - unpackb(incomplete) - - with raises(OutOfData): - unpacker = Unpacker() - unpacker.feed(incomplete) - unpacker.unpack() - - with raises(FormatError): - unpackb(b"\xc1") # (undefined tag) - - with raises(FormatError): - unpackb(b"\x91\xc1") # fixarray(len=1) [ (undefined tag) ] - - with raises(StackError): - unpackb(b"\x91" * 3000) # nested fixarray(len=1) - - -def test_strict_map_key(): - valid = {"unicode": 1, b"bytes": 2} - packed = packb(valid, use_bin_type=True) - assert valid == unpackb(packed, raw=False, strict_map_key=True) - - invalid = {42: 1} - packed = packb(invalid, use_bin_type=True) - with raises(ValueError): - unpackb(packed, raw=False, strict_map_key=True) + unpackb(b'\xd9\x97#DL_') diff --git a/test/test_extension.py b/test/test_extension.py index aaf0fd9..2f85ce3 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,5 +1,5 @@ +from __future__ import print_function import array - import msgpack from msgpack import ExtType @@ -9,70 +9,49 @@ def test_pack_ext_type(): packer = msgpack.Packer() packer.pack_ext_type(0x42, s) return packer.bytes() - - assert p(b"A") == b"\xd4\x42A" # fixext 1 - assert p(b"AB") == b"\xd5\x42AB" # fixext 2 - assert p(b"ABCD") == b"\xd6\x42ABCD" # fixext 4 - assert p(b"ABCDEFGH") == b"\xd7\x42ABCDEFGH" # fixext 8 - assert p(b"A" * 16) == b"\xd8\x42" + b"A" * 16 # fixext 16 - assert p(b"ABC") == b"\xc7\x03\x42ABC" # ext 8 - assert p(b"A" * 0x0123) == b"\xc8\x01\x23\x42" + b"A" * 0x0123 # ext 16 - assert p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 # ext 32 + assert p(b'A') == b'\xd4\x42A' # fixext 1 + assert p(b'AB') == b'\xd5\x42AB' # fixext 2 + assert p(b'ABCD') == b'\xd6\x42ABCD' # fixext 4 + assert p(b'ABCDEFGH') == b'\xd7\x42ABCDEFGH' # fixext 8 + assert p(b'A'*16) == b'\xd8\x42' + b'A'*16 # fixext 16 + assert p(b'ABC') == b'\xc7\x03\x42ABC' # ext 8 + assert p(b'A'*0x0123) == b'\xc8\x01\x23\x42' + b'A'*0x0123 # ext 16 + assert p(b'A'*0x00012345) == b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345 # ext 32 def test_unpack_ext_type(): def check(b, expected): assert msgpack.unpackb(b) == expected - check(b"\xd4\x42A", ExtType(0x42, b"A")) # fixext 1 - check(b"\xd5\x42AB", ExtType(0x42, b"AB")) # fixext 2 - check(b"\xd6\x42ABCD", ExtType(0x42, b"ABCD")) # fixext 4 - check(b"\xd7\x42ABCDEFGH", ExtType(0x42, b"ABCDEFGH")) # fixext 8 - check(b"\xd8\x42" + b"A" * 16, ExtType(0x42, b"A" * 16)) # fixext 16 - check(b"\xc7\x03\x42ABC", ExtType(0x42, b"ABC")) # ext 8 - check(b"\xc8\x01\x23\x42" + b"A" * 0x0123, ExtType(0x42, b"A" * 0x0123)) # ext 16 - check( - b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345, - ExtType(0x42, b"A" * 0x00012345), - ) # ext 32 + check(b'\xd4\x42A', ExtType(0x42, b'A')) # fixext 1 + check(b'\xd5\x42AB', ExtType(0x42, b'AB')) # fixext 2 + check(b'\xd6\x42ABCD', ExtType(0x42, b'ABCD')) # fixext 4 + check(b'\xd7\x42ABCDEFGH', ExtType(0x42, b'ABCDEFGH')) # fixext 8 + check(b'\xd8\x42' + b'A'*16, ExtType(0x42, b'A'*16)) # fixext 16 + check(b'\xc7\x03\x42ABC', ExtType(0x42, b'ABC')) # ext 8 + check(b'\xc8\x01\x23\x42' + b'A'*0x0123, + ExtType(0x42, b'A'*0x0123)) # ext 16 + check(b'\xc9\x00\x01\x23\x45\x42' + b'A'*0x00012345, + ExtType(0x42, b'A'*0x00012345)) # ext 32 def test_extension_type(): def default(obj): - print("default called", obj) + print('default called', obj) if isinstance(obj, array.array): - typecode = 123 # application specific typecode - try: - data = obj.tobytes() - except AttributeError: - data = obj.tostring() + typecode = 123 # application specific typecode + data = obj.tostring() return ExtType(typecode, data) - raise TypeError(f"Unknown type object {obj!r}") + raise TypeError("Unknwon type object %r" % (obj,)) def ext_hook(code, data): - print("ext_hook called", code, data) + print('ext_hook called', code, data) assert code == 123 - obj = array.array("d") - obj.frombytes(data) + obj = array.array('d') + obj.fromstring(data) return obj - obj = [42, b"hello", array.array("d", [1.1, 2.2, 3.3])] + obj = [42, b'hello', array.array('d', [1.1, 2.2, 3.3])] s = msgpack.packb(obj, default=default) obj2 = msgpack.unpackb(s, ext_hook=ext_hook) assert obj == obj2 - - -def test_overriding_hooks(): - def default(obj): - if isinstance(obj, int): - return {"__type__": "long", "__data__": str(obj)} - else: - return obj - - obj = {"testval": 1823746192837461928374619} - refobj = {"testval": default(obj["testval"])} - refout = msgpack.packb(refobj) - assert isinstance(refout, (str, bytes)) - testout = msgpack.packb(obj, default=default) - - assert refout == testout diff --git a/test/test_format.py b/test/test_format.py index c06c87d..5fec0c3 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -1,88 +1,70 @@ #!/usr/bin/env python +# coding: utf-8 from msgpack import unpackb - -def check(src, should, use_list=0, raw=True): - assert unpackb(src, use_list=use_list, raw=raw, strict_map_key=False) == should - +def check(src, should, use_list=0): + assert unpackb(src, use_list=use_list) == should def testSimpleValue(): - check(b"\x93\xc0\xc2\xc3", (None, False, True)) - + check(b"\x93\xc0\xc2\xc3", + (None, False, True,)) def testFixnum(): - check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", ((0, 64, 127), (-32, -16, -1))) - + check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", + ((0,64,127,), (-32,-16,-1,),) + ) def testFixArray(): - check(b"\x92\x90\x91\x91\xc0", ((), ((None,),))) - + check(b"\x92\x90\x91\x91\xc0", + ((),((None,),),), + ) def testFixRaw(): - check(b"\x94\xa0\xa1a\xa2bc\xa3def", (b"", b"a", b"bc", b"def")) - + check(b"\x94\xa0\xa1a\xa2bc\xa3def", + (b"", b"a", b"bc", b"def",), + ) def testFixMap(): - check(b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}}) - + check( + b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", + {False: {None: None}, True:{None:{}}}, + ) def testUnsignedInt(): check( - b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" - b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" - b"\xce\xff\xff\xff\xff", - (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295), - ) - + b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" + b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" + b"\xce\xff\xff\xff\xff", + (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295,), + ) def testSignedInt(): - check( - b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" - b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" - b"\xd2\xff\xff\xff\xff", - (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1), - ) - + check(b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" + b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" + b"\xd2\xff\xff\xff\xff", + (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1,)) def testRaw(): - check( - b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" + check(b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", - (b"", b"a", b"ab", b"", b"a", b"ab"), - ) - check( - b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" - b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", - ("", "a", "ab", "", "a", "ab"), - raw=False, - ) - + (b"", b"a", b"ab", b"", b"a", b"ab")) def testArray(): - check( - b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" + check(b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" b"\xc2\xc3", - ((), (None,), (False, True), (), (None,), (False, True)), - ) - + ((), (None,), (False,True), (), (None,), (False,True)) + ) def testMap(): check( b"\x96" - b"\xde\x00\x00" - b"\xde\x00\x01\xc0\xc2" - b"\xde\x00\x02\xc0\xc2\xc3\xc2" - b"\xdf\x00\x00\x00\x00" - b"\xdf\x00\x00\x00\x01\xc0\xc2" - b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", - ( - {}, - {None: False}, - {True: False, None: False}, - {}, - {None: False}, - {True: False, None: False}, - ), - ) + b"\xde\x00\x00" + b"\xde\x00\x01\xc0\xc2" + b"\xde\x00\x02\xc0\xc2\xc3\xc2" + b"\xdf\x00\x00\x00\x00" + b"\xdf\x00\x00\x00\x01\xc0\xc2" + b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", + ({}, {None: False}, {True: False, None: False}, {}, + {None: False}, {True: False, None: False})) diff --git a/test/test_limits.py b/test/test_limits.py index 9b92b4d..3c1cf2a 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -1,60 +1,53 @@ #!/usr/bin/env python +# coding: utf-8 +from __future__ import absolute_import, division, print_function, unicode_literals import pytest -from msgpack import ( - ExtType, - Packer, - PackOverflowError, - PackValueError, - Unpacker, - UnpackValueError, - packb, - unpackb, -) +from msgpack import packb, unpackb, Packer, Unpacker, ExtType def test_integer(): - x = -(2**63) + x = -(2 ** 63) assert unpackb(packb(x)) == x - with pytest.raises(PackOverflowError): - packb(x - 1) + with pytest.raises((OverflowError, ValueError)): + packb(x-1) - x = 2**64 - 1 + x = 2 ** 64 - 1 assert unpackb(packb(x)) == x - with pytest.raises(PackOverflowError): - packb(x + 1) + with pytest.raises((OverflowError, ValueError)): + packb(x+1) def test_array_header(): packer = Packer() - packer.pack_array_header(2**32 - 1) - with pytest.raises(PackValueError): + packer.pack_array_header(2**32-1) + with pytest.raises((OverflowError, ValueError)): packer.pack_array_header(2**32) def test_map_header(): packer = Packer() - packer.pack_map_header(2**32 - 1) - with pytest.raises(PackValueError): + packer.pack_map_header(2**32-1) + with pytest.raises((OverflowError, ValueError)): packer.pack_array_header(2**32) def test_max_str_len(): - d = "x" * 3 + d = 'x' * 3 packed = packb(d) - unpacker = Unpacker(max_str_len=3, raw=False) + unpacker = Unpacker(max_str_len=3, encoding='utf-8') unpacker.feed(packed) assert unpacker.unpack() == d - unpacker = Unpacker(max_str_len=2, raw=False) - with pytest.raises(UnpackValueError): + unpacker = Unpacker(max_str_len=2, encoding='utf-8') + with pytest.raises(ValueError): unpacker.feed(packed) unpacker.unpack() def test_max_bin_len(): - d = b"x" * 3 + d = b'x' * 3 packed = packb(d, use_bin_type=True) unpacker = Unpacker(max_bin_len=3) @@ -62,13 +55,13 @@ def test_max_bin_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_bin_len=2) - with pytest.raises(UnpackValueError): + with pytest.raises(ValueError): unpacker.feed(packed) unpacker.unpack() def test_max_array_len(): - d = [1, 2, 3] + d = [1,2,3] packed = packb(d) unpacker = Unpacker(max_array_len=3) @@ -76,7 +69,7 @@ def test_max_array_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_array_len=2) - with pytest.raises(UnpackValueError): + with pytest.raises(ValueError): unpacker.feed(packed) unpacker.unpack() @@ -85,12 +78,12 @@ def test_max_map_len(): d = {1: 2, 3: 4, 5: 6} packed = packb(d) - unpacker = Unpacker(max_map_len=3, strict_map_key=False) + unpacker = Unpacker(max_map_len=3) unpacker.feed(packed) assert unpacker.unpack() == d - unpacker = Unpacker(max_map_len=2, strict_map_key=False) - with pytest.raises(UnpackValueError): + unpacker = Unpacker(max_map_len=2) + with pytest.raises(ValueError): unpacker.feed(packed) unpacker.unpack() @@ -104,15 +97,16 @@ def test_max_ext_len(): assert unpacker.unpack() == d unpacker = Unpacker(max_ext_len=2) - with pytest.raises(UnpackValueError): + with pytest.raises(ValueError): unpacker.feed(packed) unpacker.unpack() + # PyPy fails following tests because of constant folding? # https://bugs.pypy.org/issue1721 -# @pytest.mark.skipif(True, reason="Requires very large memory.") -# def test_binary(): +#@pytest.mark.skipif(True, reason="Requires very large memory.") +#def test_binary(): # x = b'x' * (2**32 - 1) # assert unpackb(packb(x)) == x # del x @@ -121,8 +115,8 @@ def test_max_ext_len(): # packb(x) # # -# @pytest.mark.skipif(True, reason="Requires very large memory.") -# def test_string(): +#@pytest.mark.skipif(True, reason="Requires very large memory.") +#def test_string(): # x = 'x' * (2**32 - 1) # assert unpackb(packb(x)) == x # x += 'y' @@ -130,36 +124,10 @@ def test_max_ext_len(): # packb(x) # # -# @pytest.mark.skipif(True, reason="Requires very large memory.") -# def test_array(): +#@pytest.mark.skipif(True, reason="Requires very large memory.") +#def test_array(): # x = [0] * (2**32 - 1) # assert unpackb(packb(x)) == x # x.append(0) # with pytest.raises(ValueError): # packb(x) - - -# auto max len - - -def test_auto_max_array_len(): - packed = b"\xde\x00\x06zz" - with pytest.raises(UnpackValueError): - unpackb(packed, raw=False) - - unpacker = Unpacker(max_buffer_size=5, raw=False) - unpacker.feed(packed) - with pytest.raises(UnpackValueError): - unpacker.unpack() - - -def test_auto_max_map_len(): - # len(packed) == 6 -> max_map_len == 3 - packed = b"\xde\x00\x04zzz" - with pytest.raises(UnpackValueError): - unpackb(packed, raw=False) - - unpacker = Unpacker(max_buffer_size=6, raw=False) - unpacker.feed(packed) - with pytest.raises(UnpackValueError): - unpacker.unpack() diff --git a/test/test_memoryview.py b/test/test_memoryview.py deleted file mode 100644 index 0a2a6f5..0000000 --- a/test/test_memoryview.py +++ /dev/null @@ -1,99 +0,0 @@ -#!/usr/bin/env python - -from array import array - -from msgpack import packb, unpackb - - -def make_array(f, data): - a = array(f) - a.frombytes(data) - return a - - -def _runtest(format, nbytes, expected_header, expected_prefix, use_bin_type): - # create a new array - original_array = array(format) - original_array.fromlist([255] * (nbytes // original_array.itemsize)) - original_data = original_array.tobytes() - view = memoryview(original_array) - - # pack, unpack, and reconstruct array - packed = packb(view, use_bin_type=use_bin_type) - unpacked = unpackb(packed, raw=(not use_bin_type)) - reconstructed_array = make_array(format, unpacked) - - # check that we got the right amount of data - assert len(original_data) == nbytes - # check packed header - assert packed[:1] == expected_header - # check packed length prefix, if any - assert packed[1 : 1 + len(expected_prefix)] == expected_prefix - # check packed data - assert packed[1 + len(expected_prefix) :] == original_data - # check array unpacked correctly - assert original_array == reconstructed_array - - -def test_fixstr_from_byte(): - _runtest("B", 1, b"\xa1", b"", False) - _runtest("B", 31, b"\xbf", b"", False) - - -def test_fixstr_from_float(): - _runtest("f", 4, b"\xa4", b"", False) - _runtest("f", 28, b"\xbc", b"", False) - - -def test_str16_from_byte(): - _runtest("B", 2**8, b"\xda", b"\x01\x00", False) - _runtest("B", 2**16 - 1, b"\xda", b"\xff\xff", False) - - -def test_str16_from_float(): - _runtest("f", 2**8, b"\xda", b"\x01\x00", False) - _runtest("f", 2**16 - 4, b"\xda", b"\xff\xfc", False) - - -def test_str32_from_byte(): - _runtest("B", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) - - -def test_str32_from_float(): - _runtest("f", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) - - -def test_bin8_from_byte(): - _runtest("B", 1, b"\xc4", b"\x01", True) - _runtest("B", 2**8 - 1, b"\xc4", b"\xff", True) - - -def test_bin8_from_float(): - _runtest("f", 4, b"\xc4", b"\x04", True) - _runtest("f", 2**8 - 4, b"\xc4", b"\xfc", True) - - -def test_bin16_from_byte(): - _runtest("B", 2**8, b"\xc5", b"\x01\x00", True) - _runtest("B", 2**16 - 1, b"\xc5", b"\xff\xff", True) - - -def test_bin16_from_float(): - _runtest("f", 2**8, b"\xc5", b"\x01\x00", True) - _runtest("f", 2**16 - 4, b"\xc5", b"\xff\xfc", True) - - -def test_bin32_from_byte(): - _runtest("B", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) - - -def test_bin32_from_float(): - _runtest("f", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) - - -def test_multidim_memoryview(): - # See https://github.com/msgpack/msgpack-python/issues/526 - view = memoryview(b"\00" * 6) - data = view.cast(view.format, (3, 2)) - packed = packb(data) - assert packed == b"\xc4\x06\x00\x00\x00\x00\x00\x00" diff --git a/test/test_newspec.py b/test/test_newspec.py index 9e2f9be..ab05029 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -1,90 +1,88 @@ -from msgpack import ExtType, packb, unpackb +# coding: utf-8 + +from msgpack import packb, unpackb, ExtType def test_str8(): - header = b"\xd9" - data = b"x" * 32 + header = b'\xd9' + data = b'x' * 32 b = packb(data.decode(), use_bin_type=True) assert len(b) == len(data) + 2 - assert b[0:2] == header + b"\x20" - assert b[2:] == data - assert unpackb(b, raw=True) == data - assert unpackb(b, raw=False) == data.decode() - - data = b"x" * 255 - b = packb(data.decode(), use_bin_type=True) - assert len(b) == len(data) + 2 - assert b[0:2] == header + b"\xff" - assert b[2:] == data - assert unpackb(b, raw=True) == data - assert unpackb(b, raw=False) == data.decode() - - -def test_bin8(): - header = b"\xc4" - data = b"" - b = packb(data, use_bin_type=True) - assert len(b) == len(data) + 2 - assert b[0:2] == header + b"\x00" + assert b[0:2] == header + b'\x20' assert b[2:] == data assert unpackb(b) == data - data = b"x" * 255 + data = b'x' * 255 + b = packb(data.decode(), use_bin_type=True) + assert len(b) == len(data) + 2 + assert b[0:2] == header + b'\xff' + assert b[2:] == data + assert unpackb(b) == data + + +def test_bin8(): + header = b'\xc4' + data = b'' b = packb(data, use_bin_type=True) assert len(b) == len(data) + 2 - assert b[0:2] == header + b"\xff" + assert b[0:2] == header + b'\x00' + assert b[2:] == data + assert unpackb(b) == data + + data = b'x' * 255 + b = packb(data, use_bin_type=True) + assert len(b) == len(data) + 2 + assert b[0:2] == header + b'\xff' assert b[2:] == data assert unpackb(b) == data def test_bin16(): - header = b"\xc5" - data = b"x" * 256 + header = b'\xc5' + data = b'x' * 256 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 3 assert b[0:1] == header - assert b[1:3] == b"\x01\x00" + assert b[1:3] == b'\x01\x00' assert b[3:] == data assert unpackb(b) == data - data = b"x" * 65535 + data = b'x' * 65535 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 3 assert b[0:1] == header - assert b[1:3] == b"\xff\xff" + assert b[1:3] == b'\xff\xff' assert b[3:] == data assert unpackb(b) == data def test_bin32(): - header = b"\xc6" - data = b"x" * 65536 + header = b'\xc6' + data = b'x' * 65536 b = packb(data, use_bin_type=True) assert len(b) == len(data) + 5 assert b[0:1] == header - assert b[1:5] == b"\x00\x01\x00\x00" + assert b[1:5] == b'\x00\x01\x00\x00' assert b[5:] == data assert unpackb(b) == data - def test_ext(): def check(ext, packed): assert packb(ext) == packed assert unpackb(packed) == ext - - check(ExtType(0x42, b"Z"), b"\xd4\x42Z") # fixext 1 - check(ExtType(0x42, b"ZZ"), b"\xd5\x42ZZ") # fixext 2 - check(ExtType(0x42, b"Z" * 4), b"\xd6\x42" + b"Z" * 4) # fixext 4 - check(ExtType(0x42, b"Z" * 8), b"\xd7\x42" + b"Z" * 8) # fixext 8 - check(ExtType(0x42, b"Z" * 16), b"\xd8\x42" + b"Z" * 16) # fixext 16 + check(ExtType(0x42, b'Z'), b'\xd4\x42Z') # fixext 1 + check(ExtType(0x42, b'ZZ'), b'\xd5\x42ZZ') # fixext 2 + check(ExtType(0x42, b'Z'*4), b'\xd6\x42' + b'Z'*4) # fixext 4 + check(ExtType(0x42, b'Z'*8), b'\xd7\x42' + b'Z'*8) # fixext 8 + check(ExtType(0x42, b'Z'*16), b'\xd8\x42' + b'Z'*16) # fixext 16 # ext 8 - check(ExtType(0x42, b""), b"\xc7\x00\x42") - check(ExtType(0x42, b"Z" * 255), b"\xc7\xff\x42" + b"Z" * 255) + check(ExtType(0x42, b''), b'\xc7\x00\x42') + check(ExtType(0x42, b'Z'*255), b'\xc7\xff\x42' + b'Z'*255) # ext 16 - check(ExtType(0x42, b"Z" * 256), b"\xc8\x01\x00\x42" + b"Z" * 256) - check(ExtType(0x42, b"Z" * 0xFFFF), b"\xc8\xff\xff\x42" + b"Z" * 0xFFFF) + check(ExtType(0x42, b'Z'*256), b'\xc8\x01\x00\x42' + b'Z'*256) + check(ExtType(0x42, b'Z'*0xffff), b'\xc8\xff\xff\x42' + b'Z'*0xffff) # ext 32 - check(ExtType(0x42, b"Z" * 0x10000), b"\xc9\x00\x01\x00\x00\x42" + b"Z" * 0x10000) + check(ExtType(0x42, b'Z'*0x10000), b'\xc9\x00\x01\x00\x00\x42' + b'Z'*0x10000) # needs large memory - # check(ExtType(0x42, b'Z'*0xffffffff), + #check(ExtType(0x42, b'Z'*0xffffffff), # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff) diff --git a/test/test_obj.py b/test/test_obj.py index 23be06d..390c1b6 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -1,82 +1,67 @@ #!/usr/bin/env python +# coding: utf-8 from pytest import raises - from msgpack import packb, unpackb - def _decode_complex(obj): - if b"__complex__" in obj: - return complex(obj[b"real"], obj[b"imag"]) + if b'__complex__' in obj: + return complex(obj[b'real'], obj[b'imag']) return obj - def _encode_complex(obj): if isinstance(obj, complex): - return {b"__complex__": True, b"real": 1, b"imag": 2} + return {b'__complex__': True, b'real': 1, b'imag': 2} return obj - def test_encode_hook(): - packed = packb([3, 1 + 2j], default=_encode_complex) + packed = packb([3, 1+2j], default=_encode_complex) unpacked = unpackb(packed, use_list=1) - assert unpacked[1] == {b"__complex__": True, b"real": 1, b"imag": 2} - + assert unpacked[1] == {b'__complex__': True, b'real': 1, b'imag': 2} def test_decode_hook(): - packed = packb([3, {b"__complex__": True, b"real": 1, b"imag": 2}]) + packed = packb([3, {b'__complex__': True, b'real': 1, b'imag': 2}]) unpacked = unpackb(packed, object_hook=_decode_complex, use_list=1) - assert unpacked[1] == 1 + 2j - + assert unpacked[1] == 1+2j def test_decode_pairs_hook(): packed = packb([3, {1: 2, 3: 4}]) prod_sum = 1 * 2 + 3 * 4 - unpacked = unpackb( - packed, - object_pairs_hook=lambda lst: sum(k * v for k, v in lst), - use_list=1, - strict_map_key=False, - ) + unpacked = unpackb(packed, object_pairs_hook=lambda l: sum(k * v for k, v in l), use_list=1) assert unpacked[1] == prod_sum - def test_only_one_obj_hook(): with raises(TypeError): - unpackb(b"", object_hook=lambda x: x, object_pairs_hook=lambda x: x) - + unpackb(b'', object_hook=lambda x: x, object_pairs_hook=lambda x: x) def test_bad_hook(): with raises(TypeError): - packed = packb([3, 1 + 2j], default=lambda o: o) - unpackb(packed, use_list=1) - + packed = packb([3, 1+2j], default=lambda o: o) + unpacked = unpackb(packed, use_list=1) def _arr_to_str(arr): - return "".join(str(c) for c in arr) - + return ''.join(str(c) for c in arr) def test_array_hook(): - packed = packb([1, 2, 3]) + packed = packb([1,2,3]) unpacked = unpackb(packed, list_hook=_arr_to_str, use_list=1) - assert unpacked == "123" + assert unpacked == '123' class DecodeError(Exception): pass - def bad_complex_decoder(o): raise DecodeError("Ooops!") def test_an_exception_in_objecthook1(): with raises(DecodeError): - packed = packb({1: {"__complex__": True, "real": 1, "imag": 2}}) - unpackb(packed, object_hook=bad_complex_decoder, strict_map_key=False) + packed = packb({1: {'__complex__': True, 'real': 1, 'imag': 2}}) + unpackb(packed, object_hook=bad_complex_decoder) def test_an_exception_in_objecthook2(): with raises(DecodeError): - packed = packb({1: [{"__complex__": True, "real": 1, "imag": 2}]}) - unpackb(packed, list_hook=bad_complex_decoder, use_list=1, strict_map_key=False) + packed = packb({1: [{'__complex__': True, 'real': 1, 'imag': 2}]}) + unpackb(packed, list_hook=bad_complex_decoder, use_list=1) diff --git a/test/test_pack.py b/test/test_pack.py index 374d154..762ccf5 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -1,110 +1,90 @@ #!/usr/bin/env python +# coding: utf-8 +from __future__ import absolute_import, division, print_function, unicode_literals import struct -from collections import OrderedDict +from pytest import raises, xfail + +from msgpack import packb, unpackb, Unpacker, Packer + from io import BytesIO -import pytest - -from msgpack import Packer, Unpacker, packb, unpackb - - def check(data, use_list=False): - re = unpackb(packb(data), use_list=use_list, strict_map_key=False) + re = unpackb(packb(data), use_list=use_list) assert re == data - def testPack(): test_data = [ - 0, - 1, - 127, - 128, - 255, - 256, - 65535, - 65536, - 4294967295, - 4294967296, - -1, - -32, - -33, - -128, - -129, - -32768, - -32769, - -4294967296, - -4294967297, - 1.0, - b"", - b"a", - b"a" * 31, - b"a" * 32, - None, - True, - False, - (), - ((),), - ((), None), + 0, 1, 127, 128, 255, 256, 65535, 65536, 4294967295, 4294967296, + -1, -32, -33, -128, -129, -32768, -32769, -4294967296, -4294967297, + 1.0, + b"", b"a", b"a"*31, b"a"*32, + None, True, False, + (), ((),), ((), None,), {None: 0}, - (1 << 23), - ] + (1<<23), + ] for td in test_data: check(td) - def testPackUnicode(): test_data = ["", "abcd", ["defgh"], "Русский текст"] for td in test_data: - re = unpackb(packb(td), use_list=1, raw=False) + re = unpackb(packb(td, encoding='utf-8'), use_list=1, encoding='utf-8') assert re == td - packer = Packer() + packer = Packer(encoding='utf-8') data = packer.pack(td) - re = Unpacker(BytesIO(data), raw=False, use_list=1).unpack() + re = Unpacker(BytesIO(data), encoding=str('utf-8'), use_list=1).unpack() assert re == td +def testPackUTF32(): + try: + test_data = [ + "", + "abcd", + ["defgh"], + "Русский текст", + ] + for td in test_data: + re = unpackb(packb(td, encoding='utf-32'), use_list=1, encoding='utf-32') + assert re == td + except LookupError as e: + xfail(e) def testPackBytes(): - test_data = [b"", b"abcd", (b"defgh",)] + test_data = [ + b"", b"abcd", (b"defgh",), + ] for td in test_data: check(td) - -def testPackByteArrays(): - test_data = [bytearray(b""), bytearray(b"abcd"), (bytearray(b"defgh"),)] - for td in test_data: - check(td) - - def testIgnoreUnicodeErrors(): - re = unpackb(packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore") + re = unpackb(packb(b'abc\xeddef'), encoding='utf-8', unicode_errors='ignore', use_list=1) assert re == "abcdef" - def testStrictUnicodeUnpack(): - packed = packb(b"abc\xeddef", use_bin_type=False) - with pytest.raises(UnicodeDecodeError): - unpackb(packed, raw=False, use_list=1) + with raises(UnicodeDecodeError): + unpackb(packb(b'abc\xeddef'), encoding='utf-8', use_list=1) +def testStrictUnicodePack(): + with raises(UnicodeEncodeError): + packb("abc\xeddef", encoding='ascii', unicode_errors='strict') def testIgnoreErrorsPack(): - re = unpackb( - packb("abc\udc80\udcffdef", use_bin_type=True, unicode_errors="ignore"), - raw=False, - use_list=1, - ) + re = unpackb(packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), encoding='utf-8', use_list=1) assert re == "abcdef" +def testNoEncoding(): + with raises(TypeError): + packb("abc", encoding=None) def testDecodeBinary(): - re = unpackb(packb(b"abc"), use_list=1) + re = unpackb(packb(b"abc"), encoding=None, use_list=1) assert re == b"abc" - def testPackFloat(): - assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(">f", 1.0) - assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(">d", 1.0) - + assert packb(1.0, use_single_float=True) == b'\xca' + struct.pack(str('>f'), 1.0) + assert packb(1.0, use_single_float=False) == b'\xcb' + struct.pack(str('>d'), 1.0) def testArraySize(sizes=[0, 5, 50, 1000]): bio = BytesIO() @@ -119,7 +99,6 @@ def testArraySize(sizes=[0, 5, 50, 1000]): for size in sizes: assert unpacker.unpack() == list(range(size)) - def test_manualreset(sizes=[0, 5, 50, 1000]): packer = Packer(autoreset=False) for size in sizes: @@ -133,8 +112,7 @@ def test_manualreset(sizes=[0, 5, 50, 1000]): assert unpacker.unpack() == list(range(size)) packer.reset() - assert packer.bytes() == b"" - + assert packer.bytes() == b'' def testMapSize(sizes=[0, 5, 50, 1000]): bio = BytesIO() @@ -142,40 +120,42 @@ def testMapSize(sizes=[0, 5, 50, 1000]): for size in sizes: bio.write(packer.pack_map_header(size)) for i in range(size): - bio.write(packer.pack(i)) # key - bio.write(packer.pack(i * 2)) # value + bio.write(packer.pack(i)) # key + bio.write(packer.pack(i * 2)) # value bio.seek(0) - unpacker = Unpacker(bio, strict_map_key=False) + unpacker = Unpacker(bio) for size in sizes: - assert unpacker.unpack() == {i: i * 2 for i in range(size)} + assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) +class odict(dict): + '''Reimplement OrderedDict to run test on Python 2.6''' + def __init__(self, seq): + self._seq = seq + dict.__init__(self, seq) + + def items(self): + return self._seq[:] + + def iteritems(self): + return iter(self._seq) + + def keys(self): + return [x[0] for x in self._seq] + def test_odict(): - seq = [(b"one", 1), (b"two", 2), (b"three", 3), (b"four", 4)] - od = OrderedDict(seq) + seq = [(b'one', 1), (b'two', 2), (b'three', 3), (b'four', 4)] + od = odict(seq) assert unpackb(packb(od), use_list=1) == dict(seq) - def pair_hook(seq): return list(seq) - assert unpackb(packb(od), object_pairs_hook=pair_hook, use_list=1) == seq def test_pairlist(): - pairlist = [(b"a", 1), (2, b"b"), (b"foo", b"bar")] + pairlist = [(b'a', 1), (2, b'b'), (b'foo', b'bar')] packer = Packer() packed = packer.pack_map_pairs(pairlist) - unpacked = unpackb(packed, object_pairs_hook=list, strict_map_key=False) + unpacked = unpackb(packed, object_pairs_hook=list) assert pairlist == unpacked - - -def test_get_buffer(): - packer = Packer(autoreset=0, use_bin_type=True) - packer.pack([1, 2]) - strm = BytesIO() - strm.write(packer.getbuffer()) - written = strm.getvalue() - - expected = packb([1, 2], use_bin_type=True) - assert written == expected diff --git a/test/test_read_size.py b/test/test_read_size.py index 0f6c1b5..4e6c2b9 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -1,72 +1,66 @@ """Test Unpacker's read_array_header and read_map_header methods""" - -from msgpack import OutOfData, Unpacker, packb - +from msgpack import packb, Unpacker, OutOfData UnexpectedTypeException = ValueError - def test_read_array_header(): unpacker = Unpacker() - unpacker.feed(packb(["a", "b", "c"])) + unpacker.feed(packb(['a', 'b', 'c'])) assert unpacker.read_array_header() == 3 - assert unpacker.unpack() == "a" - assert unpacker.unpack() == "b" - assert unpacker.unpack() == "c" + assert unpacker.unpack() == b'a' + assert unpacker.unpack() == b'b' + assert unpacker.unpack() == b'c' try: unpacker.unpack() - assert 0, "should raise exception" + assert 0, 'should raise exception' except OutOfData: - assert 1, "okay" + assert 1, 'okay' def test_read_map_header(): unpacker = Unpacker() - unpacker.feed(packb({"a": "A"})) + unpacker.feed(packb({'a': 'A'})) assert unpacker.read_map_header() == 1 - assert unpacker.unpack() == "a" - assert unpacker.unpack() == "A" + assert unpacker.unpack() == B'a' + assert unpacker.unpack() == B'A' try: unpacker.unpack() - assert 0, "should raise exception" + assert 0, 'should raise exception' except OutOfData: - assert 1, "okay" - + assert 1, 'okay' def test_incorrect_type_array(): unpacker = Unpacker() unpacker.feed(packb(1)) try: unpacker.read_array_header() - assert 0, "should raise exception" + assert 0, 'should raise exception' except UnexpectedTypeException: - assert 1, "okay" - + assert 1, 'okay' def test_incorrect_type_map(): unpacker = Unpacker() unpacker.feed(packb(1)) try: unpacker.read_map_header() - assert 0, "should raise exception" + assert 0, 'should raise exception' except UnexpectedTypeException: - assert 1, "okay" - + assert 1, 'okay' def test_correct_type_nested_array(): unpacker = Unpacker() - unpacker.feed(packb({"a": ["b", "c", "d"]})) + unpacker.feed(packb({'a': ['b', 'c', 'd']})) try: unpacker.read_array_header() - assert 0, "should raise exception" + assert 0, 'should raise exception' except UnexpectedTypeException: - assert 1, "okay" - + assert 1, 'okay' def test_incorrect_type_nested_map(): unpacker = Unpacker() - unpacker.feed(packb([{"a": "b"}])) + unpacker.feed(packb([{'a': 'b'}])) try: unpacker.read_map_header() - assert 0, "should raise exception" + assert 0, 'should raise exception' except UnexpectedTypeException: - assert 1, "okay" + assert 1, 'okay' + diff --git a/test/test_seq.py b/test/test_seq.py index 8dee462..fed9ff4 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -1,14 +1,14 @@ -# ruff: noqa: E501 -# ignore line length limit for long comments -import io +#!/usr/bin/env python +# coding: utf-8 +import io import msgpack + binarydata = bytes(bytearray(range(256))) - def gen_binary_data(idx): - return binarydata[: idx % 300] + return binarydata[:idx % 300] def test_exceeding_unpacker_read_size(): @@ -18,10 +18,10 @@ def test_exceeding_unpacker_read_size(): NUMBER_OF_STRINGS = 6 read_size = 16 - # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): - # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) - # 40 ok for read_size=1024, while 50 introduces errors - # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): + # 5 ok for read_size=16, while 6 glibc detected *** python: double free or corruption (fasttop): + # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: double free or corruption (!prev) + # 40 ok for read_size=1024, while 50 introduces errors + # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** python: double free or corruption (!prev): for idx in range(NUMBER_OF_STRINGS): data = gen_binary_data(idx) @@ -34,7 +34,7 @@ def test_exceeding_unpacker_read_size(): read_count = 0 for idx, o in enumerate(unpacker): - assert isinstance(o, bytes) + assert type(o) == bytes assert o == gen_binary_data(idx) read_count += 1 diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 0f895d7..45f4cc7 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,148 +1,98 @@ #!/usr/bin/env python +# coding: utf-8 + import io - -from pytest import raises - -from msgpack import BufferFull, Unpacker, pack, packb +from msgpack import Unpacker, BufferFull from msgpack.exceptions import OutOfData +from pytest import raises def test_partialdata(): unpacker = Unpacker() - unpacker.feed(b"\xa5") - with raises(StopIteration): - next(iter(unpacker)) - unpacker.feed(b"h") - with raises(StopIteration): - next(iter(unpacker)) - unpacker.feed(b"a") - with raises(StopIteration): - next(iter(unpacker)) - unpacker.feed(b"l") - with raises(StopIteration): - next(iter(unpacker)) - unpacker.feed(b"l") - with raises(StopIteration): - next(iter(unpacker)) - unpacker.feed(b"o") - assert next(iter(unpacker)) == "hallo" - + unpacker.feed(b'\xa5') + with raises(StopIteration): next(iter(unpacker)) + unpacker.feed(b'h') + with raises(StopIteration): next(iter(unpacker)) + unpacker.feed(b'a') + with raises(StopIteration): next(iter(unpacker)) + unpacker.feed(b'l') + with raises(StopIteration): next(iter(unpacker)) + unpacker.feed(b'l') + with raises(StopIteration): next(iter(unpacker)) + unpacker.feed(b'o') + assert next(iter(unpacker)) == b'hallo' def test_foobar(): unpacker = Unpacker(read_size=3, use_list=1) - unpacker.feed(b"foobar") - assert unpacker.unpack() == ord(b"f") - assert unpacker.unpack() == ord(b"o") - assert unpacker.unpack() == ord(b"o") - assert unpacker.unpack() == ord(b"b") - assert unpacker.unpack() == ord(b"a") - assert unpacker.unpack() == ord(b"r") + unpacker.feed(b'foobar') + assert unpacker.unpack() == ord(b'f') + assert unpacker.unpack() == ord(b'o') + assert unpacker.unpack() == ord(b'o') + assert unpacker.unpack() == ord(b'b') + assert unpacker.unpack() == ord(b'a') + assert unpacker.unpack() == ord(b'r') with raises(OutOfData): unpacker.unpack() - unpacker.feed(b"foo") - unpacker.feed(b"bar") + unpacker.feed(b'foo') + unpacker.feed(b'bar') k = 0 - for o, e in zip(unpacker, "foobarbaz"): + for o, e in zip(unpacker, 'foobarbaz'): assert o == ord(e) k += 1 - assert k == len(b"foobar") - + assert k == len(b'foobar') def test_foobar_skip(): unpacker = Unpacker(read_size=3, use_list=1) - unpacker.feed(b"foobar") - assert unpacker.unpack() == ord(b"f") + unpacker.feed(b'foobar') + assert unpacker.unpack() == ord(b'f') unpacker.skip() - assert unpacker.unpack() == ord(b"o") + assert unpacker.unpack() == ord(b'o') unpacker.skip() - assert unpacker.unpack() == ord(b"a") + assert unpacker.unpack() == ord(b'a') unpacker.skip() with raises(OutOfData): unpacker.unpack() - def test_maxbuffersize(): with raises(ValueError): Unpacker(read_size=5, max_buffer_size=3) unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) - unpacker.feed(b"fo") + unpacker.feed(b'fo') with raises(BufferFull): - unpacker.feed(b"ob") - unpacker.feed(b"o") - assert ord("f") == next(unpacker) - unpacker.feed(b"b") - assert ord("o") == next(unpacker) - assert ord("o") == next(unpacker) - assert ord("b") == next(unpacker) - - -def test_maxbuffersize_file(): - buff = io.BytesIO(packb(b"a" * 10) + packb([b"a" * 20] * 2)) - unpacker = Unpacker(buff, read_size=1, max_buffer_size=19, max_bin_len=20) - assert unpacker.unpack() == b"a" * 10 - # assert unpacker.unpack() == [b"a" * 20]*2 - with raises(BufferFull): - print(unpacker.unpack()) + unpacker.feed(b'ob') + unpacker.feed(b'o') + assert ord('f') == next(unpacker) + unpacker.feed(b'b') + assert ord('o') == next(unpacker) + assert ord('o') == next(unpacker) + assert ord('b') == next(unpacker) def test_readbytes(): unpacker = Unpacker(read_size=3) - unpacker.feed(b"foobar") - assert unpacker.unpack() == ord(b"f") - assert unpacker.read_bytes(3) == b"oob" - assert unpacker.unpack() == ord(b"a") - assert unpacker.unpack() == ord(b"r") + unpacker.feed(b'foobar') + assert unpacker.unpack() == ord(b'f') + assert unpacker.read_bytes(3) == b'oob' + assert unpacker.unpack() == ord(b'a') + assert unpacker.unpack() == ord(b'r') # Test buffer refill - unpacker = Unpacker(io.BytesIO(b"foobar"), read_size=3) - assert unpacker.unpack() == ord(b"f") - assert unpacker.read_bytes(3) == b"oob" - assert unpacker.unpack() == ord(b"a") - assert unpacker.unpack() == ord(b"r") - - # Issue 352 - u = Unpacker() - u.feed(b"x") - assert bytes(u.read_bytes(1)) == b"x" - with raises(StopIteration): - next(u) - u.feed(b"\1") - assert next(u) == 1 - + unpacker = Unpacker(io.BytesIO(b'foobar'), read_size=3) + assert unpacker.unpack() == ord(b'f') + assert unpacker.read_bytes(3) == b'oob' + assert unpacker.unpack() == ord(b'a') + assert unpacker.unpack() == ord(b'r') def test_issue124(): unpacker = Unpacker() - unpacker.feed(b"\xa1?\xa1!") - assert tuple(unpacker) == ("?", "!") + unpacker.feed(b'\xa1?\xa1!') + assert tuple(unpacker) == (b'?', b'!') assert tuple(unpacker) == () unpacker.feed(b"\xa1?\xa1") - assert tuple(unpacker) == ("?",) + assert tuple(unpacker) == (b'?',) assert tuple(unpacker) == () unpacker.feed(b"!") - assert tuple(unpacker) == ("!",) + assert tuple(unpacker) == (b'!',) assert tuple(unpacker) == () - - -def test_unpack_tell(): - stream = io.BytesIO() - messages = [2**i - 1 for i in range(65)] - messages += [-(2**i) for i in range(1, 64)] - messages += [ - b"hello", - b"hello" * 1000, - list(range(20)), - {i: bytes(i) * i for i in range(10)}, - {i: bytes(i) * i for i in range(32)}, - ] - offsets = [] - for m in messages: - pack(m, stream) - offsets.append(stream.tell()) - stream.seek(0) - unpacker = Unpacker(stream, strict_map_key=False) - for m, o in zip(messages, offsets): - m2 = next(unpacker) - assert m == m2 - assert o == unpacker.tell() diff --git a/test/test_stricttype.py b/test/test_stricttype.py deleted file mode 100644 index 72776a2..0000000 --- a/test/test_stricttype.py +++ /dev/null @@ -1,59 +0,0 @@ -from collections import namedtuple - -from msgpack import ExtType, packb, unpackb - - -def test_namedtuple(): - T = namedtuple("T", "foo bar") - - def default(o): - if isinstance(o, T): - return dict(o._asdict()) - raise TypeError(f"Unsupported type {type(o)}") - - packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) - unpacked = unpackb(packed, raw=False) - assert unpacked == {"foo": 1, "bar": 42} - - -def test_tuple(): - t = ("one", 2, b"three", (4,)) - - def default(o): - if isinstance(o, tuple): - return {"__type__": "tuple", "value": list(o)} - raise TypeError(f"Unsupported type {type(o)}") - - def convert(o): - if o.get("__type__") == "tuple": - return tuple(o["value"]) - return o - - data = packb(t, strict_types=True, use_bin_type=True, default=default) - expected = unpackb(data, raw=False, object_hook=convert) - - assert expected == t - - -def test_tuple_ext(): - t = ("one", 2, b"three", (4,)) - - MSGPACK_EXT_TYPE_TUPLE = 0 - - def default(o): - if isinstance(o, tuple): - # Convert to list and pack - payload = packb(list(o), strict_types=True, use_bin_type=True, default=default) - return ExtType(MSGPACK_EXT_TYPE_TUPLE, payload) - raise TypeError(repr(o)) - - def convert(code, payload): - if code == MSGPACK_EXT_TYPE_TUPLE: - # Unpack and convert to tuple - return tuple(unpackb(payload, raw=False, ext_hook=convert)) - raise ValueError(f"Unknown Ext code {code}") - - data = packb(t, strict_types=True, use_bin_type=True, default=default) - expected = unpackb(data, raw=False, ext_hook=convert) - - assert expected == t diff --git a/test/test_subtype.py b/test/test_subtype.py index a911578..6807508 100644 --- a/test/test_subtype.py +++ b/test/test_subtype.py @@ -1,24 +1,19 @@ #!/usr/bin/env python +# coding: utf-8 +from msgpack import packb, unpackb from collections import namedtuple -from msgpack import packb - - class MyList(list): pass - class MyDict(dict): pass - class MyTuple(tuple): pass - -MyNamedTuple = namedtuple("MyNamedTuple", "x y") - +MyNamedTuple = namedtuple('MyNamedTuple', 'x y') def test_types(): assert packb(MyDict()) == packb(dict()) diff --git a/test/test_timestamp.py b/test/test_timestamp.py deleted file mode 100644 index 831141a..0000000 --- a/test/test_timestamp.py +++ /dev/null @@ -1,171 +0,0 @@ -import datetime - -import pytest - -import msgpack -from msgpack.ext import Timestamp - - -def test_timestamp(): - # timestamp32 - ts = Timestamp(2**32 - 1) - assert ts.to_bytes() == b"\xff\xff\xff\xff" - packed = msgpack.packb(ts) - assert packed == b"\xd6\xff" + ts.to_bytes() - unpacked = msgpack.unpackb(packed) - assert ts == unpacked - assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0 - - # timestamp64 - ts = Timestamp(2**34 - 1, 999999999) - assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff" - packed = msgpack.packb(ts) - assert packed == b"\xd7\xff" + ts.to_bytes() - unpacked = msgpack.unpackb(packed) - assert ts == unpacked - assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999 - - # timestamp96 - ts = Timestamp(2**63 - 1, 999999999) - assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff" - packed = msgpack.packb(ts) - assert packed == b"\xc7\x0c\xff" + ts.to_bytes() - unpacked = msgpack.unpackb(packed) - assert ts == unpacked - assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999 - - # negative fractional - ts = Timestamp.from_unix(-2.3) # s: -3, ns: 700000000 - assert ts.seconds == -3 and ts.nanoseconds == 700000000 - assert ts.to_bytes() == b"\x29\xb9\x27\x00\xff\xff\xff\xff\xff\xff\xff\xfd" - packed = msgpack.packb(ts) - assert packed == b"\xc7\x0c\xff" + ts.to_bytes() - unpacked = msgpack.unpackb(packed) - assert ts == unpacked - - -def test_unpack_timestamp(): - # timestamp 32 - assert msgpack.unpackb(b"\xd6\xff\x00\x00\x00\x00") == Timestamp(0) - - # timestamp 64 - assert msgpack.unpackb(b"\xd7\xff" + b"\x00" * 8) == Timestamp(0) - with pytest.raises(ValueError): - msgpack.unpackb(b"\xd7\xff" + b"\xff" * 8) - - # timestamp 96 - assert msgpack.unpackb(b"\xc7\x0c\xff" + b"\x00" * 12) == Timestamp(0) - with pytest.raises(ValueError): - msgpack.unpackb(b"\xc7\x0c\xff" + b"\xff" * 12) == Timestamp(0) - - # Undefined - with pytest.raises(ValueError): - msgpack.unpackb(b"\xd4\xff\x00") # fixext 1 - with pytest.raises(ValueError): - msgpack.unpackb(b"\xd5\xff\x00\x00") # fixext 2 - with pytest.raises(ValueError): - msgpack.unpackb(b"\xc7\x00\xff") # ext8 (len=0) - with pytest.raises(ValueError): - msgpack.unpackb(b"\xc7\x03\xff\0\0\0") # ext8 (len=3) - with pytest.raises(ValueError): - msgpack.unpackb(b"\xc7\x05\xff\0\0\0\0\0") # ext8 (len=5) - - -def test_timestamp_from(): - t = Timestamp(42, 14000) - assert Timestamp.from_unix(42.000014) == t - assert Timestamp.from_unix_nano(42000014000) == t - - -def test_timestamp_to(): - t = Timestamp(42, 14000) - assert t.to_unix() == 42.000014 - assert t.to_unix_nano() == 42000014000 - - -def test_timestamp_datetime(): - t = Timestamp(42, 14) - utc = datetime.timezone.utc - assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) - - ts = datetime.datetime(2024, 4, 16, 8, 43, 9, 420317, tzinfo=utc) - ts2 = datetime.datetime(2024, 4, 16, 8, 43, 9, 420318, tzinfo=utc) - - assert ( - Timestamp.from_datetime(ts2).nanoseconds - Timestamp.from_datetime(ts).nanoseconds == 1000 - ) - - ts3 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4256) - ts4 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4257) - assert ( - Timestamp.from_datetime(ts4).nanoseconds - Timestamp.from_datetime(ts3).nanoseconds == 1000 - ) - - assert Timestamp.from_datetime(ts).to_datetime() == ts - - -def test_unpack_datetime(): - t = Timestamp(42, 14) - utc = datetime.timezone.utc - packed = msgpack.packb(t) - unpacked = msgpack.unpackb(packed, timestamp=3) - assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) - - -def test_pack_unpack_before_epoch(): - utc = datetime.timezone.utc - t_in = datetime.datetime(1960, 1, 1, tzinfo=utc) - packed = msgpack.packb(t_in, datetime=True) - unpacked = msgpack.unpackb(packed, timestamp=3) - assert unpacked == t_in - - -def test_pack_datetime(): - t = Timestamp(42, 14000) - dt = t.to_datetime() - utc = datetime.timezone.utc - assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) - - packed = msgpack.packb(dt, datetime=True) - packed2 = msgpack.packb(t) - assert packed == packed2 - - unpacked = msgpack.unpackb(packed) - print(packed, unpacked) - assert unpacked == t - - unpacked = msgpack.unpackb(packed, timestamp=3) - assert unpacked == dt - - x = [] - packed = msgpack.packb(dt, datetime=False, default=x.append) - assert x - assert x[0] == dt - assert msgpack.unpackb(packed) is None - - -def test_issue451(): - # https://github.com/msgpack/msgpack-python/issues/451 - utc = datetime.timezone.utc - dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=utc) - packed = msgpack.packb(dt, datetime=True) - assert packed == b"\xd6\xff\xf4\x86eL" - - unpacked = msgpack.unpackb(packed, timestamp=3) - assert dt == unpacked - - -def test_pack_datetime_without_tzinfo(): - dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) - with pytest.raises(ValueError, match="where tzinfo=None"): - packed = msgpack.packb(dt, datetime=True) - - dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) - packed = msgpack.packb(dt, datetime=True, default=lambda x: None) - assert packed == msgpack.packb(None) - - utc = datetime.timezone.utc - dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) - packed = msgpack.packb(dt, datetime=True) - unpacked = msgpack.unpackb(packed, timestamp=3) - assert unpacked == dt diff --git a/test/test_unpack.py b/test/test_unpack.py index b17c3c5..c0d711c 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -1,13 +1,11 @@ -import sys from io import BytesIO - -from pytest import mark, raises - -from msgpack import ExtType, OutOfData, Unpacker, packb +import sys +from msgpack import Unpacker, packb, OutOfData, ExtType +from pytest import raises, mark def test_unpack_array_header_from_file(): - f = BytesIO(packb([1, 2, 3, 4])) + f = BytesIO(packb([1,2,3,4])) unpacker = Unpacker(f) assert unpacker.read_array_header() == 4 assert unpacker.unpack() == 1 @@ -18,10 +16,8 @@ def test_unpack_array_header_from_file(): unpacker.unpack() -@mark.skipif( - "not hasattr(sys, 'getrefcount') == True", - reason="sys.getrefcount() is needed to pass this test", -) +@mark.skipif("not hasattr(sys, 'getrefcount') == True", + reason='sys.getrefcount() is needed to pass this test') def test_unpacker_hook_refcnt(): result = [] @@ -47,9 +43,12 @@ def test_unpacker_hook_refcnt(): def test_unpacker_ext_hook(): + class MyUnpacker(Unpacker): + def __init__(self): - super().__init__(ext_hook=self._hook, raw=False) + super(MyUnpacker, self).__init__(ext_hook=self._hook, + encoding='utf-8') def _hook(self, code, data): if code == 1: @@ -58,32 +57,15 @@ def test_unpacker_ext_hook(): return ExtType(code, data) unpacker = MyUnpacker() - unpacker.feed(packb({"a": 1})) - assert unpacker.unpack() == {"a": 1} - unpacker.feed(packb({"a": ExtType(1, b"123")})) - assert unpacker.unpack() == {"a": 123} - unpacker.feed(packb({"a": ExtType(2, b"321")})) - assert unpacker.unpack() == {"a": ExtType(2, b"321")} + unpacker.feed(packb({'a': 1}, encoding='utf-8')) + assert unpacker.unpack() == {'a': 1} + unpacker.feed(packb({'a': ExtType(1, b'123')}, encoding='utf-8')) + assert unpacker.unpack() == {'a': 123} + unpacker.feed(packb({'a': ExtType(2, b'321')}, encoding='utf-8')) + assert unpacker.unpack() == {'a': ExtType(2, b'321')} -def test_unpacker_tell(): - objects = 1, 2, "abc", "def", "ghi" - packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" - positions = 1, 2, 6, 10, 14 - unpacker = Unpacker(BytesIO(packed)) - for obj, unp, pos in zip(objects, unpacker, positions): - assert obj == unp - assert pos == unpacker.tell() - - -def test_unpacker_tell_read_bytes(): - objects = 1, "abc", "ghi" - packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" - raw_data = b"\x02", b"\xa3def", b"" - lenghts = 1, 4, 999 - positions = 1, 6, 14 - unpacker = Unpacker(BytesIO(packed)) - for obj, unp, pos, n, raw in zip(objects, unpacker, positions, lenghts, raw_data): - assert obj == unp - assert pos == unpacker.tell() - assert unpacker.read_bytes(n) == raw +if __name__ == '__main__': + test_unpack_array_header_from_file() + test_unpacker_hook_refcnt() + test_unpacker_ext_hook() diff --git a/test/test_unpack_raw.py b/test/test_unpack_raw.py new file mode 100644 index 0000000..7002601 --- /dev/null +++ b/test/test_unpack_raw.py @@ -0,0 +1,29 @@ +"""Tests for cases where the user seeks to obtain packed msgpack objects""" + +import io +from msgpack import Unpacker, packb + + +def test_write_bytes(): + unpacker = Unpacker() + unpacker.feed(b'abc') + f = io.BytesIO() + assert unpacker.unpack(f.write) == ord('a') + assert f.getvalue() == b'a' + f = io.BytesIO() + assert unpacker.skip(f.write) is None + assert f.getvalue() == b'b' + f = io.BytesIO() + assert unpacker.skip() is None + assert f.getvalue() == b'' + + +def test_write_bytes_multi_buffer(): + long_val = (5) * 100 + expected = packb(long_val) + unpacker = Unpacker(io.BytesIO(expected), read_size=3, max_buffer_size=3) + + f = io.BytesIO() + unpacked = unpacker.unpack(f.write) + assert unpacked == long_val + assert f.getvalue() == expected diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..15feb51 --- /dev/null +++ b/tox.ini @@ -0,0 +1,39 @@ +[tox] +envlist = {py26,py27,py32,py33,py34}-{c,pure},{pypy,pypy3}-pure,py27-x86,py34-x86 + +[variants:pure] +setenv= + MSGPACK_PUREPYTHON=x + +[testenv] +deps= + pytest + +changedir=test +commands= + c,x86: python -c 'from msgpack import _packer, _unpacker' + c,x86: py.test + pure: py.test + +[testenv:py27-x86] +basepython=python2.7-x86 +deps= + pytest + +changedir=test +commands= + python -c 'import sys; print(hex(sys.maxsize))' + python -c 'from msgpack import _packer, _unpacker' + py.test + +[testenv:py34-x86] +basepython=python3.4-x86 +deps= + pytest + +changedir=test +commands= + python -c 'import sys; print(hex(sys.maxsize))' + python -c 'from msgpack import _packer, _unpacker' + py.test + diff --git a/upload_windows.bat b/upload_windows.bat new file mode 100644 index 0000000..5cd9a7c --- /dev/null +++ b/upload_windows.bat @@ -0,0 +1,4 @@ +c:\Python27\python setup.py bdist_egg bdist_wininst upload +c:\Python33\python setup.py bdist_egg bdist_wininst upload +c:\Python27_amd64\python setup.py bdist_egg bdist_wininst upload +c:\Python33_amd64\python setup.py bdist_egg bdist_wininst upload