diff --git a/.github/workflows/black.yaml b/.github/workflows/black.yaml new file mode 100644 index 0000000..eda8d07 --- /dev/null +++ b/.github/workflows/black.yaml @@ -0,0 +1,21 @@ +name: Black + +on: ["push", "pull_request"] + +jobs: + black: + runs-on: ubuntu-latest + steps: + - name: Setup Python + uses: actions/setup-python@v1 + with: + python-version: '3.x' + architecture: 'x64' + + - name: Checkout + uses: actions/checkout@v1 + + - name: Black Code Formatter + run: | + pip install black + black --diff --check msgpack/ test/ setup.py diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml deleted file mode 100644 index b696b92..0000000 --- a/.github/workflows/docs.yaml +++ /dev/null @@ -1,33 +0,0 @@ -name: docs - -on: ["push", "pull_request"] - -jobs: - docs: - # We want to run on external PRs, but not on our own internal PRs as they'll be run - # by the push to the branch. - if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository - - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Setup Python - uses: actions/setup-python@v5 - with: - python-version: '3.x' - cache: "pip" - cache-dependency-path: | - requirements.txt - docs/requirements.txt - - - name: Build - run: | - pip install -r requirements.txt - make cython - - - name: Sphinx Documentation Generator - run: | - pip install -r docs/requirements.txt - make docs diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml deleted file mode 100644 index 198cf7b..0000000 --- a/.github/workflows/lint.yaml +++ /dev/null @@ -1,22 +0,0 @@ -name: lint - -on: ["push", "pull_request"] - -jobs: - lint: - # We want to run on external PRs, but not on our own internal PRs as they'll be run - # by the push to the branch. - if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository - - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: ruff check - run: | - pipx run ruff check --diff msgpack/ test/ setup.py - - - name: ruff format - run: | - pipx run ruff format --diff msgpack/ test/ setup.py diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml new file mode 100644 index 0000000..f9707b1 --- /dev/null +++ b/.github/workflows/linux.yml @@ -0,0 +1,88 @@ +name: Build Linux Wheels +on: + push: + pull_request: + create: + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v1 + + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: 3.8 + + - name: Cythonize + shell: bash + run: | + pip install -U pip + pip -V + pip install -r requirements.txt + make cython + #python setup.py sdist + + - name: Build wheels + shell: bash + run: | + make linux-wheel + + - name: Install qemu-user-static for docker + shell: bash + run: | + docker run --rm --privileged multiarch/qemu-user-static --reset -p yes + + - name: Build arm64 wheels + shell: bash + run: | + make linux-arm64-wheel + + - name: Run test (3.8) + run: | + pip install pytest + pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse + pytest -v test + + + - name: Set up Python 3.9 + uses: actions/setup-python@v1 + with: + python-version: 3.9 + + - name: Run test (3.9) + run: | + pip install pytest + pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse + pytest -v test + + - name: Set up Python 3.7 + uses: actions/setup-python@v1 + with: + python-version: 3.7 + + - name: Run test (3.7) + run: | + pip install pytest + pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse + pytest -v test + + - name: Set up Python 3.6 + uses: actions/setup-python@v1 + with: + python-version: 3.6 + + - name: Run test (3.6) + run: | + pip install pytest + pip install -v msgpack --only-binary :all: --no-index -f dist/wheelhouse + pytest -v test + + + - name: Upload Wheels + uses: actions/upload-artifact@v1 + with: + name: linux-wheels + path: ./dist/wheelhouse/ diff --git a/.github/workflows/mac.yml b/.github/workflows/mac.yml new file mode 100644 index 0000000..78d944c --- /dev/null +++ b/.github/workflows/mac.yml @@ -0,0 +1,93 @@ +name: Build macOS Wheels +on: + push: + pull_request: + create: + +jobs: + build: + runs-on: macos-latest + + steps: + - name: Checkout + uses: actions/checkout@v1 + + - name: Set up Python 3.8 + uses: actions/setup-python@v1 + with: + python-version: "3.8" + + - name: Cythonize + run: | + pip install -U pip + pip install -r requirements.txt + make cython + + - name: Build wheel + run: | + pip install setuptools wheel + python setup.py bdist_wheel + + - name: Run test + run: | + pip install pytest + pip install -v msgpack --only-binary :all: -f dist/ --no-index + pytest -v test + + + - name: Set up Python 3.9 + uses: actions/setup-python@v1 + with: + python-version: "3.9" + + - name: Build wheel + run: | + pip install setuptools wheel + python setup.py bdist_wheel + + - name: Run test + run: | + pip install pytest + pip install -v msgpack --only-binary :all: -f dist/ --no-index + pytest -v test + + + - name: Set up Python 3.7 + uses: actions/setup-python@v1 + with: + python-version: "3.7" + + - name: Build wheel + run: | + pip install setuptools wheel + python setup.py bdist_wheel + + - name: Run test + run: | + pip install pytest + pip install -v msgpack --only-binary :all: -f dist/ --no-index + pytest -v test + + + - name: Set up Python 3.6 + uses: actions/setup-python@v1 + with: + python-version: "3.6" + + - name: Build wheel + run: | + pip install setuptools wheel + python setup.py bdist_wheel + + - name: Run test + run: | + pip install pytest + pip install -v msgpack --only-binary :all: -f dist/ --no-index + pytest -v test + + + - name: Upload Wheels + uses: actions/upload-artifact@v1 + with: + name: macos-wheels + path: ./dist/ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml deleted file mode 100644 index 6b1664a..0000000 --- a/.github/workflows/test.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: Run tests -on: - push: - branches: [main] - pull_request: - create: - -jobs: - test: - strategy: - matrix: - os: ["ubuntu-latest", "windows-latest", "windows-11-arm", "macos-latest"] - py: ["3.14", "3.14t", "3.13", "3.12", "3.11", "3.10"] - exclude: - - os: windows-11-arm - py: "3.10" - runs-on: ${{ matrix.os }} - name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} - - steps: - - name: Checkout - uses: actions/checkout@v5 - - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.py }} - allow-prereleases: true - cache: "pip" - - - name: Prepare - shell: bash - run: | - python -m pip install -r requirements.txt pytest - - - name: Build - shell: bash - run: | - make cython - pip install . - - - name: Test (C extension) - shell: bash - run: | - pytest -v test - - - name: Test (pure Python fallback) - shell: bash - run: | - MSGPACK_PUREPYTHON=1 pytest -v test - - - name: build packages - shell: bash - run: | - python -m build -nv - - - name: upload packages - uses: actions/upload-artifact@v4 - with: - name: dist-${{ matrix.os }}-${{ matrix.py }} - path: dist diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml deleted file mode 100644 index 531abbc..0000000 --- a/.github/workflows/wheel.yml +++ /dev/null @@ -1,88 +0,0 @@ -name: Build sdist and Wheels -on: - push: - branches: [main] - release: - types: - - published - workflow_dispatch: - -jobs: - build_wheels: - strategy: - matrix: - # macos-13 is for intel - os: ["ubuntu-24.04", "ubuntu-24.04-arm", "windows-latest", "windows-11-arm", "macos-13", "macos-latest"] - runs-on: ${{ matrix.os }} - name: Build wheels on ${{ matrix.os }} - - steps: - - uses: actions/checkout@v6 - - uses: actions/setup-python@v6 - with: - python-version: "3.x" - cache: "pip" - - name: Cythonize - shell: bash - run: | - pip install -r requirements.txt - make cython - - - name: Build - uses: pypa/cibuildwheel@v3.3.0 - env: - CIBW_TEST_REQUIRES: "pytest" - CIBW_TEST_COMMAND: "pytest {package}/test" - CIBW_SKIP: "pp* cp38-* cp39-* cp310-win_arm64" - - - name: Build sdist - if: runner.os == 'Linux' && runner.arch == 'X64' - run: | - pip install build - python -m build -s -o wheelhouse - - - name: Upload Wheels to artifact - uses: actions/upload-artifact@v4 - with: - name: wheels-${{ matrix.os }} - path: wheelhouse - - # combine all wheels into one artifact - combine_wheels: - needs: [build_wheels] - runs-on: ubuntu-latest - steps: - - uses: actions/download-artifact@v4 - with: - # unpacks all CIBW artifacts into dist/ - pattern: wheels-* - path: dist - merge-multiple: true - - - name: Upload Wheels to artifact - uses: actions/upload-artifact@v4 - with: - name: wheels-all - path: dist - - # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml - upload_pypi: - needs: [build_wheels] - runs-on: ubuntu-latest - environment: pypi - permissions: - id-token: write - if: github.event_name == 'release' && github.event.action == 'published' - # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) - # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') - steps: - - uses: actions/download-artifact@v4 - with: - # unpacks all CIBW artifacts into dist/ - pattern: wheels-* - path: dist - merge-multiple: true - - - uses: pypa/gh-action-pypi-publish@release/v1 - #with: - # To test: repository-url: https://test.pypi.org/legacy/ diff --git a/.github/workflows/windows.yaml b/.github/workflows/windows.yaml new file mode 100644 index 0000000..139a5a6 --- /dev/null +++ b/.github/workflows/windows.yaml @@ -0,0 +1,84 @@ +name: Build and test windows wheels +on: + push: + branches: + - master + - test + pull_request: + create: + +jobs: + build: + runs-on: windows-latest + steps: + - name: Checkout + uses: actions/checkout@v1 + + - name: Cythonize + shell: bash + run: | + pip install -U Cython + make cython + #python setup.py sdist + + - name: Python 3.6 (amd64) + env: + PYTHON: "py -3.6-64" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.6 (x86) + env: + PYTHON: "py -3.6-32" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.7 (amd64) + env: + PYTHON: "py -3.7-64" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.7 (x86) + env: + PYTHON: "py -3.7-32" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.8 (amd64) + env: + PYTHON: "py -3.8-64" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.8 (x86) + env: + PYTHON: "py -3.8-32" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.9 (amd64) + env: + PYTHON: "py -3.9-64" + shell: bash + run: | + ci/runtests.sh + + - name: Python 3.9 (x86) + env: + PYTHON: "py -3.9-32" + shell: bash + run: | + ci/runtests.sh + + - name: Upload Wheels + uses: actions/upload-artifact@v1 + with: + name: win-wheels + path: ./dist diff --git a/.gitignore b/.gitignore index 341be63..800f1c2 100644 --- a/.gitignore +++ b/.gitignore @@ -2,13 +2,11 @@ MANIFEST build/* dist/* .tox -.python-version *.pyc *.pyo *.so *~ msgpack/__version__.py -msgpack/*.c msgpack/*.cpp *.egg-info /venv diff --git a/.readthedocs.yaml b/.readthedocs.yaml deleted file mode 100644 index 88d8718..0000000 --- a/.readthedocs.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# Read the Docs configuration file for Sphinx projects. -# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details. - -version: 2 - -build: - os: ubuntu-22.04 - tools: - python: "3.11" - apt_packages: - - build-essential - jobs: - pre_install: - - pip install -r requirements.txt - - make cython - -python: - install: - - method: pip - path: . - - requirements: docs/requirements.txt - -sphinx: - configuration: docs/conf.py diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..4974d26 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,89 @@ +version: ~> 1.0 +dist: xenial +language: python +cache: pip +arch: + - arm64 + +python: + # Available Python (PyPy) can be listed by: + # + # $ aws s3 ls s3://travis-python-archives/binaries/ubuntu/16.04/x86_64/ + - "3.6" + - "3.7" + - "3.8" + - "3.9-dev" + +_pure: &pure + install: + - pip install -U pip + - pip install -U pytest pytest-cov codecov + - pip install . + script: + - pytest --cov=msgpack -v test + +matrix: + include: + - name: 32bit build + arch: amd64 + language: python + services: + - docker + env: + - DOCKER_IMAGE=quay.io/pypa/manylinux1_i686 + install: + - pip install -U pip + - pip install -r requirements.txt + - make cython + - docker pull $DOCKER_IMAGE + script: + - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh + + - arch: arm64 + name: arm64 32bit build + language: python + services: + - docker + env: + - DOCKER_IMAGE=quay.io/pypa/manylinux2014_aarch64 + install: + - pip install -U pip + - pip install -r requirements.txt + - make cython + - docker pull $DOCKER_IMAGE + script: + - docker run --rm -v `pwd`:/io -w /io $DOCKER_IMAGE /io/docker/runtests.sh + + - name: "Python 2 (fallback)" + python: "2.7" + <<: *pure + + - name: "pypy2.7" + arch: amd64 + python: "pypy2.7-7.1.1" + <<: *pure + + - name: "pypy3" + arch: amd64 + python: "pypy3.6-7.1.1" + <<: *pure + +install: + - pip install -U pip + - pip install -U pytest pytest-cov codecov + - pip install -r requirements.txt # Cython + - make cython + - pip install -e . + +script: + - python -c 'import sys; print(hex(sys.maxsize))' + - python -c 'from msgpack import _cmsgpack' + - pytest --cov=msgpack -v test + - MSGPACK_PUREPYTHON=x pytest --cov=msgpack -v test + +after_success: + - if [ -f .coverage ]; then + codecov; + fi + +# vim: sw=2 ts=2 diff --git a/ChangeLog.rst b/ChangeLog.rst index beeab15..230cc30 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,124 +1,3 @@ -1.1.2 -===== - -Release Date: 2025-10-08 - -This release does not change source code. It updates only building wheels: - -* Update Cython to v3.1.4 -* Update cibuildwheel to v3.2.0 -* Drop Python 3.8 -* Add Python 3.14 -* Add windows-arm - -1.1.1 -===== - -Release Date: 2025-06-13 - -* No change from 1.1.1rc1. - -1.1.1rc1 -======== - -Release Date: 2025-06-06 - -* Update Cython to 3.1.1 and cibuildwheel to 2.23.3. - -1.1.0 -===== - -Release Date: 2024-09-10 - -* use ``PyLong_*`` instead of ``PyInt_*`` for compatibility with - future Cython. (#620) - -1.1.0rc2 -======== - -Release Date: 2024-08-19 - -* Update Cython to 3.0.11 for better Python 3.13 support. -* Update cibuildwheel to 2.20.0 to build Python 3.13 wheels. - -1.1.0rc1 -======== - -Release Date: 2024-05-07 - -* Update Cython to 3.0.10 to reduce C warnings and future support for Python 3.13. -* Stop using C++ mode in Cython to reduce compile error on some compilers. -* ``Packer()`` has ``buf_size`` option to specify initial size of - internal buffer to reduce reallocation. -* The default internal buffer size of ``Packer()`` is reduced from - 1MiB to 256KiB to optimize for common use cases. Use ``buf_size`` - if you are packing large data. -* ``Timestamp.to_datetime()`` and ``Timestamp.from_datetime()`` become - more accurate by avoiding floating point calculations. (#591) -* The Cython code for ``Unpacker`` has been slightly rewritten for maintainability. -* The fallback implementation of ``Packer()`` and ``Unpacker()`` now uses keyword-only - arguments to improve compatibility with the Cython implementation. - -1.0.8 -===== - -Release Date: 2024-03-01 - -* Update Cython to 3.0.8. This fixes memory leak when iterating - ``Unpacker`` object on Python 3.12. -* Do not include C/Cython files in binary wheels. - - -1.0.7 -===== - -Release Date: 2023-09-28 - -* Fix build error of extension module on Windows. (#567) -* ``setup.py`` doesn't skip build error of extension module. (#568) - - -1.0.6 -===== - -Release Date: 2023-09-21 - -.. note:: - v1.0.6 Wheels for Windows don't contain extension module. - Please upgrade to v1.0.7 or newer. - -* Add Python 3.12 wheels (#517) -* Remove Python 2.7, 3.6, and 3.7 support - - -1.0.5 -===== - -Release Date: 2023-03-08 - -* Use ``__BYTE_ORDER__`` instead of ``__BYTE_ORDER`` for portability. (#513, #514) -* Add Python 3.11 wheels (#517) -* fallback: Fix packing multidimensional memoryview (#527) - -1.0.4 -===== - -Release Date: 2022-06-03 - -* Support Python 3.11 (beta). -* Don't define `__*_ENDIAN__` macro on Unix. by @methane in https://github.com/msgpack/msgpack-python/pull/495 -* Use PyFloat_Pack8() on Python 3.11a7 by @vstinner in https://github.com/msgpack/msgpack-python/pull/499 -* Fix Unpacker max_buffer_length handling by @methane in https://github.com/msgpack/msgpack-python/pull/506 - -1.0.3 -===== - -Release Date: 2021-11-24 JST - -* Fix Docstring (#459) -* Fix error formatting (#463) -* Improve error message about strict_map_key (#485) - 1.0.2 ===== @@ -191,7 +70,7 @@ Important changes * unpacker: Default value of input limits are smaller than before to avoid DoS attack. If you need to handle large data, you need to specify limits manually. (#319) -* Unpacker doesn't wrap underlying ``ValueError`` (including ``UnicodeError``) into +* Unpacker doesn't wrap underlaying ``ValueError`` (including ``UnicodeError``) into ``UnpackValueError``. If you want to catch all exception during unpack, you need to use ``try ... except Exception`` with minimum try code block. (#323, #233) diff --git a/DEVELOP.md b/DEVELOP.md index 27adf8c..9c823c3 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -1,5 +1,13 @@ # Developer's note +## Wheels + +Wheels for macOS and Linux are built on Travis and AppVeyr, in +[methane/msgpack-wheels](https://github.com/methane/msgpack-wheels) repository. + +Wheels for Windows are built on Github Actions in this repository. + + ### Build ``` diff --git a/MANIFEST.in b/MANIFEST.in index 6317706..57d84a4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include setup.py include COPYING include README.md -recursive-include msgpack *.h *.c *.pyx +recursive-include msgpack *.h *.c *.pyx *.cpp recursive-include test *.py diff --git a/Makefile b/Makefile index 51f3e0e..05cca55 100644 --- a/Makefile +++ b/Makefile @@ -1,28 +1,14 @@ -PYTHON_SOURCES = msgpack test setup.py - .PHONY: all all: cython python setup.py build_ext -i -f -.PHONY: format -format: - ruff format $(PYTHON_SOURCES) - -.PHONY: lint -lint: - ruff check $(PYTHON_SOURCES) - -.PHONY: doc -doc: - cd docs && sphinx-build -n -v -W --keep-going -b html -d doctrees . html - -.PHONY: pyupgrade -pyupgrade: - @find $(PYTHON_SOURCES) -name '*.py' -type f -exec pyupgrade --py37-plus '{}' \; +.PHONY: black +black: + black msgpack/ test/ setup.py .PHONY: cython cython: - cython msgpack/_cmsgpack.pyx + cython --cplus msgpack/_cmsgpack.pyx .PHONY: test test: cython @@ -45,14 +31,13 @@ clean: .PHONY: update-docker update-docker: - docker pull quay.io/pypa/manylinux2014_i686 - docker pull quay.io/pypa/manylinux2014_x86_64 - docker pull quay.io/pypa/manylinux2014_aarch64 + docker pull quay.io/pypa/manylinux2010_i686 + docker pull quay.io/pypa/manylinux2010_x86_64 .PHONY: linux-wheel linux-wheel: - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_i686 bash docker/buildwheel.sh - docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2014_x86_64 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2010_i686 bash docker/buildwheel.sh + docker run --rm -v `pwd`:/project -w /project quay.io/pypa/manylinux2010_x86_64 bash docker/buildwheel.sh .PHONY: linux-arm64-wheel linux-arm64-wheel: diff --git a/README.md b/README.md index 1f06324..d8ce9ba 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,62 @@ # MessagePack for Python -[![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml) +[![Build Status](https://travis-ci.org/msgpack/msgpack-python.svg?branch=master)](https://travis-ci.org/msgpack/msgpack-python) [![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) -## What is this? +## What's this [MessagePack](https://msgpack.org/) is an efficient binary serialization format. It lets you exchange data among multiple languages like JSON. But it's faster and smaller. This package provides CPython bindings for reading and writing MessagePack data. + +## Very important notes for existing users + +### PyPI package name + +Package name on PyPI was changed from `msgpack-python` to `msgpack` from 0.5. + +When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before +`pip install -U msgpack`. + + +### Compatibility with the old format + +You can use `use_bin_type=False` option to pack `bytes` +object into raw type in the old msgpack spec, instead of bin type in new msgpack spec. + +You can unpack old msgpack format using `raw=True` option. +It unpacks str (raw) type in msgpack into Python bytes. + +See note below for detail. + + +### Major breaking changes in msgpack 1.0 + +* Python 2 + + * The extension module does not support Python 2 anymore. + The pure Python implementation (`msgpack.fallback`) is used for Python 2. + +* Packer + + * `use_bin_type=True` by default. bytes are encoded in bin type in msgpack. + **If you are still using Python 2, you must use unicode for all string types.** + You can use `use_bin_type=False` to encode into old msgpack format. + * `encoding` option is removed. UTF-8 is used always. + +* Unpacker + + * `raw=False` by default. It assumes str types are valid UTF-8 string + and decode them to Python str (unicode) object. + * `encoding` option is removed. You can use `raw=True` to support old format. + * Default value of `max_buffer_size` is changed from 0 to 100 MiB. + * Default value of `strict_map_key` is changed to True to avoid hashdos. + You need to pass `strict_map_key=False` if you have data which contain map keys + which type is not bytes or str. + + ## Install ``` @@ -18,38 +65,57 @@ $ pip install msgpack ### Pure Python implementation -The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy. +The extension module in msgpack (`msgpack._cmsgpack`) does not support +Python 2 and PyPy. -But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy. +But msgpack provides a pure Python implementation (`msgpack.fallback`) +for PyPy and Python 2. + +Since the [pip](https://pip.pypa.io/) uses the pure Python implementation, +Python 2 support will not be dropped in the foreseeable future. ### Windows -If you can't use a binary distribution, you need to install Visual Studio -or the Windows SDK on Windows. -Without the extension, the pure Python implementation on CPython runs slowly. +When you can't use a binary distribution, you need to install Visual Studio +or Windows SDK on Windows. +Without extension, using pure Python implementation on CPython runs slowly. ## How to use +NOTE: In examples below, I use `raw=False` and `use_bin_type=True` for users +using msgpack < 1.0. These options are default from msgpack 1.0 so you can omit them. + + ### One-shot pack & unpack Use `packb` for packing and `unpackb` for unpacking. -msgpack provides `dumps` and `loads` as aliases for compatibility with +msgpack provides `dumps` and `loads` as an alias for compatibility with `json` and `pickle`. -`pack` and `dump` pack to a file-like object. -`unpack` and `load` unpack from a file-like object. +`pack` and `dump` packs to a file-like object. +`unpack` and `load` unpacks from a file-like object. ```pycon >>> import msgpack ->>> msgpack.packb([1, 2, 3]) +>>> msgpack.packb([1, 2, 3], use_bin_type=True) '\x93\x01\x02\x03' ->>> msgpack.unpackb(_) +>>> msgpack.unpackb(_, raw=False) [1, 2, 3] ``` -Read the docstring for options. +`unpack` unpacks msgpack's array to Python's list, but can also unpack to tuple: + +```pycon +>>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) +(1, 2, 3) +``` + +You should always specify the `use_list` keyword argument for backward compatibility. +See performance issues relating to `use_list option`_ below. + +Read the docstring for other options. ### Streaming unpacking @@ -63,17 +129,17 @@ from io import BytesIO buf = BytesIO() for i in range(100): - buf.write(msgpack.packb(i)) + buf.write(msgpack.packb(i, use_bin_type=True)) buf.seek(0) -unpacker = msgpack.Unpacker(buf) +unpacker = msgpack.Unpacker(buf, raw=False) for unpacked in unpacker: print(unpacked) ``` -### Packing/unpacking of custom data types +### Packing/unpacking of custom data type It is also possible to pack/unpack custom data types. Here is an example for `datetime.datetime`. @@ -98,17 +164,14 @@ def encode_datetime(obj): return obj -packed_dict = msgpack.packb(useful_dict, default=encode_datetime) -this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) +packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) +this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) ``` `Unpacker`'s `object_hook` callback receives a dict; the `object_pairs_hook` callback may instead be used to receive a list of key-value pairs. -NOTE: msgpack can encode datetime with tzinfo into standard ext type for now. -See `datetime` option in `Packer` docstring. - ### Extended types @@ -130,8 +193,8 @@ It is also possible to pack/unpack custom data types using the **ext** type. ... return ExtType(code, data) ... >>> data = array.array('d', [1.2, 3.4]) ->>> packed = msgpack.packb(data, default=default) ->>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) +>>> packed = msgpack.packb(data, default=default, use_bin_type=True) +>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False) >>> data == unpacked True ``` @@ -140,8 +203,8 @@ True ### Advanced unpacking control As an alternative to iteration, `Unpacker` objects provide `unpack`, -`skip`, `read_array_header`, and `read_map_header` methods. The former two -read an entire message from the stream, respectively deserializing and returning +`skip`, `read_array_header` and `read_map_header` methods. The former two +read an entire message from the stream, respectively de-serialising and returning the result, or ignoring it. The latter two methods return the number of elements in the upcoming container, so that each element in an array, or key-value pair in a map, can be unpacked or skipped individually. @@ -149,7 +212,7 @@ in a map, can be unpacked or skipped individually. ## Notes -### String and binary types in the old MessagePack spec +### string and binary type Early versions of msgpack didn't distinguish string and binary types. The type for representing both string and binary types was named **raw**. @@ -159,15 +222,15 @@ and `raw=True` options. ```pycon >>> import msgpack ->>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=False), raw=True) +>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=False), raw=True) [b'spam', b'eggs'] ->>> msgpack.unpackb(msgpack.packb([b'spam', 'eggs'], use_bin_type=True), raw=False) +>>> msgpack.unpackb(msgpack.packb([b'spam', u'eggs'], use_bin_type=True), raw=False) [b'spam', 'eggs'] ``` ### ext type -To use the **ext** type, pass a `msgpack.ExtType` object to the packer. +To use the **ext** type, pass `msgpack.ExtType` object to packer. ```pycon >>> import msgpack @@ -181,62 +244,24 @@ You can use it with `default` and `ext_hook`. See below. ### Security -When unpacking data received from an unreliable source, msgpack provides +To unpacking data received from unreliable source, msgpack provides two security options. `max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size. -It is also used to limit preallocated list sizes. +It is used to limit the preallocated list size too. `strict_map_key` (default: `True`) limits the type of map keys to bytes and str. -While the MessagePack spec doesn't limit map key types, -there is a risk of a hash DoS. +While msgpack spec doesn't limit the types of the map keys, +there is a risk of the hashdos. If you need to support other types for map keys, use `strict_map_key=False`. ### Performance tips -CPython's GC starts when the number of allocated objects grows. -This means unpacking may trigger unnecessary GC. -You can use `gc.disable()` when unpacking a large message. +CPython's GC starts when growing allocated object. +This means unpacking may cause useless GC. +You can use `gc.disable()` when unpacking large message. -A list is the default sequence type in Python. -However, a tuple is lighter than a list. +List is the default sequence type of Python. +But tuple is lighter than list. You can use `use_list=False` while unpacking when performance is important. - - -## Major breaking changes in the history - -### msgpack 0.5 - -The package name on PyPI was changed from `msgpack-python` to `msgpack` in 0.5. - -When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before -`pip install -U msgpack`. - - -### msgpack 1.0 - -* Python 2 support - - * The extension module no longer supports Python 2. - The pure Python implementation (`msgpack.fallback`) is used for Python 2. - - * msgpack 1.0.6 drops official support of Python 2.7, as pip and - GitHub Action "setup-python" no longer supports Python 2.7. - -* Packer - - * Packer uses `use_bin_type=True` by default. - Bytes are encoded in the bin type in MessagePack. - * The `encoding` option is removed. UTF-8 is always used. - -* Unpacker - - * Unpacker uses `raw=False` by default. It assumes str values are valid UTF-8 strings - and decodes them to Python str (Unicode) objects. - * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str). - * The default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attacks. - You need to pass `max_buffer_size=0` if you have large but safe data. - * The default value of `strict_map_key` is changed to True to avoid hash DoS. - You need to pass `strict_map_key=False` if you have data that contain map keys - whose type is neither bytes nor str. diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index 75f0c54..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,5 +0,0 @@ -## Security contact information - -To report a security vulnerability, please use the -[Tidelift security contact](https://tidelift.com/security). -Tidelift will coordinate the fix and disclosure. \ No newline at end of file diff --git a/appveyor.yml b/appveyor.yml new file mode 100644 index 0000000..f338e17 --- /dev/null +++ b/appveyor.yml @@ -0,0 +1,50 @@ +environment: + matrix: + # For Python versions available on Appveyor, see + # http://www.appveyor.com/docs/installed-software#python + - PYTHON: "C:\\Python36" + +install: + # We need wheel installed to build wheels + - "%PYTHON%\\python.exe -m pip install -U pip" + - "%PYTHON%\\python.exe -m pip install -U cython" + - "%PYTHON%\\Scripts\\cython --cplus msgpack/_cmsgpack.pyx" + +build: off + +test_script: + # Put your test command here. + # Note that you must use the environment variable %PYTHON% to refer to + # the interpreter you're using - Appveyor does not do anything special + # to put the Python version you want to use on PATH. + - set PYTHON="C:\\Python27" + - ci\\runtests.bat + - set PYTHON="C:\\Python27-x64" + - ci\\runtests.bat + - set PYTHON="C:\\Python36" + - ci\\runtests.bat + - set PYTHON="C:\\Python36-x64" + - ci\\runtests.bat + - set PYTHON="C:\\Python37" + - ci\\runtests.bat + - set PYTHON="C:\\Python37-x64" + - ci\\runtests.bat + - set PYTHON="C:\\Python38" + - ci\\runtests.bat + - set PYTHON="C:\\Python38-x64" + - ci\\runtests.bat + +after_test: + # This step builds your wheels. + # Again, you need to use %PYTHON% to get the correct interpreter + +artifacts: + # bdist_wheel puts your built wheel in the dist directory + - path: dist\*.whl + +#on_success: +# You can use this step to upload your artifacts to a public website. +# See Appveyor's documentation for more details. Or you can simply +# access your wheels from the Appveyor "artifacts" tab for your build. + +# vim: set shiftwidth=2 diff --git a/ci/runtests.bat b/ci/runtests.bat new file mode 100644 index 0000000..4ae2f70 --- /dev/null +++ b/ci/runtests.bat @@ -0,0 +1,9 @@ +%PYTHON%\python.exe -m pip install -U pip wheel pytest +%PYTHON%\python.exe setup.py build_ext -i +%PYTHON%\python.exe setup.py install +%PYTHON%\python.exe -c "import sys; print(hex(sys.maxsize))" +%PYTHON%\python.exe -c "from msgpack import _cmsgpack" +%PYTHON%\python.exe setup.py bdist_wheel +%PYTHON%\python.exe -m pytest -v test +SET EL=%ERRORLEVEL% +exit /b %EL% diff --git a/ci/runtests.sh b/ci/runtests.sh new file mode 100644 index 0000000..5d87f69 --- /dev/null +++ b/ci/runtests.sh @@ -0,0 +1,8 @@ +#!/bin/bash +set -ex +${PYTHON} -VV +${PYTHON} -m pip install setuptools wheel pytest +${PYTHON} setup.py build_ext -if +${PYTHON} -c "from msgpack import _cmsgpack" +${PYTHON} setup.py bdist_wheel +${PYTHON} -m pytest -v test diff --git a/docker/buildwheel.sh b/docker/buildwheel.sh index ff34139..89a2570 100644 --- a/docker/buildwheel.sh +++ b/docker/buildwheel.sh @@ -7,12 +7,10 @@ set -e -x ARCH=`uname -p` echo "arch=$ARCH" -ls /opt/python - for V in "${PYTHON_VERSIONS[@]}"; do PYBIN=/opt/python/$V/bin rm -rf build/ # Avoid lib build by narrow Python is used by wide python - $PYBIN/python -m build -w + $PYBIN/python setup.py bdist_wheel done cd dist diff --git a/docker/shared.env b/docker/shared.env index 80274ac..3601a07 100644 --- a/docker/shared.env +++ b/docker/shared.env @@ -1,7 +1,7 @@ PYTHON_VERSIONS=( - cp310-cp310 cp39-cp39 cp38-cp38 cp37-cp37m cp36-cp36m + cp35-cp35m ) diff --git a/docs/_static/README.txt b/docs/_static/README.txt deleted file mode 100644 index 1c70594..0000000 --- a/docs/_static/README.txt +++ /dev/null @@ -1 +0,0 @@ -Sphinx will copy the contents of docs/_static/ directory to the build location. diff --git a/docs/api.rst b/docs/api.rst index f5dfbbd..93827e1 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -5,19 +5,19 @@ API reference .. autofunction:: pack -``dump()`` is an alias for :func:`pack` +:func:`dump` is alias for :func:`pack` .. autofunction:: packb -``dumps()`` is an alias for :func:`packb` +:func:`dumps` is alias for :func:`packb` .. autofunction:: unpack -``load()`` is an alias for :func:`unpack` +:func:`load` is alias for :func:`unpack` .. autofunction:: unpackb -``loads()`` is an alias for :func:`unpackb` +:func:`loads` is alias for :func:`unpackb` .. autoclass:: Packer :members: diff --git a/docs/conf.py b/docs/conf.py index 28116cd..6b432be 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- +# # msgpack documentation build configuration file, created by # sphinx-quickstart on Sun Feb 24 14:20:50 2013. # @@ -9,12 +11,12 @@ # All configuration values have a default; values that are commented out # serve to show the default. +import sys, os + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -# import os -# import sys -# sys.path.insert(0, os.path.abspath('..')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- @@ -38,8 +40,8 @@ source_suffix = ".rst" master_doc = "index" # General information about the project. -project = "msgpack" -copyright = "Inada Naoki" +project = u"msgpack" +copyright = u"Inada Naoki" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -89,7 +91,7 @@ pygments_style = "sphinx" # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = "sphinx_rtd_theme" +html_theme = "sphinxdoc" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -179,7 +181,7 @@ latex_elements = { # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ("index", "msgpack.tex", "msgpack Documentation", "Author", "manual"), + ("index", "msgpack.tex", u"msgpack Documentation", u"Author", "manual"), ] # The name of an image file (relative to this directory) to place at the top of @@ -207,7 +209,7 @@ latex_documents = [ # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [("index", "msgpack", "msgpack Documentation", ["Author"], 1)] +man_pages = [("index", "msgpack", u"msgpack Documentation", [u"Author"], 1)] # If true, show URL addresses after external links. # man_show_urls = False @@ -222,8 +224,8 @@ texinfo_documents = [ ( "index", "msgpack", - "msgpack Documentation", - "Author", + u"msgpack Documentation", + u"Author", "msgpack", "One line description of project.", "Miscellaneous", @@ -243,10 +245,10 @@ texinfo_documents = [ # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. -epub_title = "msgpack" -epub_author = "Author" -epub_publisher = "Author" -epub_copyright = "2013, Author" +epub_title = u"msgpack" +epub_author = u"Author" +epub_publisher = u"Author" +epub_copyright = u"2013, Author" # The language of the text. It defaults to the language option # or en if the language is not set. diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 26002de..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -sphinx~=7.3.7 -sphinx-rtd-theme~=2.0.0 diff --git a/msgpack/__init__.py b/msgpack/__init__.py index f3266b7..d6705e2 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,20 +1,19 @@ -# ruff: noqa: F401 -import os - -from .exceptions import * # noqa: F403 +# coding: utf-8 +from ._version import version +from .exceptions import * from .ext import ExtType, Timestamp -version = (1, 1, 2) -__version__ = "1.1.2" +import os +import sys -if os.environ.get("MSGPACK_PUREPYTHON"): - from .fallback import Packer, Unpacker, unpackb +if os.environ.get("MSGPACK_PUREPYTHON") or sys.version_info[0] == 2: + from .fallback import Packer, unpackb, Unpacker else: try: - from ._cmsgpack import Packer, Unpacker, unpackb + from ._cmsgpack import Packer, unpackb, Unpacker except ImportError: - from .fallback import Packer, Unpacker, unpackb + from .fallback import Packer, unpackb, Unpacker def pack(o, stream, **kwargs): diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx index 9680b31..1faaac3 100644 --- a/msgpack/_cmsgpack.pyx +++ b/msgpack/_cmsgpack.pyx @@ -1,6 +1,5 @@ +# coding: utf-8 #cython: embedsignature=True, c_string_encoding=ascii, language_level=3 -#cython: freethreading_compatible = True -import cython from cpython.datetime cimport import_datetime, datetime_new import_datetime() diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 94d1462..e6cd2c7 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -1,3 +1,5 @@ +# coding: utf-8 + from cpython cimport * from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact from cpython.datetime cimport ( @@ -14,6 +16,8 @@ from .ext import ExtType, Timestamp cdef extern from "Python.h": int PyMemoryView_Check(object obj) + char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL + cdef extern from "pack.h": struct msgpack_packer: @@ -22,21 +26,26 @@ cdef extern from "pack.h": size_t buf_size bint use_bin_type - int msgpack_pack_nil(msgpack_packer* pk) except -1 - int msgpack_pack_true(msgpack_packer* pk) except -1 - int msgpack_pack_false(msgpack_packer* pk) except -1 - int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1 - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1 - int msgpack_pack_float(msgpack_packer* pk, float d) except -1 - int msgpack_pack_double(msgpack_packer* pk, double d) except -1 - int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1 - int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1 - int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1 + int msgpack_pack_int(msgpack_packer* pk, int d) + int msgpack_pack_nil(msgpack_packer* pk) + int msgpack_pack_true(msgpack_packer* pk) + int msgpack_pack_false(msgpack_packer* pk) + int msgpack_pack_long(msgpack_packer* pk, long d) + int msgpack_pack_long_long(msgpack_packer* pk, long long d) + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) + int msgpack_pack_float(msgpack_packer* pk, float d) + int msgpack_pack_double(msgpack_packer* pk, double d) + int msgpack_pack_array(msgpack_packer* pk, size_t l) + int msgpack_pack_map(msgpack_packer* pk, size_t l) + int msgpack_pack_raw(msgpack_packer* pk, size_t l) + int msgpack_pack_bin(msgpack_packer* pk, size_t l) + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) + int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds); + int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) +cdef extern from "buff_converter.h": + object buff_to_buff(char *, Py_ssize_t) cdef int DEFAULT_RECURSE_LIMIT=511 cdef long long ITEM_LIMIT = (2**32)-1 @@ -50,7 +59,7 @@ cdef inline int PyBytesLike_CheckExact(object o): return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o) -cdef class Packer: +cdef class Packer(object): """ MessagePack Packer @@ -62,8 +71,7 @@ cdef class Packer: Packer's constructor has some keyword arguments: - :param default: - When specified, it should be callable. + :param callable default: Convert user type to builtin type that Packer supports. See also simplejson's document. @@ -90,48 +98,32 @@ cdef class Packer: If set to true, datetime with tzinfo is packed into Timestamp type. Note that the tzinfo is stripped in the timestamp. You can get UTC datetime with `timestamp=3` option of the Unpacker. + (Python 2 is not supported). :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. - - :param int buf_size: - The size of the internal buffer. (default: 256*1024) - Useful if serialisation size can be correctly estimated, - avoid unnecessary reallocations. """ cdef msgpack_packer pk cdef object _default cdef object _berrors cdef const char *unicode_errors - cdef size_t exports # number of exported buffers cdef bint strict_types cdef bint use_float cdef bint autoreset cdef bint datetime - def __cinit__(self, buf_size=256*1024, **_kwargs): + def __cinit__(self): + cdef int buf_size = 1024*1024 self.pk.buf = PyMem_Malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.pk.buf_size = buf_size self.pk.length = 0 - self.exports = 0 - def __dealloc__(self): - PyMem_Free(self.pk.buf) - self.pk.buf = NULL - assert self.exports == 0 - - cdef _check_exports(self): - if self.exports > 0: - raise BufferError("Existing exports of data: Packer cannot be changed") - - @cython.critical_section def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, - bint strict_types=False, bint datetime=False, unicode_errors=None, - buf_size=256*1024): + bint strict_types=False, bint datetime=False, unicode_errors=None): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset @@ -148,130 +140,157 @@ cdef class Packer: else: self.unicode_errors = self._berrors - # returns -2 when default should(o) be called - cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1: + def __dealloc__(self): + PyMem_Free(self.pk.buf) + self.pk.buf = NULL + + cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: cdef long long llval cdef unsigned long long ullval cdef unsigned long ulval - cdef const char* rawval - cdef Py_ssize_t L - cdef Py_buffer view - cdef bint strict = self.strict_types - - if o is None: - msgpack_pack_nil(&self.pk) - elif o is True: - msgpack_pack_true(&self.pk) - elif o is False: - msgpack_pack_false(&self.pk) - elif PyLong_CheckExact(o) if strict else PyLong_Check(o): - try: - if o > 0: - ullval = o - msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - msgpack_pack_long_long(&self.pk, llval) - except OverflowError as oe: - if will_default: - return -2 - else: - raise OverflowError("Integer value out of range") - elif PyFloat_CheckExact(o) if strict else PyFloat_Check(o): - if self.use_float: - msgpack_pack_float(&self.pk, o) - else: - msgpack_pack_double(&self.pk, o) - elif PyBytesLike_CheckExact(o) if strict else PyBytesLike_Check(o): - L = Py_SIZE(o) - if L > ITEM_LIMIT: - PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) - rawval = o - msgpack_pack_bin(&self.pk, L) - msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_CheckExact(o) if strict else PyUnicode_Check(o): - if self.unicode_errors == NULL: - rawval = PyUnicode_AsUTF8AndSize(o, &L) - if L >ITEM_LIMIT: - raise ValueError("unicode string is too large") - else: - o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) - L = Py_SIZE(o) - if L > ITEM_LIMIT: - raise ValueError("unicode string is too large") - rawval = o - msgpack_pack_raw(&self.pk, L) - msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyDict_CheckExact(o) if strict else PyDict_Check(o): - L = len(o) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") - msgpack_pack_map(&self.pk, L) - for k, v in o.items(): - self._pack(k, nest_limit) - self._pack(v, nest_limit) - elif type(o) is ExtType if strict else isinstance(o, ExtType): - # This should be before Tuple because ExtType is namedtuple. - rawval = o.data - L = len(o.data) - if L > ITEM_LIMIT: - raise ValueError("EXT data is too large") - msgpack_pack_ext(&self.pk, o.code, L) - msgpack_pack_raw_body(&self.pk, rawval, L) - elif type(o) is Timestamp: - llval = o.seconds - ulval = o.nanoseconds - msgpack_pack_timestamp(&self.pk, llval, ulval) - elif PyList_CheckExact(o) if strict else (PyTuple_Check(o) or PyList_Check(o)): - L = Py_SIZE(o) - if L > ITEM_LIMIT: - raise ValueError("list is too large") - msgpack_pack_array(&self.pk, L) - for v in o: - self._pack(v, nest_limit) - elif PyMemoryView_Check(o): - PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) - L = view.len - if L > ITEM_LIMIT: - PyBuffer_Release(&view); - raise ValueError("memoryview is too large") - try: - msgpack_pack_bin(&self.pk, L) - msgpack_pack_raw_body(&self.pk, view.buf, L) - finally: - PyBuffer_Release(&view); - elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: - delta = o - epoch - if not PyDelta_CheckExact(delta): - raise ValueError("failed to calculate delta") - llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) - ulval = timedelta_microseconds(delta) * 1000 - msgpack_pack_timestamp(&self.pk, llval, ulval) - elif will_default: - return -2 - elif self.datetime and PyDateTime_CheckExact(o): - # this should be later than will_default - PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) - else: - PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) - - cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef long longval + cdef float fval + cdef double dval + cdef char* rawval cdef int ret + cdef dict d + cdef Py_ssize_t L + cdef int default_used = 0 + cdef bint strict_types = self.strict_types + cdef Py_buffer view + if nest_limit < 0: raise ValueError("recursion limit exceeded.") - nest_limit -= 1 - if self._default is not None: - ret = self._pack_inner(o, 1, nest_limit) - if ret == -2: - o = self._default(o) - else: - return ret - return self._pack_inner(o, 0, nest_limit) - @cython.critical_section - def pack(self, object obj): + while True: + if o is None: + ret = msgpack_pack_nil(&self.pk) + elif o is True: + ret = msgpack_pack_true(&self.pk) + elif o is False: + ret = msgpack_pack_false(&self.pk) + elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): + # PyInt_Check(long) is True for Python 3. + # So we should test long before int. + try: + if o > 0: + ullval = o + ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = o + ret = msgpack_pack_long_long(&self.pk, llval) + except OverflowError as oe: + if not default_used and self._default is not None: + o = self._default(o) + default_used = True + continue + else: + raise OverflowError("Integer value out of range") + elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o): + longval = o + ret = msgpack_pack_long(&self.pk, longval) + elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): + if self.use_float: + fval = o + ret = msgpack_pack_float(&self.pk, fval) + else: + dval = o + ret = msgpack_pack_double(&self.pk, dval) + elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) + rawval = o + ret = msgpack_pack_bin(&self.pk, L) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): + if self.unicode_errors == NULL: + ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); + if ret == -2: + raise ValueError("unicode string is too large") + else: + o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("unicode string is too large") + ret = msgpack_pack_raw(&self.pk, L) + if ret == 0: + rawval = o + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyDict_CheckExact(o): + d = o + L = len(d) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + ret = msgpack_pack_map(&self.pk, L) + if ret == 0: + for k, v in d.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif not strict_types and PyDict_Check(o): + L = len(o) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + ret = msgpack_pack_map(&self.pk, L) + if ret == 0: + for k, v in o.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif type(o) is ExtType if strict_types else isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + longval = o.code + rawval = o.data + L = len(o.data) + if L > ITEM_LIMIT: + raise ValueError("EXT data is too large") + ret = msgpack_pack_ext(&self.pk, longval, L) + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif type(o) is Timestamp: + llval = o.seconds + ulval = o.nanoseconds + ret = msgpack_pack_timestamp(&self.pk, llval, ulval) + elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("list is too large") + ret = msgpack_pack_array(&self.pk, L) + if ret == 0: + for v in o: + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif PyMemoryView_Check(o): + if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: + raise ValueError("could not get buffer for memoryview") + L = view.len + if L > ITEM_LIMIT: + PyBuffer_Release(&view); + raise ValueError("memoryview is too large") + ret = msgpack_pack_bin(&self.pk, L) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, view.buf, L) + PyBuffer_Release(&view); + elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: + delta = o - epoch + if not PyDelta_CheckExact(delta): + raise ValueError("failed to calculate delta") + llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) + ulval = timedelta_microseconds(delta) * 1000 + ret = msgpack_pack_timestamp(&self.pk, llval, ulval) + elif not default_used and self._default: + o = self._default(o) + default_used = 1 + continue + else: + PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) + return ret + + cpdef pack(self, object obj): cdef int ret - self._check_exports() try: ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) except: @@ -284,37 +303,36 @@ cdef class Packer: self.pk.length = 0 return buf - @cython.critical_section def pack_ext_type(self, typecode, data): - self._check_exports() - if len(data) > ITEM_LIMIT: - raise ValueError("ext data too large") msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) - @cython.critical_section def pack_array_header(self, long long size): - self._check_exports() if size > ITEM_LIMIT: - raise ValueError("array too large") - msgpack_pack_array(&self.pk, size) + raise ValueError + cdef int ret = msgpack_pack_array(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf - @cython.critical_section def pack_map_header(self, long long size): - self._check_exports() if size > ITEM_LIMIT: - raise ValueError("map too learge") - msgpack_pack_map(&self.pk, size) + raise ValueError + cdef int ret = msgpack_pack_map(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf - @cython.critical_section def pack_map_pairs(self, object pairs): """ Pack *pairs* as msgpack map type. @@ -322,43 +340,33 @@ cdef class Packer: *pairs* should be a sequence of pairs. (`len(pairs)` and `for k, v in pairs:` should be supported.) """ - self._check_exports() - size = len(pairs) - if size > ITEM_LIMIT: - raise ValueError("map too large") - msgpack_pack_map(&self.pk, size) - for k, v in pairs: - self._pack(k) - self._pack(v) + cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) + if ret == 0: + for k, v in pairs: + ret = self._pack(k) + if ret != 0: break + ret = self._pack(v) + if ret != 0: break + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf - @cython.critical_section def reset(self): """Reset internal buffer. This method is useful only when autoreset=False. """ - self._check_exports() self.pk.length = 0 - @cython.critical_section def bytes(self): """Return internal buffer contents as bytes object""" return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) def getbuffer(self): - """Return memoryview of internal buffer. - - Note: Packer now supports buffer protocol. You can use memoryview(packer). - """ - return memoryview(self) - - def __getbuffer__(self, Py_buffer *buffer, int flags): - PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags) - self.exports += 1 - - def __releasebuffer__(self, Py_buffer *buffer): - self.exports -= 1 + """Return view of internal buffer.""" + return buff_to_buff(self.pk.buf, self.pk.length) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index f0cf96d..e4f3f1e 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,12 +1,15 @@ +# coding: utf-8 + from cpython cimport * cdef extern from "Python.h": ctypedef struct PyObject + cdef int PyObject_AsReadBuffer(object o, const void** buff, Py_ssize_t* buf_len) except -1 object PyMemoryView_GetContiguous(object obj, int buffertype, char order) from libc.stdlib cimport * from libc.string cimport * from libc.limits cimport * -from libc.stdint cimport uint64_t +ctypedef unsigned long long uint64_t from .exceptions import ( BufferFull, @@ -33,7 +36,7 @@ cdef extern from "unpack.h": PyObject* timestamp_t PyObject *giga; PyObject *utc; - const char *unicode_errors + char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len Py_ssize_t max_array_len @@ -208,77 +211,50 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, raise ValueError("Unpack failed: error = %d" % (ret,)) -cdef class Unpacker: - """Streaming unpacker. +cdef class Unpacker(object): + """ + MessagePack Packer - Arguments: + Usage:: - :param file_like: - File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and `.feed()` is not usable. + packer = Packer() + astream.write(packer.pack(a)) + astream.write(packer.pack(b)) - :param int read_size: - Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) + Packer's constructor has some keyword arguments: - :param bool use_list: - If true, unpack msgpack array to Python list. - Otherwise, unpack to Python tuple. (default: True) + :param callable default: + Convert user type to builtin type that Packer supports. + See also simplejson's document. - :param bool raw: - If true, unpack msgpack raw to Python bytes. - Otherwise, unpack to Python str by decoding with UTF-8 encoding (default). + :param bool use_single_float: + Use single precision float type for float. (default: False) - :param int timestamp: - Control how timestamp type is unpacked: + :param bool autoreset: + Reset buffer after each pack and return its content as `bytes`. (default: True). + If set this to false, use `bytes()` to get content and `.reset()` to clear buffer. - 0 - Timestamp - 1 - float (Seconds from the EPOCH) - 2 - int (Nanoseconds from the EPOCH) - 3 - datetime.datetime (UTC). + :param bool use_bin_type: + Use bin type introduced in msgpack spec 2.0 for bytes. + It also enables str8 type for unicode. (default: True) - :param bool strict_map_key: - If true (default), only str or bytes are accepted for map (dict) keys. + :param bool strict_types: + If set to true, types will be checked to be exact. Derived classes + from serializable types will not be serialized and will be + treated as unsupported type and forwarded to default. + Additionally tuples will not be serialized as lists. + This is useful when trying to implement accurate serialization + for python types. - :param object_hook: - When specified, it should be callable. - Unpacker calls it with a dict argument after unpacking msgpack map. - (See also simplejson) - - :param object_pairs_hook: - When specified, it should be callable. - Unpacker calls it with a list of key-value pairs after unpacking msgpack map. - (See also simplejson) + :param bool datetime: + If set to true, datetime with tzinfo is packed into Timestamp type. + Note that the tzinfo is stripped in the timestamp. + You can get UTC datetime with `timestamp=3` option of the Unpacker. + (Python 2 is not supported). :param str unicode_errors: - The error handler for decoding unicode. (default: 'strict') - This option should be used only when you have msgpack data which - contains invalid UTF-8 string. - - :param int max_buffer_size: - Limits size of data waiting unpacked. 0 means 2**32-1. - The default value is 100*1024*1024 (100MiB). - Raises `BufferFull` exception when it is insufficient. - You should set this parameter when unpacking data from untrusted source. - - :param int max_str_len: - Deprecated, use *max_buffer_size* instead. - Limits max length of str. (default: max_buffer_size) - - :param int max_bin_len: - Deprecated, use *max_buffer_size* instead. - Limits max length of bin. (default: max_buffer_size) - - :param int max_array_len: - Limits max length of array. - (default: max_buffer_size) - - :param int max_map_len: - Limits max length of map. - (default: max_buffer_size//2) - - :param int max_ext_len: - Deprecated, use *max_buffer_size* instead. - Limits max size of ext type. (default: max_buffer_size) + The error handler for encoding unicode. (default: 'strict') + DO NOT USE THIS!! This option is kept for very specific usage. Example of streaming deserialize from file-like object:: @@ -322,7 +298,6 @@ cdef class Unpacker: PyMem_Free(self.buf) self.buf = NULL - @cython.critical_section def __init__(self, file_like=None, *, Py_ssize_t read_size=0, bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, @@ -383,7 +358,6 @@ cdef class Unpacker: max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) - @cython.critical_section def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff @@ -440,30 +414,34 @@ cdef class Unpacker: self.buf_size = buf_size self.buf_tail = tail + _buf_len - cdef int read_from_file(self) except -1: - cdef Py_ssize_t remains = self.max_buffer_size - (self.buf_tail - self.buf_head) - if remains <= 0: - raise BufferFull - - next_bytes = self.file_like_read(min(self.read_size, remains)) + cdef read_from_file(self): + next_bytes = self.file_like_read( + min(self.read_size, + self.max_buffer_size - (self.buf_tail - self.buf_head) + )) if next_bytes: self.append_buffer(PyBytes_AsString(next_bytes), PyBytes_Size(next_bytes)) else: self.file_like = None - return 0 cdef object _unpack(self, execute_fn execute, bint iter=0): cdef int ret cdef object obj cdef Py_ssize_t prev_head + if self.buf_head >= self.buf_tail and self.file_like is not None: + self.read_from_file() + while 1: prev_head = self.buf_head - if prev_head < self.buf_tail: - ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - self.stream_offset += self.buf_head - prev_head - else: - ret = 0 + if prev_head >= self.buf_tail: + if iter: + raise StopIteration("No more data to unpack.") + else: + raise OutOfData("No more data to unpack.") + + ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) + self.stream_offset += self.buf_head - prev_head if ret == 1: obj = unpack_data(&self.ctx) @@ -484,7 +462,6 @@ cdef class Unpacker: else: raise ValueError("Unpack failed: error = %d" % (ret,)) - @cython.critical_section def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" cdef Py_ssize_t nread @@ -497,7 +474,6 @@ cdef class Unpacker: self.stream_offset += nread return ret - @cython.critical_section def unpack(self): """Unpack one object @@ -505,7 +481,6 @@ cdef class Unpacker: """ return self._unpack(unpack_construct) - @cython.critical_section def skip(self): """Read and ignore one object, returning None @@ -513,7 +488,6 @@ cdef class Unpacker: """ return self._unpack(unpack_skip) - @cython.critical_section def read_array_header(self): """assuming the next object is an array, return its size n, such that the next n unpack() calls will iterate over its contents. @@ -522,7 +496,6 @@ cdef class Unpacker: """ return self._unpack(read_array_header) - @cython.critical_section def read_map_header(self): """assuming the next object is a map, return its size n, such that the next n * 2 unpack() calls will iterate over its key-value pairs. @@ -531,7 +504,6 @@ cdef class Unpacker: """ return self._unpack(read_map_header) - @cython.critical_section def tell(self): """Returns the current position of the Unpacker in bytes, i.e., the number of bytes that were read from the input, also the starting @@ -542,7 +514,6 @@ cdef class Unpacker: def __iter__(self): return self - @cython.critical_section def __next__(self): return self._unpack(unpack_construct, 1) diff --git a/msgpack/_version.py b/msgpack/_version.py new file mode 100644 index 0000000..1c83c8e --- /dev/null +++ b/msgpack/_version.py @@ -0,0 +1 @@ +version = (1, 0, 2) diff --git a/msgpack/buff_converter.h b/msgpack/buff_converter.h new file mode 100644 index 0000000..86b4196 --- /dev/null +++ b/msgpack/buff_converter.h @@ -0,0 +1,8 @@ +#include "Python.h" + +/* cython does not support this preprocessor check => write it in raw C */ +static PyObject * +buff_to_buff(char *buff, Py_ssize_t size) +{ + return PyMemoryView_FromMemory(buff, size, PyBUF_READ); +} diff --git a/msgpack/ext.py b/msgpack/ext.py index 9694819..4eb9dd6 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -1,6 +1,21 @@ -import datetime -import struct +# coding: utf-8 from collections import namedtuple +import datetime +import sys +import struct + + +PY2 = sys.version_info[0] == 2 + +if PY2: + int_types = (int, long) + _utc = None +else: + int_types = int + try: + _utc = datetime.timezone.utc + except AttributeError: + _utc = datetime.timezone(datetime.timedelta(0)) class ExtType(namedtuple("ExtType", "code data")): @@ -13,15 +28,14 @@ class ExtType(namedtuple("ExtType", "code data")): raise TypeError("data must be bytes") if not 0 <= code <= 127: raise ValueError("code must be 0~127") - return super().__new__(cls, code, data) + return super(ExtType, cls).__new__(cls, code, data) -class Timestamp: +class Timestamp(object): """Timestamp represents the Timestamp extension type in msgpack. - When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. - When using pure-Python msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and - unpack `Timestamp`. + When built with Cython, msgpack uses C methods to pack and unpack `Timestamp`. When using pure-Python + msgpack, :func:`to_bytes` and :func:`from_bytes` are used to pack and unpack `Timestamp`. This class is immutable: Do not override seconds and nanoseconds. """ @@ -39,25 +53,31 @@ class Timestamp: Number of nanoseconds to add to `seconds` to get fractional time. Maximum is 999_999_999. Default is 0. - Note: Negative times (before the UNIX epoch) are represented as neg. seconds + pos. ns. + Note: Negative times (before the UNIX epoch) are represented as negative seconds + positive ns. """ - if not isinstance(seconds, int): - raise TypeError("seconds must be an integer") - if not isinstance(nanoseconds, int): + if not isinstance(seconds, int_types): + raise TypeError("seconds must be an interger") + if not isinstance(nanoseconds, int_types): raise TypeError("nanoseconds must be an integer") - if not (0 <= nanoseconds < 10**9): - raise ValueError("nanoseconds must be a non-negative integer less than 999999999.") + if not (0 <= nanoseconds < 10 ** 9): + raise ValueError( + "nanoseconds must be a non-negative integer less than 999999999." + ) self.seconds = seconds self.nanoseconds = nanoseconds def __repr__(self): """String representation of Timestamp.""" - return f"Timestamp(seconds={self.seconds}, nanoseconds={self.nanoseconds})" + return "Timestamp(seconds={0}, nanoseconds={1})".format( + self.seconds, self.nanoseconds + ) def __eq__(self, other): """Check for equality with another Timestamp object""" if type(other) is self.__class__: - return self.seconds == other.seconds and self.nanoseconds == other.nanoseconds + return ( + self.seconds == other.seconds and self.nanoseconds == other.nanoseconds + ) return False def __ne__(self, other): @@ -120,10 +140,10 @@ class Timestamp: """Create a Timestamp from posix timestamp in seconds. :param unix_float: Posix timestamp in seconds. - :type unix_float: int or float + :type unix_float: int or float. """ seconds = int(unix_sec // 1) - nanoseconds = int((unix_sec % 1) * 10**9) + nanoseconds = int((unix_sec % 1) * 10 ** 9) return Timestamp(seconds, nanoseconds) def to_unix(self): @@ -141,7 +161,7 @@ class Timestamp: :param int unix_ns: Posix timestamp in nanoseconds. :rtype: Timestamp """ - return Timestamp(*divmod(unix_ns, 10**9)) + return Timestamp(*divmod(unix_ns, 10 ** 9)) def to_unix_nano(self): """Get the timestamp as a unixtime in nanoseconds. @@ -149,22 +169,25 @@ class Timestamp: :returns: posix timestamp in nanoseconds :rtype: int """ - return self.seconds * 10**9 + self.nanoseconds + return self.seconds * 10 ** 9 + self.nanoseconds def to_datetime(self): """Get the timestamp as a UTC datetime. - :rtype: `datetime.datetime` + Python 2 is not supported. + + :rtype: datetime. """ - utc = datetime.timezone.utc - return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( - seconds=self.seconds, microseconds=self.nanoseconds // 1000 + return datetime.datetime.fromtimestamp(0, _utc) + datetime.timedelta( + seconds=self.to_unix() ) @staticmethod def from_datetime(dt): """Create a Timestamp from datetime with tzinfo. + Python 2 is not supported. + :rtype: Timestamp """ - return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000) + return Timestamp.from_unix(dt.timestamp()) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index b02e47c..0bfa94e 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,22 +1,63 @@ """Fallback pure Python implementation of msgpack""" -import struct -import sys from datetime import datetime as _DateTime +import sys +import struct + + +PY2 = sys.version_info[0] == 2 +if PY2: + int_types = (int, long) + + def dict_iteritems(d): + return d.iteritems() + + +else: + int_types = int + unicode = str + xrange = range + + def dict_iteritems(d): + return d.items() + + +if sys.version_info < (3, 5): + # Ugly hack... + RecursionError = RuntimeError + + def _is_recursionerror(e): + return ( + len(e.args) == 1 + and isinstance(e.args[0], str) + and e.args[0].startswith("maximum recursion depth exceeded") + ) + + +else: + + def _is_recursionerror(e): + return True + if hasattr(sys, "pypy_version_info"): + # StringIO is slow on PyPy, StringIO is faster. However: PyPy's own + # StringBuilder is fastest. from __pypy__ import newlist_hint - from __pypy__.builders import BytesBuilder - _USING_STRINGBUILDER = True + try: + from __pypy__.builders import BytesBuilder as StringBuilder + except ImportError: + from __pypy__.builders import StringBuilder + USING_STRINGBUILDER = True - class BytesIO: + class StringIO(object): def __init__(self, s=b""): if s: - self.builder = BytesBuilder(len(s)) + self.builder = StringBuilder(len(s)) self.builder.append(s) else: - self.builder = BytesBuilder() + self.builder = StringBuilder() def write(self, s): if isinstance(s, memoryview): @@ -28,18 +69,19 @@ if hasattr(sys, "pypy_version_info"): def getvalue(self): return self.builder.build() + else: - from io import BytesIO + USING_STRINGBUILDER = False + from io import BytesIO as StringIO - _USING_STRINGBUILDER = False - - def newlist_hint(size): - return [] + newlist_hint = lambda size: [] -from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError +from .exceptions import BufferFull, OutOfData, ExtraData, FormatError, StackError + from .ext import ExtType, Timestamp + EX_SKIP = 0 EX_CONSTRUCT = 1 EX_READ_ARRAY_HEADER = 2 @@ -87,54 +129,34 @@ def unpackb(packed, **kwargs): ret = unpacker._unpack() except OutOfData: raise ValueError("Unpack failed: incomplete input") - except RecursionError: - raise StackError + except RecursionError as e: + if _is_recursionerror(e): + raise StackError + raise if unpacker._got_extradata(): raise ExtraData(ret, unpacker._get_extradata()) return ret -_NO_FORMAT_USED = "" -_MSGPACK_HEADERS = { - 0xC4: (1, _NO_FORMAT_USED, TYPE_BIN), - 0xC5: (2, ">H", TYPE_BIN), - 0xC6: (4, ">I", TYPE_BIN), - 0xC7: (2, "Bb", TYPE_EXT), - 0xC8: (3, ">Hb", TYPE_EXT), - 0xC9: (5, ">Ib", TYPE_EXT), - 0xCA: (4, ">f"), - 0xCB: (8, ">d"), - 0xCC: (1, _NO_FORMAT_USED), - 0xCD: (2, ">H"), - 0xCE: (4, ">I"), - 0xCF: (8, ">Q"), - 0xD0: (1, "b"), - 0xD1: (2, ">h"), - 0xD2: (4, ">i"), - 0xD3: (8, ">q"), - 0xD4: (1, "b1s", TYPE_EXT), - 0xD5: (2, "b2s", TYPE_EXT), - 0xD6: (4, "b4s", TYPE_EXT), - 0xD7: (8, "b8s", TYPE_EXT), - 0xD8: (16, "b16s", TYPE_EXT), - 0xD9: (1, _NO_FORMAT_USED, TYPE_RAW), - 0xDA: (2, ">H", TYPE_RAW), - 0xDB: (4, ">I", TYPE_RAW), - 0xDC: (2, ">H", TYPE_ARRAY), - 0xDD: (4, ">I", TYPE_ARRAY), - 0xDE: (2, ">H", TYPE_MAP), - 0xDF: (4, ">I", TYPE_MAP), -} +if sys.version_info < (2, 7, 6): + + def _unpack_from(f, b, o=0): + """Explicit type cast for legacy struct.unpack_from""" + return struct.unpack_from(f, bytes(b), o) -class Unpacker: +else: + _unpack_from = struct.unpack_from + + +class Unpacker(object): """Streaming unpacker. Arguments: :param file_like: File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and `.feed()` is not usable. + If specified, unpacker reads serialized data from it and :meth:`feed()` is not usable. :param int read_size: Used as `file_like.read(read_size)`. (default: `min(16*1024, max_buffer_size)`) @@ -153,17 +175,17 @@ class Unpacker: 0 - Timestamp 1 - float (Seconds from the EPOCH) 2 - int (Nanoseconds from the EPOCH) - 3 - datetime.datetime (UTC). + 3 - datetime.datetime (UTC). Python 2 is not supported. :param bool strict_map_key: If true (default), only str or bytes are accepted for map (dict) keys. - :param object_hook: + :param callable object_hook: When specified, it should be callable. Unpacker calls it with a dict argument after unpacking msgpack map. (See also simplejson) - :param object_pairs_hook: + :param callable object_pairs_hook: When specified, it should be callable. Unpacker calls it with a list of key-value pairs after unpacking msgpack map. (See also simplejson) @@ -207,7 +229,7 @@ class Unpacker: Example of streaming deserialize from socket:: - unpacker = Unpacker() + unpacker = Unpacker(max_buffer_size) while True: buf = sock.recv(1024**2) if not buf: @@ -226,7 +248,6 @@ class Unpacker: def __init__( self, file_like=None, - *, read_size=0, use_list=True, raw=False, @@ -270,7 +291,7 @@ class Unpacker: self._buf_checkpoint = 0 if not max_buffer_size: - max_buffer_size = 2**31 - 1 + max_buffer_size = 2 ** 31 - 1 if max_str_len == -1: max_str_len = max_buffer_size if max_bin_len == -1: @@ -311,7 +332,9 @@ class Unpacker: if object_pairs_hook is not None and not callable(object_pairs_hook): raise TypeError("`object_pairs_hook` is not callable") if object_hook is not None and object_pairs_hook is not None: - raise TypeError("object_pairs_hook and object_hook are mutually exclusive") + raise TypeError( + "object_pairs_hook and object_hook are mutually " "exclusive" + ) if not callable(ext_hook): raise TypeError("`ext_hook` is not callable") @@ -329,10 +352,9 @@ class Unpacker: # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython self._buffer.extend(view) - view.release() def _consume(self): - """Gets rid of the used parts of the buffer.""" + """ Gets rid of the used parts of the buffer. """ self._stream_offset += self._buff_i - self._buf_checkpoint self._buf_checkpoint = self._buff_i @@ -374,8 +396,6 @@ class Unpacker: # Read from file remain_bytes = -remain_bytes - if remain_bytes + len(self._buffer) > self._max_buffer_size: - raise BufferFull while remain_bytes > 0: to_read_bytes = max(self._read_size, remain_bytes) read_data = self.file_like.read(to_read_bytes) @@ -389,7 +409,7 @@ class Unpacker: self._buff_i = 0 # rollback raise OutOfData - def _read_header(self): + def _read_header(self, execute=EX_CONSTRUCT): typ = TYPE_IMMEDIATE n = 0 obj = None @@ -404,89 +424,205 @@ class Unpacker: n = b & 0b00011111 typ = TYPE_RAW if n > self._max_str_len: - raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) elif b & 0b11110000 == 0b10010000: n = b & 0b00001111 typ = TYPE_ARRAY if n > self._max_array_len: - raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") + raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) elif b & 0b11110000 == 0b10000000: n = b & 0b00001111 typ = TYPE_MAP if n > self._max_map_len: - raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") + raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) elif b == 0xC0: obj = None elif b == 0xC2: obj = False elif b == 0xC3: obj = True - elif 0xC4 <= b <= 0xC6: - size, fmt, typ = _MSGPACK_HEADERS[b] - self._reserve(size) - if len(fmt) > 0: - n = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] - else: - n = self._buffer[self._buff_i] - self._buff_i += size + elif b == 0xC4: + typ = TYPE_BIN + self._reserve(1) + n = self._buffer[self._buff_i] + self._buff_i += 1 if n > self._max_bin_len: - raise ValueError(f"{n} exceeds max_bin_len({self._max_bin_len})") + raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) obj = self._read(n) - elif 0xC7 <= b <= 0xC9: - size, fmt, typ = _MSGPACK_HEADERS[b] - self._reserve(size) - L, n = struct.unpack_from(fmt, self._buffer, self._buff_i) - self._buff_i += size + elif b == 0xC5: + typ = TYPE_BIN + self._reserve(2) + n = _unpack_from(">H", self._buffer, self._buff_i)[0] + self._buff_i += 2 + if n > self._max_bin_len: + raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + obj = self._read(n) + elif b == 0xC6: + typ = TYPE_BIN + self._reserve(4) + n = _unpack_from(">I", self._buffer, self._buff_i)[0] + self._buff_i += 4 + if n > self._max_bin_len: + raise ValueError("%s exceeds max_bin_len(%s)" % (n, self._max_bin_len)) + obj = self._read(n) + elif b == 0xC7: # ext 8 + typ = TYPE_EXT + self._reserve(2) + L, n = _unpack_from("Bb", self._buffer, self._buff_i) + self._buff_i += 2 if L > self._max_ext_len: - raise ValueError(f"{L} exceeds max_ext_len({self._max_ext_len})") + raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) obj = self._read(L) - elif 0xCA <= b <= 0xD3: - size, fmt = _MSGPACK_HEADERS[b] - self._reserve(size) - if len(fmt) > 0: - obj = struct.unpack_from(fmt, self._buffer, self._buff_i)[0] - else: - obj = self._buffer[self._buff_i] - self._buff_i += size - elif 0xD4 <= b <= 0xD8: - size, fmt, typ = _MSGPACK_HEADERS[b] - if self._max_ext_len < size: - raise ValueError(f"{size} exceeds max_ext_len({self._max_ext_len})") - self._reserve(size + 1) - n, obj = struct.unpack_from(fmt, self._buffer, self._buff_i) - self._buff_i += size + 1 - elif 0xD9 <= b <= 0xDB: - size, fmt, typ = _MSGPACK_HEADERS[b] - self._reserve(size) - if len(fmt) > 0: - (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) - else: - n = self._buffer[self._buff_i] - self._buff_i += size + elif b == 0xC8: # ext 16 + typ = TYPE_EXT + self._reserve(3) + L, n = _unpack_from(">Hb", self._buffer, self._buff_i) + self._buff_i += 3 + if L > self._max_ext_len: + raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + obj = self._read(L) + elif b == 0xC9: # ext 32 + typ = TYPE_EXT + self._reserve(5) + L, n = _unpack_from(">Ib", self._buffer, self._buff_i) + self._buff_i += 5 + if L > self._max_ext_len: + raise ValueError("%s exceeds max_ext_len(%s)" % (L, self._max_ext_len)) + obj = self._read(L) + elif b == 0xCA: + self._reserve(4) + obj = _unpack_from(">f", self._buffer, self._buff_i)[0] + self._buff_i += 4 + elif b == 0xCB: + self._reserve(8) + obj = _unpack_from(">d", self._buffer, self._buff_i)[0] + self._buff_i += 8 + elif b == 0xCC: + self._reserve(1) + obj = self._buffer[self._buff_i] + self._buff_i += 1 + elif b == 0xCD: + self._reserve(2) + obj = _unpack_from(">H", self._buffer, self._buff_i)[0] + self._buff_i += 2 + elif b == 0xCE: + self._reserve(4) + obj = _unpack_from(">I", self._buffer, self._buff_i)[0] + self._buff_i += 4 + elif b == 0xCF: + self._reserve(8) + obj = _unpack_from(">Q", self._buffer, self._buff_i)[0] + self._buff_i += 8 + elif b == 0xD0: + self._reserve(1) + obj = _unpack_from("b", self._buffer, self._buff_i)[0] + self._buff_i += 1 + elif b == 0xD1: + self._reserve(2) + obj = _unpack_from(">h", self._buffer, self._buff_i)[0] + self._buff_i += 2 + elif b == 0xD2: + self._reserve(4) + obj = _unpack_from(">i", self._buffer, self._buff_i)[0] + self._buff_i += 4 + elif b == 0xD3: + self._reserve(8) + obj = _unpack_from(">q", self._buffer, self._buff_i)[0] + self._buff_i += 8 + elif b == 0xD4: # fixext 1 + typ = TYPE_EXT + if self._max_ext_len < 1: + raise ValueError("%s exceeds max_ext_len(%s)" % (1, self._max_ext_len)) + self._reserve(2) + n, obj = _unpack_from("b1s", self._buffer, self._buff_i) + self._buff_i += 2 + elif b == 0xD5: # fixext 2 + typ = TYPE_EXT + if self._max_ext_len < 2: + raise ValueError("%s exceeds max_ext_len(%s)" % (2, self._max_ext_len)) + self._reserve(3) + n, obj = _unpack_from("b2s", self._buffer, self._buff_i) + self._buff_i += 3 + elif b == 0xD6: # fixext 4 + typ = TYPE_EXT + if self._max_ext_len < 4: + raise ValueError("%s exceeds max_ext_len(%s)" % (4, self._max_ext_len)) + self._reserve(5) + n, obj = _unpack_from("b4s", self._buffer, self._buff_i) + self._buff_i += 5 + elif b == 0xD7: # fixext 8 + typ = TYPE_EXT + if self._max_ext_len < 8: + raise ValueError("%s exceeds max_ext_len(%s)" % (8, self._max_ext_len)) + self._reserve(9) + n, obj = _unpack_from("b8s", self._buffer, self._buff_i) + self._buff_i += 9 + elif b == 0xD8: # fixext 16 + typ = TYPE_EXT + if self._max_ext_len < 16: + raise ValueError("%s exceeds max_ext_len(%s)" % (16, self._max_ext_len)) + self._reserve(17) + n, obj = _unpack_from("b16s", self._buffer, self._buff_i) + self._buff_i += 17 + elif b == 0xD9: + typ = TYPE_RAW + self._reserve(1) + n = self._buffer[self._buff_i] + self._buff_i += 1 if n > self._max_str_len: - raise ValueError(f"{n} exceeds max_str_len({self._max_str_len})") + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) obj = self._read(n) - elif 0xDC <= b <= 0xDD: - size, fmt, typ = _MSGPACK_HEADERS[b] - self._reserve(size) - (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) - self._buff_i += size + elif b == 0xDA: + typ = TYPE_RAW + self._reserve(2) + (n,) = _unpack_from(">H", self._buffer, self._buff_i) + self._buff_i += 2 + if n > self._max_str_len: + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + obj = self._read(n) + elif b == 0xDB: + typ = TYPE_RAW + self._reserve(4) + (n,) = _unpack_from(">I", self._buffer, self._buff_i) + self._buff_i += 4 + if n > self._max_str_len: + raise ValueError("%s exceeds max_str_len(%s)", n, self._max_str_len) + obj = self._read(n) + elif b == 0xDC: + typ = TYPE_ARRAY + self._reserve(2) + (n,) = _unpack_from(">H", self._buffer, self._buff_i) + self._buff_i += 2 if n > self._max_array_len: - raise ValueError(f"{n} exceeds max_array_len({self._max_array_len})") - elif 0xDE <= b <= 0xDF: - size, fmt, typ = _MSGPACK_HEADERS[b] - self._reserve(size) - (n,) = struct.unpack_from(fmt, self._buffer, self._buff_i) - self._buff_i += size + raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + elif b == 0xDD: + typ = TYPE_ARRAY + self._reserve(4) + (n,) = _unpack_from(">I", self._buffer, self._buff_i) + self._buff_i += 4 + if n > self._max_array_len: + raise ValueError("%s exceeds max_array_len(%s)", n, self._max_array_len) + elif b == 0xDE: + self._reserve(2) + (n,) = _unpack_from(">H", self._buffer, self._buff_i) + self._buff_i += 2 if n > self._max_map_len: - raise ValueError(f"{n} exceeds max_map_len({self._max_map_len})") + raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + typ = TYPE_MAP + elif b == 0xDF: + self._reserve(4) + (n,) = _unpack_from(">I", self._buffer, self._buff_i) + self._buff_i += 4 + if n > self._max_map_len: + raise ValueError("%s exceeds max_map_len(%s)", n, self._max_map_len) + typ = TYPE_MAP else: raise FormatError("Unknown header: 0x%x" % b) return typ, n, obj def _unpack(self, execute=EX_CONSTRUCT): - typ, n, obj = self._read_header() + typ, n, obj = self._read_header(execute) if execute == EX_READ_ARRAY_HEADER: if typ != TYPE_ARRAY: @@ -499,12 +635,12 @@ class Unpacker: # TODO should we eliminate the recursion? if typ == TYPE_ARRAY: if execute == EX_SKIP: - for i in range(n): + for i in xrange(n): # TODO check whether we need to call `list_hook` self._unpack(EX_SKIP) return ret = newlist_hint(n) - for i in range(n): + for i in xrange(n): ret.append(self._unpack(EX_CONSTRUCT)) if self._list_hook is not None: ret = self._list_hook(ret) @@ -512,22 +648,25 @@ class Unpacker: return ret if self._use_list else tuple(ret) if typ == TYPE_MAP: if execute == EX_SKIP: - for i in range(n): + for i in xrange(n): # TODO check whether we need to call hooks self._unpack(EX_SKIP) self._unpack(EX_SKIP) return if self._object_pairs_hook is not None: ret = self._object_pairs_hook( - (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) for _ in range(n) + (self._unpack(EX_CONSTRUCT), self._unpack(EX_CONSTRUCT)) + for _ in xrange(n) ) else: ret = {} - for _ in range(n): + for _ in xrange(n): key = self._unpack(EX_CONSTRUCT) - if self._strict_map_key and type(key) not in (str, bytes): - raise ValueError("%s is not allowed for map key" % str(type(key))) - if isinstance(key, str): + if self._strict_map_key and type(key) not in (unicode, bytes): + raise ValueError( + "%s is not allowed for map key" % str(type(key)) + ) + if not PY2 and type(key) is str: key = sys.intern(key) ret[key] = self._unpack(EX_CONSTRUCT) if self._object_hook is not None: @@ -601,7 +740,7 @@ class Unpacker: return self._stream_offset -class Packer: +class Packer(object): """ MessagePack Packer @@ -613,8 +752,7 @@ class Packer: Packer's constructor has some keyword arguments: - :param default: - When specified, it should be callable. + :param callable default: Convert user type to builtin type that Packer supports. See also simplejson's document. @@ -641,18 +779,38 @@ class Packer: If set to true, datetime with tzinfo is packed into Timestamp type. Note that the tzinfo is stripped in the timestamp. You can get UTC datetime with `timestamp=3` option of the Unpacker. + (Python 2 is not supported). :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. - :param int buf_size: - Internal buffer size. This option is used only for C implementation. + Example of streaming deserialize from file-like object:: + + unpacker = Unpacker(file_like) + for o in unpacker: + process(o) + + Example of streaming deserialize from socket:: + + unpacker = Unpacker() + while True: + buf = sock.recv(1024**2) + if not buf: + break + unpacker.feed(buf) + for o in unpacker: + process(o) + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``OutOfData`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. """ def __init__( self, - *, default=None, use_single_float=False, autoreset=True, @@ -660,17 +818,19 @@ class Packer: strict_types=False, datetime=False, unicode_errors=None, - buf_size=None, ): self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type - self._buffer = BytesIO() + self._buffer = StringIO() + if PY2 and datetime: + raise ValueError("datetime is not supported in Python 2") self._datetime = bool(datetime) self._unicode_errors = unicode_errors or "strict" - if default is not None and not callable(default): - raise TypeError("default must be callable") + if default is not None: + if not callable(default): + raise TypeError("default must be callable") self._default = default def _pack( @@ -695,7 +855,7 @@ class Packer: if obj: return self._buffer.write(b"\xc3") return self._buffer.write(b"\xc2") - if check(obj, int): + if check(obj, int_types): if 0 <= obj < 0x80: return self._buffer.write(struct.pack("B", obj)) if -0x20 <= obj < 0: @@ -723,20 +883,20 @@ class Packer: raise OverflowError("Integer value out of range") if check(obj, (bytes, bytearray)): n = len(obj) - if n >= 2**32: + if n >= 2 ** 32: raise ValueError("%s is too large" % type(obj).__name__) self._pack_bin_header(n) return self._buffer.write(obj) - if check(obj, str): + if check(obj, unicode): obj = obj.encode("utf-8", self._unicode_errors) n = len(obj) - if n >= 2**32: + if n >= 2 ** 32: raise ValueError("String is too large") self._pack_raw_header(n) return self._buffer.write(obj) if check(obj, memoryview): - n = obj.nbytes - if n >= 2**32: + n = len(obj) * obj.itemsize + if n >= 2 ** 32: raise ValueError("Memoryview is too large") self._pack_bin_header(n) return self._buffer.write(obj) @@ -776,11 +936,13 @@ class Packer: if check(obj, list_types): n = len(obj) self._pack_array_header(n) - for i in range(n): + for i in xrange(n): self._pack(obj[i], nest_limit - 1) return if check(obj, dict): - return self._pack_map_pairs(len(obj), obj.items(), nest_limit - 1) + return self._pack_map_pairs( + len(obj), dict_iteritems(obj), nest_limit - 1 + ) if self._datetime and check(obj, _DateTime) and obj.tzinfo is not None: obj = Timestamp.from_datetime(obj) @@ -791,46 +953,42 @@ class Packer: obj = self._default(obj) default_used = 1 continue - - if self._datetime and check(obj, _DateTime): - raise ValueError(f"Cannot serialize {obj!r} where tzinfo=None") - - raise TypeError(f"Cannot serialize {obj!r}") + raise TypeError("Cannot serialize %r" % (obj,)) def pack(self, obj): try: self._pack(obj) except: - self._buffer = BytesIO() # force reset + self._buffer = StringIO() # force reset raise if self._autoreset: ret = self._buffer.getvalue() - self._buffer = BytesIO() + self._buffer = StringIO() return ret def pack_map_pairs(self, pairs): self._pack_map_pairs(len(pairs), pairs) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = BytesIO() + self._buffer = StringIO() return ret def pack_array_header(self, n): - if n >= 2**32: + if n >= 2 ** 32: raise ValueError self._pack_array_header(n) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = BytesIO() + self._buffer = StringIO() return ret def pack_map_header(self, n): - if n >= 2**32: + if n >= 2 ** 32: raise ValueError self._pack_map_header(n) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = BytesIO() + self._buffer = StringIO() return ret def pack_ext_type(self, typecode, data): @@ -882,7 +1040,7 @@ class Packer: def _pack_map_pairs(self, n, pairs, nest_limit=DEFAULT_RECURSE_LIMIT): self._pack_map_header(n) - for k, v in pairs: + for (k, v) in pairs: self._pack(k, nest_limit - 1) self._pack(v, nest_limit - 1) @@ -919,11 +1077,11 @@ class Packer: This method is useful only when autoreset=False. """ - self._buffer = BytesIO() + self._buffer = StringIO() def getbuffer(self): """Return view of internal buffer.""" - if _USING_STRINGBUILDER: + if USING_STRINGBUILDER or PY2: return memoryview(self.bytes()) else: return self._buffer.getbuffer() diff --git a/msgpack/pack.h b/msgpack/pack.h index edf3a3f..4f3ce1d 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -21,12 +21,15 @@ #include "sysdep.h" #include #include -#include #ifdef __cplusplus extern "C" { #endif +#ifdef _MSC_VER +#define inline __inline +#endif + typedef struct msgpack_packer { char *buf; size_t length; @@ -64,6 +67,53 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ #include "pack_template.h" +// return -2 when o is too long +static inline int +msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit) +{ +#if PY_MAJOR_VERSION >= 3 + assert(PyUnicode_Check(o)); + + Py_ssize_t len; + const char* buf = PyUnicode_AsUTF8AndSize(o, &len); + if (buf == NULL) + return -1; + + if (len > limit) { + return -2; + } + + int ret = msgpack_pack_raw(pk, len); + if (ret) return ret; + + return msgpack_pack_raw_body(pk, buf, len); +#else + PyObject *bytes; + Py_ssize_t len; + int ret; + + // py2 + bytes = PyUnicode_AsUTF8String(o); + if (bytes == NULL) + return -1; + + len = PyString_GET_SIZE(bytes); + if (len > limit) { + Py_DECREF(bytes); + return -2; + } + + ret = msgpack_pack_raw(pk, len); + if (ret) { + Py_DECREF(bytes); + return -1; + } + ret = msgpack_pack_raw_body(pk, PyString_AS_STRING(bytes), len); + Py_DECREF(bytes); + return ret; +#endif +} + #ifdef __cplusplus } #endif diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index b8959f0..0e940b8 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -37,6 +37,18 @@ * Integer */ +#define msgpack_pack_real_uint8(x, d) \ +do { \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ +} while(0) + #define msgpack_pack_real_uint16(x, d) \ do { \ if(d < (1<<7)) { \ @@ -111,6 +123,18 @@ do { \ } \ } while(0) +#define msgpack_pack_real_int8(x, d) \ +do { \ + if(d < -(1<<5)) { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ + } \ +} while(0) + #define msgpack_pack_real_int16(x, d) \ do { \ if(d < -(1<<5)) { \ @@ -240,6 +264,49 @@ do { \ } while(0) +static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d) +{ + msgpack_pack_real_uint8(x, d); +} + +static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d) +{ + msgpack_pack_real_uint16(x, d); +} + +static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d) +{ + msgpack_pack_real_uint32(x, d); +} + +static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d) +{ + msgpack_pack_real_uint64(x, d); +} + +static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d) +{ + msgpack_pack_real_int8(x, d); +} + +static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d) +{ + msgpack_pack_real_int16(x, d); +} + +static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d) +{ + msgpack_pack_real_int32(x, d); +} + +static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d) +{ + msgpack_pack_real_int64(x, d); +} + + +//#ifdef msgpack_pack_inline_func_cint + static inline int msgpack_pack_short(msgpack_packer* x, short d) { #if defined(SIZEOF_SHORT) @@ -305,37 +372,192 @@ if(sizeof(int) == 2) { static inline int msgpack_pack_long(msgpack_packer* x, long d) { #if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 4 +#if SIZEOF_LONG == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_LONG == 4 msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #elif defined(LONG_MAX) -#if LONG_MAX == 0x7fffffffL +#if LONG_MAX == 0x7fffL + msgpack_pack_real_int16(x, d); +#elif LONG_MAX == 0x7fffffffL msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #else - if (sizeof(long) == 4) { - msgpack_pack_real_int32(x, d); - } else { - msgpack_pack_real_int64(x, d); - } +if(sizeof(long) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(long) == 4) { + msgpack_pack_real_int32(x, d); +} else { + msgpack_pack_real_int64(x, d); +} #endif } static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) { +#if defined(SIZEOF_LONG_LONG) +#if SIZEOF_LONG_LONG == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_LONG_LONG == 4 + msgpack_pack_real_int32(x, d); +#else msgpack_pack_real_int64(x, d); +#endif + +#elif defined(LLONG_MAX) +#if LLONG_MAX == 0x7fffL + msgpack_pack_real_int16(x, d); +#elif LLONG_MAX == 0x7fffffffL + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#else +if(sizeof(long long) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(long long) == 4) { + msgpack_pack_real_int32(x, d); +} else { + msgpack_pack_real_int64(x, d); +} +#endif +} + +static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d) +{ +#if defined(SIZEOF_SHORT) +#if SIZEOF_SHORT == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_SHORT == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(USHRT_MAX) +#if USHRT_MAX == 0xffffU + msgpack_pack_real_uint16(x, d); +#elif USHRT_MAX == 0xffffffffU + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned short) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned short) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif +} + +static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d) +{ +#if defined(SIZEOF_INT) +#if SIZEOF_INT == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_INT == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(UINT_MAX) +#if UINT_MAX == 0xffffU + msgpack_pack_real_uint16(x, d); +#elif UINT_MAX == 0xffffffffU + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned int) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned int) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif +} + +static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d) +{ +#if defined(SIZEOF_LONG) +#if SIZEOF_LONG == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_LONG == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(ULONG_MAX) +#if ULONG_MAX == 0xffffUL + msgpack_pack_real_uint16(x, d); +#elif ULONG_MAX == 0xffffffffUL + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned long) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned long) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif } static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) { +#if defined(SIZEOF_LONG_LONG) +#if SIZEOF_LONG_LONG == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_LONG_LONG == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(ULLONG_MAX) +#if ULLONG_MAX == 0xffffUL + msgpack_pack_real_uint16(x, d); +#elif ULLONG_MAX == 0xffffffffUL + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned long long) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned long long) == 4) { + msgpack_pack_real_uint32(x, d); +} else { msgpack_pack_real_uint64(x, d); } +#endif +} + +//#undef msgpack_pack_inline_func_cint +//#endif + /* @@ -346,12 +568,7 @@ static inline int msgpack_pack_float(msgpack_packer* x, float d) { unsigned char buf[5]; buf[0] = 0xca; - -#if PY_VERSION_HEX >= 0x030B00A7 - PyFloat_Pack4(d, (char *)&buf[1], 0); -#else _PyFloat_Pack4(d, &buf[1], 0); -#endif msgpack_pack_append_buffer(x, buf, 5); } @@ -359,11 +576,7 @@ static inline int msgpack_pack_double(msgpack_packer* x, double d) { unsigned char buf[9]; buf[0] = 0xcb; -#if PY_VERSION_HEX >= 0x030B00A7 - PyFloat_Pack8(d, (char *)&buf[1], 0); -#else _PyFloat_Pack8(d, &buf[1], 0); -#endif msgpack_pack_append_buffer(x, buf, 9); } @@ -588,9 +801,11 @@ static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uin #undef TAKE8_32 #undef TAKE8_64 +#undef msgpack_pack_real_uint8 #undef msgpack_pack_real_uint16 #undef msgpack_pack_real_uint32 #undef msgpack_pack_real_uint64 +#undef msgpack_pack_real_int8 #undef msgpack_pack_real_int16 #undef msgpack_pack_real_int32 #undef msgpack_pack_real_int64 diff --git a/msgpack/sysdep.h b/msgpack/sysdep.h index 7067300..ed9c1bc 100644 --- a/msgpack/sysdep.h +++ b/msgpack/sysdep.h @@ -61,14 +61,14 @@ typedef unsigned int _msgpack_atomic_counter_t; #endif #endif -#else /* _WIN32 */ -#include /* ntohs, ntohl */ +#else +#include /* __BYTE_ORDER */ #endif #if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#if __BYTE_ORDER == __LITTLE_ENDIAN #define __LITTLE_ENDIAN__ -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#elif __BYTE_ORDER == __BIG_ENDIAN #define __BIG_ENDIAN__ #elif _WIN32 #define __LITTLE_ENDIAN__ @@ -95,7 +95,7 @@ typedef unsigned int _msgpack_atomic_counter_t; #ifdef _WIN32 # if defined(ntohl) # define _msgpack_be32(x) ntohl(x) -# elif defined(_byteswap_ulong) || defined(_MSC_VER) +# elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400) # define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) # else # define _msgpack_be32(x) \ @@ -108,7 +108,7 @@ typedef unsigned int _msgpack_atomic_counter_t; # define _msgpack_be32(x) ntohl(x) #endif -#if defined(_byteswap_uint64) || defined(_MSC_VER) +#if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400) # define _msgpack_be64(x) (_byteswap_uint64(x)) #elif defined(bswap_64) # define _msgpack_be64(x) bswap_64(x) diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 58a2f4f..34212bc 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -47,7 +47,7 @@ static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromLong((long)d); + PyObject *p = PyInt_FromLong((long)d); if (!p) return -1; *o = p; @@ -61,7 +61,7 @@ static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpac static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromSize_t((size_t)d); + PyObject *p = PyInt_FromSize_t((size_t)d); if (!p) return -1; *o = p; @@ -74,7 +74,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp if (d > LONG_MAX) { p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); } else { - p = PyLong_FromLong((long)d); + p = PyInt_FromLong((long)d); } if (!p) return -1; @@ -84,7 +84,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromLong(d); + PyObject *p = PyInt_FromLong(d); if (!p) return -1; *o = p; @@ -107,7 +107,7 @@ static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpac if (d > LONG_MAX || d < LONG_MIN) { p = PyLong_FromLongLong((PY_LONG_LONG)d); } else { - p = PyLong_FromLong((long)d); + p = PyInt_FromLong((long)d); } *o = p; return 0; @@ -193,7 +193,7 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_un static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) { if (u->strict_map_key && !PyUnicode_CheckExact(k) && !PyBytes_CheckExact(k)) { - PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key when strict_map_key=True", Py_TYPE(k)->tp_name); + PyErr_Format(PyExc_ValueError, "%.100s is not allowed for map key", Py_TYPE(k)->tp_name); return -1; } if (PyUnicode_CheckExact(k)) { diff --git a/msgpack/unpack_container_header.h b/msgpack/unpack_container_header.h deleted file mode 100644 index c14a3c2..0000000 --- a/msgpack/unpack_container_header.h +++ /dev/null @@ -1,51 +0,0 @@ -static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) -{ - assert(len >= *off); - uint32_t size; - const unsigned char *const p = (unsigned char*)data + *off; - -#define inc_offset(inc) \ - if (len - *off < inc) \ - return 0; \ - *off += inc; - - switch (*p) { - case var_offset: - inc_offset(3); - size = _msgpack_load16(uint16_t, p + 1); - break; - case var_offset + 1: - inc_offset(5); - size = _msgpack_load32(uint32_t, p + 1); - break; -#ifdef USE_CASE_RANGE - case fixed_offset + 0x0 ... fixed_offset + 0xf: -#else - case fixed_offset + 0x0: - case fixed_offset + 0x1: - case fixed_offset + 0x2: - case fixed_offset + 0x3: - case fixed_offset + 0x4: - case fixed_offset + 0x5: - case fixed_offset + 0x6: - case fixed_offset + 0x7: - case fixed_offset + 0x8: - case fixed_offset + 0x9: - case fixed_offset + 0xa: - case fixed_offset + 0xb: - case fixed_offset + 0xc: - case fixed_offset + 0xd: - case fixed_offset + 0xe: - case fixed_offset + 0xf: -#endif - ++*off; - size = ((unsigned int)*p) & 0x0f; - break; - default: - PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); - return -1; - } - unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); - return 1; -} - diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index cce29e7..9924b9c 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -75,7 +75,8 @@ static inline void unpack_clear(unpack_context *ctx) Py_CLEAR(ctx->stack[0].obj); } -static inline int unpack_execute(bool construct, unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +template +static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) { assert(len >= *off); @@ -242,20 +243,10 @@ static inline int unpack_execute(bool construct, unpack_context* ctx, const char _msgpack_load32(uint32_t,n)+1, _ext_zero); case CS_FLOAT: { - double f; -#if PY_VERSION_HEX >= 0x030B00A7 - f = PyFloat_Unpack4((const char*)n, 0); -#else - f = _PyFloat_Unpack4((unsigned char*)n, 0); -#endif + double f = _PyFloat_Unpack4((unsigned char*)n, 0); push_fixed_value(_float, f); } case CS_DOUBLE: { - double f; -#if PY_VERSION_HEX >= 0x030B00A7 - f = PyFloat_Unpack8((const char*)n, 0); -#else - f = _PyFloat_Unpack8((unsigned char*)n, 0); -#endif + double f = _PyFloat_Unpack8((unsigned char*)n, 0); push_fixed_value(_double, f); } case CS_UINT_8: push_fixed_value(_uint8, *(uint8_t*)n); @@ -385,7 +376,6 @@ _end: #undef construct_cb } -#undef NEXT_CS #undef SWITCH_RANGE_BEGIN #undef SWITCH_RANGE #undef SWITCH_RANGE_DEFAULT @@ -397,27 +387,68 @@ _end: #undef again_fixed_trail_if_zero #undef start_container -static int unpack_construct(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { - return unpack_execute(1, ctx, data, len, off); -} -static int unpack_skip(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { - return unpack_execute(0, ctx, data, len, off); +template +static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +{ + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; + +#define inc_offset(inc) \ + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; +#ifdef USE_CASE_RANGE + case fixed_offset + 0x0 ... fixed_offset + 0xf: +#else + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: +#endif + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; + } + unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); + return 1; } -#define unpack_container_header read_array_header -#define fixed_offset 0x90 -#define var_offset 0xdc -#include "unpack_container_header.h" -#undef unpack_container_header -#undef fixed_offset -#undef var_offset +#undef SWITCH_RANGE_BEGIN +#undef SWITCH_RANGE +#undef SWITCH_RANGE_DEFAULT +#undef SWITCH_RANGE_END -#define unpack_container_header read_map_header -#define fixed_offset 0x80 -#define var_offset 0xde -#include "unpack_container_header.h" -#undef unpack_container_header -#undef fixed_offset -#undef var_offset +static const execute_fn unpack_construct = &unpack_execute; +static const execute_fn unpack_skip = &unpack_execute; +static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>; +static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>; + +#undef NEXT_CS /* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index c69d5a7..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,45 +0,0 @@ -[build-system] -requires = ["setuptools >= 78.1.1"] -build-backend = "setuptools.build_meta" - -[project] -name = "msgpack" -dynamic = ["version"] -license = "Apache-2.0" -authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] -description = "MessagePack serializer" -readme = "README.md" -keywords = ["msgpack", "messagepack", "serializer", "serialization", "binary"] -requires-python = ">=3.10" -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Operating System :: OS Independent", - "Topic :: File Formats", - "Intended Audience :: Developers", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", -] - -[project.urls] -Homepage = "https://msgpack.org/" -Documentation = "https://msgpack-python.readthedocs.io/" -Repository = "https://github.com/msgpack/msgpack-python/" -Tracker = "https://github.com/msgpack/msgpack-python/issues" -Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" - -[tool.setuptools] -# Do not install C/C++/Cython source files -include-package-data = false - -[tool.setuptools.dynamic] -version = {attr = "msgpack.__version__"} - -[tool.ruff] -line-length = 100 -target-version = "py310" -lint.select = [ - "E", # pycodestyle - "F", # Pyflakes - "I", # isort - #"UP", pyupgrade -] diff --git a/requirements.txt b/requirements.txt index 9e4643b..a2cce25 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1 @@ -Cython==3.2.1 -setuptools==78.1.1 -build +Cython~=0.29.13 diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 4029e9e..8e88750 --- a/setup.py +++ b/setup.py @@ -1,24 +1,98 @@ #!/usr/bin/env python +# coding: utf-8 +import io import os import sys +from glob import glob +from distutils.command.sdist import sdist +from setuptools import setup, Extension + +from distutils.command.build_ext import build_ext -from setuptools import Extension, setup PYPY = hasattr(sys, "pypy_version_info") +PY2 = sys.version_info[0] == 2 + + +class NoCython(Exception): + pass + + +try: + import Cython.Compiler.Main as cython_compiler + + have_cython = True +except ImportError: + have_cython = False + + +def cythonize(src): + sys.stderr.write("cythonize: %r\n" % (src,)) + cython_compiler.compile([src], cplus=True) + + +def ensure_source(src): + pyx = os.path.splitext(src)[0] + ".pyx" + + if not os.path.exists(src): + if not have_cython: + raise NoCython + cythonize(pyx) + elif ( + os.path.exists(pyx) + and os.stat(src).st_mtime < os.stat(pyx).st_mtime + and have_cython + ): + cythonize(pyx) + return src + + +class BuildExt(build_ext): + def build_extension(self, ext): + try: + ext.sources = list(map(ensure_source, ext.sources)) + except NoCython: + print("WARNING") + print("Cython is required for building extension from checkout.") + print("Install Cython >= 0.16 or install msgpack from PyPI.") + print("Falling back to pure Python implementation.") + return + try: + return build_ext.build_extension(self, ext) + except Exception as e: + print("WARNING: Failed to compile extension modules.") + print("msgpack uses fallback pure python implementation.") + print(e) + + +exec(open("msgpack/_version.py").read()) + +version_str = ".".join(str(x) for x in version[:3]) +if len(version) > 3 and version[3] != "final": + version_str += version[3] + +# Cython is required for sdist +class Sdist(sdist): + def __init__(self, *args, **kwargs): + cythonize("msgpack/_cmsgpack.pyx") + sdist.__init__(self, *args, **kwargs) + libraries = [] -macros = [] -ext_modules = [] - if sys.platform == "win32": libraries.append("ws2_32") + +if sys.byteorder == "big": + macros = [("__BIG_ENDIAN__", "1")] +else: macros = [("__LITTLE_ENDIAN__", "1")] -if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): +ext_modules = [] +if not PYPY and not PY2: ext_modules.append( Extension( "msgpack._cmsgpack", - sources=["msgpack/_cmsgpack.c"], + sources=["msgpack/_cmsgpack.cpp"], libraries=libraries, include_dirs=["."], define_macros=macros, @@ -26,7 +100,42 @@ if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): ) del libraries, macros + +desc = "MessagePack (de)serializer." +with io.open("README.md", encoding="utf-8") as f: + long_desc = f.read() +del f + setup( + name="msgpack", + author="Inada Naoki", + author_email="songofacandy@gmail.com", + version=version_str, + cmdclass={"build_ext": BuildExt, "sdist": Sdist}, ext_modules=ext_modules, packages=["msgpack"], + description=desc, + long_description=long_desc, + long_description_content_type="text/markdown", + url="https://msgpack.org/", + project_urls={ + "Documentation": "https://msgpack-python.readthedocs.io/", + "Source": "https://github.com/msgpack/msgpack-python", + "Tracker": "https://github.com/msgpack/msgpack-python/issues", + }, + license="Apache 2.0", + classifiers=[ + "Programming Language :: Python :: 2", + "Programming Language :: Python :: 2.7", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + ], ) diff --git a/test/test_buffer.py b/test/test_buffer.py index ca09722..62507cf 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,8 +1,12 @@ -from pytest import raises +#!/usr/bin/env python +# coding: utf-8 -from msgpack import Packer, packb, unpackb +import sys +import pytest +from msgpack import packb, unpackb +@pytest.mark.skipif(sys.version_info[0] == 2, reason="Python 2 is not supported") def test_unpack_buffer(): from array import array @@ -17,7 +21,7 @@ def test_unpack_bytearray(): obj = unpackb(buf, use_list=1) assert [b"foo", b"bar"] == obj expected_type = bytes - assert all(type(s) is expected_type for s in obj) + assert all(type(s) == expected_type for s in obj) def test_unpack_memoryview(): @@ -26,24 +30,4 @@ def test_unpack_memoryview(): obj = unpackb(view, use_list=1) assert [b"foo", b"bar"] == obj expected_type = bytes - assert all(type(s) is expected_type for s in obj) - - -def test_packer_getbuffer(): - packer = Packer(autoreset=False) - packer.pack_array_header(2) - packer.pack(42) - packer.pack("hello") - buffer = packer.getbuffer() - assert isinstance(buffer, memoryview) - assert bytes(buffer) == b"\x92*\xa5hello" - - if Packer.__module__ == "msgpack._cmsgpack": # only for Cython - # cython Packer supports buffer protocol directly - assert bytes(packer) == b"\x92*\xa5hello" - - with raises(BufferError): - packer.pack(42) - buffer.release() - packer.pack(42) - assert bytes(packer) == b"\x92*\xa5hello*" + assert all(type(s) == expected_type for s in obj) diff --git a/test/test_case.py b/test/test_case.py index c4c615e..a0a3c5a 100644 --- a/test/test_case.py +++ b/test/test_case.py @@ -1,10 +1,11 @@ #!/usr/bin/env python +# coding: utf-8 from msgpack import packb, unpackb def check(length, obj, use_bin_type=True): v = packb(obj, use_bin_type=use_bin_type) - assert len(v) == length, f"{obj!r} length should be {length!r} but get {len(v)!r}" + assert len(v) == length, "%r length should be %r but get %r" % (obj, length, len(v)) assert unpackb(v, use_list=0, raw=not use_bin_type) == obj @@ -119,11 +120,11 @@ def test_match(): ), ({}, b"\x80"), ( - {x: x for x in range(15)}, + dict([(x, x) for x in range(15)]), b"\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e", ), ( - {x: x for x in range(16)}, + dict([(x, x) for x in range(16)]), b"\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e\x0f\x0f", ), ] @@ -133,4 +134,4 @@ def test_match(): def test_unicode(): - assert unpackb(packb("foobar"), use_list=1) == "foobar" + assert unpackb(packb(u"foobar"), use_list=1) == u"foobar" diff --git a/test/test_except.py b/test/test_except.py index b77ac80..5544f2b 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -1,10 +1,10 @@ #!/usr/bin/env python - -import datetime +# coding: utf-8 from pytest import raises +from msgpack import packb, unpackb, Unpacker, FormatError, StackError, OutOfData -from msgpack import FormatError, OutOfData, StackError, Unpacker, packb, unpackb +import datetime class DummyException(Exception): @@ -53,7 +53,7 @@ def test_invalidvalue(): def test_strict_map_key(): - valid = {"unicode": 1, b"bytes": 2} + valid = {u"unicode": 1, b"bytes": 2} packed = packb(valid, use_bin_type=True) assert valid == unpackb(packed, raw=False, strict_map_key=True) diff --git a/test/test_extension.py b/test/test_extension.py index aaf0fd9..6b36575 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,5 +1,5 @@ +from __future__ import print_function import array - import msgpack from msgpack import ExtType @@ -17,7 +17,9 @@ def test_pack_ext_type(): assert p(b"A" * 16) == b"\xd8\x42" + b"A" * 16 # fixext 16 assert p(b"ABC") == b"\xc7\x03\x42ABC" # ext 8 assert p(b"A" * 0x0123) == b"\xc8\x01\x23\x42" + b"A" * 0x0123 # ext 16 - assert p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 # ext 32 + assert ( + p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 + ) # ext 32 def test_unpack_ext_type(): @@ -47,13 +49,16 @@ def test_extension_type(): except AttributeError: data = obj.tostring() return ExtType(typecode, data) - raise TypeError(f"Unknown type object {obj!r}") + raise TypeError("Unknown type object %r" % (obj,)) def ext_hook(code, data): print("ext_hook called", code, data) assert code == 123 obj = array.array("d") - obj.frombytes(data) + try: + obj.frombytes(data) + except AttributeError: # PY2 + obj.fromstring(data) return obj obj = [42, b"hello", array.array("d", [1.1, 2.2, 3.3])] @@ -62,14 +67,20 @@ def test_extension_type(): assert obj == obj2 +import sys + +if sys.version > "3": + long = int + + def test_overriding_hooks(): def default(obj): - if isinstance(obj, int): + if isinstance(obj, long): return {"__type__": "long", "__data__": str(obj)} else: return obj - obj = {"testval": 1823746192837461928374619} + obj = {"testval": long(1823746192837461928374619)} refobj = {"testval": default(obj["testval"])} refout = msgpack.packb(refobj) assert isinstance(refout, (str, bytes)) diff --git a/test/test_format.py b/test/test_format.py index c06c87d..fbbc3f9 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +# coding: utf-8 from msgpack import unpackb @@ -24,7 +25,9 @@ def testFixRaw(): def testFixMap(): - check(b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}}) + check( + b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}} + ) def testUnsignedInt(): diff --git a/test/test_limits.py b/test/test_limits.py index 9b92b4d..65e6bcc 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -1,25 +1,27 @@ #!/usr/bin/env python +# coding: utf-8 +from __future__ import absolute_import, division, print_function, unicode_literals import pytest from msgpack import ( - ExtType, - Packer, - PackOverflowError, - PackValueError, - Unpacker, - UnpackValueError, packb, unpackb, + Packer, + Unpacker, + ExtType, + PackOverflowError, + PackValueError, + UnpackValueError, ) def test_integer(): - x = -(2**63) + x = -(2 ** 63) assert unpackb(packb(x)) == x with pytest.raises(PackOverflowError): packb(x - 1) - x = 2**64 - 1 + x = 2 ** 64 - 1 assert unpackb(packb(x)) == x with pytest.raises(PackOverflowError): packb(x + 1) @@ -27,16 +29,16 @@ def test_integer(): def test_array_header(): packer = Packer() - packer.pack_array_header(2**32 - 1) + packer.pack_array_header(2 ** 32 - 1) with pytest.raises(PackValueError): - packer.pack_array_header(2**32) + packer.pack_array_header(2 ** 32) def test_map_header(): packer = Packer() - packer.pack_map_header(2**32 - 1) + packer.pack_map_header(2 ** 32 - 1) with pytest.raises(PackValueError): - packer.pack_array_header(2**32) + packer.pack_array_header(2 ** 32) def test_max_str_len(): diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 0a2a6f5..86b2c1f 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,8 +1,15 @@ #!/usr/bin/env python +# coding: utf-8 +import pytest from array import array - from msgpack import packb, unpackb +import sys + + +pytestmark = pytest.mark.skipif( + sys.version_info[0] < 3, reason="Only Python 3 supports buffer protocol" +) def make_array(f, data): @@ -46,54 +53,46 @@ def test_fixstr_from_float(): def test_str16_from_byte(): - _runtest("B", 2**8, b"\xda", b"\x01\x00", False) - _runtest("B", 2**16 - 1, b"\xda", b"\xff\xff", False) + _runtest("B", 2 ** 8, b"\xda", b"\x01\x00", False) + _runtest("B", 2 ** 16 - 1, b"\xda", b"\xff\xff", False) def test_str16_from_float(): - _runtest("f", 2**8, b"\xda", b"\x01\x00", False) - _runtest("f", 2**16 - 4, b"\xda", b"\xff\xfc", False) + _runtest("f", 2 ** 8, b"\xda", b"\x01\x00", False) + _runtest("f", 2 ** 16 - 4, b"\xda", b"\xff\xfc", False) def test_str32_from_byte(): - _runtest("B", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) + _runtest("B", 2 ** 16, b"\xdb", b"\x00\x01\x00\x00", False) def test_str32_from_float(): - _runtest("f", 2**16, b"\xdb", b"\x00\x01\x00\x00", False) + _runtest("f", 2 ** 16, b"\xdb", b"\x00\x01\x00\x00", False) def test_bin8_from_byte(): _runtest("B", 1, b"\xc4", b"\x01", True) - _runtest("B", 2**8 - 1, b"\xc4", b"\xff", True) + _runtest("B", 2 ** 8 - 1, b"\xc4", b"\xff", True) def test_bin8_from_float(): _runtest("f", 4, b"\xc4", b"\x04", True) - _runtest("f", 2**8 - 4, b"\xc4", b"\xfc", True) + _runtest("f", 2 ** 8 - 4, b"\xc4", b"\xfc", True) def test_bin16_from_byte(): - _runtest("B", 2**8, b"\xc5", b"\x01\x00", True) - _runtest("B", 2**16 - 1, b"\xc5", b"\xff\xff", True) + _runtest("B", 2 ** 8, b"\xc5", b"\x01\x00", True) + _runtest("B", 2 ** 16 - 1, b"\xc5", b"\xff\xff", True) def test_bin16_from_float(): - _runtest("f", 2**8, b"\xc5", b"\x01\x00", True) - _runtest("f", 2**16 - 4, b"\xc5", b"\xff\xfc", True) + _runtest("f", 2 ** 8, b"\xc5", b"\x01\x00", True) + _runtest("f", 2 ** 16 - 4, b"\xc5", b"\xff\xfc", True) def test_bin32_from_byte(): - _runtest("B", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) + _runtest("B", 2 ** 16, b"\xc6", b"\x00\x01\x00\x00", True) def test_bin32_from_float(): - _runtest("f", 2**16, b"\xc6", b"\x00\x01\x00\x00", True) - - -def test_multidim_memoryview(): - # See https://github.com/msgpack/msgpack-python/issues/526 - view = memoryview(b"\00" * 6) - data = view.cast(view.format, (3, 2)) - packed = packb(data) - assert packed == b"\xc4\x06\x00\x00\x00\x00\x00\x00" + _runtest("f", 2 ** 16, b"\xc6", b"\x00\x01\x00\x00", True) diff --git a/test/test_newspec.py b/test/test_newspec.py index 9e2f9be..b7da486 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -1,4 +1,6 @@ -from msgpack import ExtType, packb, unpackb +# coding: utf-8 + +from msgpack import packb, unpackb, ExtType def test_str8(): diff --git a/test/test_obj.py b/test/test_obj.py index 23be06d..86c557c 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -1,7 +1,7 @@ #!/usr/bin/env python +# coding: utf-8 from pytest import raises - from msgpack import packb, unpackb @@ -34,7 +34,7 @@ def test_decode_pairs_hook(): prod_sum = 1 * 2 + 3 * 4 unpacked = unpackb( packed, - object_pairs_hook=lambda lst: sum(k * v for k, v in lst), + object_pairs_hook=lambda l: sum(k * v for k, v in l), use_list=1, strict_map_key=False, ) @@ -49,7 +49,7 @@ def test_only_one_obj_hook(): def test_bad_hook(): with raises(TypeError): packed = packb([3, 1 + 2j], default=lambda o: o) - unpackb(packed, use_list=1) + unpacked = unpackb(packed, use_list=1) def _arr_to_str(arr): diff --git a/test/test_pack.py b/test/test_pack.py index 374d154..a51d84c 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -1,12 +1,16 @@ #!/usr/bin/env python +# coding: utf-8 +from __future__ import absolute_import, division, print_function, unicode_literals -import struct from collections import OrderedDict from io import BytesIO +import struct +import sys import pytest +from pytest import raises, xfail -from msgpack import Packer, Unpacker, packb, unpackb +from msgpack import packb, unpackb, Unpacker, Packer, pack def check(data, use_list=False): @@ -76,8 +80,13 @@ def testPackByteArrays(): check(td) +@pytest.mark.skipif( + sys.version_info < (3, 0), reason="Python 2 passes invalid surrogates" +) def testIgnoreUnicodeErrors(): - re = unpackb(packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore") + re = unpackb( + packb(b"abc\xeddef", use_bin_type=False), raw=False, unicode_errors="ignore" + ) assert re == "abcdef" @@ -87,9 +96,12 @@ def testStrictUnicodeUnpack(): unpackb(packed, raw=False, use_list=1) +@pytest.mark.skipif( + sys.version_info < (3, 0), reason="Python 2 passes invalid surrogates" +) def testIgnoreErrorsPack(): re = unpackb( - packb("abc\udc80\udcffdef", use_bin_type=True, unicode_errors="ignore"), + packb("abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors="ignore"), raw=False, use_list=1, ) @@ -102,8 +114,8 @@ def testDecodeBinary(): def testPackFloat(): - assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(">f", 1.0) - assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(">d", 1.0) + assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(str(">f"), 1.0) + assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(str(">d"), 1.0) def testArraySize(sizes=[0, 5, 50, 1000]): @@ -148,7 +160,7 @@ def testMapSize(sizes=[0, 5, 50, 1000]): bio.seek(0) unpacker = Unpacker(bio, strict_map_key=False) for size in sizes: - assert unpacker.unpack() == {i: i * 2 for i in range(size)} + assert unpacker.unpack() == dict((i, i * 2) for i in range(size)) def test_odict(): diff --git a/test/test_read_size.py b/test/test_read_size.py index 0f6c1b5..33a7e7d 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -1,6 +1,5 @@ """Test Unpacker's read_array_header and read_map_header methods""" - -from msgpack import OutOfData, Unpacker, packb +from msgpack import packb, Unpacker, OutOfData UnexpectedTypeException = ValueError diff --git a/test/test_seq.py b/test/test_seq.py index 8dee462..0d5d806 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -1,9 +1,10 @@ -# ruff: noqa: E501 -# ignore line length limit for long comments -import io +#!/usr/bin/env python +# coding: utf-8 +import io import msgpack + binarydata = bytes(bytearray(range(256))) @@ -34,7 +35,7 @@ def test_exceeding_unpacker_read_size(): read_count = 0 for idx, o in enumerate(unpacker): - assert isinstance(o, bytes) + assert type(o) == bytes assert o == gen_binary_data(idx) read_count += 1 diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 0f895d7..6293a45 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,10 +1,10 @@ #!/usr/bin/env python +# coding: utf-8 import io - -from pytest import raises - -from msgpack import BufferFull, Unpacker, pack, packb +from msgpack import Unpacker, BufferFull +from msgpack import pack from msgpack.exceptions import OutOfData +from pytest import raises def test_partialdata(): @@ -78,15 +78,6 @@ def test_maxbuffersize(): assert ord("b") == next(unpacker) -def test_maxbuffersize_file(): - buff = io.BytesIO(packb(b"a" * 10) + packb([b"a" * 20] * 2)) - unpacker = Unpacker(buff, read_size=1, max_buffer_size=19, max_bin_len=20) - assert unpacker.unpack() == b"a" * 10 - # assert unpacker.unpack() == [b"a" * 20]*2 - with raises(BufferFull): - print(unpacker.unpack()) - - def test_readbytes(): unpacker = Unpacker(read_size=3) unpacker.feed(b"foobar") @@ -127,8 +118,8 @@ def test_issue124(): def test_unpack_tell(): stream = io.BytesIO() - messages = [2**i - 1 for i in range(65)] - messages += [-(2**i) for i in range(1, 64)] + messages = [2 ** i - 1 for i in range(65)] + messages += [-(2 ** i) for i in range(1, 64)] messages += [ b"hello", b"hello" * 1000, diff --git a/test/test_stricttype.py b/test/test_stricttype.py index 72776a2..fe9ec6c 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -1,6 +1,7 @@ -from collections import namedtuple +# coding: utf-8 -from msgpack import ExtType, packb, unpackb +from collections import namedtuple +from msgpack import packb, unpackb, ExtType def test_namedtuple(): @@ -9,7 +10,7 @@ def test_namedtuple(): def default(o): if isinstance(o, T): return dict(o._asdict()) - raise TypeError(f"Unsupported type {type(o)}") + raise TypeError("Unsupported type %s" % (type(o),)) packed = packb(T(1, 42), strict_types=True, use_bin_type=True, default=default) unpacked = unpackb(packed, raw=False) @@ -22,7 +23,7 @@ def test_tuple(): def default(o): if isinstance(o, tuple): return {"__type__": "tuple", "value": list(o)} - raise TypeError(f"Unsupported type {type(o)}") + raise TypeError("Unsupported type %s" % (type(o),)) def convert(o): if o.get("__type__") == "tuple": @@ -43,7 +44,9 @@ def test_tuple_ext(): def default(o): if isinstance(o, tuple): # Convert to list and pack - payload = packb(list(o), strict_types=True, use_bin_type=True, default=default) + payload = packb( + list(o), strict_types=True, use_bin_type=True, default=default + ) return ExtType(MSGPACK_EXT_TYPE_TUPLE, payload) raise TypeError(repr(o)) @@ -51,7 +54,7 @@ def test_tuple_ext(): if code == MSGPACK_EXT_TYPE_TUPLE: # Unpack and convert to tuple return tuple(unpackb(payload, raw=False, ext_hook=convert)) - raise ValueError(f"Unknown Ext code {code}") + raise ValueError("Unknown Ext code {}".format(code)) data = packb(t, strict_types=True, use_bin_type=True, default=default) expected = unpackb(data, raw=False, ext_hook=convert) diff --git a/test/test_subtype.py b/test/test_subtype.py index a911578..d91d455 100644 --- a/test/test_subtype.py +++ b/test/test_subtype.py @@ -1,9 +1,9 @@ #!/usr/bin/env python +# coding: utf-8 +from msgpack import packb, unpackb from collections import namedtuple -from msgpack import packb - class MyList(list): pass diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 831141a..6a29be7 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -1,38 +1,40 @@ -import datetime - import pytest - +import sys +import datetime import msgpack from msgpack.ext import Timestamp +if sys.version_info[0] > 2: + from msgpack.ext import _utc + def test_timestamp(): # timestamp32 - ts = Timestamp(2**32 - 1) + ts = Timestamp(2 ** 32 - 1) assert ts.to_bytes() == b"\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xd6\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2**32 - 1 and ts.nanoseconds == 0 + assert ts.seconds == 2 ** 32 - 1 and ts.nanoseconds == 0 # timestamp64 - ts = Timestamp(2**34 - 1, 999999999) + ts = Timestamp(2 ** 34 - 1, 999999999) assert ts.to_bytes() == b"\xee\x6b\x27\xff\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xd7\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2**34 - 1 and ts.nanoseconds == 999999999 + assert ts.seconds == 2 ** 34 - 1 and ts.nanoseconds == 999999999 # timestamp96 - ts = Timestamp(2**63 - 1, 999999999) + ts = Timestamp(2 ** 63 - 1, 999999999) assert ts.to_bytes() == b"\x3b\x9a\xc9\xff\x7f\xff\xff\xff\xff\xff\xff\xff" packed = msgpack.packb(ts) assert packed == b"\xc7\x0c\xff" + ts.to_bytes() unpacked = msgpack.unpackb(packed) assert ts == unpacked - assert ts.seconds == 2**63 - 1 and ts.nanoseconds == 999999999 + assert ts.seconds == 2 ** 63 - 1 and ts.nanoseconds == 999999999 # negative fractional ts = Timestamp.from_unix(-2.3) # s: -3, ns: 700000000 @@ -83,48 +85,33 @@ def test_timestamp_to(): assert t.to_unix_nano() == 42000014000 +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_timestamp_datetime(): t = Timestamp(42, 14) - utc = datetime.timezone.utc - assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) - - ts = datetime.datetime(2024, 4, 16, 8, 43, 9, 420317, tzinfo=utc) - ts2 = datetime.datetime(2024, 4, 16, 8, 43, 9, 420318, tzinfo=utc) - - assert ( - Timestamp.from_datetime(ts2).nanoseconds - Timestamp.from_datetime(ts).nanoseconds == 1000 - ) - - ts3 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4256) - ts4 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4257) - assert ( - Timestamp.from_datetime(ts4).nanoseconds - Timestamp.from_datetime(ts3).nanoseconds == 1000 - ) - - assert Timestamp.from_datetime(ts).to_datetime() == ts + assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_unpack_datetime(): t = Timestamp(42, 14) - utc = datetime.timezone.utc packed = msgpack.packb(t) unpacked = msgpack.unpackb(packed, timestamp=3) - assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) + assert unpacked == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=_utc) +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_pack_unpack_before_epoch(): - utc = datetime.timezone.utc - t_in = datetime.datetime(1960, 1, 1, tzinfo=utc) + t_in = datetime.datetime(1960, 1, 1, tzinfo=_utc) packed = msgpack.packb(t_in, datetime=True) unpacked = msgpack.unpackb(packed, timestamp=3) assert unpacked == t_in +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_pack_datetime(): t = Timestamp(42, 14000) dt = t.to_datetime() - utc = datetime.timezone.utc - assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) + assert dt == datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=_utc) packed = msgpack.packb(dt, datetime=True) packed2 = msgpack.packb(t) @@ -144,28 +131,12 @@ def test_pack_datetime(): assert msgpack.unpackb(packed) is None +@pytest.mark.skipif(sys.version_info[0] == 2, reason="datetime support is PY3+ only") def test_issue451(): # https://github.com/msgpack/msgpack-python/issues/451 - utc = datetime.timezone.utc - dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=utc) + dt = datetime.datetime(2100, 1, 1, 1, 1, tzinfo=_utc) packed = msgpack.packb(dt, datetime=True) assert packed == b"\xd6\xff\xf4\x86eL" unpacked = msgpack.unpackb(packed, timestamp=3) assert dt == unpacked - - -def test_pack_datetime_without_tzinfo(): - dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) - with pytest.raises(ValueError, match="where tzinfo=None"): - packed = msgpack.packb(dt, datetime=True) - - dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14) - packed = msgpack.packb(dt, datetime=True, default=lambda x: None) - assert packed == msgpack.packb(None) - - utc = datetime.timezone.utc - dt = datetime.datetime(1970, 1, 1, 0, 0, 42, 14, tzinfo=utc) - packed = msgpack.packb(dt, datetime=True) - unpacked = msgpack.unpackb(packed, timestamp=3) - assert unpacked == dt diff --git a/test/test_unpack.py b/test/test_unpack.py index b17c3c5..057b7bf 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -1,9 +1,12 @@ -import sys from io import BytesIO +import sys +from msgpack import Unpacker, packb, OutOfData, ExtType +from pytest import raises, mark -from pytest import mark, raises - -from msgpack import ExtType, OutOfData, Unpacker, packb +try: + from itertools import izip as zip +except ImportError: + pass def test_unpack_array_header_from_file(): @@ -49,7 +52,7 @@ def test_unpacker_hook_refcnt(): def test_unpacker_ext_hook(): class MyUnpacker(Unpacker): def __init__(self): - super().__init__(ext_hook=self._hook, raw=False) + super(MyUnpacker, self).__init__(ext_hook=self._hook, raw=False) def _hook(self, code, data): if code == 1: @@ -67,7 +70,7 @@ def test_unpacker_ext_hook(): def test_unpacker_tell(): - objects = 1, 2, "abc", "def", "ghi" + objects = 1, 2, u"abc", u"def", u"ghi" packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" positions = 1, 2, 6, 10, 14 unpacker = Unpacker(BytesIO(packed)) @@ -77,7 +80,7 @@ def test_unpacker_tell(): def test_unpacker_tell_read_bytes(): - objects = 1, "abc", "ghi" + objects = 1, u"abc", u"ghi" packed = b"\x01\x02\xa3abc\xa3def\xa3ghi" raw_data = b"\x02", b"\xa3def", b"" lenghts = 1, 4, 999 @@ -87,3 +90,10 @@ def test_unpacker_tell_read_bytes(): assert obj == unp assert pos == unpacker.tell() assert unpacker.read_bytes(n) == raw + + +if __name__ == "__main__": + test_unpack_array_header_from_file() + test_unpacker_hook_refcnt() + test_unpacker_ext_hook() + test_unpacker_tell() diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..607b182 --- /dev/null +++ b/tox.ini @@ -0,0 +1,43 @@ +[tox] +envlist = + py27-pure, + {py35,py36,py37,py38}-{c,pure}, + {pypy,pypy3}-pure, + py27-x86, + py34-x86, + +[variants:pure] +setenv= + MSGPACK_PUREPYTHON=x + +[testenv] +deps= + pytest + +changedir=test +commands= + c,x86: python -c 'from msgpack import _cmsgpack' + c,x86: py.test + pure: py.test + +[testenv:py27-x86] +basepython=python2.7-x86 +deps= + pytest + +changedir=test +commands= + python -c 'import sys; print(hex(sys.maxsize))' + python -c 'from msgpack import _cmsgpack' + py.test + +[testenv:py34-x86] +basepython=python3.4-x86 +deps= + pytest + +changedir=test +commands= + python -c 'import sys; print(hex(sys.maxsize))' + python -c 'from msgpack import _cmsgpack' + py.test