diff --git a/.github/workflows/lint.yaml b/.github/workflows/black.yaml similarity index 51% rename from .github/workflows/lint.yaml rename to .github/workflows/black.yaml index 198cf7b..1e28b7b 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/black.yaml @@ -1,22 +1,25 @@ -name: lint +name: Black on: ["push", "pull_request"] jobs: - lint: + black: # We want to run on external PRs, but not on our own internal PRs as they'll be run # by the push to the branch. if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest steps: + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.x' + architecture: 'x64' + - name: Checkout - uses: actions/checkout@v4 + uses: actions/checkout@v3 - - name: ruff check + - name: Black Code Formatter run: | - pipx run ruff check --diff msgpack/ test/ setup.py - - - name: ruff format - run: | - pipx run ruff format --diff msgpack/ test/ setup.py + pip install black==22.3.0 + black -S --diff --check msgpack/ test/ setup.py diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index b696b92..a393c6b 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -10,24 +10,23 @@ jobs: runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v4 - - name: Setup Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v4 with: python-version: '3.x' - cache: "pip" - cache-dependency-path: | - requirements.txt - docs/requirements.txt + architecture: 'x64' + + - name: Checkout + uses: actions/checkout@v3 - name: Build + shell: bash run: | pip install -r requirements.txt make cython + pip install . - name: Sphinx Documentation Generator run: | - pip install -r docs/requirements.txt - make docs + pip install tox + tox -e sphinx diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6b1664a..76fcf27 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,33 +9,27 @@ jobs: test: strategy: matrix: - os: ["ubuntu-latest", "windows-latest", "windows-11-arm", "macos-latest"] - py: ["3.14", "3.14t", "3.13", "3.12", "3.11", "3.10"] - exclude: - - os: windows-11-arm - py: "3.10" + os: ["ubuntu-latest", "windows-latest", "macos-latest"] + py: ["3.12", "3.11", "3.10", "3.9", "3.8"] + runs-on: ${{ matrix.os }} name: Run test with Python ${{ matrix.py }} on ${{ matrix.os }} steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v6 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.py }} allow-prereleases: true cache: "pip" - - name: Prepare - shell: bash - run: | - python -m pip install -r requirements.txt pytest - - name: Build shell: bash run: | + pip install -r requirements.txt pytest make cython pip install . @@ -48,14 +42,3 @@ jobs: shell: bash run: | MSGPACK_PUREPYTHON=1 pytest -v test - - - name: build packages - shell: bash - run: | - python -m build -nv - - - name: upload packages - uses: actions/upload-artifact@v4 - with: - name: dist-${{ matrix.os }}-${{ matrix.py }} - path: dist diff --git a/.github/workflows/wheel.yml b/.github/workflows/wheel.yml index be54e5e..c1eb2ed 100644 --- a/.github/workflows/wheel.yml +++ b/.github/workflows/wheel.yml @@ -1,88 +1,50 @@ -name: Build sdist and Wheels +name: Build Wheels on: push: branches: [main] - release: - types: - - published - workflow_dispatch: + create: jobs: build_wheels: strategy: matrix: - # macos-13 is for intel - os: ["ubuntu-24.04", "ubuntu-24.04-arm", "windows-latest", "windows-11-arm", "macos-13", "macos-latest"] + os: ["ubuntu-latest", "windows-latest", "macos-latest"] runs-on: ${{ matrix.os }} name: Build wheels on ${{ matrix.os }} steps: - - uses: actions/checkout@v5 - - uses: actions/setup-python@v6 + - name: Checkout + uses: actions/checkout@v3 + + - name: Set up QEMU + if: runner.os == 'Linux' + uses: docker/setup-qemu-action@v1 + with: + platforms: arm64 + + - name: Set up Python 3.x + uses: actions/setup-python@v4 with: python-version: "3.x" cache: "pip" - - name: Cythonize + + - name: Prepare shell: bash run: | pip install -r requirements.txt make cython - name: Build - uses: pypa/cibuildwheel@v3.2.0 + uses: pypa/cibuildwheel@v2.15.0 env: CIBW_TEST_REQUIRES: "pytest" CIBW_TEST_COMMAND: "pytest {package}/test" - CIBW_SKIP: "pp* cp38-* cp39-* cp310-win_arm64" - - - name: Build sdist - if: runner.os == 'Linux' && runner.arch == 'X64' - run: | - pip install build - python -m build -s -o wheelhouse + CIBW_ARCHS_LINUX: auto aarch64 + CIBW_ARCHS_MACOS: x86_64 universal2 arm64 + CIBW_SKIP: pp* - name: Upload Wheels to artifact - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v1 with: - name: wheels-${{ matrix.os }} + name: Wheels path: wheelhouse - - # combine all wheels into one artifact - combine_wheels: - needs: [build_wheels] - runs-on: ubuntu-latest - steps: - - uses: actions/download-artifact@v4 - with: - # unpacks all CIBW artifacts into dist/ - pattern: wheels-* - path: dist - merge-multiple: true - - - name: Upload Wheels to artifact - uses: actions/upload-artifact@v4 - with: - name: wheels-all - path: dist - - # https://github.com/pypa/cibuildwheel/blob/main/examples/github-deploy.yml - upload_pypi: - needs: [build_wheels] - runs-on: ubuntu-latest - environment: pypi - permissions: - id-token: write - if: github.event_name == 'release' && github.event.action == 'published' - # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this) - # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') - steps: - - uses: actions/download-artifact@v4 - with: - # unpacks all CIBW artifacts into dist/ - pattern: wheels-* - path: dist - merge-multiple: true - - - uses: pypa/gh-action-pypi-publish@release/v1 - #with: - # To test: repository-url: https://test.pypi.org/legacy/ diff --git a/.gitignore b/.gitignore index 341be63..800f1c2 100644 --- a/.gitignore +++ b/.gitignore @@ -2,13 +2,11 @@ MANIFEST build/* dist/* .tox -.python-version *.pyc *.pyo *.so *~ msgpack/__version__.py -msgpack/*.c msgpack/*.cpp *.egg-info /venv diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 88d8718..7447895 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -18,7 +18,6 @@ python: install: - method: pip path: . - - requirements: docs/requirements.txt sphinx: configuration: docs/conf.py diff --git a/ChangeLog.rst b/ChangeLog.rst index beeab15..bf345dd 100644 --- a/ChangeLog.rst +++ b/ChangeLog.rst @@ -1,91 +1,7 @@ -1.1.2 -===== - -Release Date: 2025-10-08 - -This release does not change source code. It updates only building wheels: - -* Update Cython to v3.1.4 -* Update cibuildwheel to v3.2.0 -* Drop Python 3.8 -* Add Python 3.14 -* Add windows-arm - -1.1.1 -===== - -Release Date: 2025-06-13 - -* No change from 1.1.1rc1. - -1.1.1rc1 +1.0.6rc1 ======== -Release Date: 2025-06-06 - -* Update Cython to 3.1.1 and cibuildwheel to 2.23.3. - -1.1.0 -===== - -Release Date: 2024-09-10 - -* use ``PyLong_*`` instead of ``PyInt_*`` for compatibility with - future Cython. (#620) - -1.1.0rc2 -======== - -Release Date: 2024-08-19 - -* Update Cython to 3.0.11 for better Python 3.13 support. -* Update cibuildwheel to 2.20.0 to build Python 3.13 wheels. - -1.1.0rc1 -======== - -Release Date: 2024-05-07 - -* Update Cython to 3.0.10 to reduce C warnings and future support for Python 3.13. -* Stop using C++ mode in Cython to reduce compile error on some compilers. -* ``Packer()`` has ``buf_size`` option to specify initial size of - internal buffer to reduce reallocation. -* The default internal buffer size of ``Packer()`` is reduced from - 1MiB to 256KiB to optimize for common use cases. Use ``buf_size`` - if you are packing large data. -* ``Timestamp.to_datetime()`` and ``Timestamp.from_datetime()`` become - more accurate by avoiding floating point calculations. (#591) -* The Cython code for ``Unpacker`` has been slightly rewritten for maintainability. -* The fallback implementation of ``Packer()`` and ``Unpacker()`` now uses keyword-only - arguments to improve compatibility with the Cython implementation. - -1.0.8 -===== - -Release Date: 2024-03-01 - -* Update Cython to 3.0.8. This fixes memory leak when iterating - ``Unpacker`` object on Python 3.12. -* Do not include C/Cython files in binary wheels. - - -1.0.7 -===== - -Release Date: 2023-09-28 - -* Fix build error of extension module on Windows. (#567) -* ``setup.py`` doesn't skip build error of extension module. (#568) - - -1.0.6 -===== - -Release Date: 2023-09-21 - -.. note:: - v1.0.6 Wheels for Windows don't contain extension module. - Please upgrade to v1.0.7 or newer. +Release Date: 2023-09-13 * Add Python 3.12 wheels (#517) * Remove Python 2.7, 3.6, and 3.7 support @@ -191,7 +107,7 @@ Important changes * unpacker: Default value of input limits are smaller than before to avoid DoS attack. If you need to handle large data, you need to specify limits manually. (#319) -* Unpacker doesn't wrap underlying ``ValueError`` (including ``UnicodeError``) into +* Unpacker doesn't wrap underlaying ``ValueError`` (including ``UnicodeError``) into ``UnpackValueError``. If you want to catch all exception during unpack, you need to use ``try ... except Exception`` with minimum try code block. (#323, #233) diff --git a/DEVELOP.md b/DEVELOP.md index 27adf8c..9c823c3 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -1,5 +1,13 @@ # Developer's note +## Wheels + +Wheels for macOS and Linux are built on Travis and AppVeyr, in +[methane/msgpack-wheels](https://github.com/methane/msgpack-wheels) repository. + +Wheels for Windows are built on Github Actions in this repository. + + ### Build ``` diff --git a/MANIFEST.in b/MANIFEST.in index 6317706..57d84a4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,5 @@ include setup.py include COPYING include README.md -recursive-include msgpack *.h *.c *.pyx +recursive-include msgpack *.h *.c *.pyx *.cpp recursive-include test *.py diff --git a/Makefile b/Makefile index 51f3e0e..e4f22da 100644 --- a/Makefile +++ b/Makefile @@ -4,17 +4,9 @@ PYTHON_SOURCES = msgpack test setup.py all: cython python setup.py build_ext -i -f -.PHONY: format -format: - ruff format $(PYTHON_SOURCES) - -.PHONY: lint -lint: - ruff check $(PYTHON_SOURCES) - -.PHONY: doc -doc: - cd docs && sphinx-build -n -v -W --keep-going -b html -d doctrees . html +.PHONY: black +black: + black $(PYTHON_SOURCES) .PHONY: pyupgrade pyupgrade: @@ -22,7 +14,7 @@ pyupgrade: .PHONY: cython cython: - cython msgpack/_cmsgpack.pyx + cython --cplus msgpack/_cmsgpack.pyx .PHONY: test test: cython diff --git a/README.md b/README.md index 1f06324..61f99e1 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,60 @@ [![Build Status](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml/badge.svg)](https://github.com/msgpack/msgpack-python/actions/workflows/wheel.yml) [![Documentation Status](https://readthedocs.org/projects/msgpack-python/badge/?version=latest)](https://msgpack-python.readthedocs.io/en/latest/?badge=latest) -## What is this? +## What's this [MessagePack](https://msgpack.org/) is an efficient binary serialization format. It lets you exchange data among multiple languages like JSON. But it's faster and smaller. This package provides CPython bindings for reading and writing MessagePack data. + +## Very important notes for existing users + +### PyPI package name + +Package name on PyPI was changed from `msgpack-python` to `msgpack` from 0.5. + +When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before +`pip install -U msgpack`. + + +### Compatibility with the old format + +You can use `use_bin_type=False` option to pack `bytes` +object into raw type in the old msgpack spec, instead of bin type in new msgpack spec. + +You can unpack old msgpack format using `raw=True` option. +It unpacks str (raw) type in msgpack into Python bytes. + +See note below for detail. + + +### Major breaking changes in msgpack 1.0 + +* Python 2 + + * The extension module does not support Python 2 anymore. + The pure Python implementation (`msgpack.fallback`) is used for Python 2. + +* Packer + + * `use_bin_type=True` by default. bytes are encoded in bin type in msgpack. + **If you are still using Python 2, you must use unicode for all string types.** + You can use `use_bin_type=False` to encode into old msgpack format. + * `encoding` option is removed. UTF-8 is used always. + +* Unpacker + + * `raw=False` by default. It assumes str types are valid UTF-8 string + and decode them to Python str (unicode) object. + * `encoding` option is removed. You can use `raw=True` to support old format. + * Default value of `max_buffer_size` is changed from 0 to 100 MiB. + * Default value of `strict_map_key` is changed to True to avoid hashdos. + You need to pass `strict_map_key=False` if you have data which contain map keys + which type is not bytes or str. + + ## Install ``` @@ -18,38 +65,55 @@ $ pip install msgpack ### Pure Python implementation -The extension module in msgpack (`msgpack._cmsgpack`) does not support PyPy. +The extension module in msgpack (`msgpack._cmsgpack`) does not support +Python 2 and PyPy. + +But msgpack provides a pure Python implementation (`msgpack.fallback`) +for PyPy and Python 2. -But msgpack provides a pure Python implementation (`msgpack.fallback`) for PyPy. ### Windows -If you can't use a binary distribution, you need to install Visual Studio -or the Windows SDK on Windows. -Without the extension, the pure Python implementation on CPython runs slowly. +When you can't use a binary distribution, you need to install Visual Studio +or Windows SDK on Windows. +Without extension, using pure Python implementation on CPython runs slowly. ## How to use +NOTE: In examples below, I use `raw=False` and `use_bin_type=True` for users +using msgpack < 1.0. These options are default from msgpack 1.0 so you can omit them. + + ### One-shot pack & unpack Use `packb` for packing and `unpackb` for unpacking. -msgpack provides `dumps` and `loads` as aliases for compatibility with +msgpack provides `dumps` and `loads` as an alias for compatibility with `json` and `pickle`. -`pack` and `dump` pack to a file-like object. -`unpack` and `load` unpack from a file-like object. +`pack` and `dump` packs to a file-like object. +`unpack` and `load` unpacks from a file-like object. ```pycon >>> import msgpack ->>> msgpack.packb([1, 2, 3]) +>>> msgpack.packb([1, 2, 3], use_bin_type=True) '\x93\x01\x02\x03' ->>> msgpack.unpackb(_) +>>> msgpack.unpackb(_, raw=False) [1, 2, 3] ``` -Read the docstring for options. +`unpack` unpacks msgpack's array to Python's list, but can also unpack to tuple: + +```pycon +>>> msgpack.unpackb(b'\x93\x01\x02\x03', use_list=False, raw=False) +(1, 2, 3) +``` + +You should always specify the `use_list` keyword argument for backward compatibility. +See performance issues relating to `use_list option`_ below. + +Read the docstring for other options. ### Streaming unpacking @@ -63,17 +127,17 @@ from io import BytesIO buf = BytesIO() for i in range(100): - buf.write(msgpack.packb(i)) + buf.write(msgpack.packb(i, use_bin_type=True)) buf.seek(0) -unpacker = msgpack.Unpacker(buf) +unpacker = msgpack.Unpacker(buf, raw=False) for unpacked in unpacker: print(unpacked) ``` -### Packing/unpacking of custom data types +### Packing/unpacking of custom data type It is also possible to pack/unpack custom data types. Here is an example for `datetime.datetime`. @@ -98,17 +162,14 @@ def encode_datetime(obj): return obj -packed_dict = msgpack.packb(useful_dict, default=encode_datetime) -this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime) +packed_dict = msgpack.packb(useful_dict, default=encode_datetime, use_bin_type=True) +this_dict_again = msgpack.unpackb(packed_dict, object_hook=decode_datetime, raw=False) ``` `Unpacker`'s `object_hook` callback receives a dict; the `object_pairs_hook` callback may instead be used to receive a list of key-value pairs. -NOTE: msgpack can encode datetime with tzinfo into standard ext type for now. -See `datetime` option in `Packer` docstring. - ### Extended types @@ -130,8 +191,8 @@ It is also possible to pack/unpack custom data types using the **ext** type. ... return ExtType(code, data) ... >>> data = array.array('d', [1.2, 3.4]) ->>> packed = msgpack.packb(data, default=default) ->>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook) +>>> packed = msgpack.packb(data, default=default, use_bin_type=True) +>>> unpacked = msgpack.unpackb(packed, ext_hook=ext_hook, raw=False) >>> data == unpacked True ``` @@ -140,8 +201,8 @@ True ### Advanced unpacking control As an alternative to iteration, `Unpacker` objects provide `unpack`, -`skip`, `read_array_header`, and `read_map_header` methods. The former two -read an entire message from the stream, respectively deserializing and returning +`skip`, `read_array_header` and `read_map_header` methods. The former two +read an entire message from the stream, respectively de-serialising and returning the result, or ignoring it. The latter two methods return the number of elements in the upcoming container, so that each element in an array, or key-value pair in a map, can be unpacked or skipped individually. @@ -149,7 +210,7 @@ in a map, can be unpacked or skipped individually. ## Notes -### String and binary types in the old MessagePack spec +### string and binary type Early versions of msgpack didn't distinguish string and binary types. The type for representing both string and binary types was named **raw**. @@ -167,7 +228,7 @@ and `raw=True` options. ### ext type -To use the **ext** type, pass a `msgpack.ExtType` object to the packer. +To use the **ext** type, pass `msgpack.ExtType` object to packer. ```pycon >>> import msgpack @@ -181,62 +242,24 @@ You can use it with `default` and `ext_hook`. See below. ### Security -When unpacking data received from an unreliable source, msgpack provides +To unpacking data received from unreliable source, msgpack provides two security options. `max_buffer_size` (default: `100*1024*1024`) limits the internal buffer size. -It is also used to limit preallocated list sizes. +It is used to limit the preallocated list size too. `strict_map_key` (default: `True`) limits the type of map keys to bytes and str. -While the MessagePack spec doesn't limit map key types, -there is a risk of a hash DoS. +While msgpack spec doesn't limit the types of the map keys, +there is a risk of the hashdos. If you need to support other types for map keys, use `strict_map_key=False`. ### Performance tips -CPython's GC starts when the number of allocated objects grows. -This means unpacking may trigger unnecessary GC. -You can use `gc.disable()` when unpacking a large message. +CPython's GC starts when growing allocated object. +This means unpacking may cause useless GC. +You can use `gc.disable()` when unpacking large message. -A list is the default sequence type in Python. -However, a tuple is lighter than a list. +List is the default sequence type of Python. +But tuple is lighter than list. You can use `use_list=False` while unpacking when performance is important. - - -## Major breaking changes in the history - -### msgpack 0.5 - -The package name on PyPI was changed from `msgpack-python` to `msgpack` in 0.5. - -When upgrading from msgpack-0.4 or earlier, do `pip uninstall msgpack-python` before -`pip install -U msgpack`. - - -### msgpack 1.0 - -* Python 2 support - - * The extension module no longer supports Python 2. - The pure Python implementation (`msgpack.fallback`) is used for Python 2. - - * msgpack 1.0.6 drops official support of Python 2.7, as pip and - GitHub Action "setup-python" no longer supports Python 2.7. - -* Packer - - * Packer uses `use_bin_type=True` by default. - Bytes are encoded in the bin type in MessagePack. - * The `encoding` option is removed. UTF-8 is always used. - -* Unpacker - - * Unpacker uses `raw=False` by default. It assumes str values are valid UTF-8 strings - and decodes them to Python str (Unicode) objects. - * `encoding` option is removed. You can use `raw=True` to support old format (e.g. unpack into bytes, not str). - * The default value of `max_buffer_size` is changed from 0 to 100 MiB to avoid DoS attacks. - You need to pass `max_buffer_size=0` if you have large but safe data. - * The default value of `strict_map_key` is changed to True to avoid hash DoS. - You need to pass `strict_map_key=False` if you have data that contain map keys - whose type is neither bytes nor str. diff --git a/docs/conf.py b/docs/conf.py index 28116cd..6eb472a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- +# # msgpack documentation build configuration file, created by # sphinx-quickstart on Sun Feb 24 14:20:50 2013. # @@ -12,9 +14,9 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -# import os -# import sys -# sys.path.insert(0, os.path.abspath('..')) +#import os +#import sys +#sys.path.insert(0, os.path.abspath('..')) # -- General configuration ----------------------------------------------------- @@ -89,7 +91,7 @@ pygments_style = "sphinx" # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -html_theme = "sphinx_rtd_theme" +html_theme = "sphinxdoc" # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 26002de..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -sphinx~=7.3.7 -sphinx-rtd-theme~=2.0.0 diff --git a/msgpack/__init__.py b/msgpack/__init__.py index f3266b7..2e20133 100644 --- a/msgpack/__init__.py +++ b/msgpack/__init__.py @@ -1,20 +1,20 @@ -# ruff: noqa: F401 -import os - -from .exceptions import * # noqa: F403 +from .exceptions import * from .ext import ExtType, Timestamp -version = (1, 1, 2) -__version__ = "1.1.2" +import os + + +version = (1, 0, 6, "rc", 1) +__version__ = "1.0.6rc1" if os.environ.get("MSGPACK_PUREPYTHON"): - from .fallback import Packer, Unpacker, unpackb + from .fallback import Packer, unpackb, Unpacker else: try: - from ._cmsgpack import Packer, Unpacker, unpackb + from ._cmsgpack import Packer, unpackb, Unpacker except ImportError: - from .fallback import Packer, Unpacker, unpackb + from .fallback import Packer, unpackb, Unpacker def pack(o, stream, **kwargs): diff --git a/msgpack/_cmsgpack.pyx b/msgpack/_cmsgpack.pyx index 9680b31..1faaac3 100644 --- a/msgpack/_cmsgpack.pyx +++ b/msgpack/_cmsgpack.pyx @@ -1,6 +1,5 @@ +# coding: utf-8 #cython: embedsignature=True, c_string_encoding=ascii, language_level=3 -#cython: freethreading_compatible = True -import cython from cpython.datetime cimport import_datetime, datetime_new import_datetime() diff --git a/msgpack/_packer.pyx b/msgpack/_packer.pyx index 94d1462..3c39867 100644 --- a/msgpack/_packer.pyx +++ b/msgpack/_packer.pyx @@ -1,3 +1,5 @@ +# coding: utf-8 + from cpython cimport * from cpython.bytearray cimport PyByteArray_Check, PyByteArray_CheckExact from cpython.datetime cimport ( @@ -14,6 +16,8 @@ from .ext import ExtType, Timestamp cdef extern from "Python.h": int PyMemoryView_Check(object obj) + char* PyUnicode_AsUTF8AndSize(object obj, Py_ssize_t *l) except NULL + cdef extern from "pack.h": struct msgpack_packer: @@ -22,21 +26,26 @@ cdef extern from "pack.h": size_t buf_size bint use_bin_type - int msgpack_pack_nil(msgpack_packer* pk) except -1 - int msgpack_pack_true(msgpack_packer* pk) except -1 - int msgpack_pack_false(msgpack_packer* pk) except -1 - int msgpack_pack_long_long(msgpack_packer* pk, long long d) except -1 - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) except -1 - int msgpack_pack_float(msgpack_packer* pk, float d) except -1 - int msgpack_pack_double(msgpack_packer* pk, double d) except -1 - int msgpack_pack_array(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_map(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_raw(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_bin(msgpack_packer* pk, size_t l) except -1 - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) except -1 - int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) except -1 - int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds) except -1 + int msgpack_pack_int(msgpack_packer* pk, int d) + int msgpack_pack_nil(msgpack_packer* pk) + int msgpack_pack_true(msgpack_packer* pk) + int msgpack_pack_false(msgpack_packer* pk) + int msgpack_pack_long(msgpack_packer* pk, long d) + int msgpack_pack_long_long(msgpack_packer* pk, long long d) + int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d) + int msgpack_pack_float(msgpack_packer* pk, float d) + int msgpack_pack_double(msgpack_packer* pk, double d) + int msgpack_pack_array(msgpack_packer* pk, size_t l) + int msgpack_pack_map(msgpack_packer* pk, size_t l) + int msgpack_pack_raw(msgpack_packer* pk, size_t l) + int msgpack_pack_bin(msgpack_packer* pk, size_t l) + int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) + int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) + int msgpack_pack_timestamp(msgpack_packer* x, long long seconds, unsigned long nanoseconds); + int msgpack_pack_unicode(msgpack_packer* pk, object o, long long limit) +cdef extern from "buff_converter.h": + object buff_to_buff(char *, Py_ssize_t) cdef int DEFAULT_RECURSE_LIMIT=511 cdef long long ITEM_LIMIT = (2**32)-1 @@ -50,7 +59,7 @@ cdef inline int PyBytesLike_CheckExact(object o): return PyBytes_CheckExact(o) or PyByteArray_CheckExact(o) -cdef class Packer: +cdef class Packer(object): """ MessagePack Packer @@ -94,44 +103,27 @@ cdef class Packer: :param str unicode_errors: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. - - :param int buf_size: - The size of the internal buffer. (default: 256*1024) - Useful if serialisation size can be correctly estimated, - avoid unnecessary reallocations. """ cdef msgpack_packer pk cdef object _default cdef object _berrors cdef const char *unicode_errors - cdef size_t exports # number of exported buffers cdef bint strict_types cdef bint use_float cdef bint autoreset cdef bint datetime - def __cinit__(self, buf_size=256*1024, **_kwargs): + def __cinit__(self): + cdef int buf_size = 1024*1024 self.pk.buf = PyMem_Malloc(buf_size) if self.pk.buf == NULL: raise MemoryError("Unable to allocate internal buffer.") self.pk.buf_size = buf_size self.pk.length = 0 - self.exports = 0 - def __dealloc__(self): - PyMem_Free(self.pk.buf) - self.pk.buf = NULL - assert self.exports == 0 - - cdef _check_exports(self): - if self.exports > 0: - raise BufferError("Existing exports of data: Packer cannot be changed") - - @cython.critical_section def __init__(self, *, default=None, bint use_single_float=False, bint autoreset=True, bint use_bin_type=True, - bint strict_types=False, bint datetime=False, unicode_errors=None, - buf_size=256*1024): + bint strict_types=False, bint datetime=False, unicode_errors=None): self.use_float = use_single_float self.strict_types = strict_types self.autoreset = autoreset @@ -148,130 +140,159 @@ cdef class Packer: else: self.unicode_errors = self._berrors - # returns -2 when default should(o) be called - cdef int _pack_inner(self, object o, bint will_default, int nest_limit) except -1: + def __dealloc__(self): + PyMem_Free(self.pk.buf) + self.pk.buf = NULL + + cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: cdef long long llval cdef unsigned long long ullval cdef unsigned long ulval - cdef const char* rawval - cdef Py_ssize_t L - cdef Py_buffer view - cdef bint strict = self.strict_types - - if o is None: - msgpack_pack_nil(&self.pk) - elif o is True: - msgpack_pack_true(&self.pk) - elif o is False: - msgpack_pack_false(&self.pk) - elif PyLong_CheckExact(o) if strict else PyLong_Check(o): - try: - if o > 0: - ullval = o - msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - msgpack_pack_long_long(&self.pk, llval) - except OverflowError as oe: - if will_default: - return -2 - else: - raise OverflowError("Integer value out of range") - elif PyFloat_CheckExact(o) if strict else PyFloat_Check(o): - if self.use_float: - msgpack_pack_float(&self.pk, o) - else: - msgpack_pack_double(&self.pk, o) - elif PyBytesLike_CheckExact(o) if strict else PyBytesLike_Check(o): - L = Py_SIZE(o) - if L > ITEM_LIMIT: - PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) - rawval = o - msgpack_pack_bin(&self.pk, L) - msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_CheckExact(o) if strict else PyUnicode_Check(o): - if self.unicode_errors == NULL: - rawval = PyUnicode_AsUTF8AndSize(o, &L) - if L >ITEM_LIMIT: - raise ValueError("unicode string is too large") - else: - o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) - L = Py_SIZE(o) - if L > ITEM_LIMIT: - raise ValueError("unicode string is too large") - rawval = o - msgpack_pack_raw(&self.pk, L) - msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyDict_CheckExact(o) if strict else PyDict_Check(o): - L = len(o) - if L > ITEM_LIMIT: - raise ValueError("dict is too large") - msgpack_pack_map(&self.pk, L) - for k, v in o.items(): - self._pack(k, nest_limit) - self._pack(v, nest_limit) - elif type(o) is ExtType if strict else isinstance(o, ExtType): - # This should be before Tuple because ExtType is namedtuple. - rawval = o.data - L = len(o.data) - if L > ITEM_LIMIT: - raise ValueError("EXT data is too large") - msgpack_pack_ext(&self.pk, o.code, L) - msgpack_pack_raw_body(&self.pk, rawval, L) - elif type(o) is Timestamp: - llval = o.seconds - ulval = o.nanoseconds - msgpack_pack_timestamp(&self.pk, llval, ulval) - elif PyList_CheckExact(o) if strict else (PyTuple_Check(o) or PyList_Check(o)): - L = Py_SIZE(o) - if L > ITEM_LIMIT: - raise ValueError("list is too large") - msgpack_pack_array(&self.pk, L) - for v in o: - self._pack(v, nest_limit) - elif PyMemoryView_Check(o): - PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) - L = view.len - if L > ITEM_LIMIT: - PyBuffer_Release(&view); - raise ValueError("memoryview is too large") - try: - msgpack_pack_bin(&self.pk, L) - msgpack_pack_raw_body(&self.pk, view.buf, L) - finally: - PyBuffer_Release(&view); - elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: - delta = o - epoch - if not PyDelta_CheckExact(delta): - raise ValueError("failed to calculate delta") - llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) - ulval = timedelta_microseconds(delta) * 1000 - msgpack_pack_timestamp(&self.pk, llval, ulval) - elif will_default: - return -2 - elif self.datetime and PyDateTime_CheckExact(o): - # this should be later than will_default - PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) - else: - PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) - - cdef int _pack(self, object o, int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: + cdef long longval + cdef float fval + cdef double dval + cdef char* rawval cdef int ret + cdef dict d + cdef Py_ssize_t L + cdef int default_used = 0 + cdef bint strict_types = self.strict_types + cdef Py_buffer view + if nest_limit < 0: raise ValueError("recursion limit exceeded.") - nest_limit -= 1 - if self._default is not None: - ret = self._pack_inner(o, 1, nest_limit) - if ret == -2: - o = self._default(o) - else: - return ret - return self._pack_inner(o, 0, nest_limit) - @cython.critical_section - def pack(self, object obj): + while True: + if o is None: + ret = msgpack_pack_nil(&self.pk) + elif o is True: + ret = msgpack_pack_true(&self.pk) + elif o is False: + ret = msgpack_pack_false(&self.pk) + elif PyLong_CheckExact(o) if strict_types else PyLong_Check(o): + # PyInt_Check(long) is True for Python 3. + # So we should test long before int. + try: + if o > 0: + ullval = o + ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) + else: + llval = o + ret = msgpack_pack_long_long(&self.pk, llval) + except OverflowError as oe: + if not default_used and self._default is not None: + o = self._default(o) + default_used = True + continue + else: + raise OverflowError("Integer value out of range") + elif PyInt_CheckExact(o) if strict_types else PyInt_Check(o): + longval = o + ret = msgpack_pack_long(&self.pk, longval) + elif PyFloat_CheckExact(o) if strict_types else PyFloat_Check(o): + if self.use_float: + fval = o + ret = msgpack_pack_float(&self.pk, fval) + else: + dval = o + ret = msgpack_pack_double(&self.pk, dval) + elif PyBytesLike_CheckExact(o) if strict_types else PyBytesLike_Check(o): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + PyErr_Format(ValueError, b"%.200s object is too large", Py_TYPE(o).tp_name) + rawval = o + ret = msgpack_pack_bin(&self.pk, L) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyUnicode_CheckExact(o) if strict_types else PyUnicode_Check(o): + if self.unicode_errors == NULL: + ret = msgpack_pack_unicode(&self.pk, o, ITEM_LIMIT); + if ret == -2: + raise ValueError("unicode string is too large") + else: + o = PyUnicode_AsEncodedString(o, NULL, self.unicode_errors) + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("unicode string is too large") + ret = msgpack_pack_raw(&self.pk, L) + if ret == 0: + rawval = o + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif PyDict_CheckExact(o): + d = o + L = len(d) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + ret = msgpack_pack_map(&self.pk, L) + if ret == 0: + for k, v in d.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif not strict_types and PyDict_Check(o): + L = len(o) + if L > ITEM_LIMIT: + raise ValueError("dict is too large") + ret = msgpack_pack_map(&self.pk, L) + if ret == 0: + for k, v in o.items(): + ret = self._pack(k, nest_limit-1) + if ret != 0: break + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif type(o) is ExtType if strict_types else isinstance(o, ExtType): + # This should be before Tuple because ExtType is namedtuple. + longval = o.code + rawval = o.data + L = len(o.data) + if L > ITEM_LIMIT: + raise ValueError("EXT data is too large") + ret = msgpack_pack_ext(&self.pk, longval, L) + ret = msgpack_pack_raw_body(&self.pk, rawval, L) + elif type(o) is Timestamp: + llval = o.seconds + ulval = o.nanoseconds + ret = msgpack_pack_timestamp(&self.pk, llval, ulval) + elif PyList_CheckExact(o) if strict_types else (PyTuple_Check(o) or PyList_Check(o)): + L = Py_SIZE(o) + if L > ITEM_LIMIT: + raise ValueError("list is too large") + ret = msgpack_pack_array(&self.pk, L) + if ret == 0: + for v in o: + ret = self._pack(v, nest_limit-1) + if ret != 0: break + elif PyMemoryView_Check(o): + if PyObject_GetBuffer(o, &view, PyBUF_SIMPLE) != 0: + raise ValueError("could not get buffer for memoryview") + L = view.len + if L > ITEM_LIMIT: + PyBuffer_Release(&view); + raise ValueError("memoryview is too large") + ret = msgpack_pack_bin(&self.pk, L) + if ret == 0: + ret = msgpack_pack_raw_body(&self.pk, view.buf, L) + PyBuffer_Release(&view); + elif self.datetime and PyDateTime_CheckExact(o) and datetime_tzinfo(o) is not None: + delta = o - epoch + if not PyDelta_CheckExact(delta): + raise ValueError("failed to calculate delta") + llval = timedelta_days(delta) * (24*60*60) + timedelta_seconds(delta) + ulval = timedelta_microseconds(delta) * 1000 + ret = msgpack_pack_timestamp(&self.pk, llval, ulval) + elif not default_used and self._default: + o = self._default(o) + default_used = 1 + continue + elif self.datetime and PyDateTime_CheckExact(o): + PyErr_Format(ValueError, b"can not serialize '%.200s' object where tzinfo=None", Py_TYPE(o).tp_name) + else: + PyErr_Format(TypeError, b"can not serialize '%.200s' object", Py_TYPE(o).tp_name) + return ret + + cpdef pack(self, object obj): cdef int ret - self._check_exports() try: ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) except: @@ -284,37 +305,36 @@ cdef class Packer: self.pk.length = 0 return buf - @cython.critical_section def pack_ext_type(self, typecode, data): - self._check_exports() - if len(data) > ITEM_LIMIT: - raise ValueError("ext data too large") msgpack_pack_ext(&self.pk, typecode, len(data)) msgpack_pack_raw_body(&self.pk, data, len(data)) - @cython.critical_section def pack_array_header(self, long long size): - self._check_exports() if size > ITEM_LIMIT: - raise ValueError("array too large") - msgpack_pack_array(&self.pk, size) + raise ValueError + cdef int ret = msgpack_pack_array(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf - @cython.critical_section def pack_map_header(self, long long size): - self._check_exports() if size > ITEM_LIMIT: - raise ValueError("map too learge") - msgpack_pack_map(&self.pk, size) + raise ValueError + cdef int ret = msgpack_pack_map(&self.pk, size) + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf - @cython.critical_section def pack_map_pairs(self, object pairs): """ Pack *pairs* as msgpack map type. @@ -322,43 +342,33 @@ cdef class Packer: *pairs* should be a sequence of pairs. (`len(pairs)` and `for k, v in pairs:` should be supported.) """ - self._check_exports() - size = len(pairs) - if size > ITEM_LIMIT: - raise ValueError("map too large") - msgpack_pack_map(&self.pk, size) - for k, v in pairs: - self._pack(k) - self._pack(v) + cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) + if ret == 0: + for k, v in pairs: + ret = self._pack(k) + if ret != 0: break + ret = self._pack(v) + if ret != 0: break + if ret == -1: + raise MemoryError + elif ret: # should not happen + raise TypeError if self.autoreset: buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) self.pk.length = 0 return buf - @cython.critical_section def reset(self): """Reset internal buffer. This method is useful only when autoreset=False. """ - self._check_exports() self.pk.length = 0 - @cython.critical_section def bytes(self): """Return internal buffer contents as bytes object""" return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) def getbuffer(self): - """Return memoryview of internal buffer. - - Note: Packer now supports buffer protocol. You can use memoryview(packer). - """ - return memoryview(self) - - def __getbuffer__(self, Py_buffer *buffer, int flags): - PyBuffer_FillInfo(buffer, self, self.pk.buf, self.pk.length, 1, flags) - self.exports += 1 - - def __releasebuffer__(self, Py_buffer *buffer): - self.exports -= 1 + """Return view of internal buffer.""" + return buff_to_buff(self.pk.buf, self.pk.length) diff --git a/msgpack/_unpacker.pyx b/msgpack/_unpacker.pyx index f0cf96d..56126f4 100644 --- a/msgpack/_unpacker.pyx +++ b/msgpack/_unpacker.pyx @@ -1,3 +1,5 @@ +# coding: utf-8 + from cpython cimport * cdef extern from "Python.h": ctypedef struct PyObject @@ -33,7 +35,7 @@ cdef extern from "unpack.h": PyObject* timestamp_t PyObject *giga; PyObject *utc; - const char *unicode_errors + char *unicode_errors Py_ssize_t max_str_len Py_ssize_t max_bin_len Py_ssize_t max_array_len @@ -208,7 +210,7 @@ def unpackb(object packed, *, object object_hook=None, object list_hook=None, raise ValueError("Unpack failed: error = %d" % (ret,)) -cdef class Unpacker: +cdef class Unpacker(object): """Streaming unpacker. Arguments: @@ -322,7 +324,6 @@ cdef class Unpacker: PyMem_Free(self.buf) self.buf = NULL - @cython.critical_section def __init__(self, file_like=None, *, Py_ssize_t read_size=0, bint use_list=True, bint raw=False, int timestamp=0, bint strict_map_key=True, object object_hook=None, object object_pairs_hook=None, object list_hook=None, @@ -383,7 +384,6 @@ cdef class Unpacker: max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) - @cython.critical_section def feed(self, object next_bytes): """Append `next_bytes` to internal buffer.""" cdef Py_buffer pybuff @@ -484,7 +484,6 @@ cdef class Unpacker: else: raise ValueError("Unpack failed: error = %d" % (ret,)) - @cython.critical_section def read_bytes(self, Py_ssize_t nbytes): """Read a specified number of raw bytes from the stream""" cdef Py_ssize_t nread @@ -497,7 +496,6 @@ cdef class Unpacker: self.stream_offset += nread return ret - @cython.critical_section def unpack(self): """Unpack one object @@ -505,7 +503,6 @@ cdef class Unpacker: """ return self._unpack(unpack_construct) - @cython.critical_section def skip(self): """Read and ignore one object, returning None @@ -513,7 +510,6 @@ cdef class Unpacker: """ return self._unpack(unpack_skip) - @cython.critical_section def read_array_header(self): """assuming the next object is an array, return its size n, such that the next n unpack() calls will iterate over its contents. @@ -522,7 +518,6 @@ cdef class Unpacker: """ return self._unpack(read_array_header) - @cython.critical_section def read_map_header(self): """assuming the next object is a map, return its size n, such that the next n * 2 unpack() calls will iterate over its key-value pairs. @@ -531,7 +526,6 @@ cdef class Unpacker: """ return self._unpack(read_map_header) - @cython.critical_section def tell(self): """Returns the current position of the Unpacker in bytes, i.e., the number of bytes that were read from the input, also the starting @@ -542,7 +536,6 @@ cdef class Unpacker: def __iter__(self): return self - @cython.critical_section def __next__(self): return self._unpack(unpack_construct, 1) diff --git a/msgpack/buff_converter.h b/msgpack/buff_converter.h new file mode 100644 index 0000000..86b4196 --- /dev/null +++ b/msgpack/buff_converter.h @@ -0,0 +1,8 @@ +#include "Python.h" + +/* cython does not support this preprocessor check => write it in raw C */ +static PyObject * +buff_to_buff(char *buff, Py_ssize_t size) +{ + return PyMemoryView_FromMemory(buff, size, PyBUF_READ); +} diff --git a/msgpack/ext.py b/msgpack/ext.py index 9694819..02c2c43 100644 --- a/msgpack/ext.py +++ b/msgpack/ext.py @@ -1,6 +1,6 @@ +from collections import namedtuple import datetime import struct -from collections import namedtuple class ExtType(namedtuple("ExtType", "code data")): @@ -157,9 +157,7 @@ class Timestamp: :rtype: `datetime.datetime` """ utc = datetime.timezone.utc - return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta( - seconds=self.seconds, microseconds=self.nanoseconds // 1000 - ) + return datetime.datetime.fromtimestamp(0, utc) + datetime.timedelta(seconds=self.to_unix()) @staticmethod def from_datetime(dt): @@ -167,4 +165,4 @@ class Timestamp: :rtype: Timestamp """ - return Timestamp(seconds=int(dt.timestamp()), nanoseconds=dt.microsecond * 1000) + return Timestamp.from_unix(dt.timestamp()) diff --git a/msgpack/fallback.py b/msgpack/fallback.py index b02e47c..a174162 100644 --- a/msgpack/fallback.py +++ b/msgpack/fallback.py @@ -1,22 +1,27 @@ """Fallback pure Python implementation of msgpack""" - -import struct -import sys from datetime import datetime as _DateTime +import sys +import struct + if hasattr(sys, "pypy_version_info"): + # StringIO is slow on PyPy, StringIO is faster. However: PyPy's own + # StringBuilder is fastest. from __pypy__ import newlist_hint - from __pypy__.builders import BytesBuilder - _USING_STRINGBUILDER = True + try: + from __pypy__.builders import BytesBuilder as StringBuilder + except ImportError: + from __pypy__.builders import StringBuilder + USING_STRINGBUILDER = True - class BytesIO: + class StringIO: def __init__(self, s=b""): if s: - self.builder = BytesBuilder(len(s)) + self.builder = StringBuilder(len(s)) self.builder.append(s) else: - self.builder = BytesBuilder() + self.builder = StringBuilder() def write(self, s): if isinstance(s, memoryview): @@ -29,17 +34,17 @@ if hasattr(sys, "pypy_version_info"): return self.builder.build() else: - from io import BytesIO + USING_STRINGBUILDER = False + from io import BytesIO as StringIO - _USING_STRINGBUILDER = False - - def newlist_hint(size): - return [] + newlist_hint = lambda size: [] -from .exceptions import BufferFull, ExtraData, FormatError, OutOfData, StackError +from .exceptions import BufferFull, OutOfData, ExtraData, FormatError, StackError + from .ext import ExtType, Timestamp + EX_SKIP = 0 EX_CONSTRUCT = 1 EX_READ_ARRAY_HEADER = 2 @@ -226,7 +231,6 @@ class Unpacker: def __init__( self, file_like=None, - *, read_size=0, use_list=True, raw=False, @@ -329,7 +333,6 @@ class Unpacker: # Use extend here: INPLACE_ADD += doesn't reliably typecast memoryview in jython self._buffer.extend(view) - view.release() def _consume(self): """Gets rid of the used parts of the buffer.""" @@ -646,13 +649,32 @@ class Packer: The error handler for encoding unicode. (default: 'strict') DO NOT USE THIS!! This option is kept for very specific usage. - :param int buf_size: - Internal buffer size. This option is used only for C implementation. + Example of streaming deserialize from file-like object:: + + unpacker = Unpacker(file_like) + for o in unpacker: + process(o) + + Example of streaming deserialize from socket:: + + unpacker = Unpacker() + while True: + buf = sock.recv(1024**2) + if not buf: + break + unpacker.feed(buf) + for o in unpacker: + process(o) + + Raises ``ExtraData`` when *packed* contains extra bytes. + Raises ``OutOfData`` when *packed* is incomplete. + Raises ``FormatError`` when *packed* is not valid msgpack. + Raises ``StackError`` when *packed* contains too nested. + Other exceptions can be raised during unpacking. """ def __init__( self, - *, default=None, use_single_float=False, autoreset=True, @@ -660,17 +682,17 @@ class Packer: strict_types=False, datetime=False, unicode_errors=None, - buf_size=None, ): self._strict_types = strict_types self._use_float = use_single_float self._autoreset = autoreset self._use_bin_type = use_bin_type - self._buffer = BytesIO() + self._buffer = StringIO() self._datetime = bool(datetime) self._unicode_errors = unicode_errors or "strict" - if default is not None and not callable(default): - raise TypeError("default must be callable") + if default is not None: + if not callable(default): + raise TypeError("default must be callable") self._default = default def _pack( @@ -801,18 +823,18 @@ class Packer: try: self._pack(obj) except: - self._buffer = BytesIO() # force reset + self._buffer = StringIO() # force reset raise if self._autoreset: ret = self._buffer.getvalue() - self._buffer = BytesIO() + self._buffer = StringIO() return ret def pack_map_pairs(self, pairs): self._pack_map_pairs(len(pairs), pairs) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = BytesIO() + self._buffer = StringIO() return ret def pack_array_header(self, n): @@ -821,7 +843,7 @@ class Packer: self._pack_array_header(n) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = BytesIO() + self._buffer = StringIO() return ret def pack_map_header(self, n): @@ -830,7 +852,7 @@ class Packer: self._pack_map_header(n) if self._autoreset: ret = self._buffer.getvalue() - self._buffer = BytesIO() + self._buffer = StringIO() return ret def pack_ext_type(self, typecode, data): @@ -919,11 +941,11 @@ class Packer: This method is useful only when autoreset=False. """ - self._buffer = BytesIO() + self._buffer = StringIO() def getbuffer(self): """Return view of internal buffer.""" - if _USING_STRINGBUILDER: + if USING_STRINGBUILDER: return memoryview(self.bytes()) else: return self._buffer.getbuffer() diff --git a/msgpack/pack.h b/msgpack/pack.h index edf3a3f..1e849ac 100644 --- a/msgpack/pack.h +++ b/msgpack/pack.h @@ -21,12 +21,15 @@ #include "sysdep.h" #include #include -#include #ifdef __cplusplus extern "C" { #endif +#ifdef _MSC_VER +#define inline __inline +#endif + typedef struct msgpack_packer { char *buf; size_t length; @@ -64,6 +67,27 @@ static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_ #include "pack_template.h" +// return -2 when o is too long +static inline int +msgpack_pack_unicode(msgpack_packer *pk, PyObject *o, long long limit) +{ + assert(PyUnicode_Check(o)); + + Py_ssize_t len; + const char* buf = PyUnicode_AsUTF8AndSize(o, &len); + if (buf == NULL) + return -1; + + if (len > limit) { + return -2; + } + + int ret = msgpack_pack_raw(pk, len); + if (ret) return ret; + + return msgpack_pack_raw_body(pk, buf, len); +} + #ifdef __cplusplus } #endif diff --git a/msgpack/pack_template.h b/msgpack/pack_template.h index b8959f0..7d479b6 100644 --- a/msgpack/pack_template.h +++ b/msgpack/pack_template.h @@ -37,6 +37,18 @@ * Integer */ +#define msgpack_pack_real_uint8(x, d) \ +do { \ + if(d < (1<<7)) { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ + } else { \ + /* unsigned 8 */ \ + unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } \ +} while(0) + #define msgpack_pack_real_uint16(x, d) \ do { \ if(d < (1<<7)) { \ @@ -111,6 +123,18 @@ do { \ } \ } while(0) +#define msgpack_pack_real_int8(x, d) \ +do { \ + if(d < -(1<<5)) { \ + /* signed 8 */ \ + unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ + msgpack_pack_append_buffer(x, buf, 2); \ + } else { \ + /* fixnum */ \ + msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ + } \ +} while(0) + #define msgpack_pack_real_int16(x, d) \ do { \ if(d < -(1<<5)) { \ @@ -240,6 +264,49 @@ do { \ } while(0) +static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d) +{ + msgpack_pack_real_uint8(x, d); +} + +static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d) +{ + msgpack_pack_real_uint16(x, d); +} + +static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d) +{ + msgpack_pack_real_uint32(x, d); +} + +static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d) +{ + msgpack_pack_real_uint64(x, d); +} + +static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d) +{ + msgpack_pack_real_int8(x, d); +} + +static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d) +{ + msgpack_pack_real_int16(x, d); +} + +static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d) +{ + msgpack_pack_real_int32(x, d); +} + +static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d) +{ + msgpack_pack_real_int64(x, d); +} + + +//#ifdef msgpack_pack_inline_func_cint + static inline int msgpack_pack_short(msgpack_packer* x, short d) { #if defined(SIZEOF_SHORT) @@ -305,37 +372,192 @@ if(sizeof(int) == 2) { static inline int msgpack_pack_long(msgpack_packer* x, long d) { #if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 4 +#if SIZEOF_LONG == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_LONG == 4 msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #elif defined(LONG_MAX) -#if LONG_MAX == 0x7fffffffL +#if LONG_MAX == 0x7fffL + msgpack_pack_real_int16(x, d); +#elif LONG_MAX == 0x7fffffffL msgpack_pack_real_int32(x, d); #else msgpack_pack_real_int64(x, d); #endif #else - if (sizeof(long) == 4) { - msgpack_pack_real_int32(x, d); - } else { - msgpack_pack_real_int64(x, d); - } +if(sizeof(long) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(long) == 4) { + msgpack_pack_real_int32(x, d); +} else { + msgpack_pack_real_int64(x, d); +} #endif } static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) { +#if defined(SIZEOF_LONG_LONG) +#if SIZEOF_LONG_LONG == 2 + msgpack_pack_real_int16(x, d); +#elif SIZEOF_LONG_LONG == 4 + msgpack_pack_real_int32(x, d); +#else msgpack_pack_real_int64(x, d); +#endif + +#elif defined(LLONG_MAX) +#if LLONG_MAX == 0x7fffL + msgpack_pack_real_int16(x, d); +#elif LLONG_MAX == 0x7fffffffL + msgpack_pack_real_int32(x, d); +#else + msgpack_pack_real_int64(x, d); +#endif + +#else +if(sizeof(long long) == 2) { + msgpack_pack_real_int16(x, d); +} else if(sizeof(long long) == 4) { + msgpack_pack_real_int32(x, d); +} else { + msgpack_pack_real_int64(x, d); +} +#endif +} + +static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d) +{ +#if defined(SIZEOF_SHORT) +#if SIZEOF_SHORT == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_SHORT == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(USHRT_MAX) +#if USHRT_MAX == 0xffffU + msgpack_pack_real_uint16(x, d); +#elif USHRT_MAX == 0xffffffffU + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned short) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned short) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif +} + +static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d) +{ +#if defined(SIZEOF_INT) +#if SIZEOF_INT == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_INT == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(UINT_MAX) +#if UINT_MAX == 0xffffU + msgpack_pack_real_uint16(x, d); +#elif UINT_MAX == 0xffffffffU + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned int) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned int) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif +} + +static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d) +{ +#if defined(SIZEOF_LONG) +#if SIZEOF_LONG == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_LONG == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(ULONG_MAX) +#if ULONG_MAX == 0xffffUL + msgpack_pack_real_uint16(x, d); +#elif ULONG_MAX == 0xffffffffUL + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned long) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned long) == 4) { + msgpack_pack_real_uint32(x, d); +} else { + msgpack_pack_real_uint64(x, d); +} +#endif } static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) { +#if defined(SIZEOF_LONG_LONG) +#if SIZEOF_LONG_LONG == 2 + msgpack_pack_real_uint16(x, d); +#elif SIZEOF_LONG_LONG == 4 + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#elif defined(ULLONG_MAX) +#if ULLONG_MAX == 0xffffUL + msgpack_pack_real_uint16(x, d); +#elif ULLONG_MAX == 0xffffffffUL + msgpack_pack_real_uint32(x, d); +#else + msgpack_pack_real_uint64(x, d); +#endif + +#else +if(sizeof(unsigned long long) == 2) { + msgpack_pack_real_uint16(x, d); +} else if(sizeof(unsigned long long) == 4) { + msgpack_pack_real_uint32(x, d); +} else { msgpack_pack_real_uint64(x, d); } +#endif +} + +//#undef msgpack_pack_inline_func_cint +//#endif + /* @@ -588,9 +810,11 @@ static inline int msgpack_pack_timestamp(msgpack_packer* x, int64_t seconds, uin #undef TAKE8_32 #undef TAKE8_64 +#undef msgpack_pack_real_uint8 #undef msgpack_pack_real_uint16 #undef msgpack_pack_real_uint32 #undef msgpack_pack_real_uint64 +#undef msgpack_pack_real_int8 #undef msgpack_pack_real_int16 #undef msgpack_pack_real_int32 #undef msgpack_pack_real_int64 diff --git a/msgpack/unpack.h b/msgpack/unpack.h index 58a2f4f..23aa622 100644 --- a/msgpack/unpack.h +++ b/msgpack/unpack.h @@ -47,7 +47,7 @@ static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromLong((long)d); + PyObject *p = PyInt_FromLong((long)d); if (!p) return -1; *o = p; @@ -61,7 +61,7 @@ static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpac static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromSize_t((size_t)d); + PyObject *p = PyInt_FromSize_t((size_t)d); if (!p) return -1; *o = p; @@ -74,7 +74,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp if (d > LONG_MAX) { p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); } else { - p = PyLong_FromLong((long)d); + p = PyInt_FromLong((long)d); } if (!p) return -1; @@ -84,7 +84,7 @@ static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unp static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) { - PyObject *p = PyLong_FromLong(d); + PyObject *p = PyInt_FromLong(d); if (!p) return -1; *o = p; @@ -107,7 +107,7 @@ static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpac if (d > LONG_MAX || d < LONG_MIN) { p = PyLong_FromLongLong((PY_LONG_LONG)d); } else { - p = PyLong_FromLong((long)d); + p = PyInt_FromLong((long)d); } *o = p; return 0; diff --git a/msgpack/unpack_container_header.h b/msgpack/unpack_container_header.h deleted file mode 100644 index c14a3c2..0000000 --- a/msgpack/unpack_container_header.h +++ /dev/null @@ -1,51 +0,0 @@ -static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) -{ - assert(len >= *off); - uint32_t size; - const unsigned char *const p = (unsigned char*)data + *off; - -#define inc_offset(inc) \ - if (len - *off < inc) \ - return 0; \ - *off += inc; - - switch (*p) { - case var_offset: - inc_offset(3); - size = _msgpack_load16(uint16_t, p + 1); - break; - case var_offset + 1: - inc_offset(5); - size = _msgpack_load32(uint32_t, p + 1); - break; -#ifdef USE_CASE_RANGE - case fixed_offset + 0x0 ... fixed_offset + 0xf: -#else - case fixed_offset + 0x0: - case fixed_offset + 0x1: - case fixed_offset + 0x2: - case fixed_offset + 0x3: - case fixed_offset + 0x4: - case fixed_offset + 0x5: - case fixed_offset + 0x6: - case fixed_offset + 0x7: - case fixed_offset + 0x8: - case fixed_offset + 0x9: - case fixed_offset + 0xa: - case fixed_offset + 0xb: - case fixed_offset + 0xc: - case fixed_offset + 0xd: - case fixed_offset + 0xe: - case fixed_offset + 0xf: -#endif - ++*off; - size = ((unsigned int)*p) & 0x0f; - break; - default: - PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); - return -1; - } - unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); - return 1; -} - diff --git a/msgpack/unpack_template.h b/msgpack/unpack_template.h index cce29e7..8b9fcc1 100644 --- a/msgpack/unpack_template.h +++ b/msgpack/unpack_template.h @@ -75,7 +75,8 @@ static inline void unpack_clear(unpack_context *ctx) Py_CLEAR(ctx->stack[0].obj); } -static inline int unpack_execute(bool construct, unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +template +static inline int unpack_execute(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) { assert(len >= *off); @@ -385,7 +386,6 @@ _end: #undef construct_cb } -#undef NEXT_CS #undef SWITCH_RANGE_BEGIN #undef SWITCH_RANGE #undef SWITCH_RANGE_DEFAULT @@ -397,27 +397,68 @@ _end: #undef again_fixed_trail_if_zero #undef start_container -static int unpack_construct(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { - return unpack_execute(1, ctx, data, len, off); -} -static int unpack_skip(unpack_context *ctx, const char *data, Py_ssize_t len, Py_ssize_t *off) { - return unpack_execute(0, ctx, data, len, off); +template +static inline int unpack_container_header(unpack_context* ctx, const char* data, Py_ssize_t len, Py_ssize_t* off) +{ + assert(len >= *off); + uint32_t size; + const unsigned char *const p = (unsigned char*)data + *off; + +#define inc_offset(inc) \ + if (len - *off < inc) \ + return 0; \ + *off += inc; + + switch (*p) { + case var_offset: + inc_offset(3); + size = _msgpack_load16(uint16_t, p + 1); + break; + case var_offset + 1: + inc_offset(5); + size = _msgpack_load32(uint32_t, p + 1); + break; +#ifdef USE_CASE_RANGE + case fixed_offset + 0x0 ... fixed_offset + 0xf: +#else + case fixed_offset + 0x0: + case fixed_offset + 0x1: + case fixed_offset + 0x2: + case fixed_offset + 0x3: + case fixed_offset + 0x4: + case fixed_offset + 0x5: + case fixed_offset + 0x6: + case fixed_offset + 0x7: + case fixed_offset + 0x8: + case fixed_offset + 0x9: + case fixed_offset + 0xa: + case fixed_offset + 0xb: + case fixed_offset + 0xc: + case fixed_offset + 0xd: + case fixed_offset + 0xe: + case fixed_offset + 0xf: +#endif + ++*off; + size = ((unsigned int)*p) & 0x0f; + break; + default: + PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); + return -1; + } + unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); + return 1; } -#define unpack_container_header read_array_header -#define fixed_offset 0x90 -#define var_offset 0xdc -#include "unpack_container_header.h" -#undef unpack_container_header -#undef fixed_offset -#undef var_offset +#undef SWITCH_RANGE_BEGIN +#undef SWITCH_RANGE +#undef SWITCH_RANGE_DEFAULT +#undef SWITCH_RANGE_END -#define unpack_container_header read_map_header -#define fixed_offset 0x80 -#define var_offset 0xde -#include "unpack_container_header.h" -#undef unpack_container_header -#undef fixed_offset -#undef var_offset +static const execute_fn unpack_construct = &unpack_execute; +static const execute_fn unpack_skip = &unpack_execute; +static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>; +static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>; + +#undef NEXT_CS /* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/pyproject.toml b/pyproject.toml index c69d5a7..a63009a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,23 +1,35 @@ [build-system] -requires = ["setuptools >= 78.1.1"] +requires = [ + # Also declared in requirements.txt, if updating here please also update + # there + "Cython~=3.0.0", + "setuptools >= 35.0.2", +] build-backend = "setuptools.build_meta" [project] name = "msgpack" dynamic = ["version"] -license = "Apache-2.0" +license = {text="Apache 2.0"} authors = [{name="Inada Naoki", email="songofacandy@gmail.com"}] description = "MessagePack serializer" readme = "README.md" keywords = ["msgpack", "messagepack", "serializer", "serialization", "binary"] -requires-python = ">=3.10" +requires-python = ">=3.8" classifiers = [ "Development Status :: 5 - Production/Stable", "Operating System :: OS Independent", - "Topic :: File Formats", - "Intended Audience :: Developers", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", ] [project.urls] @@ -27,19 +39,20 @@ Repository = "https://github.com/msgpack/msgpack-python/" Tracker = "https://github.com/msgpack/msgpack-python/issues" Changelog = "https://github.com/msgpack/msgpack-python/blob/main/ChangeLog.rst" -[tool.setuptools] -# Do not install C/C++/Cython source files -include-package-data = false - [tool.setuptools.dynamic] version = {attr = "msgpack.__version__"} +[tool.black] +line-length = 100 +target-version = ["py37"] +skip_string_normalization = true + [tool.ruff] line-length = 100 -target-version = "py310" -lint.select = [ - "E", # pycodestyle - "F", # Pyflakes - "I", # isort - #"UP", pyupgrade -] +target-version = "py38" +ignore = [] + +[tool.ruff.per-file-ignores] +"msgpack/__init__.py" = ["F401", "F403"] +"msgpack/fallback.py" = ["E731"] +"test/test_seq.py" = ["E501"] diff --git a/requirements.txt b/requirements.txt index 2f1c55b..e27df0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,7 @@ -Cython==3.1.4 -setuptools==78.1.1 -build +# Also declared in pyproject.toml, if updating here please also update there. +Cython~=3.0.0 + +# Tools required only for development. No need to add it to pyproject.toml file. +black==23.3.0 +pytest==7.3.1 +pyupgrade==3.3.2 diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 index 4029e9e..7a34c8c --- a/setup.py +++ b/setup.py @@ -1,24 +1,81 @@ #!/usr/bin/env python import os import sys +from setuptools import setup, Extension +from setuptools.command.build_ext import build_ext +from setuptools.command.sdist import sdist -from setuptools import Extension, setup PYPY = hasattr(sys, "pypy_version_info") + +class NoCython(Exception): + pass + + +try: + import Cython.Compiler.Main as cython_compiler + + have_cython = True +except ImportError: + have_cython = False + + +def cythonize(src): + sys.stderr.write(f"cythonize: {src!r}\n") + cython_compiler.compile([src], cplus=True) + + +def ensure_source(src): + pyx = os.path.splitext(src)[0] + ".pyx" + + if not os.path.exists(src): + if not have_cython: + raise NoCython + cythonize(pyx) + elif os.path.exists(pyx) and os.stat(src).st_mtime < os.stat(pyx).st_mtime and have_cython: + cythonize(pyx) + return src + + +class BuildExt(build_ext): + def build_extension(self, ext): + try: + ext.sources = list(map(ensure_source, ext.sources)) + except NoCython: + print("WARNING") + print("Cython is required for building extension from checkout.") + print("Install Cython >= 0.16 or install msgpack from PyPI.") + print("Falling back to pure Python implementation.") + return + try: + return build_ext.build_extension(self, ext) + except Exception as e: + print("WARNING: Failed to compile extension modules.") + print("msgpack uses fallback pure python implementation.") + print(e) + + +# Cython is required for sdist +class Sdist(sdist): + def __init__(self, *args, **kwargs): + cythonize("msgpack/_cmsgpack.pyx") + sdist.__init__(self, *args, **kwargs) + + libraries = [] macros = [] -ext_modules = [] if sys.platform == "win32": libraries.append("ws2_32") macros = [("__LITTLE_ENDIAN__", "1")] +ext_modules = [] if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): ext_modules.append( Extension( "msgpack._cmsgpack", - sources=["msgpack/_cmsgpack.c"], + sources=["msgpack/_cmsgpack.cpp"], libraries=libraries, include_dirs=["."], define_macros=macros, @@ -26,7 +83,9 @@ if not PYPY and not os.environ.get("MSGPACK_PUREPYTHON"): ) del libraries, macros + setup( + cmdclass={"build_ext": BuildExt, "sdist": Sdist}, ext_modules=ext_modules, packages=["msgpack"], ) diff --git a/test/test_buffer.py b/test/test_buffer.py index ca09722..a3db339 100644 --- a/test/test_buffer.py +++ b/test/test_buffer.py @@ -1,6 +1,6 @@ -from pytest import raises +#!/usr/bin/env python -from msgpack import Packer, packb, unpackb +from msgpack import packb, unpackb def test_unpack_buffer(): @@ -17,7 +17,7 @@ def test_unpack_bytearray(): obj = unpackb(buf, use_list=1) assert [b"foo", b"bar"] == obj expected_type = bytes - assert all(type(s) is expected_type for s in obj) + assert all(type(s) == expected_type for s in obj) def test_unpack_memoryview(): @@ -26,24 +26,4 @@ def test_unpack_memoryview(): obj = unpackb(view, use_list=1) assert [b"foo", b"bar"] == obj expected_type = bytes - assert all(type(s) is expected_type for s in obj) - - -def test_packer_getbuffer(): - packer = Packer(autoreset=False) - packer.pack_array_header(2) - packer.pack(42) - packer.pack("hello") - buffer = packer.getbuffer() - assert isinstance(buffer, memoryview) - assert bytes(buffer) == b"\x92*\xa5hello" - - if Packer.__module__ == "msgpack._cmsgpack": # only for Cython - # cython Packer supports buffer protocol directly - assert bytes(packer) == b"\x92*\xa5hello" - - with raises(BufferError): - packer.pack(42) - buffer.release() - packer.pack(42) - assert bytes(packer) == b"\x92*\xa5hello*" + assert all(type(s) == expected_type for s in obj) diff --git a/test/test_except.py b/test/test_except.py index b77ac80..8c0a976 100644 --- a/test/test_except.py +++ b/test/test_except.py @@ -1,10 +1,9 @@ #!/usr/bin/env python -import datetime - from pytest import raises +from msgpack import packb, unpackb, Unpacker, FormatError, StackError, OutOfData -from msgpack import FormatError, OutOfData, StackError, Unpacker, packb, unpackb +import datetime class DummyException(Exception): diff --git a/test/test_extension.py b/test/test_extension.py index aaf0fd9..9e5e6aa 100644 --- a/test/test_extension.py +++ b/test/test_extension.py @@ -1,5 +1,4 @@ import array - import msgpack from msgpack import ExtType diff --git a/test/test_limits.py b/test/test_limits.py index 9b92b4d..533bc11 100644 --- a/test/test_limits.py +++ b/test/test_limits.py @@ -2,14 +2,14 @@ import pytest from msgpack import ( - ExtType, - Packer, - PackOverflowError, - PackValueError, - Unpacker, - UnpackValueError, packb, unpackb, + Packer, + Unpacker, + ExtType, + PackOverflowError, + PackValueError, + UnpackValueError, ) diff --git a/test/test_memoryview.py b/test/test_memoryview.py index 0a2a6f5..dc319a6 100644 --- a/test/test_memoryview.py +++ b/test/test_memoryview.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from array import array - from msgpack import packb, unpackb @@ -96,4 +95,4 @@ def test_multidim_memoryview(): view = memoryview(b"\00" * 6) data = view.cast(view.format, (3, 2)) packed = packb(data) - assert packed == b"\xc4\x06\x00\x00\x00\x00\x00\x00" + assert packed == b'\xc4\x06\x00\x00\x00\x00\x00\x00' diff --git a/test/test_newspec.py b/test/test_newspec.py index 9e2f9be..a6f4251 100644 --- a/test/test_newspec.py +++ b/test/test_newspec.py @@ -1,4 +1,4 @@ -from msgpack import ExtType, packb, unpackb +from msgpack import packb, unpackb, ExtType def test_str8(): diff --git a/test/test_obj.py b/test/test_obj.py index 23be06d..f78bf42 100644 --- a/test/test_obj.py +++ b/test/test_obj.py @@ -1,7 +1,6 @@ #!/usr/bin/env python from pytest import raises - from msgpack import packb, unpackb diff --git a/test/test_pack.py b/test/test_pack.py index 374d154..4232537 100644 --- a/test/test_pack.py +++ b/test/test_pack.py @@ -1,12 +1,12 @@ #!/usr/bin/env python -import struct from collections import OrderedDict from io import BytesIO +import struct import pytest -from msgpack import Packer, Unpacker, packb, unpackb +from msgpack import packb, unpackb, Unpacker, Packer def check(data, use_list=False): @@ -89,7 +89,7 @@ def testStrictUnicodeUnpack(): def testIgnoreErrorsPack(): re = unpackb( - packb("abc\udc80\udcffdef", use_bin_type=True, unicode_errors="ignore"), + packb("abc\uDC80\uDCFFdef", use_bin_type=True, unicode_errors="ignore"), raw=False, use_list=1, ) diff --git a/test/test_read_size.py b/test/test_read_size.py index 0f6c1b5..33a7e7d 100644 --- a/test/test_read_size.py +++ b/test/test_read_size.py @@ -1,6 +1,5 @@ """Test Unpacker's read_array_header and read_map_header methods""" - -from msgpack import OutOfData, Unpacker, packb +from msgpack import packb, Unpacker, OutOfData UnexpectedTypeException = ValueError diff --git a/test/test_seq.py b/test/test_seq.py index 8dee462..16d9dde 100644 --- a/test/test_seq.py +++ b/test/test_seq.py @@ -1,9 +1,9 @@ -# ruff: noqa: E501 -# ignore line length limit for long comments -import io +#!/usr/bin/env python +import io import msgpack + binarydata = bytes(bytearray(range(256))) diff --git a/test/test_sequnpack.py b/test/test_sequnpack.py index 0f895d7..6b138aa 100644 --- a/test/test_sequnpack.py +++ b/test/test_sequnpack.py @@ -1,10 +1,9 @@ #!/usr/bin/env python import io - -from pytest import raises - -from msgpack import BufferFull, Unpacker, pack, packb +from msgpack import Unpacker, BufferFull +from msgpack import pack, packb from msgpack.exceptions import OutOfData +from pytest import raises def test_partialdata(): diff --git a/test/test_stricttype.py b/test/test_stricttype.py index 72776a2..9ffaff2 100644 --- a/test/test_stricttype.py +++ b/test/test_stricttype.py @@ -1,6 +1,5 @@ from collections import namedtuple - -from msgpack import ExtType, packb, unpackb +from msgpack import packb, unpackb, ExtType def test_namedtuple(): diff --git a/test/test_subtype.py b/test/test_subtype.py index a911578..0d1c41a 100644 --- a/test/test_subtype.py +++ b/test/test_subtype.py @@ -1,8 +1,7 @@ #!/usr/bin/env python -from collections import namedtuple - from msgpack import packb +from collections import namedtuple class MyList(list): diff --git a/test/test_timestamp.py b/test/test_timestamp.py index 831141a..db5cc57 100644 --- a/test/test_timestamp.py +++ b/test/test_timestamp.py @@ -1,7 +1,5 @@ -import datetime - import pytest - +import datetime import msgpack from msgpack.ext import Timestamp @@ -88,21 +86,6 @@ def test_timestamp_datetime(): utc = datetime.timezone.utc assert t.to_datetime() == datetime.datetime(1970, 1, 1, 0, 0, 42, 0, tzinfo=utc) - ts = datetime.datetime(2024, 4, 16, 8, 43, 9, 420317, tzinfo=utc) - ts2 = datetime.datetime(2024, 4, 16, 8, 43, 9, 420318, tzinfo=utc) - - assert ( - Timestamp.from_datetime(ts2).nanoseconds - Timestamp.from_datetime(ts).nanoseconds == 1000 - ) - - ts3 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4256) - ts4 = datetime.datetime(2024, 4, 16, 8, 43, 9, 4257) - assert ( - Timestamp.from_datetime(ts4).nanoseconds - Timestamp.from_datetime(ts3).nanoseconds == 1000 - ) - - assert Timestamp.from_datetime(ts).to_datetime() == ts - def test_unpack_datetime(): t = Timestamp(42, 14) diff --git a/test/test_unpack.py b/test/test_unpack.py index b17c3c5..bf3f960 100644 --- a/test/test_unpack.py +++ b/test/test_unpack.py @@ -1,9 +1,12 @@ -import sys from io import BytesIO +import sys +from msgpack import Unpacker, packb, OutOfData, ExtType +from pytest import raises, mark -from pytest import mark, raises - -from msgpack import ExtType, OutOfData, Unpacker, packb +try: + from itertools import izip as zip +except ImportError: + pass def test_unpack_array_header_from_file(): diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..369eddc --- /dev/null +++ b/tox.ini @@ -0,0 +1,38 @@ +[tox] +envlist = + {py35,py36,py37,py38}-{c,pure}, + {pypy,pypy3}-pure, + py34-x86, + sphinx, +isolated_build = true + +[testenv] +deps= + pytest + +changedir=test +commands= + c,x86: python -c 'from msgpack import _cmsgpack' + c,x86: py.test + pure: py.test +setenv= + pure: MSGPACK_PUREPYTHON=x + +[testenv:py34-x86] +basepython=python3.4-x86 +deps= + pytest + +changedir=test +commands= + python -c 'import sys; print(hex(sys.maxsize))' + python -c 'from msgpack import _cmsgpack' + py.test + + +[testenv:sphinx] +changedir = docs +deps = + sphinx +commands = + sphinx-build -n -v -W --keep-going -b html -d {envtmpdir}/doctrees . {envtmpdir}/html