cpython/Modules/_zstd/clinic/zdict.c.h
Emma Smith 3b4333583f
gh-132983: Introduce _zstd bindings module (GH-133027)
* Add _zstd module for https://peps.python.org/pep-0784/

This commit introduces the `_zstd` module, with bindings to libzstd from
the pyzstd project. It also includes the unix build system configuration.
Windows build system support will be integrated independently as it
depends on integration with cpython-source-deps.

* Add _zstd to modules

* Fix path for compression.zstd module

* Ignore _zstd module like _io

* Expand module state macros to improve code quality

Also removes module state references from the classes in the _zstd
module and instead uses PyType_GetModuleState()

* Remove backticks suggested in review

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>

* Use critical sections to lock object state

This should avoid races and deadlocks.

* Remove compress/decompress and mark module as not reliant on the GIL

The `compress`/`decompress` functions will be moved to Python code for simplicity.
C implementations can always be re-added in the future.

Also, mark _zstd as not requiring the GIL.

* Lift critical section to avoid clang warning

* Respond to comments by picnixz

* Call out pyzstd explicitly in license description

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>

* Use a much more robust implementation...

... for `get_zstd_state_from_type`

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>

* Use PyList_GetItemRef for thread safety purposes

* Use a macro for the minimum supported version

* remove const from primivite types

* Use PyMem_New in another spot

* Simplify error handling in _get_frame_size

* Another simplification of error handling in get_frame_info

* Rename _module_state to mod_state

* Rewrite comment explaining the context of the code

* Add link to pyzstd

* Add TODO about refactoring dict training code

* Use PyModule_AddObjectRef over PyModule_AddObject

PyModule_AddObject is soft-deprecated, so we should use PyModule_AddObjectRef

* Check result of OutputBufferGrow

* Simplify return logic in `add_constant_to_type`

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>

* Ignore return value of _zstd_clear()

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>

* Remove redundant comments

* Remove __reduce__ from ZstdDict

We should instead document that to pickle a dictionary a user should use
the `.dict_content` attribute.

* Use PyUnicode_FromFormat instead of a buffer

* Don't use C constants/types in error messages

* Make error messages easier to understand for Python users

* Lower minimum required version 1.4.0

* Use casts and make slot function signatures correct

* Be consistent with CPython on const usage

* Make else clauses in line with PEP 7

* Fix over-indented blocks in argument clinic

* Add critical section around ZSTD_DCtx_setParameter

* Add a TODO about refactoring critical sections

* Use Py_UNREACHABLE

* Move bytes operations out of Py_BEGIN_ALLOW_THREADS

* Add TODO about ensuring a lock is held

* Remove asserts that may not be correct

* Add TODO to make ZstdDict and others GC objects

* Make objects GC tracked

* Remove unused include

* Fix some memory issues

* Fix refleaks on module and in ZstdDict

* Update configure to check for ZDICT_finalizeDictionary

* Properly check version in configure

* exit(1) if check fails

* Use AC_RUN_IFELSE

* Use a define() to re-use version check

* Actually properly set _zstd module status based on version

---------

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
2025-05-04 01:29:55 +00:00

207 lines
7.7 KiB
C
Generated

/*[clinic input]
preserve
[clinic start generated code]*/
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
# include "pycore_gc.h" // PyGC_Head
# include "pycore_runtime.h" // _Py_ID()
#endif
#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION()
#include "pycore_modsupport.h" // _PyArg_UnpackKeywords()
PyDoc_STRVAR(_zstd_ZstdDict___init____doc__,
"ZstdDict(dict_content, is_raw=False)\n"
"--\n"
"\n"
"Represents a zstd dictionary, which can be used for compression/decompression.\n"
"\n"
" dict_content\n"
" A bytes-like object, dictionary\'s content.\n"
" is_raw\n"
" This parameter is for advanced user. True means dict_content\n"
" argument is a \"raw content\" dictionary, free of any format\n"
" restriction. False means dict_content argument is an ordinary\n"
" zstd dictionary, was created by zstd functions, follow a\n"
" specified format.\n"
"\n"
"It\'s thread-safe, and can be shared by multiple ZstdCompressor /\n"
"ZstdDecompressor objects.");
static int
_zstd_ZstdDict___init___impl(ZstdDict *self, PyObject *dict_content,
int is_raw);
static int
_zstd_ZstdDict___init__(PyObject *self, PyObject *args, PyObject *kwargs)
{
int return_value = -1;
#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE)
#define NUM_KEYWORDS 2
static struct {
PyGC_Head _this_is_not_used;
PyObject_VAR_HEAD
Py_hash_t ob_hash;
PyObject *ob_item[NUM_KEYWORDS];
} _kwtuple = {
.ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS)
.ob_hash = -1,
.ob_item = { &_Py_ID(dict_content), &_Py_ID(is_raw), },
};
#undef NUM_KEYWORDS
#define KWTUPLE (&_kwtuple.ob_base.ob_base)
#else // !Py_BUILD_CORE
# define KWTUPLE NULL
#endif // !Py_BUILD_CORE
static const char * const _keywords[] = {"dict_content", "is_raw", NULL};
static _PyArg_Parser _parser = {
.keywords = _keywords,
.fname = "ZstdDict",
.kwtuple = KWTUPLE,
};
#undef KWTUPLE
PyObject *argsbuf[2];
PyObject * const *fastargs;
Py_ssize_t nargs = PyTuple_GET_SIZE(args);
Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1;
PyObject *dict_content;
int is_raw = 0;
fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser,
/*minpos*/ 1, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf);
if (!fastargs) {
goto exit;
}
dict_content = fastargs[0];
if (!noptargs) {
goto skip_optional_pos;
}
is_raw = PyObject_IsTrue(fastargs[1]);
if (is_raw < 0) {
goto exit;
}
skip_optional_pos:
return_value = _zstd_ZstdDict___init___impl((ZstdDict *)self, dict_content, is_raw);
exit:
return return_value;
}
PyDoc_STRVAR(_zstd_ZstdDict_as_digested_dict__doc__,
"Load as a digested dictionary to compressor.\n"
"\n"
"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_digested_dict)\n"
"1. Some advanced compression parameters of compressor may be overridden\n"
" by parameters of digested dictionary.\n"
"2. ZstdDict has a digested dictionaries cache for each compression level.\n"
" It\'s faster when loading again a digested dictionary with the same\n"
" compression level.\n"
"3. No need to use this for decompression.");
#if defined(_zstd_ZstdDict_as_digested_dict_DOCSTR)
# undef _zstd_ZstdDict_as_digested_dict_DOCSTR
#endif
#define _zstd_ZstdDict_as_digested_dict_DOCSTR _zstd_ZstdDict_as_digested_dict__doc__
#if !defined(_zstd_ZstdDict_as_digested_dict_DOCSTR)
# define _zstd_ZstdDict_as_digested_dict_DOCSTR NULL
#endif
#if defined(_ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF)
# undef _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF
# define _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF {"as_digested_dict", (getter)_zstd_ZstdDict_as_digested_dict_get, (setter)_zstd_ZstdDict_as_digested_dict_set, _zstd_ZstdDict_as_digested_dict_DOCSTR},
#else
# define _ZSTD_ZSTDDICT_AS_DIGESTED_DICT_GETSETDEF {"as_digested_dict", (getter)_zstd_ZstdDict_as_digested_dict_get, NULL, _zstd_ZstdDict_as_digested_dict_DOCSTR},
#endif
static PyObject *
_zstd_ZstdDict_as_digested_dict_get_impl(ZstdDict *self);
static PyObject *
_zstd_ZstdDict_as_digested_dict_get(PyObject *self, void *Py_UNUSED(context))
{
PyObject *return_value = NULL;
Py_BEGIN_CRITICAL_SECTION(self);
return_value = _zstd_ZstdDict_as_digested_dict_get_impl((ZstdDict *)self);
Py_END_CRITICAL_SECTION();
return return_value;
}
PyDoc_STRVAR(_zstd_ZstdDict_as_undigested_dict__doc__,
"Load as an undigested dictionary to compressor.\n"
"\n"
"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_undigested_dict)\n"
"1. The advanced compression parameters of compressor will not be overridden.\n"
"2. Loading an undigested dictionary is costly. If load an undigested dictionary\n"
" multiple times, consider reusing a compressor object.\n"
"3. No need to use this for decompression.");
#if defined(_zstd_ZstdDict_as_undigested_dict_DOCSTR)
# undef _zstd_ZstdDict_as_undigested_dict_DOCSTR
#endif
#define _zstd_ZstdDict_as_undigested_dict_DOCSTR _zstd_ZstdDict_as_undigested_dict__doc__
#if !defined(_zstd_ZstdDict_as_undigested_dict_DOCSTR)
# define _zstd_ZstdDict_as_undigested_dict_DOCSTR NULL
#endif
#if defined(_ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF)
# undef _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF
# define _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF {"as_undigested_dict", (getter)_zstd_ZstdDict_as_undigested_dict_get, (setter)_zstd_ZstdDict_as_undigested_dict_set, _zstd_ZstdDict_as_undigested_dict_DOCSTR},
#else
# define _ZSTD_ZSTDDICT_AS_UNDIGESTED_DICT_GETSETDEF {"as_undigested_dict", (getter)_zstd_ZstdDict_as_undigested_dict_get, NULL, _zstd_ZstdDict_as_undigested_dict_DOCSTR},
#endif
static PyObject *
_zstd_ZstdDict_as_undigested_dict_get_impl(ZstdDict *self);
static PyObject *
_zstd_ZstdDict_as_undigested_dict_get(PyObject *self, void *Py_UNUSED(context))
{
PyObject *return_value = NULL;
Py_BEGIN_CRITICAL_SECTION(self);
return_value = _zstd_ZstdDict_as_undigested_dict_get_impl((ZstdDict *)self);
Py_END_CRITICAL_SECTION();
return return_value;
}
PyDoc_STRVAR(_zstd_ZstdDict_as_prefix__doc__,
"Load as a prefix to compressor/decompressor.\n"
"\n"
"Pass this attribute as zstd_dict argument: compress(dat, zstd_dict=zd.as_prefix)\n"
"1. Prefix is compatible with long distance matching, while dictionary is not.\n"
"2. It only works for the first frame, then the compressor/decompressor will\n"
" return to no prefix state.\n"
"3. When decompressing, must use the same prefix as when compressing.\"");
#if defined(_zstd_ZstdDict_as_prefix_DOCSTR)
# undef _zstd_ZstdDict_as_prefix_DOCSTR
#endif
#define _zstd_ZstdDict_as_prefix_DOCSTR _zstd_ZstdDict_as_prefix__doc__
#if !defined(_zstd_ZstdDict_as_prefix_DOCSTR)
# define _zstd_ZstdDict_as_prefix_DOCSTR NULL
#endif
#if defined(_ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF)
# undef _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF
# define _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF {"as_prefix", (getter)_zstd_ZstdDict_as_prefix_get, (setter)_zstd_ZstdDict_as_prefix_set, _zstd_ZstdDict_as_prefix_DOCSTR},
#else
# define _ZSTD_ZSTDDICT_AS_PREFIX_GETSETDEF {"as_prefix", (getter)_zstd_ZstdDict_as_prefix_get, NULL, _zstd_ZstdDict_as_prefix_DOCSTR},
#endif
static PyObject *
_zstd_ZstdDict_as_prefix_get_impl(ZstdDict *self);
static PyObject *
_zstd_ZstdDict_as_prefix_get(PyObject *self, void *Py_UNUSED(context))
{
PyObject *return_value = NULL;
Py_BEGIN_CRITICAL_SECTION(self);
return_value = _zstd_ZstdDict_as_prefix_get_impl((ZstdDict *)self);
Py_END_CRITICAL_SECTION();
return return_value;
}
/*[clinic end generated code: output=59257c053f74eda7 input=a9049054013a1b77]*/