mirror of
https://github.com/python/cpython.git
synced 2026-01-06 15:32:22 +00:00
GH-98363: Add itertools.batched() (GH-98364)
This commit is contained in:
parent
70732d8a4c
commit
de3ece769a
5 changed files with 370 additions and 39 deletions
|
|
@ -16,6 +16,7 @@ class itertools.groupby "groupbyobject *" "&groupby_type"
|
|||
class itertools._grouper "_grouperobject *" "&_grouper_type"
|
||||
class itertools.teedataobject "teedataobject *" "&teedataobject_type"
|
||||
class itertools._tee "teeobject *" "&tee_type"
|
||||
class itertools.batched "batchedobject *" "&batched_type"
|
||||
class itertools.cycle "cycleobject *" "&cycle_type"
|
||||
class itertools.dropwhile "dropwhileobject *" "&dropwhile_type"
|
||||
class itertools.takewhile "takewhileobject *" "&takewhile_type"
|
||||
|
|
@ -30,12 +31,13 @@ class itertools.filterfalse "filterfalseobject *" "&filterfalse_type"
|
|||
class itertools.count "countobject *" "&count_type"
|
||||
class itertools.pairwise "pairwiseobject *" "&pairwise_type"
|
||||
[clinic start generated code]*/
|
||||
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6498ed21fbe1bf94]*/
|
||||
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1168b274011ce21b]*/
|
||||
|
||||
static PyTypeObject groupby_type;
|
||||
static PyTypeObject _grouper_type;
|
||||
static PyTypeObject teedataobject_type;
|
||||
static PyTypeObject tee_type;
|
||||
static PyTypeObject batched_type;
|
||||
static PyTypeObject cycle_type;
|
||||
static PyTypeObject dropwhile_type;
|
||||
static PyTypeObject takewhile_type;
|
||||
|
|
@ -51,6 +53,171 @@ static PyTypeObject pairwise_type;
|
|||
|
||||
#include "clinic/itertoolsmodule.c.h"
|
||||
|
||||
/* batched object ************************************************************/
|
||||
|
||||
/* Note: The built-in zip() function includes a "strict" argument
|
||||
that is needed because that function can silently truncate data
|
||||
and there is no easy way for a user to detect that condition.
|
||||
The same reasoning does not apply to batches() which never drops
|
||||
data. Instead, it produces a shorter list which can be handled
|
||||
as the user sees fit.
|
||||
*/
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
PyObject *it;
|
||||
Py_ssize_t batch_size;
|
||||
} batchedobject;
|
||||
|
||||
/*[clinic input]
|
||||
@classmethod
|
||||
itertools.batched.__new__ as batched_new
|
||||
iterable: object
|
||||
n: Py_ssize_t
|
||||
Batch data into lists of length n. The last batch may be shorter than n.
|
||||
|
||||
Loops over the input iterable and accumulates data into lists
|
||||
up to size n. The input is consumed lazily, just enough to
|
||||
fill a list. The result is yielded as soon as a batch is full
|
||||
or when the input iterable is exhausted.
|
||||
|
||||
>>> for batch in batched('ABCDEFG', 3):
|
||||
... print(batch)
|
||||
...
|
||||
['A', 'B', 'C']
|
||||
['D', 'E', 'F']
|
||||
['G']
|
||||
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n)
|
||||
/*[clinic end generated code: output=7ebc954d655371b6 input=f28fd12cb52365f0]*/
|
||||
{
|
||||
PyObject *it;
|
||||
batchedobject *bo;
|
||||
|
||||
if (n < 1) {
|
||||
/* We could define the n==0 case to return an empty iterator
|
||||
but that is add odds with the idea that batching should
|
||||
never throw-away input data.
|
||||
*/
|
||||
PyErr_SetString(PyExc_ValueError, "n must be at least one");
|
||||
return NULL;
|
||||
}
|
||||
it = PyObject_GetIter(iterable);
|
||||
if (it == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* create batchedobject structure */
|
||||
bo = (batchedobject *)type->tp_alloc(type, 0);
|
||||
if (bo == NULL) {
|
||||
Py_DECREF(it);
|
||||
return NULL;
|
||||
}
|
||||
bo->batch_size = n;
|
||||
bo->it = it;
|
||||
return (PyObject *)bo;
|
||||
}
|
||||
|
||||
static void
|
||||
batched_dealloc(batchedobject *bo)
|
||||
{
|
||||
PyObject_GC_UnTrack(bo);
|
||||
Py_XDECREF(bo->it);
|
||||
Py_TYPE(bo)->tp_free(bo);
|
||||
}
|
||||
|
||||
static int
|
||||
batched_traverse(batchedobject *bo, visitproc visit, void *arg)
|
||||
{
|
||||
if (bo->it != NULL) {
|
||||
Py_VISIT(bo->it);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
batched_next(batchedobject *bo)
|
||||
{
|
||||
Py_ssize_t i;
|
||||
PyObject *it = bo->it;
|
||||
PyObject *item;
|
||||
PyObject *result;
|
||||
|
||||
if (it == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
result = PyList_New(0);
|
||||
if (result == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
for (i=0 ; i < bo->batch_size ; i++) {
|
||||
item = PyIter_Next(it);
|
||||
if (item == NULL) {
|
||||
break;
|
||||
}
|
||||
if (PyList_Append(result, item) < 0) {
|
||||
Py_DECREF(item);
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
Py_DECREF(item);
|
||||
}
|
||||
if (PyList_GET_SIZE(result) > 0) {
|
||||
return result;
|
||||
}
|
||||
Py_CLEAR(bo->it);
|
||||
Py_DECREF(result);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static PyTypeObject batched_type = {
|
||||
PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
||||
"itertools.batched", /* tp_name */
|
||||
sizeof(batchedobject), /* tp_basicsize */
|
||||
0, /* tp_itemsize */
|
||||
/* methods */
|
||||
(destructor)batched_dealloc, /* tp_dealloc */
|
||||
0, /* tp_vectorcall_offset */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_as_async */
|
||||
0, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
PyObject_GenericGetAttr, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
|
||||
Py_TPFLAGS_BASETYPE, /* tp_flags */
|
||||
batched_new__doc__, /* tp_doc */
|
||||
(traverseproc)batched_traverse, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
PyObject_SelfIter, /* tp_iter */
|
||||
(iternextfunc)batched_next, /* tp_iternext */
|
||||
0, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
PyType_GenericAlloc, /* tp_alloc */
|
||||
batched_new, /* tp_new */
|
||||
PyObject_GC_Del, /* tp_free */
|
||||
};
|
||||
|
||||
|
||||
/* pairwise object ***********************************************************/
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -4815,6 +4982,7 @@ repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\
|
|||
\n\
|
||||
Iterators terminating on the shortest input sequence:\n\
|
||||
accumulate(p[, func]) --> p0, p0+p1, p0+p1+p2\n\
|
||||
batched(p, n) --> [p0, p1, ..., p_n-1], [p_n, p_n+1, ..., p_2n-1], ...\n\
|
||||
chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ...\n\
|
||||
chain.from_iterable([p, q, ...]) --> p0, p1, ... plast, q0, q1, ...\n\
|
||||
compress(data, selectors) --> (d[0] if s[0]), (d[1] if s[1]), ...\n\
|
||||
|
|
@ -4841,6 +5009,7 @@ itertoolsmodule_exec(PyObject *m)
|
|||
{
|
||||
PyTypeObject *typelist[] = {
|
||||
&accumulate_type,
|
||||
&batched_type,
|
||||
&combinations_type,
|
||||
&cwr_type,
|
||||
&cycle_type,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue