GH-98363: Add itertools.batched() (GH-98364)

This commit is contained in:
Raymond Hettinger 2022-10-17 18:53:45 -05:00 committed by GitHub
parent 70732d8a4c
commit de3ece769a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 370 additions and 39 deletions

View file

@ -16,6 +16,7 @@ class itertools.groupby "groupbyobject *" "&groupby_type"
class itertools._grouper "_grouperobject *" "&_grouper_type"
class itertools.teedataobject "teedataobject *" "&teedataobject_type"
class itertools._tee "teeobject *" "&tee_type"
class itertools.batched "batchedobject *" "&batched_type"
class itertools.cycle "cycleobject *" "&cycle_type"
class itertools.dropwhile "dropwhileobject *" "&dropwhile_type"
class itertools.takewhile "takewhileobject *" "&takewhile_type"
@ -30,12 +31,13 @@ class itertools.filterfalse "filterfalseobject *" "&filterfalse_type"
class itertools.count "countobject *" "&count_type"
class itertools.pairwise "pairwiseobject *" "&pairwise_type"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6498ed21fbe1bf94]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=1168b274011ce21b]*/
static PyTypeObject groupby_type;
static PyTypeObject _grouper_type;
static PyTypeObject teedataobject_type;
static PyTypeObject tee_type;
static PyTypeObject batched_type;
static PyTypeObject cycle_type;
static PyTypeObject dropwhile_type;
static PyTypeObject takewhile_type;
@ -51,6 +53,171 @@ static PyTypeObject pairwise_type;
#include "clinic/itertoolsmodule.c.h"
/* batched object ************************************************************/
/* Note: The built-in zip() function includes a "strict" argument
that is needed because that function can silently truncate data
and there is no easy way for a user to detect that condition.
The same reasoning does not apply to batches() which never drops
data. Instead, it produces a shorter list which can be handled
as the user sees fit.
*/
typedef struct {
PyObject_HEAD
PyObject *it;
Py_ssize_t batch_size;
} batchedobject;
/*[clinic input]
@classmethod
itertools.batched.__new__ as batched_new
iterable: object
n: Py_ssize_t
Batch data into lists of length n. The last batch may be shorter than n.
Loops over the input iterable and accumulates data into lists
up to size n. The input is consumed lazily, just enough to
fill a list. The result is yielded as soon as a batch is full
or when the input iterable is exhausted.
>>> for batch in batched('ABCDEFG', 3):
... print(batch)
...
['A', 'B', 'C']
['D', 'E', 'F']
['G']
[clinic start generated code]*/
static PyObject *
batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n)
/*[clinic end generated code: output=7ebc954d655371b6 input=f28fd12cb52365f0]*/
{
PyObject *it;
batchedobject *bo;
if (n < 1) {
/* We could define the n==0 case to return an empty iterator
but that is add odds with the idea that batching should
never throw-away input data.
*/
PyErr_SetString(PyExc_ValueError, "n must be at least one");
return NULL;
}
it = PyObject_GetIter(iterable);
if (it == NULL) {
return NULL;
}
/* create batchedobject structure */
bo = (batchedobject *)type->tp_alloc(type, 0);
if (bo == NULL) {
Py_DECREF(it);
return NULL;
}
bo->batch_size = n;
bo->it = it;
return (PyObject *)bo;
}
static void
batched_dealloc(batchedobject *bo)
{
PyObject_GC_UnTrack(bo);
Py_XDECREF(bo->it);
Py_TYPE(bo)->tp_free(bo);
}
static int
batched_traverse(batchedobject *bo, visitproc visit, void *arg)
{
if (bo->it != NULL) {
Py_VISIT(bo->it);
}
return 0;
}
static PyObject *
batched_next(batchedobject *bo)
{
Py_ssize_t i;
PyObject *it = bo->it;
PyObject *item;
PyObject *result;
if (it == NULL) {
return NULL;
}
result = PyList_New(0);
if (result == NULL) {
return NULL;
}
for (i=0 ; i < bo->batch_size ; i++) {
item = PyIter_Next(it);
if (item == NULL) {
break;
}
if (PyList_Append(result, item) < 0) {
Py_DECREF(item);
Py_DECREF(result);
return NULL;
}
Py_DECREF(item);
}
if (PyList_GET_SIZE(result) > 0) {
return result;
}
Py_CLEAR(bo->it);
Py_DECREF(result);
return NULL;
}
static PyTypeObject batched_type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"itertools.batched", /* tp_name */
sizeof(batchedobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)batched_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE, /* tp_flags */
batched_new__doc__, /* tp_doc */
(traverseproc)batched_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc)batched_next, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
PyType_GenericAlloc, /* tp_alloc */
batched_new, /* tp_new */
PyObject_GC_Del, /* tp_free */
};
/* pairwise object ***********************************************************/
typedef struct {
@ -4815,6 +4982,7 @@ repeat(elem [,n]) --> elem, elem, elem, ... endlessly or up to n times\n\
\n\
Iterators terminating on the shortest input sequence:\n\
accumulate(p[, func]) --> p0, p0+p1, p0+p1+p2\n\
batched(p, n) --> [p0, p1, ..., p_n-1], [p_n, p_n+1, ..., p_2n-1], ...\n\
chain(p, q, ...) --> p0, p1, ... plast, q0, q1, ...\n\
chain.from_iterable([p, q, ...]) --> p0, p1, ... plast, q0, q1, ...\n\
compress(data, selectors) --> (d[0] if s[0]), (d[1] if s[1]), ...\n\
@ -4841,6 +5009,7 @@ itertoolsmodule_exec(PyObject *m)
{
PyTypeObject *typelist[] = {
&accumulate_type,
&batched_type,
&combinations_type,
&cwr_type,
&cycle_type,