diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 0b5978505a9672..624d2430ac20d7 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -52,7 +52,7 @@ Iterator Arguments Results Iterator Arguments Results Example ============================ ============================ ================================================= ============================================================= :func:`accumulate` p [,func] p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15`` -:func:`batched` p, n [p0, p1, ..., p_n-1], ... ``batched('ABCDEFG', n=3) --> ABC DEF G`` +:func:`batched` p, n (p0, p1, ..., p_n-1), ... ``batched('ABCDEFG', n=3) --> ABC DEF G`` :func:`chain` p, q, ... p0, p1, ... plast, q0, q1, ... ``chain('ABC', 'DEF') --> A B C D E F`` :func:`chain.from_iterable` iterable p0, p1, ... plast, q0, q1, ... ``chain.from_iterable(['ABC', 'DEF']) --> A B C D E F`` :func:`compress` data, selectors (d[0] if s[0]), (d[1] if s[1]), ... ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F`` @@ -166,11 +166,11 @@ loops that truncate the stream. .. function:: batched(iterable, n) - Batch data from the *iterable* into lists of length *n*. The last + Batch data from the *iterable* into tuples of length *n*. The last batch may be shorter than *n*. - Loops over the input iterable and accumulates data into lists up to - size *n*. The input is consumed lazily, just enough to fill a list. + Loops over the input iterable and accumulates data into tuples up to + size *n*. The input is consumed lazily, just enough to fill a batch. The result is yielded as soon as the batch is full or when the input iterable is exhausted: @@ -179,14 +179,14 @@ loops that truncate the stream. >>> flattened_data = ['roses', 'red', 'violets', 'blue', 'sugar', 'sweet'] >>> unflattened = list(batched(flattened_data, 2)) >>> unflattened - [['roses', 'red'], ['violets', 'blue'], ['sugar', 'sweet']] + [('roses', 'red'), ('violets', 'blue'), ('sugar', 'sweet')] >>> for batch in batched('ABCDEFG', 3): ... print(batch) ... - ['A', 'B', 'C'] - ['D', 'E', 'F'] - ['G'] + ('A', 'B', 'C') + ('D', 'E', 'F') + ('G',) Roughly equivalent to:: @@ -195,7 +195,7 @@ loops that truncate the stream. if n < 1: raise ValueError('n must be at least one') it = iter(iterable) - while (batch := list(islice(it, n))): + while (batch := tuple(islice(it, n))): yield batch .. versionadded:: 3.12 diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index a0a740fba8e8e3..962f3bf750e0e0 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -161,11 +161,11 @@ def test_accumulate(self): def test_batched(self): self.assertEqual(list(batched('ABCDEFG', 3)), - [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]) + [('A', 'B', 'C'), ('D', 'E', 'F'), ('G',)]) self.assertEqual(list(batched('ABCDEFG', 2)), - [['A', 'B'], ['C', 'D'], ['E', 'F'], ['G']]) + [('A', 'B'), ('C', 'D'), ('E', 'F'), ('G',)]) self.assertEqual(list(batched('ABCDEFG', 1)), - [['A'], ['B'], ['C'], ['D'], ['E'], ['F'], ['G']]) + [('A',), ('B',), ('C',), ('D',), ('E',), ('F',), ('G',)]) with self.assertRaises(TypeError): # Too few arguments list(batched('ABCDEFG')) @@ -188,8 +188,8 @@ def test_batched(self): with self.subTest(s=s, n=n, batches=batches): # Order is preserved and no data is lost self.assertEqual(''.join(chain(*batches)), s) - # Each batch is an exact list - self.assertTrue(all(type(batch) is list for batch in batches)) + # Each batch is an exact tuple + self.assertTrue(all(type(batch) is tuple for batch in batches)) # All but the last batch is of size n if batches: last_batch = batches.pop() @@ -1777,12 +1777,12 @@ class TestPurePythonRoughEquivalents(unittest.TestCase): def test_batched_recipe(self): def batched_recipe(iterable, n): - "Batch data into lists of length n. The last batch may be shorter." + "Batch data into tuples of length n. The last batch may be shorter." # batched('ABCDEFG', 3) --> ABC DEF G if n < 1: raise ValueError('n must be at least one') it = iter(iterable) - while (batch := list(islice(it, n))): + while (batch := tuple(islice(it, n))): yield batch for iterable, n in product( @@ -2055,7 +2055,7 @@ def test_accumulate(self): def test_batched(self): s = 'abcde' - r = [['a', 'b'], ['c', 'd'], ['e']] + r = [('a', 'b'), ('c', 'd'), ('e',)] n = 2 for g in (G, I, Ig, L, R): with self.subTest(g=g): diff --git a/Modules/clinic/itertoolsmodule.c.h b/Modules/clinic/itertoolsmodule.c.h index 17f9ebb249390f..287de524e91307 100644 --- a/Modules/clinic/itertoolsmodule.c.h +++ b/Modules/clinic/itertoolsmodule.c.h @@ -12,19 +12,19 @@ PyDoc_STRVAR(batched_new__doc__, "batched(iterable, n)\n" "--\n" "\n" -"Batch data into lists of length n. The last batch may be shorter than n.\n" +"Batch data into tuples of length n. The last batch may be shorter than n.\n" "\n" -"Loops over the input iterable and accumulates data into lists\n" +"Loops over the input iterable and accumulates data into tuples\n" "up to size n. The input is consumed lazily, just enough to\n" -"fill a list. The result is yielded as soon as a batch is full\n" +"fill a batch. The result is yielded as soon as a batch is full\n" "or when the input iterable is exhausted.\n" "\n" " >>> for batch in batched(\'ABCDEFG\', 3):\n" " ... print(batch)\n" " ...\n" -" [\'A\', \'B\', \'C\']\n" -" [\'D\', \'E\', \'F\']\n" -" [\'G\']"); +" (\'A\', \'B\', \'C\')\n" +" (\'D\', \'E\', \'F\')\n" +" (\'G\',)"); static PyObject * batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n); @@ -913,4 +913,4 @@ itertools_count(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=efea8cd1e647bd17 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0229ebd72962f130 input=a9049054013a1b77]*/ diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index e8b9bc76eec935..765fb57dddc409 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -56,11 +56,13 @@ static PyTypeObject pairwise_type; /* batched object ************************************************************/ /* Note: The built-in zip() function includes a "strict" argument - that is needed because that function can silently truncate data - and there is no easy way for a user to detect that condition. - The same reasoning does not apply to batched() which never drops - data. Instead, it produces a shorter list which can be handled - as the user sees fit. + that was needed because that function would silently truncate data, + and there was no easy way for a user to detect the data loss. + The same reasoning does not apply to batched() which never drops data. + Instead, batched() produces a shorter tuple which can be handled + as the user sees fit. If requested, it would be reasonable to add + "fillvalue" support which had demonstrated value in zip_longest(). + For now, the API is kept simple and clean. */ typedef struct { @@ -74,25 +76,25 @@ typedef struct { itertools.batched.__new__ as batched_new iterable: object n: Py_ssize_t -Batch data into lists of length n. The last batch may be shorter than n. +Batch data into tuples of length n. The last batch may be shorter than n. -Loops over the input iterable and accumulates data into lists +Loops over the input iterable and accumulates data into tuples up to size n. The input is consumed lazily, just enough to -fill a list. The result is yielded as soon as a batch is full +fill a batch. The result is yielded as soon as a batch is full or when the input iterable is exhausted. >>> for batch in batched('ABCDEFG', 3): ... print(batch) ... - ['A', 'B', 'C'] - ['D', 'E', 'F'] - ['G'] + ('A', 'B', 'C') + ('D', 'E', 'F') + ('G',) [clinic start generated code]*/ static PyObject * batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n) -/*[clinic end generated code: output=7ebc954d655371b6 input=f28fd12cb52365f0]*/ +/*[clinic end generated code: output=7ebc954d655371b6 input=ffd70726927c5129]*/ { PyObject *it; batchedobject *bo; @@ -150,12 +152,12 @@ batched_next(batchedobject *bo) if (it == NULL) { return NULL; } - result = PyList_New(n); + result = PyTuple_New(n); if (result == NULL) { return NULL; } iternextfunc iternext = *Py_TYPE(it)->tp_iternext; - PyObject **items = _PyList_ITEMS(result); + PyObject **items = _PyTuple_ITEMS(result); for (i=0 ; i < n ; i++) { item = iternext(it); if (item == NULL) {