From 62af1ed306b87f884bf6a4b8a974568cd52b68b1 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 8 Dec 2022 12:06:07 -0600 Subject: [PATCH 1/6] Have batched() return tuples instead of lists --- Doc/library/itertools.rst | 16 ++++++++-------- Lib/test/test_itertools.py | 16 ++++++++-------- Modules/itertoolsmodule.c | 8 ++++---- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 0b5978505a9672..998d72f1a90052 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -166,11 +166,11 @@ loops that truncate the stream. .. function:: batched(iterable, n) - Batch data from the *iterable* into lists of length *n*. The last + Batch data from the *iterable* into tuples of length *n*. The last batch may be shorter than *n*. - Loops over the input iterable and accumulates data into lists up to - size *n*. The input is consumed lazily, just enough to fill a list. + Loops over the input iterable and accumulates data into tuples up to + size *n*. The input is consumed lazily, just enough to fill a batch. The result is yielded as soon as the batch is full or when the input iterable is exhausted: @@ -179,14 +179,14 @@ loops that truncate the stream. >>> flattened_data = ['roses', 'red', 'violets', 'blue', 'sugar', 'sweet'] >>> unflattened = list(batched(flattened_data, 2)) >>> unflattened - [['roses', 'red'], ['violets', 'blue'], ['sugar', 'sweet']] + [('roses', 'red'), ('violets', 'blue'), ('sugar', 'sweet')] >>> for batch in batched('ABCDEFG', 3): ... print(batch) ... - ['A', 'B', 'C'] - ['D', 'E', 'F'] - ['G'] + ('A', 'B', 'C') + ('D', 'E', 'F') + ('G',) Roughly equivalent to:: @@ -195,7 +195,7 @@ loops that truncate the stream. if n < 1: raise ValueError('n must be at least one') it = iter(iterable) - while (batch := list(islice(it, n))): + while (batch := tuple(islice(it, n))): yield batch .. versionadded:: 3.12 diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py index a0a740fba8e8e3..962f3bf750e0e0 100644 --- a/Lib/test/test_itertools.py +++ b/Lib/test/test_itertools.py @@ -161,11 +161,11 @@ def test_accumulate(self): def test_batched(self): self.assertEqual(list(batched('ABCDEFG', 3)), - [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]) + [('A', 'B', 'C'), ('D', 'E', 'F'), ('G',)]) self.assertEqual(list(batched('ABCDEFG', 2)), - [['A', 'B'], ['C', 'D'], ['E', 'F'], ['G']]) + [('A', 'B'), ('C', 'D'), ('E', 'F'), ('G',)]) self.assertEqual(list(batched('ABCDEFG', 1)), - [['A'], ['B'], ['C'], ['D'], ['E'], ['F'], ['G']]) + [('A',), ('B',), ('C',), ('D',), ('E',), ('F',), ('G',)]) with self.assertRaises(TypeError): # Too few arguments list(batched('ABCDEFG')) @@ -188,8 +188,8 @@ def test_batched(self): with self.subTest(s=s, n=n, batches=batches): # Order is preserved and no data is lost self.assertEqual(''.join(chain(*batches)), s) - # Each batch is an exact list - self.assertTrue(all(type(batch) is list for batch in batches)) + # Each batch is an exact tuple + self.assertTrue(all(type(batch) is tuple for batch in batches)) # All but the last batch is of size n if batches: last_batch = batches.pop() @@ -1777,12 +1777,12 @@ class TestPurePythonRoughEquivalents(unittest.TestCase): def test_batched_recipe(self): def batched_recipe(iterable, n): - "Batch data into lists of length n. The last batch may be shorter." + "Batch data into tuples of length n. The last batch may be shorter." # batched('ABCDEFG', 3) --> ABC DEF G if n < 1: raise ValueError('n must be at least one') it = iter(iterable) - while (batch := list(islice(it, n))): + while (batch := tuple(islice(it, n))): yield batch for iterable, n in product( @@ -2055,7 +2055,7 @@ def test_accumulate(self): def test_batched(self): s = 'abcde' - r = [['a', 'b'], ['c', 'd'], ['e']] + r = [('a', 'b'), ('c', 'd'), ('e',)] n = 2 for g in (G, I, Ig, L, R): with self.subTest(g=g): diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index e8b9bc76eec935..e8475efcb465cf 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -74,9 +74,9 @@ typedef struct { itertools.batched.__new__ as batched_new iterable: object n: Py_ssize_t -Batch data into lists of length n. The last batch may be shorter than n. +Batch data into tuples of length n. The last batch may be shorter than n. -Loops over the input iterable and accumulates data into lists +Loops over the input iterable and accumulates data into tuples up to size n. The input is consumed lazily, just enough to fill a list. The result is yielded as soon as a batch is full or when the input iterable is exhausted. @@ -150,12 +150,12 @@ batched_next(batchedobject *bo) if (it == NULL) { return NULL; } - result = PyList_New(n); + result = PyTuple_New(n); if (result == NULL) { return NULL; } iternextfunc iternext = *Py_TYPE(it)->tp_iternext; - PyObject **items = _PyList_ITEMS(result); + PyObject **items = _PyTuple_ITEMS(result); for (i=0 ; i < n ; i++) { item = iternext(it); if (item == NULL) { From 8089281625b341380f1982977f1aed4f05e5cfb1 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 8 Dec 2022 12:10:02 -0600 Subject: [PATCH 2/6] Update clinic --- Modules/clinic/itertoolsmodule.c.h | 12 ++++++------ Modules/itertoolsmodule.c | 8 ++++---- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Modules/clinic/itertoolsmodule.c.h b/Modules/clinic/itertoolsmodule.c.h index 17f9ebb249390f..dfdd5ba54f919f 100644 --- a/Modules/clinic/itertoolsmodule.c.h +++ b/Modules/clinic/itertoolsmodule.c.h @@ -12,9 +12,9 @@ PyDoc_STRVAR(batched_new__doc__, "batched(iterable, n)\n" "--\n" "\n" -"Batch data into lists of length n. The last batch may be shorter than n.\n" +"Batch data into tuples of length n. The last batch may be shorter than n.\n" "\n" -"Loops over the input iterable and accumulates data into lists\n" +"Loops over the input iterable and accumulates data into tuples\n" "up to size n. The input is consumed lazily, just enough to\n" "fill a list. The result is yielded as soon as a batch is full\n" "or when the input iterable is exhausted.\n" @@ -22,9 +22,9 @@ PyDoc_STRVAR(batched_new__doc__, " >>> for batch in batched(\'ABCDEFG\', 3):\n" " ... print(batch)\n" " ...\n" -" [\'A\', \'B\', \'C\']\n" -" [\'D\', \'E\', \'F\']\n" -" [\'G\']"); +" (\'A\', \'B\', \'C\')\n" +" (\'D\', \'E\', \'F\')\n" +" (\'G\',)"); static PyObject * batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n); @@ -913,4 +913,4 @@ itertools_count(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=efea8cd1e647bd17 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c17b32bf690cbefd input=a9049054013a1b77]*/ diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index e8475efcb465cf..17eef5bf9e9397 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -84,15 +84,15 @@ or when the input iterable is exhausted. >>> for batch in batched('ABCDEFG', 3): ... print(batch) ... - ['A', 'B', 'C'] - ['D', 'E', 'F'] - ['G'] + ('A', 'B', 'C') + ('D', 'E', 'F') + ('G',) [clinic start generated code]*/ static PyObject * batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n) -/*[clinic end generated code: output=7ebc954d655371b6 input=f28fd12cb52365f0]*/ +/*[clinic end generated code: output=7ebc954d655371b6 input=ecf306e1654bf0a2]*/ { PyObject *it; batchedobject *bo; From 717e1e3d0025826aabdfd5982a06fbeb57676f52 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 8 Dec 2022 13:10:48 -0600 Subject: [PATCH 3/6] Comment and docstring tweaks --- Modules/clinic/itertoolsmodule.c.h | 4 ++-- Modules/itertoolsmodule.c | 17 ++++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Modules/clinic/itertoolsmodule.c.h b/Modules/clinic/itertoolsmodule.c.h index dfdd5ba54f919f..287de524e91307 100644 --- a/Modules/clinic/itertoolsmodule.c.h +++ b/Modules/clinic/itertoolsmodule.c.h @@ -16,7 +16,7 @@ PyDoc_STRVAR(batched_new__doc__, "\n" "Loops over the input iterable and accumulates data into tuples\n" "up to size n. The input is consumed lazily, just enough to\n" -"fill a list. The result is yielded as soon as a batch is full\n" +"fill a batch. The result is yielded as soon as a batch is full\n" "or when the input iterable is exhausted.\n" "\n" " >>> for batch in batched(\'ABCDEFG\', 3):\n" @@ -913,4 +913,4 @@ itertools_count(PyTypeObject *type, PyObject *args, PyObject *kwargs) exit: return return_value; } -/*[clinic end generated code: output=c17b32bf690cbefd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0229ebd72962f130 input=a9049054013a1b77]*/ diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 17eef5bf9e9397..7a9d8fffb97793 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -56,13 +56,16 @@ static PyTypeObject pairwise_type; /* batched object ************************************************************/ /* Note: The built-in zip() function includes a "strict" argument - that is needed because that function can silently truncate data - and there is no easy way for a user to detect that condition. - The same reasoning does not apply to batched() which never drops - data. Instead, it produces a shorter list which can be handled - as the user sees fit. + that was needed because that function would silently truncate data, + and there was no easy way for a user to detect that condition. + The same reasoning does not apply to batched() which never drops data. + Instead, batched() produces a shorter tuple which can be handled + as the user sees fit. If requested, it would be reasonable to add + "fillvalue" support which had demonstrated value in zip_longest(). + For now, the API is kept simple and clean. */ + typedef struct { PyObject_HEAD PyObject *it; @@ -78,7 +81,7 @@ Batch data into tuples of length n. The last batch may be shorter than n. Loops over the input iterable and accumulates data into tuples up to size n. The input is consumed lazily, just enough to -fill a list. The result is yielded as soon as a batch is full +fill a batch. The result is yielded as soon as a batch is full or when the input iterable is exhausted. >>> for batch in batched('ABCDEFG', 3): @@ -92,7 +95,7 @@ or when the input iterable is exhausted. static PyObject * batched_new_impl(PyTypeObject *type, PyObject *iterable, Py_ssize_t n) -/*[clinic end generated code: output=7ebc954d655371b6 input=ecf306e1654bf0a2]*/ +/*[clinic end generated code: output=7ebc954d655371b6 input=ffd70726927c5129]*/ { PyObject *it; batchedobject *bo; From cd04a337b2833b0554cf578aa172614380ba7d77 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 8 Dec 2022 13:11:49 -0600 Subject: [PATCH 4/6] . --- Modules/itertoolsmodule.c | 1 - 1 file changed, 1 deletion(-) diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 7a9d8fffb97793..223c92fda29666 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -65,7 +65,6 @@ static PyTypeObject pairwise_type; For now, the API is kept simple and clean. */ - typedef struct { PyObject_HEAD PyObject *it; From daf4f164defbda0dceba3cbecb7ef62573179fbc Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 8 Dec 2022 13:53:17 -0600 Subject: [PATCH 5/6] . --- Modules/itertoolsmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c index 223c92fda29666..765fb57dddc409 100644 --- a/Modules/itertoolsmodule.c +++ b/Modules/itertoolsmodule.c @@ -57,7 +57,7 @@ static PyTypeObject pairwise_type; /* Note: The built-in zip() function includes a "strict" argument that was needed because that function would silently truncate data, - and there was no easy way for a user to detect that condition. + and there was no easy way for a user to detect the data loss. The same reasoning does not apply to batched() which never drops data. Instead, batched() produces a shorter tuple which can be handled as the user sees fit. If requested, it would be reasonable to add From fdc644d08011f3777aa08c171efa6f42208d8e0b Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 8 Dec 2022 14:00:39 -0600 Subject: [PATCH 6/6] Update summary table at the top of the docs --- Doc/library/itertools.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst index 998d72f1a90052..624d2430ac20d7 100644 --- a/Doc/library/itertools.rst +++ b/Doc/library/itertools.rst @@ -52,7 +52,7 @@ Iterator Arguments Results Iterator Arguments Results Example ============================ ============================ ================================================= ============================================================= :func:`accumulate` p [,func] p0, p0+p1, p0+p1+p2, ... ``accumulate([1,2,3,4,5]) --> 1 3 6 10 15`` -:func:`batched` p, n [p0, p1, ..., p_n-1], ... ``batched('ABCDEFG', n=3) --> ABC DEF G`` +:func:`batched` p, n (p0, p1, ..., p_n-1), ... ``batched('ABCDEFG', n=3) --> ABC DEF G`` :func:`chain` p, q, ... p0, p1, ... plast, q0, q1, ... ``chain('ABC', 'DEF') --> A B C D E F`` :func:`chain.from_iterable` iterable p0, p1, ... plast, q0, q1, ... ``chain.from_iterable(['ABC', 'DEF']) --> A B C D E F`` :func:`compress` data, selectors (d[0] if s[0]), (d[1] if s[1]), ... ``compress('ABCDEF', [1,0,1,0,1,1]) --> A C E F``