Skip to content

Commit 868f8db

Browse files
committed
ENH: Implemented lazy iteration.
Fixes GH20783.
1 parent b2eec25 commit 868f8db

File tree

3 files changed

+15
-7
lines changed

3 files changed

+15
-7
lines changed

doc/source/whatsnew/v0.23.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@ New features
1616
~~~~~~~~~~~~
1717

1818
- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`)
19+
- Iterating over a :class:`Series` and using :meth:`DataFrame.itertuples` now create iterators without internally
20+
allocating lists of all elements (:issue:`20783`)
1921

2022

2123
.. _whatsnew_0231.deprecations:

pandas/core/base.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
import pandas.core.nanops as nanops
2424
import pandas._libs.lib as lib
2525
from pandas.compat.numpy import function as nv
26-
from pandas.compat import PYPY
26+
from pandas.compat import PYPY, map, range
2727
from pandas.util._decorators import (Appender, cache_readonly,
2828
deprecate_kwarg, Substitution)
2929

@@ -917,7 +917,13 @@ def __iter__(self):
917917
(for str, int, float) or a pandas scalar
918918
(for Timestamp/Timedelta/Interval/Period)
919919
"""
920-
return iter(self.tolist())
920+
# We are explicity making element iterators.
921+
if is_datetimelike(self._values):
922+
return map(com._maybe_box_datetimelike, self._values)
923+
elif is_extension_array_dtype(self._values):
924+
return iter(self._values)
925+
else:
926+
return map(self._values.item, range(self._values.size))
921927

922928
@cache_readonly
923929
def hasnans(self):

pandas/core/frame.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -815,14 +815,14 @@ def itertuples(self, index=True, name="Pandas"):
815815
Pandas(Index='b', col1=2, col2=0.20000000000000001)
816816
817817
"""
818-
arrays = []
818+
iterators = []
819819
fields = []
820820
if index:
821-
arrays.append(self.index)
821+
iterators.append(self.index)
822822
fields.append("Index")
823823

824824
# use integer indexing because of possible duplicate column names
825-
arrays.extend(self.iloc[:, k] for k in range(len(self.columns)))
825+
iterators.extend(self.iloc[:, k] for k in range(len(self.columns)))
826826

827827
# Python 3 supports at most 255 arguments to constructor, and
828828
# things get slow with this many fields in Python 2
@@ -832,12 +832,12 @@ def itertuples(self, index=True, name="Pandas"):
832832
itertuple = collections.namedtuple(name,
833833
fields + list(self.columns),
834834
rename=True)
835-
return map(itertuple._make, zip(*arrays))
835+
return map(itertuple._make, zip(*iterators))
836836
except Exception:
837837
pass
838838

839839
# fallback to regular tuples
840-
return zip(*arrays)
840+
return zip(*iterators)
841841

842842
items = iteritems
843843

0 commit comments

Comments
 (0)