diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b194f20c3c433..5af426d07de14 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -987,6 +987,7 @@ Other - Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`) - Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`) - Bug in :meth:`DaataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`) +- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`) .. _whatsnew_1000.contributors: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d4676a998c948..b69199defbcc4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -38,6 +38,7 @@ from pandas._libs import algos as libalgos, lib from pandas._typing import Axes, Dtype, FilePathOrBuffer +from pandas.compat import PY37 from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import ( @@ -975,7 +976,8 @@ def itertuples(self, index=True, name="Pandas"): ----- The column names will be renamed to positional names if they are invalid Python identifiers, repeated, or start with an underscore. - With a large number of columns (>255), regular tuples are returned. + On python versions < 3.7 regular tuples are returned for DataFrames + with a large number of columns (>254). Examples -------- @@ -1018,8 +1020,9 @@ def itertuples(self, index=True, name="Pandas"): # use integer indexing because of possible duplicate column names arrays.extend(self.iloc[:, k] for k in range(len(self.columns))) - # Python 3 supports at most 255 arguments to constructor - if name is not None and len(self.columns) + index < 256: + # Python versions before 3.7 support at most 255 arguments to constructors + can_return_named_tuples = PY37 or len(self.columns) + index < 255 + if name is not None and can_return_named_tuples: itertuple = collections.namedtuple(name, fields, rename=True) return map(itertuple._make, zip(*arrays)) diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index 91fb71c9de7a4..f6713d703e112 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -5,6 +5,8 @@ import numpy as np import pytest +from pandas.compat import PY37 + import pandas as pd from pandas import Categorical, DataFrame, Series, compat, date_range, timedelta_range import pandas.util.testing as tm @@ -261,8 +263,27 @@ def test_itertuples(self, float_frame): df3 = DataFrame({"f" + str(i): [i] for i in range(1024)}) # will raise SyntaxError if trying to create namedtuple tup3 = next(df3.itertuples()) - assert not hasattr(tup3, "_fields") assert isinstance(tup3, tuple) + if PY37: + assert hasattr(tup3, "_fields") + else: + assert not hasattr(tup3, "_fields") + + # GH 28282 + df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}]) + result_254_columns = next(df_254_columns.itertuples(index=False)) + assert isinstance(result_254_columns, tuple) + assert hasattr(result_254_columns, "_fields") + + df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}]) + result_255_columns = next(df_255_columns.itertuples(index=False)) + assert isinstance(result_255_columns, tuple) + + # Dataframes with >=255 columns will fallback to regular tuples on python < 3.7 + if PY37: + assert hasattr(result_255_columns, "_fields") + else: + assert not hasattr(result_255_columns, "_fields") def test_sequence_like_with_categorical(self):