From e4b035ecc7b47ce237109bf3161d6574e46ccdd7 Mon Sep 17 00:00:00 2001
From: Simon Gibbons <simongibbons@gmail.com>
Date: Wed, 1 Jan 2020 11:42:06 +0000
Subject: [PATCH 1/2] BUG: Ensure df.itertuples() uses plain tuples correctly

Currently DataFrame.itertuples() has an off by one error
when it inspects whether or not it should return namedtuples
or plain tuples in it's response.

This PR addresses that bug by correcting the condition
that is used when making the check.

Closes: #28282
---
 doc/source/whatsnew/v1.0.0.rst |  1 +
 pandas/core/frame.py           |  4 ++--
 pandas/tests/frame/test_api.py | 16 ++++++++++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
index b194f20c3c433..5af426d07de14 100755
--- a/doc/source/whatsnew/v1.0.0.rst
+++ b/doc/source/whatsnew/v1.0.0.rst
@@ -987,6 +987,7 @@ Other
 - Bug in :class:`Index` where a non-hashable name could be set without raising ``TypeError`` (:issue:`29069`)
 - Bug in :class:`DataFrame` constructor when passing a 2D ``ndarray`` and an extension dtype (:issue:`12513`)
 - Bug in :meth:`DaataFrame.to_csv` when supplied a series with a ``dtype="string"`` and a ``na_rep``, the ``na_rep`` was being truncated to 2 characters. (:issue:`29975`)
+- Bug where :meth:`DataFrame.itertuples` would incorrectly determine whether or not namedtuples could be used for dataframes of 255 columns (:issue:`28282`)
 
 .. _whatsnew_1000.contributors:
 
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index d4676a998c948..bc6e1e7d50e12 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -1018,8 +1018,8 @@ def itertuples(self, index=True, name="Pandas"):
         # use integer indexing because of possible duplicate column names
         arrays.extend(self.iloc[:, k] for k in range(len(self.columns)))
 
-        # Python 3 supports at most 255 arguments to constructor
-        if name is not None and len(self.columns) + index < 256:
+        # Python versions before 3.7 support at most 255 arguments to constructor
+        if name is not None and len(self.columns) + index < 255:
             itertuple = collections.namedtuple(name, fields, rename=True)
             return map(itertuple._make, zip(*arrays))
 
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index 91fb71c9de7a4..b3f947e1a42d6 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -288,6 +288,22 @@ def test_sequence_like_with_categorical(self):
         for c, col in df.items():
             str(s)
 
+    def test_itertuples_fallback_to_regular_tuples(self):
+        # GH 28282
+
+        df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}])
+        result_254_columns = next(df_254_columns.itertuples(index=False))
+        assert isinstance(result_254_columns, tuple)
+        assert result_254_columns.foo_1 == "bar_1"
+
+        df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}])
+        result_255_columns = next(df_255_columns.itertuples(index=False))
+        assert isinstance(result_255_columns, tuple)
+
+        # Dataframes with >=255 columns will fallback to regular tuples
+        with pytest.raises(AttributeError):
+            result_255_columns.foo_1
+
     def test_len(self, float_frame):
         assert len(float_frame) == len(float_frame.index)
 

From 64b381dbd2a0401e154f7ca812e1efaa16986eef Mon Sep 17 00:00:00 2001
From: Simon Gibbons <simongibbons@gmail.com>
Date: Wed, 1 Jan 2020 22:23:31 +0000
Subject: [PATCH 2/2] Address comments.

1. Ensure we return named tuples in more cases (when using python >=
   3.7)
2. Move test around to be with the itertuples test
3. Update docstring with the new behaviour.
---
 pandas/core/frame.py           |  9 +++++---
 pandas/tests/frame/test_api.py | 39 +++++++++++++++++++---------------
 2 files changed, 28 insertions(+), 20 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bc6e1e7d50e12..b69199defbcc4 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -38,6 +38,7 @@
 
 from pandas._libs import algos as libalgos, lib
 from pandas._typing import Axes, Dtype, FilePathOrBuffer
+from pandas.compat import PY37
 from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import (
@@ -975,7 +976,8 @@ def itertuples(self, index=True, name="Pandas"):
         -----
         The column names will be renamed to positional names if they are
         invalid Python identifiers, repeated, or start with an underscore.
-        With a large number of columns (>255), regular tuples are returned.
+        On python versions < 3.7 regular tuples are returned for DataFrames
+        with a large number of columns (>254).
 
         Examples
         --------
@@ -1018,8 +1020,9 @@ def itertuples(self, index=True, name="Pandas"):
         # use integer indexing because of possible duplicate column names
         arrays.extend(self.iloc[:, k] for k in range(len(self.columns)))
 
-        # Python versions before 3.7 support at most 255 arguments to constructor
-        if name is not None and len(self.columns) + index < 255:
+        # Python versions before 3.7 support at most 255 arguments to constructors
+        can_return_named_tuples = PY37 or len(self.columns) + index < 255
+        if name is not None and can_return_named_tuples:
             itertuple = collections.namedtuple(name, fields, rename=True)
             return map(itertuple._make, zip(*arrays))
 
diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py
index b3f947e1a42d6..f6713d703e112 100644
--- a/pandas/tests/frame/test_api.py
+++ b/pandas/tests/frame/test_api.py
@@ -5,6 +5,8 @@
 import numpy as np
 import pytest
 
+from pandas.compat import PY37
+
 import pandas as pd
 from pandas import Categorical, DataFrame, Series, compat, date_range, timedelta_range
 import pandas.util.testing as tm
@@ -261,8 +263,27 @@ def test_itertuples(self, float_frame):
         df3 = DataFrame({"f" + str(i): [i] for i in range(1024)})
         # will raise SyntaxError if trying to create namedtuple
         tup3 = next(df3.itertuples())
-        assert not hasattr(tup3, "_fields")
         assert isinstance(tup3, tuple)
+        if PY37:
+            assert hasattr(tup3, "_fields")
+        else:
+            assert not hasattr(tup3, "_fields")
+
+        # GH 28282
+        df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}])
+        result_254_columns = next(df_254_columns.itertuples(index=False))
+        assert isinstance(result_254_columns, tuple)
+        assert hasattr(result_254_columns, "_fields")
+
+        df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}])
+        result_255_columns = next(df_255_columns.itertuples(index=False))
+        assert isinstance(result_255_columns, tuple)
+
+        # Dataframes with >=255 columns will fallback to regular tuples on python < 3.7
+        if PY37:
+            assert hasattr(result_255_columns, "_fields")
+        else:
+            assert not hasattr(result_255_columns, "_fields")
 
     def test_sequence_like_with_categorical(self):
 
@@ -288,22 +309,6 @@ def test_sequence_like_with_categorical(self):
         for c, col in df.items():
             str(s)
 
-    def test_itertuples_fallback_to_regular_tuples(self):
-        # GH 28282
-
-        df_254_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(254)}])
-        result_254_columns = next(df_254_columns.itertuples(index=False))
-        assert isinstance(result_254_columns, tuple)
-        assert result_254_columns.foo_1 == "bar_1"
-
-        df_255_columns = DataFrame([{f"foo_{i}": f"bar_{i}" for i in range(255)}])
-        result_255_columns = next(df_255_columns.itertuples(index=False))
-        assert isinstance(result_255_columns, tuple)
-
-        # Dataframes with >=255 columns will fallback to regular tuples
-        with pytest.raises(AttributeError):
-            result_255_columns.foo_1
-
     def test_len(self, float_frame):
         assert len(float_frame) == len(float_frame.index)