From 98f20e216c76d495a63541d03e445ff1b38cbaf9 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sun, 27 Dec 2020 15:24:08 -0500 Subject: [PATCH 1/4] BUG: ensure series/frame mode() keeps int index --- doc/source/whatsnew/v1.3.0.rst | 1 + pandas/core/algorithms.py | 6 ++++-- pandas/core/frame.py | 7 ++++++- pandas/tests/frame/methods/test_mode.py | 9 +++++++++ pandas/tests/test_algos.py | 2 +- 5 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 pandas/tests/frame/methods/test_mode.py diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c7573ee860744..a0b08f1afaeb7 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -202,6 +202,7 @@ Numeric ^^^^^^^ - Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`) - Bug in :meth:`DataFrame.select_dtypes` with ``include=np.number`` now retains numeric ``ExtensionDtype`` columns (:issue:`35340`) +- Bug in :meth:`DataFrame.mode` and :meth:`Series.mode` not keeping consistent integer :class:`Index` for empty input (:issue:`33321`) Conversion ^^^^^^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 2098392cf70a9..e8d650f448a91 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -927,7 +927,7 @@ def mode(values, dropna: bool = True) -> Series: ------- mode : Series """ - from pandas import Series + from pandas import Index, Series values = _ensure_arraylike(values) original = values @@ -954,7 +954,9 @@ def mode(values, dropna: bool = True) -> Series: warn(f"Unable to sort modes: {err}") result = _reconstruct_data(result, original.dtype, original) - return Series(result) + # Ensure index is type stable (should always use int index) + index = None if len(result) else Index([], dtype="int64") + return Series(result, index=index) def rank( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9097ac13192c9..0185a579180f7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9241,7 +9241,12 @@ def mode( def f(s): return s.mode(dropna=dropna) - return data.apply(f, axis=axis) + data = data.apply(f, axis=axis) + # Ensure index is type stable (should always use int index) + if data.empty: + data.index = Index([], dtype="int64") + + return data def quantile( self, diff --git a/pandas/tests/frame/methods/test_mode.py b/pandas/tests/frame/methods/test_mode.py new file mode 100644 index 0000000000000..a17926b002dc6 --- /dev/null +++ b/pandas/tests/frame/methods/test_mode.py @@ -0,0 +1,9 @@ +from pandas import DataFrame, Index +import pandas._testing as tm + + +def test_empty_df_mode(): + df = DataFrame([], columns=["a", "b"]) + result = df.mode() + expected = DataFrame([], columns=["a", "b"], index=Index([], dtype="int64")) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 8fcc241348f27..8454b5dfefb04 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -2253,7 +2253,7 @@ def test_int64_add_overflow(): class TestMode: def test_no_mode(self): - exp = Series([], dtype=np.float64) + exp = Series([], dtype=np.float64, index=Index([], dtype="int64")) tm.assert_series_equal(algos.mode([]), exp) def test_mode_single(self): From 21210fc109e7ac8a8151a803bb67b36beb043d48 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sun, 27 Dec 2020 16:38:39 -0500 Subject: [PATCH 2/4] Move test, fix type --- pandas/core/algorithms.py | 2 +- pandas/core/frame.py | 2 +- pandas/tests/frame/methods/test_mode.py | 9 --------- pandas/tests/frame/test_reductions.py | 6 ++++++ 4 files changed, 8 insertions(+), 11 deletions(-) delete mode 100644 pandas/tests/frame/methods/test_mode.py diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index e8d650f448a91..35c313c0d39b5 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -955,7 +955,7 @@ def mode(values, dropna: bool = True) -> Series: result = _reconstruct_data(result, original.dtype, original) # Ensure index is type stable (should always use int index) - index = None if len(result) else Index([], dtype="int64") + index = None if len(result) else Index([], dtype=int) return Series(result, index=index) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 0185a579180f7..fee0ce561a924 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9244,7 +9244,7 @@ def f(s): data = data.apply(f, axis=axis) # Ensure index is type stable (should always use int index) if data.empty: - data.index = Index([], dtype="int64") + data.index = Index([], dtype=int) return data diff --git a/pandas/tests/frame/methods/test_mode.py b/pandas/tests/frame/methods/test_mode.py deleted file mode 100644 index a17926b002dc6..0000000000000 --- a/pandas/tests/frame/methods/test_mode.py +++ /dev/null @@ -1,9 +0,0 @@ -from pandas import DataFrame, Index -import pandas._testing as tm - - -def test_empty_df_mode(): - df = DataFrame([], columns=["a", "b"]) - result = df.mode() - expected = DataFrame([], columns=["a", "b"], index=Index([], dtype="int64")) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index d33d91f2cefca..f6313fc806305 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -669,6 +669,12 @@ def test_mode_sortwarning(self): tm.assert_frame_equal(result, expected) + def test_mode_empty_df(self): + df = DataFrame([], columns=["a", "b"]) + result = df.mode() + expected = DataFrame([], columns=["a", "b"], index=Index([], dtype=int)) + tm.assert_frame_equal(result, expected) + def test_operators_timedelta64(self): df = DataFrame( { From 751a497b9134389fae75e24ddfeb8915ea8c1926 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Sun, 27 Dec 2020 16:40:50 -0500 Subject: [PATCH 3/4] One more --- pandas/tests/test_algos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 8454b5dfefb04..ebaf69e08ccff 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -2253,7 +2253,7 @@ def test_int64_add_overflow(): class TestMode: def test_no_mode(self): - exp = Series([], dtype=np.float64, index=Index([], dtype="int64")) + exp = Series([], dtype=np.float64, index=Index([], dtype=int)) tm.assert_series_equal(algos.mode([]), exp) def test_mode_single(self): From 0e598d11bc76bb115a2dbedd44d841f3183d0c7a Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Tue, 29 Dec 2020 13:12:54 -0500 Subject: [PATCH 4/4] Use ibase.default_index instead --- pandas/core/algorithms.py | 6 +++--- pandas/core/frame.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 35c313c0d39b5..5e74db41b8740 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -927,7 +927,8 @@ def mode(values, dropna: bool = True) -> Series: ------- mode : Series """ - from pandas import Index, Series + from pandas import Series + import pandas.core.indexes.base as ibase values = _ensure_arraylike(values) original = values @@ -955,8 +956,7 @@ def mode(values, dropna: bool = True) -> Series: result = _reconstruct_data(result, original.dtype, original) # Ensure index is type stable (should always use int index) - index = None if len(result) else Index([], dtype=int) - return Series(result, index=index) + return Series(result, index=ibase.default_index(len(result))) def rank( diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 50a8aa1f462b4..7f295a73a82e6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -9233,7 +9233,7 @@ def f(s): data = data.apply(f, axis=axis) # Ensure index is type stable (should always use int index) if data.empty: - data.index = Index([], dtype=int) + data.index = ibase.default_index(0) return data