diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 584375512e76c..6a8fa0e6963a4 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -297,6 +297,7 @@ Other Deprecations - Deprecated the ``warn`` parameter in :func:`infer_freq` (:issue:`45947`) - Deprecated allowing non-keyword arguments in :meth:`ExtensionArray.argsort` (:issue:`46134`) - Deprecated treating all-bool ``object``-dtype columns as bool-like in :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``, explicitly cast to bool instead (:issue:`46188`) +- Deprecated behavior of method :meth:`DataFrame.quantile`, attribute ``numeric_only`` will default False. Including datetime/timedelta columns in the result (:issue:`7308`). - .. --------------------------------------------------------------------------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 27048b82e674c..393377d30ec74 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -119,6 +119,7 @@ is_integer_dtype, is_iterator, is_list_like, + is_numeric_dtype, is_object_dtype, is_scalar, is_sequence, @@ -10562,7 +10563,7 @@ def quantile( self, q=0.5, axis: Axis = 0, - numeric_only: bool = True, + numeric_only: bool | lib.NoDefault = no_default, interpolation: str = "linear", ): """ @@ -10632,6 +10633,17 @@ def quantile( """ validate_percentile(q) axis = self._get_axis_number(axis) + any_not_numeric = any(not is_numeric_dtype(x) for x in self.dtypes) + if numeric_only is no_default and any_not_numeric: + warnings.warn( + "In future versions of pandas, numeric_only will be set to " + "False by default, and the datetime/timedelta columns will " + "be considered in the results. To not consider these columns" + "specify numeric_only=True.", + FutureWarning, + stacklevel=find_stack_level(), + ) + numeric_only = True if not is_list_like(q): # BlockManager.quantile expects listlike, so we wrap and unwrap here diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 040b981c41593..20f190fcdfd4d 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -14,6 +14,28 @@ class TestDataFrameQuantile: + @pytest.mark.parametrize( + "non_num_col", + [ + pd.date_range("2014-01-01", periods=3, freq="m"), + ["a", "b", "c"], + [DataFrame, Series, Timestamp], + ], + ) + def test_numeric_only_default_false_warning(self, non_num_col): + # GH #7308 + df = DataFrame({"A": [1, 2, 3], "B": [2, 3, 4]}) + df["C"] = non_num_col + + expected = Series( + [2.0, 3.0], + index=["A", "B"], + name=0.5, + ) + with tm.assert_produces_warning(FutureWarning, match="numeric_only"): + result = df.quantile(0.5) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize( "df,expected", [ @@ -43,21 +65,21 @@ def test_quantile(self, datetime_frame): from numpy import percentile df = datetime_frame - q = df.quantile(0.1, axis=0) + q = df.quantile(0.1, axis=0, numeric_only=True) assert q["A"] == percentile(df["A"], 10) tm.assert_index_equal(q.index, df.columns) - q = df.quantile(0.9, axis=1) + q = df.quantile(0.9, axis=1, numeric_only=True) assert q["2000-01-17"] == percentile(df.loc["2000-01-17"], 90) tm.assert_index_equal(q.index, df.index) # test degenerate case - q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0) + q = DataFrame({"x": [], "y": []}).quantile(0.1, axis=0, numeric_only=True) assert np.isnan(q["x"]) and np.isnan(q["y"]) # non-numeric exclusion df = DataFrame({"col1": ["A", "A", "B", "B"], "col2": [1, 2, 3, 4]}) - rs = df.quantile(0.5) + rs = df.quantile(0.5, numeric_only=True) with tm.assert_produces_warning(FutureWarning, match="Select only valid"): xp = df.median().rename(0.5) tm.assert_series_equal(rs, xp) @@ -78,7 +100,7 @@ def test_quantile(self, datetime_frame): # so that we exclude non-numeric along the same axis # See GH #7312 df = DataFrame([[1, 2, 3], ["a", "b", 4]]) - result = df.quantile(0.5, axis=1) + result = df.quantile(0.5, axis=1, numeric_only=True) expected = Series([3.0, 4.0], index=[0, 1], name=0.5) tm.assert_series_equal(result, expected) @@ -107,7 +129,7 @@ def test_quantile_axis_mixed(self): "D": ["foo", "bar", "baz"], } ) - result = df.quantile(0.5, axis=1) + result = df.quantile(0.5, axis=1, numeric_only=True) expected = Series([1.5, 2.5, 3.5], name=0.5) tm.assert_series_equal(result, expected) @@ -206,7 +228,7 @@ def test_quantile_interpolation_datetime(self, datetime_frame): # interpolation = linear (default case) df = datetime_frame - q = df.quantile(0.1, axis=0, interpolation="linear") + q = df.quantile(0.1, axis=0, numeric_only=True, interpolation="linear") assert q["A"] == np.percentile(df["A"], 10) def test_quantile_interpolation_int(self, int_frame): @@ -249,7 +271,7 @@ def test_quantile_datetime(self): df = DataFrame({"a": pd.to_datetime(["2010", "2011"]), "b": [0, 5]}) # exclude datetime - result = df.quantile(0.5) + result = df.quantile(0.5, numeric_only=True) expected = Series([2.5], index=["b"]) # datetime @@ -285,11 +307,11 @@ def test_quantile_datetime(self): tm.assert_frame_equal(result, expected) # empty when numeric_only=True - result = df[["a", "c"]].quantile(0.5) + result = df[["a", "c"]].quantile(0.5, numeric_only=True) expected = Series([], index=[], dtype=np.float64, name=0.5) tm.assert_series_equal(result, expected) - result = df[["a", "c"]].quantile([0.5]) + result = df[["a", "c"]].quantile([0.5], numeric_only=True) expected = DataFrame(index=[0.5]) tm.assert_frame_equal(result, expected) @@ -567,12 +589,12 @@ def test_quantile_empty_no_columns(self): # GH#23925 _get_numeric_data may drop all columns df = DataFrame(pd.date_range("1/1/18", periods=5)) df.columns.name = "captain tightpants" - result = df.quantile(0.5) + result = df.quantile(0.5, numeric_only=True) expected = Series([], index=[], name=0.5, dtype=np.float64) expected.index.name = "captain tightpants" tm.assert_series_equal(result, expected) - result = df.quantile([0.5]) + result = df.quantile([0.5], numeric_only=True) expected = DataFrame([], index=[0.5], columns=[]) expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) @@ -763,7 +785,7 @@ def test_datelike_numeric_only(self, expected_data, expected_index, axis): "c": pd.to_datetime(["2011", "2012"]), } ) - result = df[["a", "c"]].quantile(0.5, axis=axis) + result = df[["a", "c"]].quantile(0.5, axis=axis, numeric_only=True) expected = Series( expected_data, name=0.5, index=Index(expected_index), dtype=np.float64 ) diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py index cf92cd55a720e..431029c407afc 100644 --- a/pandas/tests/generic/test_finalize.py +++ b/pandas/tests/generic/test_finalize.py @@ -248,14 +248,26 @@ marks=not_implemented_mark, ), pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("quantile")), + ( + pd.DataFrame, + frame_data, + operator.methodcaller("quantile", numeric_only=True), + ), marks=not_implemented_mark, ), pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("quantile", q=[0.25, 0.75])), + ( + pd.DataFrame, + frame_data, + operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True), + ), ), pytest.param( - (pd.DataFrame, frame_data, operator.methodcaller("quantile")), + ( + pd.DataFrame, + frame_data, + operator.methodcaller("quantile", numeric_only=True), + ), marks=not_implemented_mark, ), (