pandas-dev · jreback · Feb 12, 2021 · Feb 5, 2021 · Feb 5, 2021 · Feb 5, 2021
diff --git a/pandas/core/array_algos/quantile.py b/pandas/core/array_algos/quantile.py
@@ -0,0 +1,76 @@
+from typing import Sequence, Union
+
+import numpy as np
+
+from pandas._libs import lib
+
+from pandas.core.dtypes.common import is_list_like
+
+from pandas.core.nanops import nanpercentile
+
+
+def quantile_with_mask(
+    values: np.ndarray,
+    mask: np.ndarray,
+    fill_value,
+    qs: Union[float, Sequence[float]],
+    interpolation: str,
+    axis: int,
+) -> np.ndarray:
+    """
+    Compute the quantiles of the given values for each quantile in `qs`.
+
+
+    Parameters
+    ----------
+    values : np.ndarray
+        For ExtensionArray, this is _values_for_factorize()[0]
+    mask : np.ndarray[bool]
+        mask = isna(values)
+        For ExtensionArray, this is computed before calling _value_for_factorize
+    fill_value : Scalar
+        The value to interpret fill NA entries with
+        For ExtensionArray, this is _values_for_factorize()[1]
+    qs : a scalar or list of the quantiles to be computed
+    interpolation : str
+        Type of interpolation
+    axis : int
+        Axis along which to compute quantiles.
+
+    Notes
+    -----
+    Assumes values is already 2D.  For ExtensionArray this means np.atleast_2d
+    has been called on _values_for_factorize()[0]
+    """
+    is_empty = values.shape[axis] == 0
+    orig_scalar = not is_list_like(qs)
+    if orig_scalar:
+        # make list-like, unpack later
+        qs = [qs]
+
+    if is_empty:
+        # create the array of na_values
+        # 2d len(values) * len(qs)
+        flat = np.array([fill_value] * len(qs))
+        result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
+    else:
+        # asarray needed for Sparse, see GH#24600
+        result = nanpercentile(
+            values,
+            np.array(qs) * 100,
+            axis=axis,
+            na_value=fill_value,
+            mask=mask,
+            ndim=values.ndim,
+            interpolation=interpolation,
+        )
+
+        result = np.array(result, copy=False)
+        result = result.T
+
+    if orig_scalar:
+        assert result.shape[-1] == 1, result.shape
+        result = result[..., 0]
+        result = lib.item_from_zerodim(result)
+
+    return result
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -425,7 +425,8 @@ def copy(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:
         return new_obj
 
     def _values_for_factorize(self):
-        return self._ndarray, iNaT
+        # int64 instead of int ensures we have a "view" method
+        return self._ndarray, np.int64(iNaT)
 
     @classmethod
     def _from_factorized(

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -56,6 +56,7 @@
     putmask_smart,
     putmask_without_repeat,
 )
+from pandas.core.array_algos.quantile import quantile_with_mask
 from pandas.core.array_algos.replace import (
     compare_or_regex_search,
     replace_regex,
@@ -79,7 +80,6 @@
     is_scalar_indexer,
 )
 import pandas.core.missing as missing
-from pandas.core.nanops import nanpercentile
 
 if TYPE_CHECKING:
     from pandas import Index
@@ -1390,8 +1390,10 @@ def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
         Parameters
         ----------
         qs: a scalar or list of the quantiles to be computed
-        interpolation: type of interpolation, default 'linear'
-        axis: axis to compute, default 0
+        interpolation : str, default "linear"
+            Type of interpolation
+        axis : int, default 0
+            Axis along which to compute quantiles.
 
         Returns
         -------
@@ -1400,44 +1402,16 @@ def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
         # We should always have ndim == 2 because Series dispatches to DataFrame
         assert self.ndim == 2
 
-        values = self.get_values()
-
-        is_empty = values.shape[axis] == 0
-        orig_scalar = not is_list_like(qs)
-        if orig_scalar:
-            # make list-like, unpack later
-            qs = [qs]
-
-        if is_empty:
-            # create the array of na_values
-            # 2d len(values) * len(qs)
-            result = np.repeat(
-                np.array([self.fill_value] * len(qs)), len(values)
-            ).reshape(len(values), len(qs))
-        else:
-            # asarray needed for Sparse, see GH#24600
-            mask = np.asarray(isna(values))
-            result = nanpercentile(
-                values,
-                np.array(qs) * 100,
-                axis=axis,
-                na_value=self.fill_value,
-                mask=mask,
-                ndim=values.ndim,
-                interpolation=interpolation,
-            )
+        fill_value = self.fill_value
+        values = self.values
+        mask = np.asarray(isna(values))
 
-            result = np.array(result, copy=False)
-            result = result.T
+        result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)
+        ndim = np.ndim(result)
 
-        if orig_scalar and not lib.is_scalar(result):
-            # result could be scalar in case with is_empty and self.ndim == 1
-            assert result.shape[-1] == 1, result.shape
-            result = result[..., 0]
-            result = lib.item_from_zerodim(result)
+        placement = np.arange(len(result))
 
-        ndim = np.ndim(result)
-        return make_block(result, placement=np.arange(len(result)), ndim=ndim)
+        return make_block(result, placement=placement, ndim=ndim)
 
     def _replace_coerce(
         self,
@@ -1866,6 +1840,36 @@ def _unstack(self, unstacker, fill_value, new_placement):
         ]
         return blocks, mask
 
+    def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
+        # asarray needed for Sparse, see GH#24600
+        mask = np.asarray(isna(self.values))
+        mask = np.atleast_2d(mask)
+
+        values, fill_value = self.values._values_for_factorize()
+
+        values = np.atleast_2d(values)
+
+        result = quantile_with_mask(values, mask, fill_value, qs, interpolation, axis)
+        ndim = np.ndim(result)
+
+        if not is_sparse(self.dtype):
+            # shape[0] should be 1 as long as EAs are 1D
+
+            if result.ndim == 1:
+                # i.e. qs was originally a scalar
+                assert result.shape == (1,), result.shape
+                result = type(self.values)._from_factorized(result, self.values)
+                placement = np.arange(len(result))
+
+            else:
+                assert result.shape == (1, len(qs)), result.shape
+                result = type(self.values)._from_factorized(result[0], self.values)
+                placement = [0]
+        else:
+            placement = np.arange(len(result))
+
+        return make_block(result, placement=placement, ndim=ndim)
+
 
 class HybridMixin:
     """
@@ -2184,19 +2188,6 @@ def fillna(
             value, limit=limit, inplace=inplace, downcast=downcast
         )
 
-    def quantile(self, qs, interpolation="linear", axis: int = 0) -> Block:
-        naive = self.values.view("M8[ns]")
-
-        # TODO(EA2D): kludge for 2D block with 1D values
-        naive = naive.reshape(self.shape)
-
-        blk = self.make_block(naive)
-        res_blk = blk.quantile(qs, interpolation=interpolation, axis=axis)
-
-        # TODO(EA2D): ravel is kludge for 2D block with 1D values, assumes column-like
-        aware = self._holder(res_blk.values.ravel(), dtype=self.dtype)
-        return self.make_block_same_class(aware, ndim=res_blk.ndim)
-
     def _check_ndim(self, values, ndim):
         """
         ndim inference and validation.

diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py
@@ -78,6 +78,85 @@ def test_quantile(self, datetime_frame):
         expected = Series([3.0, 4.0], index=[0, 1], name=0.5)
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize("as_dt64tz", [True, False])
+    def test_quantile_period(self, frame_or_series, as_dt64tz):
+        pi = pd.period_range("2016-01-01", periods=9, freq="D", name="A")
+        if as_dt64tz:
+            pi = pi.to_timestamp("S").tz_localize("US/Central")
+
+        obj = frame_or_series(pi)
+
+        qs = [0.5, 0, 1]
+        if frame_or_series is Series:
+            result = obj.quantile(qs)
+        else:
+            result = obj.quantile(qs, numeric_only=False)
+
+        expected = Series([pi[4], pi[0], pi[-1]], index=qs, name="A")
+        expected = frame_or_series(expected)
+
+        tm.assert_equal(result, expected)
+
+    # TODO: tests for axis=1?
+    # TODO: empty case?  might as well do dt64 and td64 here too
+    @pytest.mark.parametrize("as_dt64tz", [True, False])
+    def test_quantile_period_with_nat(self, frame_or_series, as_dt64tz):
+        pi = pd.period_range("2016-01-01", periods=9, freq="D", name="A")
+        if as_dt64tz:
+            pi = pi.to_timestamp("S").tz_localize("US/Central")
+
+        obj = frame_or_series(pi)
+
+        obj.iloc[0] = pd.NaT
+        obj.iloc[-1] = pd.NaT
+
+        qs = [0.5, 0, 1]
+        if frame_or_series is Series:
+            result = obj.quantile(qs)
+        else:
+            result = obj.quantile(qs, numeric_only=False)
+
+        expected = Series([pi[4], pi[1], pi[-2]], index=qs, name="A")
+        expected = frame_or_series(expected)
+        tm.assert_equal(result, expected)
+
+    @pytest.mark.parametrize("as_dt64tz", [True, False])
+    def test_quantile_period_all_nat(self, frame_or_series, as_dt64tz):
+        pi = pd.period_range("2016-01-01", periods=9, freq="D", name="A")
+        if as_dt64tz:
+            pi = pi.to_timestamp("S").tz_localize("US/Central")
+
+        obj = frame_or_series(pi)
+        obj.iloc[:] = pd.NaT
+
+        qs = [0.5, 0, 1]
+        if frame_or_series is Series:
+            result = obj.quantile(qs)
+        else:
+            result = obj.quantile(qs, numeric_only=False)
+
+        expected = Series([pd.NaT, pd.NaT, pd.NaT], dtype=pi.dtype, index=qs, name="A")
+        expected = frame_or_series(expected)
+        tm.assert_equal(result, expected)
+
+    def test_quantile_period_scalar(self, frame_or_series):
+        # scalar qs
+        pi = pd.period_range("2016-01-01", periods=9, freq="D", name="A")
+        obj = frame_or_series(pi)
+
+        qs = 0.5
+        if frame_or_series is Series:
+            result = obj.quantile(qs)
+        else:
+            result = obj.quantile(qs, numeric_only=False)
+
+        expected = Series({"A": pi[4]}, name=0.5)
+        if frame_or_series is Series:
+            expected = expected["A"]
+            assert result == expected
+        else:
+            tm.assert_series_equal(result, expected)
+
     def test_quantile_date_range(self):
         # GH 2460