CLN: remove _ndarray_values (pandas-dev#32768)

jbrockmendel · Mar 22, 2020 · a4aa9d6 · a4aa9d6
1 parent 3949cef
commit a4aa9d6
Show file tree

Hide file tree

Showing 18 changed files with 37 additions and 154 deletions.
diff --git a/doc/source/development/internals.rst b/doc/source/development/internals.rst
@@ -89,16 +89,10 @@ pandas extends NumPy's type system with custom types, like ``Categorical`` or
 datetimes with a timezone, so we have multiple notions of "values". For 1-D
 containers (``Index`` classes and ``Series``) we have the following convention:
 
-* ``cls._ndarray_values`` is *always* a NumPy ``ndarray``. Ideally,
-  ``_ndarray_values`` is cheap to compute. For example, for a ``Categorical``,
-  this returns the codes, not the array of objects.
 * ``cls._values`` refers is the "best possible" array. This could be an
-  ``ndarray``, ``ExtensionArray``, or in ``Index`` subclass (note: we're in the
-  process of removing the index subclasses here so that it's always an
-  ``ndarray`` or ``ExtensionArray``).
+  ``ndarray`` or ``ExtensionArray``.
 
-So, for example, ``Series[category]._values`` is a ``Categorical``, while
-``Series[category]._ndarray_values`` is the underlying codes.
+So, for example, ``Series[category]._values`` is a ``Categorical``.
 
 .. _ref-subclassing-pandas:
 

diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst
@@ -37,7 +37,6 @@ objects.
       api.extensions.ExtensionArray._from_factorized
       api.extensions.ExtensionArray._from_sequence
       api.extensions.ExtensionArray._from_sequence_of_strings
-      api.extensions.ExtensionArray._ndarray_values
       api.extensions.ExtensionArray._reduce
       api.extensions.ExtensionArray._values_for_argsort
       api.extensions.ExtensionArray._values_for_factorize

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -93,7 +93,6 @@ class ExtensionArray:
     _from_factorized
     _from_sequence
     _from_sequence_of_strings
-    _ndarray_values
     _reduce
     _values_for_argsort
     _values_for_factorize
@@ -1046,22 +1045,6 @@ def _concat_same_type(
     # of objects
     _can_hold_na = True
 
-    @property
-    def _ndarray_values(self) -> np.ndarray:
-        """
-        Internal pandas method for lossy conversion to a NumPy ndarray.
-
-        This method is not part of the pandas interface.
-
-        The expectation is that this is cheap to compute, and is primarily
-        used for interacting with our indexers.
-
-        Returns
-        -------
-        array : ndarray
-        """
-        return np.array(self)
-
     def _reduce(self, name, skipna=True, **kwargs):
         """
         Return a scalar result of performing the reduction operation.

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -451,10 +451,6 @@ def dtype(self) -> CategoricalDtype:
         """
         return self._dtype
 
-    @property
-    def _ndarray_values(self) -> np.ndarray:
-        return self.codes
-
     @property
     def _constructor(self) -> Type["Categorical"]:
         return Categorical
@@ -2567,12 +2563,7 @@ def _get_codes_for_values(values, categories):
     """
     dtype_equal = is_dtype_equal(values.dtype, categories.dtype)
 
-    if dtype_equal:
-        # To prevent erroneous dtype coercion in _get_data_algo, retrieve
-        # the underlying numpy array. gh-22702
-        values = getattr(values, "_ndarray_values", values)
-        categories = getattr(categories, "_ndarray_values", categories)
-    elif is_extension_array_dtype(categories.dtype) and is_object_dtype(values):
+    if is_extension_array_dtype(categories.dtype) and is_object_dtype(values):
         # Support inferring the correct extension dtype from an array of
         # scalar objects. e.g.
         # Categorical(array[Period, Period], categories=PeriodIndex(...))
@@ -2582,7 +2573,7 @@ def _get_codes_for_values(values, categories):
             # exception raised in _from_sequence
             values = ensure_object(values)
             categories = ensure_object(categories)
-    else:
+    elif not dtype_equal:
         values = ensure_object(values)
         categories = ensure_object(categories)
 

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -456,10 +456,6 @@ def asi8(self) -> np.ndarray:
         # do not cache or you'll create a memory leak
         return self._data.view("i8")
 
-    @property
-    def _ndarray_values(self):
-        return self._data
-
     # ----------------------------------------------------------------
     # Rendering Methods
 

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -478,18 +478,6 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
         data = self.to_numpy(dtype=dtype, **kwargs)
         return astype_nansafe(data, dtype, copy=False)
 
-    @property
-    def _ndarray_values(self) -> np.ndarray:
-        """
-        Internal pandas method for lossy conversion to a NumPy ndarray.
-
-        This method is not part of the pandas interface.
-
-        The expectation is that this is cheap to compute, and is primarily
-        used for interacting with our indexers.
-        """
-        return self._data
-
     def _values_for_factorize(self) -> Tuple[np.ndarray, float]:
         # TODO: https://github.com/pandas-dev/pandas/issues/30037
         # use masked algorithms, rather than object-dtype / np.nan.

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -855,23 +855,6 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs):
                 result[self.isna()] = na_value
         return result
 
-    @property
-    def _ndarray_values(self) -> np.ndarray:
-        """
-        The data as an ndarray, possibly losing information.
-
-        The expectation is that this is cheap to compute, and is primarily
-        used for interacting with our indexers.
-
-        - categorical -> codes
-        """
-        if is_extension_array_dtype(self):
-            return self.array._ndarray_values
-        # As a mixin, we depend on the mixing class having values.
-        # Special mixin syntax may be developed in the future:
-        # https://github.com/python/typing/issues/246
-        return self.values  # type: ignore
-
     @property
     def empty(self):
         return not self.size

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -464,8 +464,7 @@ def _simple_new(cls, values, name: Label = None):
         # _index_data is a (temporary?) fix to ensure that the direct data
         # manipulation we do in `_libs/reduction.pyx` continues to work.
         # We need access to the actual ndarray, since we're messing with
-        # data buffers and strides. We don't re-use `_ndarray_values`, since
-        # we actually set this value too.
+        # data buffers and strides.
         result._index_data = values
         result._name = name
         result._cache = {}
@@ -625,7 +624,8 @@ def ravel(self, order="C"):
         --------
         numpy.ndarray.ravel
         """
-        return self._ndarray_values.ravel(order=order)
+        values = self._get_engine_target()
+        return values.ravel(order=order)
 
     def view(self, cls=None):
 
@@ -3846,29 +3846,24 @@ def _values(self) -> Union[ExtensionArray, np.ndarray]:
         """
         The best array representation.
 
-        This is an ndarray or ExtensionArray. This differs from
-        ``_ndarray_values``, which always returns an ndarray.
+        This is an ndarray or ExtensionArray.
 
-        Both ``_values`` and ``_ndarray_values`` are consistent between
-        ``Series`` and ``Index`` (except for datetime64[ns], which returns
-        a DatetimeArray for _values on the Index, but ndarray[M8ns] on the
-        Series).
+        ``_values`` are consistent between``Series`` and ``Index``.
 
         It may differ from the public '.values' method.
 
-        index             | values          | _values       | _ndarray_values |
-        ----------------- | --------------- | ------------- | --------------- |
-        Index             | ndarray         | ndarray       | ndarray         |
-        CategoricalIndex  | Categorical     | Categorical   | ndarray[int]    |
-        DatetimeIndex     | ndarray[M8ns]   | DatetimeArray | ndarray[M8ns]   |
-        DatetimeIndex[tz] | ndarray[M8ns]   | DatetimeArray | ndarray[M8ns]   |
-        PeriodIndex       | ndarray[object] | PeriodArray   | ndarray[int]    |
-        IntervalIndex     | IntervalArray   | IntervalArray | ndarray[object] |
+        index             | values          | _values       |
+        ----------------- | --------------- | ------------- |
+        Index             | ndarray         | ndarray       |
+        CategoricalIndex  | Categorical     | Categorical   |
+        DatetimeIndex     | ndarray[M8ns]   | DatetimeArray |
+        DatetimeIndex[tz] | ndarray[M8ns]   | DatetimeArray |
+        PeriodIndex       | ndarray[object] | PeriodArray   |
+        IntervalIndex     | IntervalArray   | IntervalArray |
 
         See Also
         --------
         values
-        _ndarray_values
         """
         return self._data
 

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -179,7 +179,7 @@ def sort_values(self, return_indexer=False, ascending=True):
             sorted_index = self.take(_as)
             return sorted_index, _as
         else:
-            # NB: using asi8 instead of _ndarray_values matters in numpy 1.18
+            # NB: using asi8 instead of _data matters in numpy 1.18
             #  because the treatment of NaT has been changed to put NaT last
             #  instead of first.
             sorted_values = np.sort(self.asi8)

diff --git a/pandas/core/indexes/extension.py b/pandas/core/indexes/extension.py
@@ -228,10 +228,6 @@ def __iter__(self):
     def __array__(self, dtype=None) -> np.ndarray:
         return np.asarray(self._data, dtype=dtype)
 
-    @property
-    def _ndarray_values(self) -> np.ndarray:
-        return self._data._ndarray_values
-
     def _get_engine_target(self) -> np.ndarray:
         return self._data._values_for_argsort()
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -550,21 +550,17 @@ def _values(self):
         timedelta64 dtypes), while ``.array`` ensures to always return an
         ExtensionArray.
 
-        Differs from ``._ndarray_values``, as that ensures to always return a
-        numpy array (it will call ``_ndarray_values`` on the ExtensionArray, if
-        the Series was backed by an ExtensionArray).
-
         Overview:
 
-        dtype       | values        | _values       | array         | _ndarray_values |
-        ----------- | ------------- | ------------- | ------------- | --------------- |
-        Numeric     | ndarray       | ndarray       | PandasArray   | ndarray         |
-        Category    | Categorical   | Categorical   | Categorical   | ndarray[int]    |
-        dt64[ns]    | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns]   |
-        dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray | ndarray[M8ns]   |
-        td64[ns]    | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] | ndarray[m8ns]   |
-        Period      | ndarray[obj]  | PeriodArray   | PeriodArray   | ndarray[int]    |
-        Nullable    | EA            | EA            | EA            | ndarray         |
+        dtype       | values        | _values       | array         |
+        ----------- | ------------- | ------------- | ------------- |
+        Numeric     | ndarray       | ndarray       | PandasArray   |
+        Category    | Categorical   | Categorical   | Categorical   |
+        dt64[ns]    | ndarray[M8ns] | DatetimeArray | DatetimeArray |
+        dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
+        td64[ns]    | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] |
+        Period      | ndarray[obj]  | PeriodArray   | PeriodArray   |
+        Nullable    | EA            | EA            | EA            |
 
         """
         return self._data.internal_values()

diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py
@@ -220,34 +220,6 @@ def test_values_consistent(array, expected_type, dtype):
     tm.assert_equal(l_values, r_values)
 
 
-@pytest.mark.parametrize(
-    "array, expected",
-    [
-        (np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)),
-        (np.array(["0", "1"]), np.array(["0", "1"], dtype=object)),
-        (pd.Categorical(["a", "a"]), np.array([0, 0], dtype="int8")),
-        (
-            pd.DatetimeIndex(["2017-01-01T00:00:00"]),
-            np.array(["2017-01-01T00:00:00"], dtype="M8[ns]"),
-        ),
-        (
-            pd.DatetimeIndex(["2017-01-01T00:00:00"], tz="US/Eastern"),
-            np.array(["2017-01-01T05:00:00"], dtype="M8[ns]"),
-        ),
-        (pd.TimedeltaIndex([10 ** 10]), np.array([10 ** 10], dtype="m8[ns]")),
-        (
-            pd.PeriodIndex(["2017", "2018"], freq="D"),
-            np.array([17167, 17532], dtype=np.int64),
-        ),
-    ],
-)
-def test_ndarray_values(array, expected):
-    l_values = pd.Series(array)._ndarray_values
-    r_values = pd.Index(array)._ndarray_values
-    tm.assert_numpy_array_equal(l_values, r_values)
-    tm.assert_numpy_array_equal(l_values, expected)
-
-
 @pytest.mark.parametrize("arr", [np.array([1, 2, 3])])
 def test_numpy_array(arr):
     ser = pd.Series(arr)

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -313,16 +313,11 @@ def test_ensure_copied_data(self, indices):
             result = result.tz_localize("UTC").tz_convert(indices.tz)
 
         tm.assert_index_equal(indices, result)
-        tm.assert_numpy_array_equal(
-            indices._ndarray_values, result._ndarray_values, check_same="copy"
-        )
 
         if isinstance(indices, PeriodIndex):
             # .values an object array of Period, thus copied
             result = index_type(ordinal=indices.asi8, copy=False, **init_kwargs)
-            tm.assert_numpy_array_equal(
-                indices._ndarray_values, result._ndarray_values, check_same="same"
-            )
+            tm.assert_numpy_array_equal(indices.asi8, result.asi8, check_same="same")
         elif isinstance(indices, IntervalIndex):
             # checked in test_interval.py
             pass
@@ -331,9 +326,6 @@ def test_ensure_copied_data(self, indices):
             tm.assert_numpy_array_equal(
                 indices.values, result.values, check_same="same"
             )
-            tm.assert_numpy_array_equal(
-                indices._ndarray_values, result._ndarray_values, check_same="same"
-            )
 
     def test_memory_usage(self, indices):
         indices._engine.clear_mapping()

diff --git a/pandas/tests/indexes/interval/test_constructors.py b/pandas/tests/indexes/interval/test_constructors.py
@@ -91,7 +91,7 @@ def test_constructor_nan(self, constructor, breaks, closed):
 
         assert result.closed == closed
         assert result.dtype.subtype == expected_subtype
-        tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
+        tm.assert_numpy_array_equal(np.array(result), expected_values)
 
     @pytest.mark.parametrize(
         "breaks",
@@ -114,7 +114,7 @@ def test_constructor_empty(self, constructor, breaks, closed):
         assert result.empty
         assert result.closed == closed
         assert result.dtype.subtype == expected_subtype
-        tm.assert_numpy_array_equal(result._ndarray_values, expected_values)
+        tm.assert_numpy_array_equal(np.array(result), expected_values)
 
     @pytest.mark.parametrize(
         "breaks",

diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
@@ -147,7 +147,7 @@ def test_ensure_copied_data(self, closed):
         )
 
         # by-definition make a copy
-        result = IntervalIndex(index._ndarray_values, copy=False)
+        result = IntervalIndex(np.array(index), copy=False)
         tm.assert_numpy_array_equal(
             index.left.values, result.left.values, check_same="copy"
         )

diff --git a/pandas/tests/indexes/period/test_constructors.py b/pandas/tests/indexes/period/test_constructors.py
@@ -147,9 +147,9 @@ def test_constructor_fromarraylike(self):
 
         msg = "freq not specified and cannot be inferred"
         with pytest.raises(ValueError, match=msg):
-            PeriodIndex(idx._ndarray_values)
+            PeriodIndex(idx.asi8)
         with pytest.raises(ValueError, match=msg):
-            PeriodIndex(list(idx._ndarray_values))
+            PeriodIndex(list(idx.asi8))
 
         msg = "'Period' object is not iterable"
         with pytest.raises(TypeError, match=msg):

diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
@@ -161,23 +161,23 @@ def test_values(self):
         tm.assert_numpy_array_equal(idx.to_numpy(), exp)
 
         exp = np.array([], dtype=np.int64)
-        tm.assert_numpy_array_equal(idx._ndarray_values, exp)
+        tm.assert_numpy_array_equal(idx.asi8, exp)
 
         idx = PeriodIndex(["2011-01", NaT], freq="M")
 
         exp = np.array([Period("2011-01", freq="M"), NaT], dtype=object)
         tm.assert_numpy_array_equal(idx.values, exp)
         tm.assert_numpy_array_equal(idx.to_numpy(), exp)
         exp = np.array([492, -9223372036854775808], dtype=np.int64)
-        tm.assert_numpy_array_equal(idx._ndarray_values, exp)
+        tm.assert_numpy_array_equal(idx.asi8, exp)
 
         idx = PeriodIndex(["2011-01-01", NaT], freq="D")
 
         exp = np.array([Period("2011-01-01", freq="D"), NaT], dtype=object)
         tm.assert_numpy_array_equal(idx.values, exp)
         tm.assert_numpy_array_equal(idx.to_numpy(), exp)
         exp = np.array([14975, -9223372036854775808], dtype=np.int64)
-        tm.assert_numpy_array_equal(idx._ndarray_values, exp)
+        tm.assert_numpy_array_equal(idx.asi8, exp)
 
     def test_period_index_length(self):
         pi = period_range(freq="A", start="1/1/2001", end="12/1/2009")

diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
@@ -55,9 +55,7 @@ def test_ops(self, opname, obj):
         if not isinstance(obj, PeriodIndex):
             expected = getattr(obj.values, opname)()
         else:
-            expected = pd.Period(
-                ordinal=getattr(obj._ndarray_values, opname)(), freq=obj.freq
-            )
+            expected = pd.Period(ordinal=getattr(obj.asi8, opname)(), freq=obj.freq)
         try:
             assert result == expected
         except TypeError: