Fix renaming Series and Index (#14080)

This PR resolves renaming `Series` and `Index` by assigning `no_default` to internal API default parameters. Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Bradley Dice (https://github.com/bdice) - Matthew Roeschke (https://github.com/mroeschke) URL: #14080
rapidsai · Sep 12, 2023 · c3bf705 · c3bf705
1 parent bc304a2
commit c3bf705
Show file tree

Hide file tree

Showing 6 changed files with 61 additions and 38 deletions.
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
@@ -28,6 +28,7 @@
 from cudf._lib.filling import sequence
 from cudf._lib.search import search_sorted
 from cudf._lib.types import size_type_dtype
+from cudf.api.extensions import no_default
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
     is_categorical_dtype,
@@ -95,7 +96,7 @@ def _lexsorted_equal_range(
     return lower_bound, upper_bound, sort_inds
 
 
-def _index_from_data(data: MutableMapping, name: Any = None):
+def _index_from_data(data: MutableMapping, name: Any = no_default):
     """Construct an index of the appropriate type from some data."""
 
     if len(data) == 0:
@@ -131,7 +132,7 @@ def _index_from_data(data: MutableMapping, name: Any = None):
 
 
 def _index_from_columns(
-    columns: List[cudf.core.column.ColumnBase], name: Any = None
+    columns: List[cudf.core.column.ColumnBase], name: Any = no_default
 ):
     """Construct an index from ``columns``, with levels named 0, 1, 2..."""
     return _index_from_data(dict(zip(range(len(columns)), columns)), name=name)
@@ -1032,10 +1033,10 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
     @classmethod
     @_cudf_nvtx_annotate
     def _from_data(
-        cls, data: MutableMapping, name: Any = None
+        cls, data: MutableMapping, name: Any = no_default
     ) -> GenericIndex:
         out = super()._from_data(data=data)
-        if name is not None:
+        if name is not no_default:
             out.name = name
         return out
 
@@ -3334,6 +3335,7 @@ def as_index(arbitrary, nan_as_null=None, **kwargs) -> BaseIndex:
         - DatetimeIndex for Datetime input.
         - GenericIndex for all other inputs.
     """
+
     kwargs = _setdefault_name(arbitrary, **kwargs)
     if isinstance(arbitrary, cudf.MultiIndex):
         return arbitrary

diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
@@ -605,10 +605,10 @@ def _from_data(
         cls,
         data: MutableMapping,
         index: Optional[BaseIndex] = None,
-        name: Any = None,
+        name: Any = no_default,
     ) -> Series:
         out = super()._from_data(data=data, index=index)
-        if name is not None:
+        if name is not no_default:
             out.name = name
         return out
 

diff --git a/python/cudf/cudf/testing/_utils.py b/python/cudf/cudf/testing/_utils.py
@@ -48,6 +48,33 @@
 OTHER_TYPES = sorted(list(dtypeutils.OTHER_TYPES))
 ALL_TYPES = sorted(list(dtypeutils.ALL_TYPES))
 
+SERIES_OR_INDEX_NAMES = [
+    None,
+    pd.NA,
+    cudf.NA,
+    np.nan,
+    float("NaN"),
+    "abc",
+    1,
+    pd.NaT,
+    np.datetime64("nat"),
+    np.timedelta64("NaT"),
+    np.timedelta64(10, "D"),
+    np.timedelta64(5, "D"),
+    np.datetime64("1970-01-01 00:00:00.000000001"),
+    np.datetime64("1970-01-01 00:00:00.000000002"),
+    pd.Timestamp(1),
+    pd.Timestamp(2),
+    pd.Timedelta(1),
+    pd.Timedelta(2),
+    Decimal("NaN"),
+    Decimal("1.2"),
+    np.int64(1),
+    np.int32(1),
+    np.float32(1),
+    pd.Timestamp(1),
+]
+
 
 def set_random_null_mask_inplace(series, null_probability=0.5, seed=None):
     """Randomly nullify elements in series with the provided probability."""

diff --git a/python/cudf/cudf/tests/test_binops.py b/python/cudf/cudf/tests/test_binops.py
@@ -150,32 +150,6 @@
     lambda x: cudf.Scalar(0) / x,
 ]
 
-_series_or_index_names = [
-    None,
-    pd.NA,
-    cudf.NA,
-    np.nan,
-    float("NaN"),
-    "abc",
-    1,
-    pd.NaT,
-    np.datetime64("nat"),
-    np.timedelta64("NaT"),
-    np.timedelta64(10, "D"),
-    np.timedelta64(5, "D"),
-    np.datetime64("1970-01-01 00:00:00.000000001"),
-    np.datetime64("1970-01-01 00:00:00.000000002"),
-    pd.Timestamp(1),
-    pd.Timestamp(2),
-    pd.Timedelta(1),
-    pd.Timedelta(2),
-    decimal.Decimal("NaN"),
-    decimal.Decimal("1.2"),
-    np.int64(1),
-    np.int32(1),
-    np.float32(1),
-    pd.Timestamp(1),
-]
 
 pytest_xfail = pytest.mark.xfail
 pytestmark = pytest.mark.spilling
@@ -3315,8 +3289,8 @@ def test_binop_index_series(op):
     utils.assert_eq(expected, actual)
 
 
-@pytest.mark.parametrize("name1", _series_or_index_names)
-@pytest.mark.parametrize("name2", _series_or_index_names)
+@pytest.mark.parametrize("name1", utils.SERIES_OR_INDEX_NAMES)
+@pytest.mark.parametrize("name2", utils.SERIES_OR_INDEX_NAMES)
 def test_binop_index_dt_td_series_with_names(name1, name2):
     gi = cudf.Index([1, 2, 3], dtype="datetime64[ns]", name=name1)
     gs = cudf.Series([10, 11, 12], dtype="timedelta64[ns]", name=name2)

diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py
@@ -24,6 +24,7 @@
     FLOAT_TYPES,
     NUMERIC_TYPES,
     OTHER_TYPES,
+    SERIES_OR_INDEX_NAMES,
     SIGNED_INTEGER_TYPES,
     SIGNED_TYPES,
     UNSIGNED_TYPES,
@@ -227,12 +228,16 @@ def test_pandas_as_index():
     )
 
 
-def test_index_rename():
-    pds = pd.Index([1, 2, 3], name="asdf")
+@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES)
+@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES)
+def test_index_rename(initial_name, name):
+    pds = pd.Index([1, 2, 3], name=initial_name)
     gds = as_index(pds)
 
-    expect = pds.rename("new_name")
-    got = gds.rename("new_name")
+    assert_eq(pds, gds)
+
+    expect = pds.rename(name)
+    got = gds.rename(name)
 
     assert_eq(expect, got)
     """

diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
@@ -16,6 +16,7 @@
 from cudf.core._compat import PANDAS_LT_140
 from cudf.testing._utils import (
     NUMERIC_TYPES,
+    SERIES_OR_INDEX_NAMES,
     TIMEDELTA_TYPES,
     _create_pandas_series,
     assert_eq,
@@ -2267,3 +2268,17 @@ def test_series_unique_pandas_compatibility():
         actual = gs.unique()
     expected = ps.unique()
     assert_eq(actual, expected)
+
+
+@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES)
+@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES)
+def test_series_rename(initial_name, name):
+    gsr = cudf.Series([1, 2, 3], name=initial_name)
+    psr = pd.Series([1, 2, 3], name=initial_name)
+
+    assert_eq(gsr, psr)
+
+    actual = gsr.rename(name)
+    expected = psr.rename(name)
+
+    assert_eq(actual, expected)