Skip to content

Commit

Permalink
Fix renaming Series and Index (#14080)
Browse files Browse the repository at this point in the history
This PR resolves renaming `Series` and `Index` by assigning `no_default` to internal API default parameters.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Matthew Roeschke (https://github.com/mroeschke)

URL: #14080
  • Loading branch information
galipremsagar authored Sep 12, 2023
1 parent bc304a2 commit c3bf705
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 38 deletions.
10 changes: 6 additions & 4 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from cudf._lib.filling import sequence
from cudf._lib.search import search_sorted
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import (
_is_non_decimal_numeric_dtype,
is_categorical_dtype,
Expand Down Expand Up @@ -95,7 +96,7 @@ def _lexsorted_equal_range(
return lower_bound, upper_bound, sort_inds


def _index_from_data(data: MutableMapping, name: Any = None):
def _index_from_data(data: MutableMapping, name: Any = no_default):
"""Construct an index of the appropriate type from some data."""

if len(data) == 0:
Expand Down Expand Up @@ -131,7 +132,7 @@ def _index_from_data(data: MutableMapping, name: Any = None):


def _index_from_columns(
columns: List[cudf.core.column.ColumnBase], name: Any = None
columns: List[cudf.core.column.ColumnBase], name: Any = no_default
):
"""Construct an index from ``columns``, with levels named 0, 1, 2..."""
return _index_from_data(dict(zip(range(len(columns)), columns)), name=name)
Expand Down Expand Up @@ -1032,10 +1033,10 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
@classmethod
@_cudf_nvtx_annotate
def _from_data(
cls, data: MutableMapping, name: Any = None
cls, data: MutableMapping, name: Any = no_default
) -> GenericIndex:
out = super()._from_data(data=data)
if name is not None:
if name is not no_default:
out.name = name
return out

Expand Down Expand Up @@ -3334,6 +3335,7 @@ def as_index(arbitrary, nan_as_null=None, **kwargs) -> BaseIndex:
- DatetimeIndex for Datetime input.
- GenericIndex for all other inputs.
"""

kwargs = _setdefault_name(arbitrary, **kwargs)
if isinstance(arbitrary, cudf.MultiIndex):
return arbitrary
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,10 +605,10 @@ def _from_data(
cls,
data: MutableMapping,
index: Optional[BaseIndex] = None,
name: Any = None,
name: Any = no_default,
) -> Series:
out = super()._from_data(data=data, index=index)
if name is not None:
if name is not no_default:
out.name = name
return out

Expand Down
27 changes: 27 additions & 0 deletions python/cudf/cudf/testing/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,33 @@
OTHER_TYPES = sorted(list(dtypeutils.OTHER_TYPES))
ALL_TYPES = sorted(list(dtypeutils.ALL_TYPES))

SERIES_OR_INDEX_NAMES = [
None,
pd.NA,
cudf.NA,
np.nan,
float("NaN"),
"abc",
1,
pd.NaT,
np.datetime64("nat"),
np.timedelta64("NaT"),
np.timedelta64(10, "D"),
np.timedelta64(5, "D"),
np.datetime64("1970-01-01 00:00:00.000000001"),
np.datetime64("1970-01-01 00:00:00.000000002"),
pd.Timestamp(1),
pd.Timestamp(2),
pd.Timedelta(1),
pd.Timedelta(2),
Decimal("NaN"),
Decimal("1.2"),
np.int64(1),
np.int32(1),
np.float32(1),
pd.Timestamp(1),
]


def set_random_null_mask_inplace(series, null_probability=0.5, seed=None):
"""Randomly nullify elements in series with the provided probability."""
Expand Down
30 changes: 2 additions & 28 deletions python/cudf/cudf/tests/test_binops.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,32 +150,6 @@
lambda x: cudf.Scalar(0) / x,
]

_series_or_index_names = [
None,
pd.NA,
cudf.NA,
np.nan,
float("NaN"),
"abc",
1,
pd.NaT,
np.datetime64("nat"),
np.timedelta64("NaT"),
np.timedelta64(10, "D"),
np.timedelta64(5, "D"),
np.datetime64("1970-01-01 00:00:00.000000001"),
np.datetime64("1970-01-01 00:00:00.000000002"),
pd.Timestamp(1),
pd.Timestamp(2),
pd.Timedelta(1),
pd.Timedelta(2),
decimal.Decimal("NaN"),
decimal.Decimal("1.2"),
np.int64(1),
np.int32(1),
np.float32(1),
pd.Timestamp(1),
]

pytest_xfail = pytest.mark.xfail
pytestmark = pytest.mark.spilling
Expand Down Expand Up @@ -3315,8 +3289,8 @@ def test_binop_index_series(op):
utils.assert_eq(expected, actual)


@pytest.mark.parametrize("name1", _series_or_index_names)
@pytest.mark.parametrize("name2", _series_or_index_names)
@pytest.mark.parametrize("name1", utils.SERIES_OR_INDEX_NAMES)
@pytest.mark.parametrize("name2", utils.SERIES_OR_INDEX_NAMES)
def test_binop_index_dt_td_series_with_names(name1, name2):
gi = cudf.Index([1, 2, 3], dtype="datetime64[ns]", name=name1)
gs = cudf.Series([10, 11, 12], dtype="timedelta64[ns]", name=name2)
Expand Down
13 changes: 9 additions & 4 deletions python/cudf/cudf/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
FLOAT_TYPES,
NUMERIC_TYPES,
OTHER_TYPES,
SERIES_OR_INDEX_NAMES,
SIGNED_INTEGER_TYPES,
SIGNED_TYPES,
UNSIGNED_TYPES,
Expand Down Expand Up @@ -227,12 +228,16 @@ def test_pandas_as_index():
)


def test_index_rename():
pds = pd.Index([1, 2, 3], name="asdf")
@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES)
@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES)
def test_index_rename(initial_name, name):
pds = pd.Index([1, 2, 3], name=initial_name)
gds = as_index(pds)

expect = pds.rename("new_name")
got = gds.rename("new_name")
assert_eq(pds, gds)

expect = pds.rename(name)
got = gds.rename(name)

assert_eq(expect, got)
"""
Expand Down
15 changes: 15 additions & 0 deletions python/cudf/cudf/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from cudf.core._compat import PANDAS_LT_140
from cudf.testing._utils import (
NUMERIC_TYPES,
SERIES_OR_INDEX_NAMES,
TIMEDELTA_TYPES,
_create_pandas_series,
assert_eq,
Expand Down Expand Up @@ -2267,3 +2268,17 @@ def test_series_unique_pandas_compatibility():
actual = gs.unique()
expected = ps.unique()
assert_eq(actual, expected)


@pytest.mark.parametrize("initial_name", SERIES_OR_INDEX_NAMES)
@pytest.mark.parametrize("name", SERIES_OR_INDEX_NAMES)
def test_series_rename(initial_name, name):
gsr = cudf.Series([1, 2, 3], name=initial_name)
psr = pd.Series([1, 2, 3], name=initial_name)

assert_eq(gsr, psr)

actual = gsr.rename(name)
expected = psr.rename(name)

assert_eq(actual, expected)

0 comments on commit c3bf705

Please sign in to comment.