Skip to content

Commit

Permalink
Support Series.__setitem__ with key to a new row (rapidsai#7443)
Browse files Browse the repository at this point in the history
Closes rapidsai#7290 

Supports assigning to a new row (specified by a new label) in a series.

Authors:
  - Michael Wang (@isVoid)

Approvers:
  - @brandon-b-miller
  - GALI PREM SAGAR (@galipremsagar)

URL: rapidsai#7443
  • Loading branch information
isVoid authored and hyperbolic2346 committed Mar 23, 2021
1 parent 2818928 commit 0e2736a
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 2 deletions.
30 changes: 28 additions & 2 deletions python/cudf/cudf/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,12 @@
from nvtx import annotate

import cudf
from cudf._lib.concat import concat_columns
from cudf._lib.scalar import _is_null_host_scalar
from cudf._typing import DataFrameOrSeries, ScalarLike
from cudf._typing import ColumnLike, DataFrameOrSeries, ScalarLike
from cudf.core.column.column import as_column
from cudf.utils.dtypes import (
find_common_type,
is_categorical_dtype,
is_column_like,
is_list_like,
Expand Down Expand Up @@ -142,7 +145,19 @@ def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]:
return self._sr.iloc[arg]

def __setitem__(self, key, value):
key = self._loc_to_iloc(key)
try:
key = self._loc_to_iloc(key)
except KeyError as e:
if (
is_scalar(key)
and not isinstance(self._sr.index, cudf.MultiIndex)
and is_scalar(value)
):
_append_new_row_inplace(self._sr.index._values, key)
_append_new_row_inplace(self._sr._column, value)
return
else:
raise e
if isinstance(value, (pd.Series, cudf.Series)):
value = cudf.Series(value)
value = value._align_to_index(self._sr.index, how="right")
Expand Down Expand Up @@ -481,3 +496,14 @@ def _normalize_dtypes(df):
for name, col in df._data.items():
df[name] = col.astype(normalized_dtype)
return df


def _append_new_row_inplace(col: ColumnLike, value: ScalarLike):
"""Append a scalar `value` to the end of `col` inplace.
Cast to common type if possible
"""
to_type = find_common_type([type(value), col.dtype])
val_col = as_column(value, dtype=to_type)
old_col = col.astype(to_type)

col._mimic_inplace(concat_columns([old_col, val_col]), inplace=True)
25 changes: 25 additions & 0 deletions python/cudf/cudf/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,6 +1043,10 @@ def test_series_setitem_string(key, value):
[
("a", 4),
("b", 4),
("b", np.int8(8)),
("d", 4),
("d", np.int8(16)),
("d", np.float32(16)),
(["a", "b"], 4),
(["a", "b"], [4, 5]),
([True, False, True], 4),
Expand All @@ -1058,6 +1062,27 @@ def test_series_setitem_loc(key, value):
assert_eq(psr, gsr)


@pytest.mark.parametrize(
"key, value",
[
(1, "d"),
(2, "e"),
(4, "f"),
([1, 3], "g"),
([1, 3], ["g", "h"]),
([True, False, True], "i"),
([False, False, False], "j"),
([True, False, True], ["k", "l"]),
],
)
def test_series_setitem_loc_numeric_index(key, value):
psr = pd.Series(["a", "b", "c"], [1, 2, 3])
gsr = cudf.from_pandas(psr)
psr.loc[key] = value
gsr.loc[key] = value
assert_eq(psr, gsr)


@pytest.mark.parametrize(
"key, value",
[
Expand Down

0 comments on commit 0e2736a

Please sign in to comment.