Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change return type of MultiIndex.codes #14123

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 22 additions & 18 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,12 @@
from cudf.core._compat import PANDAS_GE_150
from cudf.core.frame import Frame
from cudf.core.index import BaseIndex, _lexsorted_equal_range, as_index
from cudf.utils.utils import NotIterable, _cudf_nvtx_annotate, _is_same_name
from cudf.utils.utils import (
NotIterable,
_cudf_nvtx_annotate,
_external_only_api,
_is_same_name,
)


def _maybe_indices_to_slice(indices: cp.ndarray) -> Union[slice, cp.ndarray]:
Expand Down Expand Up @@ -560,7 +565,14 @@ def __repr__(self):
data_output = "\n".join(lines)
return output_prefix + data_output

@property
def _codes_frame(self):
if self._codes is None:
self._compute_levels_and_codes()
return self._codes

@property # type: ignore
@_external_only_api("Use ._codes_frame instead")
@_cudf_nvtx_annotate
def codes(self):
"""
Expand All @@ -570,26 +582,18 @@ def codes(self):
--------
>>> import cudf
>>> df = cudf.DataFrame({'a':[1, 2, 3], 'b':[10, 11, 12]})
>>> cudf.MultiIndex.from_frame(df)
MultiIndex([(1, 10),
(2, 11),
(3, 12)],
names=['a', 'b'])
>>> midx = cudf.MultiIndex.from_frame(df)
>>> midx
MultiIndex([(1, 10),
(2, 11),
(3, 12)],
names=['a', 'b'])
>>> midx.codes
a b
0 0 0
1 1 1
2 2 2
FrozenList([[0, 1, 2], [0, 1, 2]])
"""
if self._codes is None:
self._compute_levels_and_codes()
return self._codes
return pd.core.indexes.frozen.FrozenList(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just FYI I am planning on changing this to a tuple instead of FrozenList in pandas 3.0, not sure if there's any impact changing to a tuple now pandas-dev/pandas#53582

col.values for col in self._codes_frame._columns
)

def get_slice_bound(self, label, side, kind=None):
raise NotImplementedError()
Expand Down Expand Up @@ -904,7 +908,7 @@ def _index_and_downcast(self, result, index, index_key):
result.names = index.names[size:]
index = MultiIndex(
levels=index.levels[size:],
codes=index.codes.iloc[:, size:],
codes=index._codes_frame.iloc[:, size:],
names=index.names[size:],
)

Expand Down Expand Up @@ -1028,8 +1032,8 @@ def __getitem__(self, index):
if flatten:
return result.to_pandas()[0]

if self._codes is not None:
result._codes = self._codes.take(index)
if self._codes_frame is not None:
result._codes = self._codes_frame.take(index)
if self._levels is not None:
result._levels = self._levels
return result
Expand Down Expand Up @@ -1713,8 +1717,8 @@ def memory_usage(self, deep=False):
if self.levels:
for level in self.levels:
usage += level.memory_usage(deep=deep)
if self.codes:
for col in self.codes._data.columns:
if self._codes_frame:
for col in self._codes_frame._data.columns:
usage += col.memory_usage
return usage

Expand Down
13 changes: 11 additions & 2 deletions python/cudf/cudf/tests/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,8 +752,8 @@ def test_multiindex_copy_sem(data, levels, codes, names):

for glv, plv in zip(gmi_copy.levels, pmi_copy.levels):
assert all(glv.values_host == plv.values)
for (_, gval), pval in zip(gmi.codes._data._data.items(), pmi.codes):
assert all(gval.values_host == pval.astype(np.int64))
for gval, pval in zip(gmi.codes, pmi.codes):
assert_eq(gval, pval)
assert_eq(gmi_copy.names, pmi_copy.names)

# Test same behavior when used on DataFrame
Expand Down Expand Up @@ -2009,3 +2009,12 @@ def test_multiindex_to_frame_allow_duplicates(
)

assert_eq(expected, actual)


def test_multiIndex_codes():
midx = cudf.MultiIndex.from_tuples(
[("a", "b"), ("a", "c"), ("b", "c")], names=["A", "Z"]
)

for p_array, g_array in zip(midx.to_pandas().codes, midx.codes):
assert_eq(p_array, g_array)
Loading