Skip to content

COMPAT: Categorical Subclassing #13827

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 33 additions & 29 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,11 +328,16 @@ def __init__(self, values, categories=None, ordered=False,
self._categories = categories
self._codes = _coerce_indexer_dtype(codes, categories)

@property
def _constructor(self):
return Categorical

def copy(self):
""" Copy constructor. """
return Categorical(values=self._codes.copy(),
categories=self.categories, ordered=self.ordered,
fastpath=True)
return self._constructor(values=self._codes.copy(),
categories=self.categories,
ordered=self.ordered,
fastpath=True)

def astype(self, dtype, copy=True):
"""
Expand Down Expand Up @@ -414,7 +419,7 @@ def from_array(cls, data, **kwargs):
Can be an Index or array-like. The categories are assumed to be
the unique values of `data`.
"""
return Categorical(data, **kwargs)
return cls(data, **kwargs)

@classmethod
def from_codes(cls, codes, categories, ordered=False, name=None):
Expand Down Expand Up @@ -458,8 +463,8 @@ def from_codes(cls, codes, categories, ordered=False, name=None):
raise ValueError("codes need to be between -1 and "
"len(categories)-1")

return Categorical(codes, categories=categories, ordered=ordered,
fastpath=True)
return cls(codes, categories=categories, ordered=ordered,
fastpath=True)

_codes = None

Expand Down Expand Up @@ -916,9 +921,9 @@ def map(self, mapper):
"""
new_categories = self.categories.map(mapper)
try:
return Categorical.from_codes(self._codes.copy(),
categories=new_categories,
ordered=self.ordered)
return self.from_codes(self._codes.copy(),
categories=new_categories,
ordered=self.ordered)
except ValueError:
return np.take(new_categories, self._codes)

Expand Down Expand Up @@ -968,8 +973,8 @@ def shift(self, periods):
else:
codes[periods:] = -1

return Categorical.from_codes(codes, categories=self.categories,
ordered=self.ordered)
return self.from_codes(codes, categories=self.categories,
ordered=self.ordered)

def __array__(self, dtype=None):
"""
Expand Down Expand Up @@ -1159,8 +1164,8 @@ def value_counts(self, dropna=True):
count = bincount(np.where(mask, code, ncat))
ix = np.append(ix, -1)

ix = Categorical(ix, categories=cat, ordered=obj.ordered,
fastpath=True)
ix = self._constructor(ix, categories=cat, ordered=obj.ordered,
fastpath=True)

return Series(count, index=CategoricalIndex(ix), dtype='int64')

Expand Down Expand Up @@ -1313,8 +1318,8 @@ def sort_values(self, inplace=False, ascending=True, na_position='last'):
self._codes = codes
return
else:
return Categorical(values=codes, categories=self.categories,
ordered=self.ordered, fastpath=True)
return self._constructor(values=codes, categories=self.categories,
ordered=self.ordered, fastpath=True)

def order(self, inplace=False, ascending=True, na_position='last'):
"""
Expand Down Expand Up @@ -1441,8 +1446,8 @@ def fillna(self, value=None, method=None, limit=None):
values = values.copy()
values[mask] = self.categories.get_loc(value)

return Categorical(values, categories=self.categories,
ordered=self.ordered, fastpath=True)
return self._constructor(values, categories=self.categories,
ordered=self.ordered, fastpath=True)

def take_nd(self, indexer, allow_fill=True, fill_value=None):
""" Take the codes by the indexer, fill with the fill_value.
Expand All @@ -1455,8 +1460,8 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None):
assert isnull(fill_value)

codes = take_1d(self._codes, indexer, allow_fill=True, fill_value=-1)
result = Categorical(codes, categories=self.categories,
ordered=self.ordered, fastpath=True)
result = self._constructor(codes, categories=self.categories,
ordered=self.ordered, fastpath=True)
return result

take = take_nd
Expand All @@ -1476,8 +1481,8 @@ def _slice(self, slicer):
slicer = slicer[1]

_codes = self._codes[slicer]
return Categorical(values=_codes, categories=self.categories,
ordered=self.ordered, fastpath=True)
return self._constructor(values=_codes, categories=self.categories,
ordered=self.ordered, fastpath=True)

def __len__(self):
"""The length of this Categorical."""
Expand Down Expand Up @@ -1588,10 +1593,9 @@ def __getitem__(self, key):
else:
return self.categories[i]
else:
return Categorical(values=self._codes[key],
categories=self.categories,
ordered=self.ordered,
fastpath=True)
return self._constructor(values=self._codes[key],
categories=self.categories,
ordered=self.ordered, fastpath=True)

def __setitem__(self, key, value):
""" Item assignment.
Expand Down Expand Up @@ -1742,8 +1746,8 @@ def mode(self):
import pandas.hashtable as htable
good = self._codes != -1
values = sorted(htable.mode_int64(_ensure_int64(self._codes[good])))
result = Categorical(values=values, categories=self.categories,
ordered=self.ordered, fastpath=True)
result = self._constructor(values=values, categories=self.categories,
ordered=self.ordered, fastpath=True)
return result

def unique(self):
Expand Down Expand Up @@ -1837,8 +1841,8 @@ def repeat(self, repeats, *args, **kwargs):
"""
nv.validate_repeat(args, kwargs)
codes = self._codes.repeat(repeats)
return Categorical(values=codes, categories=self.categories,
ordered=self.ordered, fastpath=True)
return self._constructor(values=codes, categories=self.categories,
ordered=self.ordered, fastpath=True)

# The Series.cat accessor

Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -4415,6 +4415,36 @@ def test_concat_categorical(self):
tm.assert_frame_equal(df_expected, df_concat)


class TestCategoricalSubclassing(tm.TestCase):

_multiprocess_can_split_ = True

def test_constructor(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
self.assertIsInstance(sc, tm.SubclassedCategorical)
tm.assert_categorical_equal(sc, Categorical(['a', 'b', 'c']))

def test_from_array(self):
sc = tm.SubclassedCategorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
self.assertIsInstance(sc, tm.SubclassedCategorical)
exp = Categorical.from_codes([1, 0, 2], ['a', 'b', 'c'])
tm.assert_categorical_equal(sc, exp)

def test_map(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
res = sc.map(lambda x: x.upper())
self.assertIsInstance(res, tm.SubclassedCategorical)
exp = Categorical(['A', 'B', 'C'])
tm.assert_categorical_equal(res, exp)

def test_map(self):
sc = tm.SubclassedCategorical(['a', 'b', 'c'])
res = sc.map(lambda x: x.upper())
self.assertIsInstance(res, tm.SubclassedCategorical)
exp = Categorical(['A', 'B', 'C'])
tm.assert_categorical_equal(res, exp)


if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
Expand Down
9 changes: 8 additions & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@

from pandas.computation import expressions as expr

from pandas import (bdate_range, CategoricalIndex, DatetimeIndex,
from pandas import (bdate_range, CategoricalIndex, Categorical, DatetimeIndex,
TimedeltaIndex, PeriodIndex, RangeIndex, Index, MultiIndex,
Series, DataFrame, Panel, Panel4D)
from pandas.util.decorators import deprecate
Expand Down Expand Up @@ -2670,6 +2670,13 @@ def _constructor_sliced(self):
return SubclassedSparseSeries


class SubclassedCategorical(Categorical):

@property
def _constructor(self):
return SubclassedCategorical


@contextmanager
def patch(ob, attr, value):
"""Temporarily patch an attribute of an object.
Expand Down