Skip to content

Commit 7818486

Browse files
BUG: Have object dtype for empty Categorical.categories (#17249)
* BUG: Have object dtype for empty Categorical ctor Previously we had a `Float64Index`, which is inconsistent with, e.g., the regular Index constructor. * TST: Update tests in multi for new return Previously these relied worked around the return type by wrapping list-likes in `np.array` and relying on that to cast to float. These workarounds are no longer nescessary. * TST: Update union_categorical tests This relied on `NaN` being a float and empty being a float. Not a necessary test anymore. * TST: set object dtype
1 parent 34c4ffd commit 7818486

File tree

6 files changed

+25
-16
lines changed

6 files changed

+25
-16
lines changed

doc/source/whatsnew/v0.21.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,9 @@ Numeric
385385
Categorical
386386
^^^^^^^^^^^
387387
- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`)
388+
- Bug in the categorical constructor with empty values and categories causing
389+
the ``.categories`` to be an empty ``Float64Index`` rather than an empty
390+
``Index`` with object dtype (:issue:`17248`)
388391

389392

390393
Other

pandas/core/categorical.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -290,7 +290,10 @@ def __init__(self, values, categories=None, ordered=False, fastpath=False):
290290
# On list with NaNs, int values will be converted to float. Use
291291
# "object" dtype to prevent this. In the end objects will be
292292
# casted to int/... in the category assignment step.
293-
dtype = 'object' if isna(values).any() else None
293+
if len(values) == 0 or isna(values).any():
294+
dtype = 'object'
295+
else:
296+
dtype = None
294297
values = _sanitize_array(values, None, dtype=dtype)
295298

296299
if categories is None:

pandas/tests/indexes/test_multi.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,7 @@ def test_from_arrays_empty(self):
776776
arrays = [[]] * N
777777
names = list('ABC')[:N]
778778
result = MultiIndex.from_arrays(arrays=arrays, names=names)
779-
expected = MultiIndex(levels=[np.array([])] * N, labels=[[]] * N,
779+
expected = MultiIndex(levels=[[]] * N, labels=[[]] * N,
780780
names=names)
781781
tm.assert_index_equal(result, expected)
782782

@@ -829,7 +829,7 @@ def test_from_product_empty(self):
829829

830830
# 1 level
831831
result = MultiIndex.from_product([[]], names=['A'])
832-
expected = pd.Float64Index([], name='A')
832+
expected = pd.Index([], name='A')
833833
tm.assert_index_equal(result, expected)
834834

835835
# 2 levels
@@ -838,7 +838,7 @@ def test_from_product_empty(self):
838838
names = ['A', 'B']
839839
for first, second in zip(l1, l2):
840840
result = MultiIndex.from_product([first, second], names=names)
841-
expected = MultiIndex(levels=[np.array(first), np.array(second)],
841+
expected = MultiIndex(levels=[first, second],
842842
labels=[[], []], names=names)
843843
tm.assert_index_equal(result, expected)
844844

@@ -847,8 +847,7 @@ def test_from_product_empty(self):
847847
for N in range(4):
848848
lvl2 = lrange(N)
849849
result = MultiIndex.from_product([[], lvl2, []], names=names)
850-
expected = MultiIndex(levels=[np.array(A)
851-
for A in [[], lvl2, []]],
850+
expected = MultiIndex(levels=[[], lvl2, []],
852851
labels=[[], [], []], names=names)
853852
tm.assert_index_equal(result, expected)
854853

pandas/tests/reshape/test_concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -680,7 +680,7 @@ def test_concat_categorical_empty(self):
680680
tm.assert_series_equal(s1.append(s2, ignore_index=True), s2)
681681

682682
s1 = pd.Series([], dtype='category')
683-
s2 = pd.Series([])
683+
s2 = pd.Series([], dtype='object')
684684

685685
# different dtype => not-category
686686
tm.assert_series_equal(pd.concat([s1, s2], ignore_index=True), s2)

pandas/tests/reshape/test_union_categoricals.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -107,17 +107,11 @@ def test_union_categoricals_empty(self):
107107
exp = Categorical([])
108108
tm.assert_categorical_equal(res, exp)
109109

110-
res = union_categoricals([pd.Categorical([]),
111-
pd.Categorical([1.0])])
112-
exp = Categorical([1.0])
110+
res = union_categoricals([Categorical([]),
111+
Categorical(['1'])])
112+
exp = Categorical(['1'])
113113
tm.assert_categorical_equal(res, exp)
114114

115-
# to make dtype equal
116-
nanc = pd.Categorical(np.array([np.nan], dtype=np.float64))
117-
res = union_categoricals([nanc,
118-
pd.Categorical([])])
119-
tm.assert_categorical_equal(res, nanc)
120-
121115
def test_union_categorical_same_category(self):
122116
# check fastpath
123117
c1 = Categorical([1, 2, 3, 4], categories=[1, 2, 3, 4])

pandas/tests/test_categorical.py

+10
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,16 @@ def test_setitem_listlike(self):
112112
result = c.codes[np.array([100000]).astype(np.int64)]
113113
tm.assert_numpy_array_equal(result, np.array([5], dtype='int8'))
114114

115+
def test_constructor_empty(self):
116+
# GH 17248
117+
c = Categorical([])
118+
expected = Index([])
119+
tm.assert_index_equal(c.categories, expected)
120+
121+
c = Categorical([], categories=[1, 2, 3])
122+
expected = pd.Int64Index([1, 2, 3])
123+
tm.assert_index_equal(c.categories, expected)
124+
115125
def test_constructor_unsortable(self):
116126

117127
# it works!

0 commit comments

Comments
 (0)