Skip to content

Commit 9ebbe1b

Browse files
another-greenjreback
authored andcommitted
BUG: fix categorical comparison with missing values (#26504 ) (#26514)
1 parent 4ec92eb commit 9ebbe1b

File tree

3 files changed

+41
-6
lines changed

3 files changed

+41
-6
lines changed

doc/source/whatsnew/v0.25.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,7 @@ Categorical
414414
^^^^^^^^^^^
415415

416416
- Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`)
417-
-
417+
- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in True (:issue:`26504`)
418418
-
419419

420420
Datetimelike

pandas/core/arrays/categorical.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,23 @@ def f(self, other):
8989
else:
9090
other_codes = other._codes
9191

92-
na_mask = (self._codes == -1) | (other_codes == -1)
92+
mask = (self._codes == -1) | (other_codes == -1)
9393
f = getattr(self._codes, op)
9494
ret = f(other_codes)
95-
if na_mask.any():
95+
if mask.any():
9696
# In other series, the leads to False, so do that here too
97-
ret[na_mask] = False
97+
ret[mask] = False
9898
return ret
9999

100100
if is_scalar(other):
101101
if other in self.categories:
102102
i = self.categories.get_loc(other)
103-
return getattr(self._codes, op)(i)
103+
ret = getattr(self._codes, op)(i)
104+
105+
# check for NaN in self
106+
mask = (self._codes == -1)
107+
ret[mask] = False
108+
return ret
104109
else:
105110
if op == '__eq__':
106111
return np.repeat(False, len(self))

pandas/tests/arrays/categorical/test_operators.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import operator
2+
import warnings
23

34
import numpy as np
45
import pytest
@@ -17,7 +18,6 @@ def test_categories_none_comparisons(self):
1718
tm.assert_categorical_equal(factor, self.factor)
1819

1920
def test_comparisons(self):
20-
2121
result = self.factor[self.factor == 'a']
2222
expected = self.factor[np.asarray(self.factor) == 'a']
2323
tm.assert_categorical_equal(result, expected)
@@ -186,6 +186,36 @@ def test_comparison_with_unknown_scalars(self):
186186
tm.assert_numpy_array_equal(cat != 4,
187187
np.array([True, True, True]))
188188

189+
def test_comparison_of_ordered_categorical_with_nan_to_scalar(
190+
self, compare_operators_no_eq_ne):
191+
# https://github.com/pandas-dev/pandas/issues/26504
192+
# BUG: fix ordered categorical comparison with missing values (#26504 )
193+
# and following comparisons with scalars in categories with missing
194+
# values should be evaluated as False
195+
196+
cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
197+
scalar = 2
198+
with warnings.catch_warnings():
199+
warnings.simplefilter("ignore", RuntimeWarning)
200+
expected = getattr(np.array(cat),
201+
compare_operators_no_eq_ne)(scalar)
202+
actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
203+
tm.assert_numpy_array_equal(actual, expected)
204+
205+
def test_comparison_of_ordered_categorical_with_nan_to_listlike(
206+
self, compare_operators_no_eq_ne):
207+
# https://github.com/pandas-dev/pandas/issues/26504
208+
# and following comparisons of missing values in ordered Categorical
209+
# with listlike should be evaluated as False
210+
211+
cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
212+
other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
213+
with warnings.catch_warnings():
214+
warnings.simplefilter("ignore", RuntimeWarning)
215+
expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
216+
actual = getattr(cat, compare_operators_no_eq_ne)(other)
217+
tm.assert_numpy_array_equal(actual, expected)
218+
189219
@pytest.mark.parametrize('data,reverse,base', [
190220
(list("abc"), list("cba"), list("bbb")),
191221
([1, 2, 3], [3, 2, 1], [2, 2, 2])]

0 commit comments

Comments
 (0)