From 70ba727a73daf1b5ed1d7b33c0f1e7c3326ae2a2 Mon Sep 17 00:00:00 2001 From: aviolov Date: Fri, 7 Jul 2017 23:05:04 +0200 Subject: [PATCH 1/4] BUG: GH16639 fix Series.isin for Categoricals --- pandas/core/algorithms.py | 25 +++++++++++++------------ pandas/tests/test_categorical.py | 11 +++++++++++ 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index d74c5e66ea1a9..ffb03f52ffbdb 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -3,14 +3,18 @@ intended for public consumption """ from __future__ import division + from warnings import warn, catch_warnings -import numpy as np from pandas import compat, _np_version_under1p8 +from pandas.compat import string_types +from pandas.compat.numpy import _np_version_under1p10 +from pandas.core import common as com + +import numpy as np +from pandas._libs import algos, lib, hashtable as htable +from pandas._libs.tslib import iNaT from pandas.core.dtypes.cast import maybe_promote -from pandas.core.dtypes.generic import ( - ABCSeries, ABCIndex, - ABCIndexClass, ABCCategorical) from pandas.core.dtypes.common import ( is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, is_complex_dtype, @@ -26,19 +30,15 @@ _ensure_platform_int, _ensure_object, _ensure_float64, _ensure_uint64, _ensure_int64) -from pandas.compat.numpy import _np_version_under1p10 +from pandas.core.dtypes.generic import ( + ABCSeries, ABCIndex, + ABCIndexClass, ABCCategorical) from pandas.core.dtypes.missing import isnull -from pandas.core import common as com -from pandas.compat import string_types -from pandas._libs import algos, lib, hashtable as htable -from pandas._libs.tslib import iNaT - # --------------- # # dtype access # # --------------- # - def _ensure_data(values, dtype=None): """ routine to ensure that our data is of the correct @@ -113,7 +113,8 @@ def _ensure_data(values, dtype=None): return values.asi8, dtype, 'int64' - elif is_categorical_dtype(values) or is_categorical_dtype(dtype): + elif (is_categorical_dtype(values) and ++ (is_categorical_dtype(dtype) or dtype is None)): values = getattr(values, 'values', values) values = values.codes dtype = 'category' diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 667b26c24c662..24bdae9336d48 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -442,6 +442,17 @@ def f(): if hasattr(np.random, "choice"): codes = np.random.choice([0, 1], 5, p=[0.9, 0.1]) pd.Categorical.from_codes(codes, categories=["train", "test"]) + + # Regression test https://github.com/pandas-dev/pandas/issues/16639 + vals = np.array([0, 1, 2, 0]); + cats = ['a', 'b', 'c']; + + D = pd.DataFrame({'id': pd.Series(pd.Categorical(1).from_codes(vals, cats))}); + T = pd.DataFrame({'id': pd.Series(pd.Categorical(1).from_codes(np.array([0, 1]), cats))}); + + select_ids = D['id'].isin(T['id']); + + assert( np.all(select_ids == np.array([True, True, False, True]) ) ) def test_validate_ordered(self): # see gh-14058 From 0c5cc74f64cede3ea6a643fdb516a070e78f75e7 Mon Sep 17 00:00:00 2001 From: aviolov Date: Fri, 7 Jul 2017 23:17:58 +0200 Subject: [PATCH 2/4] BUG: GH16639 fix Series.isin for Categoricals --- doc/source/whatsnew/v0.21.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d5cc3d6ddca8e..20daa19a7eded 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -188,7 +188,7 @@ Numeric Categorical ^^^^^^^^^^^ - +- Bug in ``Series.isin()`` when called for categoricals (:issue`16639`) Other ^^^^^ From 503bc2bcbce7e397eac360698e3c53695746538e Mon Sep 17 00:00:00 2001 From: aviolov Date: Sat, 8 Jul 2017 00:03:16 +0200 Subject: [PATCH 3/4] GH16639: stylistic fixes, move test to test_algos from test_categoricals, fix import order --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/core/algorithms.py | 21 ++++++++++----------- pandas/tests/test_algos.py | 14 ++++++++++++-- pandas/tests/test_categorical.py | 11 ----------- 4 files changed, 23 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 20daa19a7eded..c6046aac831b1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -188,7 +188,7 @@ Numeric Categorical ^^^^^^^^^^^ -- Bug in ``Series.isin()`` when called for categoricals (:issue`16639`) +- Bug in ``:func:Series.isin()`` when called with a categorical (:issue`16639`) Other ^^^^^ diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index ffb03f52ffbdb..1c50fefa24eec 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -3,18 +3,14 @@ intended for public consumption """ from __future__ import division - from warnings import warn, catch_warnings +import numpy as np from pandas import compat, _np_version_under1p8 -from pandas.compat import string_types -from pandas.compat.numpy import _np_version_under1p10 -from pandas.core import common as com - -import numpy as np -from pandas._libs import algos, lib, hashtable as htable -from pandas._libs.tslib import iNaT from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.generic import ( + ABCSeries, ABCIndex, + ABCIndexClass, ABCCategorical) from pandas.core.dtypes.common import ( is_unsigned_integer_dtype, is_signed_integer_dtype, is_integer_dtype, is_complex_dtype, @@ -30,11 +26,14 @@ _ensure_platform_int, _ensure_object, _ensure_float64, _ensure_uint64, _ensure_int64) -from pandas.core.dtypes.generic import ( - ABCSeries, ABCIndex, - ABCIndexClass, ABCCategorical) +from pandas.compat.numpy import _np_version_under1p10 from pandas.core.dtypes.missing import isnull +from pandas.core import common as com +from pandas.compat import string_types +from pandas._libs import algos, lib, hashtable as htable +from pandas._libs.tslib import iNaT + # --------------- # # dtype access # diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 063dcea5c76d6..3d725a03122e2 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -438,7 +438,8 @@ def test_categorical(self): result = pd.unique(ci) tm.assert_index_equal(result, expected) - + + def test_datetime64tz_aware(self): # GH 15939 @@ -586,7 +587,16 @@ def test_large(self): expected[1] = True tm.assert_numpy_array_equal(result, expected) - + def test_categorical_from_codes(self): + # GH 16639 + vals = np.array([0, 1, 2, 0]) + cats = ['a', 'b', 'c'] + Sd = pd.Series(pd.Categorical(1).from_codes(vals, cats)) + St = pd.Series(pd.Categorical(1).from_codes(np.array([0, 1]), cats)) + expected = np.array([True, True, False, True]) + result = algos.isin(Sd,St) + tm.assert_numpy_array_equal(expected, result) + class TestValueCounts(object): def test_value_counts(self): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 24bdae9336d48..14f30d7ab80a0 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -443,17 +443,6 @@ def f(): codes = np.random.choice([0, 1], 5, p=[0.9, 0.1]) pd.Categorical.from_codes(codes, categories=["train", "test"]) - # Regression test https://github.com/pandas-dev/pandas/issues/16639 - vals = np.array([0, 1, 2, 0]); - cats = ['a', 'b', 'c']; - - D = pd.DataFrame({'id': pd.Series(pd.Categorical(1).from_codes(vals, cats))}); - T = pd.DataFrame({'id': pd.Series(pd.Categorical(1).from_codes(np.array([0, 1]), cats))}); - - select_ids = D['id'].isin(T['id']); - - assert( np.all(select_ids == np.array([True, True, False, True]) ) ) - def test_validate_ordered(self): # see gh-14058 exp_msg = "'ordered' must either be 'True' or 'False'" From 8e19c9830454a716128149f0edf854267380b23c Mon Sep 17 00:00:00 2001 From: aviolov Date: Sat, 8 Jul 2017 21:25:50 +0200 Subject: [PATCH 4/4] GH16639: lint fixes --- pandas/core/algorithms.py | 2 +- pandas/tests/test_algos.py | 8 ++++---- pandas/tests/test_categorical.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 1c50fefa24eec..b490bf787a037 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -113,7 +113,7 @@ def _ensure_data(values, dtype=None): return values.asi8, dtype, 'int64' elif (is_categorical_dtype(values) and -+ (is_categorical_dtype(dtype) or dtype is None)): + (is_categorical_dtype(dtype) or dtype is None)): values = getattr(values, 'values', values) values = values.codes dtype = 'category' diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 3d725a03122e2..9504d2a9426f0 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -438,8 +438,7 @@ def test_categorical(self): result = pd.unique(ci) tm.assert_index_equal(result, expected) - - + def test_datetime64tz_aware(self): # GH 15939 @@ -594,9 +593,10 @@ def test_categorical_from_codes(self): Sd = pd.Series(pd.Categorical(1).from_codes(vals, cats)) St = pd.Series(pd.Categorical(1).from_codes(np.array([0, 1]), cats)) expected = np.array([True, True, False, True]) - result = algos.isin(Sd,St) + result = algos.isin(Sd, St) tm.assert_numpy_array_equal(expected, result) - + + class TestValueCounts(object): def test_value_counts(self): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 14f30d7ab80a0..667b26c24c662 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -442,7 +442,7 @@ def f(): if hasattr(np.random, "choice"): codes = np.random.choice([0, 1], 5, p=[0.9, 0.1]) pd.Categorical.from_codes(codes, categories=["train", "test"]) - + def test_validate_ordered(self): # see gh-14058 exp_msg = "'ordered' must either be 'True' or 'False'"