From 9e4e9289bd5e9694e2aaaf312d0fffe277e2159f Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 12 Jan 2021 14:21:04 -0800 Subject: [PATCH 1/3] BUG: setting categorical values into object dtype DataFrame --- pandas/core/internals/blocks.py | 7 ++++- pandas/tests/indexing/test_iloc.py | 44 ++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 9eb4bdc5dbae3..33e1bb960c75e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -965,7 +965,12 @@ def setitem(self, indexer, value): # GH25495 - If the current dtype is not categorical, # we need to create a new categorical block values[indexer] = value - return self.make_block(Categorical(self.values, dtype=arr_value.dtype)) + if values.ndim == 2: + if values.shape[-1] != 1: + # shouldn't get here (at least until 2D EAs) + raise NotImplementedError + values = values[:, 0] + return self.make_block(Categorical(values, dtype=arr_value.dtype)) elif exact_match and is_ea_value: # GH#32395 if we're going to replace the values entirely, just diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 7dcb30efb8184..1668123e782ff 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -65,6 +65,50 @@ def test_iloc_getitem_list_int(self): class TestiLocBaseIndependent: """Tests Independent Of Base Class""" + @pytest.mark.parametrize( + "key", + [ + slice(None), + slice(3), + range(3), + [0, 1, 2], + Index(range(3)), + np.asarray([0, 1, 2]), + ], + ) + @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) + def test_iloc_setitem_fullcol_categorical(self, indexer, key): + frame = DataFrame({0: range(3)}, dtype=object) + + cat = Categorical(["alpha", "beta", "gamma"]) + expected = DataFrame({0: cat}) + # NB: pending GH#38896, the expected likely should become + # expected= DataFrame({"A": cat.astype(object)}) + # and should remain a view on the original values + + assert frame._mgr.blocks[0]._can_hold_element(cat) + + df = frame.copy() + orig_vals = df.values + indexer(df)[key, 0] = cat + + overwrite = not isinstance(key, slice) + + tm.assert_frame_equal(df, expected) + + # TODO: this inconsistency is likely undesired GH#39986 + if overwrite: + # check that we overwrote underlying + tm.assert_numpy_array_equal(orig_vals, df.values) + + # but we don't have a view on orig_vals + orig_vals[0, 0] = 19 + assert df.iloc[0, 0] != 19 + + # check we dont have a view on cat (may be undesired GH#39986) + df.iloc[0, 0] = "gamma" + assert cat[0] != "gamma" + @pytest.mark.parametrize("box", [pd_array, Series]) def test_iloc_setitem_ea_inplace(self, frame_or_series, box): # GH#38952 Case with not setting a full column From ec534a2b5ec9851adef63f8c96e81e6570009116 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 12 Jan 2021 14:22:20 -0800 Subject: [PATCH 2/3] comment --- pandas/core/internals/blocks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 33e1bb960c75e..65b1c30d0cfa3 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -966,6 +966,7 @@ def setitem(self, indexer, value): # we need to create a new categorical block values[indexer] = value if values.ndim == 2: + # TODO(EA2D): special case not needed with 2D EAs if values.shape[-1] != 1: # shouldn't get here (at least until 2D EAs) raise NotImplementedError From 0ca7d78166f72599704b4c08bd1f11e3d35f8c4f Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 14 Jan 2021 07:44:10 -0800 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index db2e2ba3a2e1e..06df3d019eb2f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -195,6 +195,7 @@ Categorical - Bug where constructing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`) - Bug in constructing a :class:`DataFrame` from an ``ndarray`` and a :class:`CategoricalDtype` (:issue:`38857`) - Bug in :meth:`DataFrame.reindex` was throwing ``IndexError`` when new index contained duplicates and old index was :class:`CategoricalIndex` (:issue:`38906`) +- Bug in setting categorical values into an object-dtype column in a :class:`DataFrame` (:issue:`39136`) Datetimelike ^^^^^^^^^^^^