pandas-dev · jreback · Jul 18, 2015 · Jul 3, 2015 · jreback · Jul 5, 2015
diff --git a/doc/source/whatsnew/v0.17.0.txt b/doc/source/whatsnew/v0.17.0.txt
@@ -128,6 +128,7 @@ Bug Fixes
 
 
 - Bug in ``test_categorical`` on big-endian builds (:issue:`10425`)
+- Bug in ``Series.shift`` and ``DataFrame.shift`` not supporting categorical data (:issue:`9416`)
 - Bug in ``Series.map`` using categorical ``Series`` raises ``AttributeError`` (:issue:`10324`)
 - Bug in ``MultiIndex.get_level_values`` including ``Categorical`` raises ``AttributeError`` (:issue:`10460`)
 

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -820,6 +820,35 @@ def shape(self):
 
         return tuple([len(self._codes)])
 
+    def shift(self, periods):
+        """
+        Shift Categorical by desired number of periods.
+
+        Parameters
+        ----------
+        periods : int
+            Number of periods to move, can be positive or negative
+
+        Returns
+        -------
+        shifted : Categorical
+        """
+        # since categoricals always have ndim == 1, an axis parameter
+        # doesnt make any sense here.
+        codes = self.codes
+        if codes.ndim > 1:
+            raise NotImplementedError("Categorical with ndim > 1.")
+        if np.prod(codes.shape) and (periods != 0):
+            codes = np.roll(codes, com._ensure_platform_int(periods), axis=0)
+            if periods > 0:
+                codes[:periods] = -1
+            else:
+                codes[periods:] = -1
+
+        return Categorical.from_codes(codes,
+                                      categories=self.categories,
+                                      ordered=self.ordered)
+
     def __array__(self, dtype=None):
         """
         The numpy array interface.

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1709,6 +1709,10 @@ def interpolate(self, method='pad', axis=0, inplace=False,
                                                                limit=limit),
                                           placement=self.mgr_locs)
 
+    def shift(self, periods, axis=0):
+        return self.make_block_same_class(values=self.values.shift(periods),
+                                          placement=self.mgr_locs)
+
     def take_nd(self, indexer, axis=0, new_mgr_locs=None, fill_tuple=None):
         """
         Take values according to indexer and return them as a block.bb

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
@@ -1080,6 +1080,26 @@ def test_set_item_nan(self):
         exp = np.array([0,1,3,2])
         self.assert_numpy_array_equal(cat.codes, exp)
 
+    def test_shift(self):
+        # GH 9416
+        cat = pd.Categorical(['a', 'b', 'c', 'd', 'a'])
+
+        # shift forward
+        sp1 = cat.shift(1)
+        xp1 = pd.Categorical([np.nan, 'a', 'b', 'c', 'd'])
+        self.assert_categorical_equal(sp1, xp1)
+        self.assert_categorical_equal(cat[:-1], sp1[1:])
+
+        # shift back
+        sn2 = cat.shift(-2)
+        xp2 = pd.Categorical(['c', 'd', 'a', np.nan, np.nan],
+                categories=['a', 'b', 'c', 'd'])
+        self.assert_categorical_equal(sn2, xp2)
+        self.assert_categorical_equal(cat[2:], sn2[:-2])
+
+        # shift by zero
+        self.assert_categorical_equal(cat, cat.shift(0))
+
     def test_nbytes(self):
         cat = pd.Categorical([1,2,3])
         exp = cat._codes.nbytes + cat._categories.values.nbytes

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -10360,6 +10360,15 @@ def test_shift_bool(self):
                        columns=['high', 'low'])
         assert_frame_equal(rs, xp)
 
+    def test_shift_categorical(self):
+        # GH 9416
+        s1 = pd.Series(['a', 'b', 'c'], dtype='category')
+        s2 = pd.Series(['A', 'B', 'C'], dtype='category')
+        df = DataFrame({'one': s1, 'two': s2})
+        rs = df.shift(1)
+        xp = DataFrame({'one': s1.shift(1), 'two': s2.shift(1)})
+        assert_frame_equal(rs, xp)
+
     def test_shift_empty(self):
         # Regression test for #8019
         df = DataFrame({'foo': []})

diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -35,6 +35,7 @@
 from pandas.util.testing import (assert_series_equal,
                                  assert_almost_equal,
                                  assert_frame_equal,
+                                 assert_index_equal,
                                  ensure_clean)
 import pandas.util.testing as tm
 
@@ -5260,6 +5261,25 @@ def test_shift_int(self):
         expected = ts.astype(float).shift(1)
         assert_series_equal(shifted, expected)
 
+    def test_shift_categorical(self):
+        # GH 9416
+        s = pd.Series(['a', 'b', 'c', 'd'], dtype='category')
+
+        assert_series_equal(s.iloc[:-1], s.shift(1).shift(-1).valid())
+
+        sp1 = s.shift(1)
+        assert_index_equal(s.index, sp1.index)
+        self.assertTrue(np.all(sp1.values.codes[:1] == -1))
+        self.assertTrue(np.all(s.values.codes[:-1] == sp1.values.codes[1:]))
+
+        sn2 = s.shift(-2)
+        assert_index_equal(s.index, sn2.index)
+        self.assertTrue(np.all(sn2.values.codes[-2:] == -1))
+        self.assertTrue(np.all(s.values.codes[2:] == sn2.values.codes[:-2]))
+
+        assert_index_equal(s.values.categories, sp1.values.categories)
+        assert_index_equal(s.values.categories, sn2.values.categories)
+
     def test_truncate(self):
         offset = datetools.bday