pandas-dev · fjetter · May 13, 2018 · May 13, 2018 · May 25, 2018 · May 25, 2018
diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt
@@ -88,7 +88,8 @@ Indexing
 - Bug in :meth:`MultiIndex.set_names` where error raised for a ``MultiIndex`` with ``nlevels == 1`` (:issue:`21149`)
 - Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, issue:`21253`)
 - Bug in :meth:`MultiIndex.sort_index` which was not guaranteed to sort correctly with ``level=1``; this was also causing data misalignment in particular :meth:`DataFrame.stack` operations (:issue:`20994`, :issue:`20945`, :issue:`21052`)
--
+- Bug in :func:`CategoricalIndex.searchsorted` where the method did not return a scalar when the input values was scalar (:issue:`21019`)
+- Bug in :class:`CategoricalIndex` where slicing beyond the range of the data raised a ``KeyError`` (:issue:`21019`)
 
 I/O
 ^^^

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1342,6 +1342,8 @@ def searchsorted(self, value, side='left', sorter=None):
 
         if -1 in values_as_codes:
             raise ValueError("Value(s) to be inserted must be in categories.")
+        if is_scalar(value):
+            values_as_codes = values_as_codes.item()
 
         return self.codes.searchsorted(values_as_codes, side=side,
                                        sorter=sorter)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -432,13 +432,14 @@ def get_loc(self, key, method=None):
         >>> monotonic_index.get_loc('b')
         slice(1, 3, None)
 
-        >>> non_monotonic_index = p.dCategoricalIndex(list('abcb'))
+        >>> non_monotonic_index = pd.CategoricalIndex(list('abcb'))
         >>> non_monotonic_index.get_loc('b')
         array([False,  True, False,  True], dtype=bool)
         """
         codes = self.categories.get_loc(key)
         if (codes == -1):
             raise KeyError(key)
+
         return self._engine.get_loc(codes)
 
     def get_value(self, series, key):

diff --git a/pandas/tests/categorical/test_analytics.py b/pandas/tests/categorical/test_analytics.py
@@ -86,9 +86,9 @@ def test_searchsorted(self):
         # Searching for single item argument, side='left' (default)
         res_cat = c1.searchsorted('apple')
         res_ser = s1.searchsorted('apple')
-        exp = np.array([2], dtype=np.intp)
-        tm.assert_numpy_array_equal(res_cat, exp)
-        tm.assert_numpy_array_equal(res_ser, exp)
+        exp = np.intp(2)
+        assert res_cat == exp
+        assert res_ser == exp
 
         # Searching for single item array, side='left' (default)
         res_cat = c1.searchsorted(['bread'])

diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
@@ -627,15 +627,80 @@ def test_reindexing(self):
                       lambda: self.df2.reindex(['a'], limit=2))
 
     def test_loc_slice(self):
-        # slicing
-        # not implemented ATM
-        # GH9748
+        df = DataFrame(
+            {"A": range(0, 6)},
+            index=CategoricalIndex(list("aabcde"), name="B"),
+        )
+
+        # slice on an unordered categorical using in-sample, connected edges
+        result = df.loc["b":"d"]
+        expected = df.iloc[2:5]
+        assert_frame_equal(result, expected)
 
-        pytest.raises(TypeError, lambda: self.df.loc[1:5])
+        # Slice the entire dataframe
+        result = df.loc["a":"e"]
+        assert_frame_equal(result, df)
+        result_iloc = df.iloc[0:6]
+        assert_frame_equal(result_iloc, result)
+
+        # check if the result is identical to an ordinary index
+        df_non_cat_index = df.copy()
+        df_non_cat_index.index = df_non_cat_index.index.astype(str)
+        result = df.loc["a":"e"]
+        result_non_cat = df_non_cat_index.loc["a": "e"]
+        result.index = result.index.astype(str)
+        assert_frame_equal(result_non_cat, result)
+
+    @pytest.mark.parametrize(
+        "content",
+        [list("aab"), list("bbc"), list('bbc')],
+        ids=["right_edge", "left_edge", "both_edges"],
+    )
+    def test_loc_beyond_edge_slicing(self, content):
+        """
+        This test ensures that no `KeyError` is raised if trying to slice
+        beyond the edges of known, ordered categories.
+
+        see GH21019
+        """
+        # This dataframe might be a slice of a larger categorical
+        # (i.e. more categories are known than there are in the column)
+
+        ordered_df = DataFrame(
+            {"A": range(0, 3)},
+            index=CategoricalIndex(
+                content, categories=list("abcde"), name="B", ordered=True
+            ),
+        )
+
+        # Although the edge is not within the slice, this should fall back
+        # to searchsorted slicing since the category is known and the index
+        # is ordered. Since we're selecting a value larger/lower than the
+        # right/left edge we should get the original slice again.
+        result = ordered_df.loc["a": "d"]
+        assert_frame_equal(result, ordered_df)
+
+        # Ensure that index based slicing gives the same result
+        result_iloc = ordered_df.iloc[0:4]
+        assert_frame_equal(result, result_iloc)
+
+        # If the categorical is not sorted and the requested edge
+        # is not in the slice we cannot perform slicing
+        ordered_df.index = ordered_df.index.as_unordered()
+        with pytest.raises(KeyError):
+            ordered_df.loc["a": "d"]
 
-        # result = df.loc[1:5]
-        # expected = df.iloc[[1,2,3,4]]
-        # assert_frame_equal(result, expected)
+        with pytest.raises(KeyError):
+            # If the category is not known, there is nothing we can do
+            ordered_df.loc["a":"z"]
+
+        unordered_df = ordered_df.copy()
+        unordered_df.index = unordered_df.index.as_unordered()
+        with pytest.raises(KeyError):
+            # This operation previously succeeded for an ordered index. Since
+            # this index is no longer ordered, we cannot perfom out of range
+            # slicing / searchsorted
+            unordered_df.loc["a": "d"]
 
     def test_boolean_selection(self):