From bd3d440c6c0047141c802551c93184e7c531df7e Mon Sep 17 00:00:00 2001
From: fjetter <fjetter@users.noreply.github.com>
Date: Sun, 13 May 2018 11:35:14 +0200
Subject: [PATCH 1/6] categorical: searchsorted returns a scalar if input was
 scalar

---
 doc/source/whatsnew/v0.23.0.txt            |  3 ++
 pandas/core/arrays/categorical.py          |  2 +
 pandas/core/indexes/category.py            |  9 ++--
 pandas/tests/categorical/test_analytics.py |  6 +--
 pandas/tests/indexing/test_categorical.py  | 51 +++++++++++++++++++---
 5 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index a099fb40c35a7..85bad54a5cdb8 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -1289,6 +1289,9 @@ Indexing
 - Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`)
 - Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`)
 - Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`)
+- Bug in ``CategoricalIndex.searchsorted`` where the method didn't return a scalar when the input values was scalar (:issue:`21019`)
+- Bug in ``CategoricalIndex`` where slicing beyond the range of the data raised a KeyError (:issue:`21019`)
+
 
 MultiIndex
 ^^^^^^^^^^
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 30f9c56d24f02..075f13f1bf9fb 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1342,6 +1342,8 @@ def searchsorted(self, value, side='left', sorter=None):
 
         if -1 in values_as_codes:
             raise ValueError("Value(s) to be inserted must be in categories.")
+        if is_scalar(value):
+            values_as_codes = np.asscalar(values_as_codes)
 
         return self.codes.searchsorted(values_as_codes, side=side,
                                        sorter=sorter)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 150eca32e229d..f99f1ae32b4ef 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -432,13 +432,14 @@ def get_loc(self, key, method=None):
         >>> monotonic_index.get_loc('b')
         slice(1, 3, None)
 
-        >>> non_monotonic_index = p.dCategoricalIndex(list('abcb'))
+        >>> non_monotonic_index = pd.CategoricalIndex(list('abcb'))
         >>> non_monotonic_index.get_loc('b')
         array([False,  True, False,  True], dtype=bool)
         """
-        codes = self.categories.get_loc(key)
-        if (codes == -1):
-            raise KeyError(key)
+        try:
+            codes = self.categories.get_loc(key)
+        except KeyError:
+            raise KeyError("Category `{}` unknown".format(key))
         return self._engine.get_loc(codes)
 
     def get_value(self, series, key):
diff --git a/pandas/tests/categorical/test_analytics.py b/pandas/tests/categorical/test_analytics.py
index 53d0e596a1d99..6557b98c467b7 100644
--- a/pandas/tests/categorical/test_analytics.py
+++ b/pandas/tests/categorical/test_analytics.py
@@ -86,9 +86,9 @@ def test_searchsorted(self):
         # Searching for single item argument, side='left' (default)
         res_cat = c1.searchsorted('apple')
         res_ser = s1.searchsorted('apple')
-        exp = np.array([2], dtype=np.intp)
-        tm.assert_numpy_array_equal(res_cat, exp)
-        tm.assert_numpy_array_equal(res_ser, exp)
+        exp = np.int64(2)
+        assert res_cat == exp
+        assert res_ser == exp
 
         # Searching for single item array, side='left' (default)
         res_cat = c1.searchsorted(['bread'])
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index 634ad0d8160ed..de50f88bdd6c4 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -627,15 +627,52 @@ def test_reindexing(self):
                       lambda: self.df2.reindex(['a'], limit=2))
 
     def test_loc_slice(self):
-        # slicing
-        # not implemented ATM
-        # GH9748
+        # Raises KeyError since the left slice 'a' is not unique
+        pytest.raises(KeyError, lambda: self.df.loc["a":"b"])
+        result = self.df.loc["b":"c"]
 
-        pytest.raises(TypeError, lambda: self.df.loc[1:5])
+        expected = DataFrame(
+            {"A": [2, 3, 4]},
+            index=CategoricalIndex(
+                ["b", "b", "c"], name="B", categories=list("cab")
+            ),
+        )
+
+        assert_frame_equal(result, expected)
+
+        ordered_df = DataFrame(
+            {"A": range(0, 6)},
+            index=CategoricalIndex(list("aabcde"), name="B", ordered=True),
+        )
+
+        result = ordered_df.loc["a":"b"]
+        expected = DataFrame(
+            {"A": range(0, 3)},
+            index=CategoricalIndex(
+                list("aab"), categories=list("abcde"), name="B", ordered=True
+            ),
+        )
+        assert_frame_equal(result, expected)
+
+        # This should select the entire dataframe
+        result = ordered_df.loc["a":"e"]
+        assert_frame_equal(result, ordered_df)
+
+        df_slice = ordered_df.loc["a":"b"]
+        # Although the edge is not within the slice, this should fall back
+        # to searchsorted slicing since the category is known
+        result = df_slice.loc["a":"e"]
+        assert_frame_equal(result, df_slice)
 
-        # result = df.loc[1:5]
-        # expected = df.iloc[[1,2,3,4]]
-        # assert_frame_equal(result, expected)
+        # If the categorical is not sorted and the requested edge
+        # is not in the slice we cannot perform slicing
+        df_slice.index = df_slice.index.as_unordered()
+        with pytest.raises(KeyError):
+            df_slice.loc["a":"e"]
+
+        with pytest.raises(KeyError):
+            # If the category is not known, there is nothing we can do
+            ordered_df.loc["a":"z"]
 
     def test_boolean_selection(self):
 

From c4249fadb45537c3790f069bb67587c4d388bfff Mon Sep 17 00:00:00 2001
From: fjetter <fjetter@users.noreply.github.com>
Date: Sun, 13 May 2018 16:47:51 +0200
Subject: [PATCH 2/6] Address review comments

---
 doc/source/whatsnew/v0.23.0.txt            | 4 ++--
 pandas/core/indexes/category.py            | 8 ++++----
 pandas/tests/categorical/test_analytics.py | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 85bad54a5cdb8..912e12a1191ca 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -1289,8 +1289,8 @@ Indexing
 - Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`)
 - Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`)
 - Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`)
-- Bug in ``CategoricalIndex.searchsorted`` where the method didn't return a scalar when the input values was scalar (:issue:`21019`)
-- Bug in ``CategoricalIndex`` where slicing beyond the range of the data raised a KeyError (:issue:`21019`)
+- Bug in :func:`CategoricalIndex.searchsorted` where the method did not return a scalar when the input values was scalar (:issue:`21019`)
+- Bug in :class:`CategoricalIndex` where slicing beyond the range of the data raised a KeyError (:issue:`21019`)
 
 
 MultiIndex
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index f99f1ae32b4ef..d0d5f3e9de971 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -436,10 +436,10 @@ def get_loc(self, key, method=None):
         >>> non_monotonic_index.get_loc('b')
         array([False,  True, False,  True], dtype=bool)
         """
-        try:
-            codes = self.categories.get_loc(key)
-        except KeyError:
-            raise KeyError("Category `{}` unknown".format(key))
+        codes = self.categories.get_loc(key)
+        if (codes == -1):
+            raise KeyError(key)
+
         return self._engine.get_loc(codes)
 
     def get_value(self, series, key):
diff --git a/pandas/tests/categorical/test_analytics.py b/pandas/tests/categorical/test_analytics.py
index 6557b98c467b7..ab8d2f30f545a 100644
--- a/pandas/tests/categorical/test_analytics.py
+++ b/pandas/tests/categorical/test_analytics.py
@@ -86,7 +86,7 @@ def test_searchsorted(self):
         # Searching for single item argument, side='left' (default)
         res_cat = c1.searchsorted('apple')
         res_ser = s1.searchsorted('apple')
-        exp = np.int64(2)
+        exp = np.intp(2)
         assert res_cat == exp
         assert res_ser == exp
 

From 1c25d659610a874364ce846fdba6cde163818e12 Mon Sep 17 00:00:00 2001
From: fjetter <fjetter@users.noreply.github.com>
Date: Fri, 25 May 2018 08:48:53 +0200
Subject: [PATCH 3/6] Use item instead of np.asscalar to select value

---
 pandas/core/arrays/categorical.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 075f13f1bf9fb..8f670d21d9c44 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1343,7 +1343,7 @@ def searchsorted(self, value, side='left', sorter=None):
         if -1 in values_as_codes:
             raise ValueError("Value(s) to be inserted must be in categories.")
         if is_scalar(value):
-            values_as_codes = np.asscalar(values_as_codes)
+            values_as_codes = values_as_codes.item()
 
         return self.codes.searchsorted(values_as_codes, side=side,
                                        sorter=sorter)

From d4e9879fe2594a47ef7fc6943a5b3fa310eb50b2 Mon Sep 17 00:00:00 2001
From: fjetter <fjetter@users.noreply.github.com>
Date: Fri, 25 May 2018 08:49:28 +0200
Subject: [PATCH 4/6] refactor categorical loc slicing tests

---
 pandas/tests/indexing/test_categorical.py | 45 ++++++++++++++++++-----
 1 file changed, 36 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index de50f88bdd6c4..695fd82c6f07c 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -645,6 +645,12 @@ def test_loc_slice(self):
             index=CategoricalIndex(list("aabcde"), name="B", ordered=True),
         )
 
+        # This should select the entire dataframe
+        result = ordered_df.loc["a":"e"]
+        assert_frame_equal(result, ordered_df)
+        result_iloc = ordered_df.iloc[0:6]
+        assert_frame_equal(result_iloc, result)
+
         result = ordered_df.loc["a":"b"]
         expected = DataFrame(
             {"A": range(0, 3)},
@@ -654,21 +660,42 @@ def test_loc_slice(self):
         )
         assert_frame_equal(result, expected)
 
-        # This should select the entire dataframe
-        result = ordered_df.loc["a":"e"]
-        assert_frame_equal(result, ordered_df)
+    @pytest.mark.parametrize(
+        "content",
+        [list("aab"), list("bbc"), list('bbc')],
+        ids=["right_edge", "left_edge", "both_edges"],
+    )
+    def test_loc_beyond_edge_slicing(self, content):
+        """
+        This test ensures that no `KeyError` is raised if trying to slice
+        beyond the edges of known, ordered categories.
+        """
+        # This dataframe might be a slice of a larger categorical
+        # (i.e. more categories are known than there are in the column)
+
+        ordered_df = DataFrame(
+            {"A": range(0, 3)},
+            index=CategoricalIndex(
+                content, categories=list("abcde"), name="B", ordered=True
+            ),
+        )
 
-        df_slice = ordered_df.loc["a":"b"]
         # Although the edge is not within the slice, this should fall back
-        # to searchsorted slicing since the category is known
-        result = df_slice.loc["a":"e"]
-        assert_frame_equal(result, df_slice)
+        # to searchsorted slicing since the category is known and the index
+        # is ordered. Since we're selecting a value larger/lower than the
+        # right/left edge we should get the original slice again.
+        result = ordered_df.loc["a": "d"]
+        assert_frame_equal(result, ordered_df)
+
+        # Ensure that index based slicing gives the same result
+        result_iloc = ordered_df.iloc[0:4]
+        assert_frame_equal(result, result_iloc)
 
         # If the categorical is not sorted and the requested edge
         # is not in the slice we cannot perform slicing
-        df_slice.index = df_slice.index.as_unordered()
+        ordered_df.index = ordered_df.index.as_unordered()
         with pytest.raises(KeyError):
-            df_slice.loc["a":"e"]
+            ordered_df.loc["a": "d"]
 
         with pytest.raises(KeyError):
             # If the category is not known, there is nothing we can do

From 25b5fd72e2dd4b5a42c5d8b6e447b23459b64f74 Mon Sep 17 00:00:00 2001
From: fjetter <fjetter@users.noreply.github.com>
Date: Fri, 25 May 2018 08:55:21 +0200
Subject: [PATCH 5/6] Changelog entry

---
 doc/source/whatsnew/v0.23.1.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt
index 9c29c34adb7dd..2e7a2f25833ae 100644
--- a/doc/source/whatsnew/v0.23.1.txt
+++ b/doc/source/whatsnew/v0.23.1.txt
@@ -88,6 +88,8 @@ Indexing
 - Bug in :meth:`MultiIndex.set_names` where error raised for a ``MultiIndex`` with ``nlevels == 1`` (:issue:`21149`)
 - Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, issue:`21253`)
 - Bug in :meth:`MultiIndex.sort_index` which was not guaranteed to sort correctly with ``level=1``; this was also causing data misalignment in particular :meth:`DataFrame.stack` operations (:issue:`20994`, :issue:`20945`, :issue:`21052`)
+- Bug in :func:`CategoricalIndex.searchsorted` where the method did not return a scalar when the input values was scalar (:issue:`21019`)
+- Bug in :class:`CategoricalIndex` where slicing beyond the range of the data raised a KeyError (:issue:`21019`)
 -
 
 I/O

From 04ca52f01c51fc009ddee75ee79f3d87b9435b95 Mon Sep 17 00:00:00 2001
From: fjetter <fjetter@users.noreply.github.com>
Date: Wed, 6 Jun 2018 19:55:42 +0200
Subject: [PATCH 6/6] Address PR comments

---
 doc/source/whatsnew/v0.23.0.txt           |  3 --
 doc/source/whatsnew/v0.23.1.txt           |  3 +-
 pandas/tests/indexing/test_categorical.py | 53 ++++++++++++-----------
 3 files changed, 28 insertions(+), 31 deletions(-)

diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 912e12a1191ca..a099fb40c35a7 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -1289,9 +1289,6 @@ Indexing
 - Bug in performing in-place operations on a ``DataFrame`` with a duplicate ``Index`` (:issue:`17105`)
 - Bug in :meth:`IntervalIndex.get_loc` and :meth:`IntervalIndex.get_indexer` when used with an :class:`IntervalIndex` containing a single interval (:issue:`17284`, :issue:`20921`)
 - Bug in ``.loc`` with a ``uint64`` indexer (:issue:`20722`)
-- Bug in :func:`CategoricalIndex.searchsorted` where the method did not return a scalar when the input values was scalar (:issue:`21019`)
-- Bug in :class:`CategoricalIndex` where slicing beyond the range of the data raised a KeyError (:issue:`21019`)
-
 
 MultiIndex
 ^^^^^^^^^^
diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt
index 2e7a2f25833ae..db39a1c558b97 100644
--- a/doc/source/whatsnew/v0.23.1.txt
+++ b/doc/source/whatsnew/v0.23.1.txt
@@ -89,8 +89,7 @@ Indexing
 - Bug in :class:`IntervalIndex` constructors where creating an ``IntervalIndex`` from categorical data was not fully supported (:issue:`21243`, issue:`21253`)
 - Bug in :meth:`MultiIndex.sort_index` which was not guaranteed to sort correctly with ``level=1``; this was also causing data misalignment in particular :meth:`DataFrame.stack` operations (:issue:`20994`, :issue:`20945`, :issue:`21052`)
 - Bug in :func:`CategoricalIndex.searchsorted` where the method did not return a scalar when the input values was scalar (:issue:`21019`)
-- Bug in :class:`CategoricalIndex` where slicing beyond the range of the data raised a KeyError (:issue:`21019`)
--
+- Bug in :class:`CategoricalIndex` where slicing beyond the range of the data raised a ``KeyError`` (:issue:`21019`)
 
 I/O
 ^^^
diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py
index 695fd82c6f07c..9f745700049ae 100644
--- a/pandas/tests/indexing/test_categorical.py
+++ b/pandas/tests/indexing/test_categorical.py
@@ -627,38 +627,29 @@ def test_reindexing(self):
                       lambda: self.df2.reindex(['a'], limit=2))
 
     def test_loc_slice(self):
-        # Raises KeyError since the left slice 'a' is not unique
-        pytest.raises(KeyError, lambda: self.df.loc["a":"b"])
-        result = self.df.loc["b":"c"]
-
-        expected = DataFrame(
-            {"A": [2, 3, 4]},
-            index=CategoricalIndex(
-                ["b", "b", "c"], name="B", categories=list("cab")
-            ),
+        df = DataFrame(
+            {"A": range(0, 6)},
+            index=CategoricalIndex(list("aabcde"), name="B"),
         )
 
+        # slice on an unordered categorical using in-sample, connected edges
+        result = df.loc["b":"d"]
+        expected = df.iloc[2:5]
         assert_frame_equal(result, expected)
 
-        ordered_df = DataFrame(
-            {"A": range(0, 6)},
-            index=CategoricalIndex(list("aabcde"), name="B", ordered=True),
-        )
-
-        # This should select the entire dataframe
-        result = ordered_df.loc["a":"e"]
-        assert_frame_equal(result, ordered_df)
-        result_iloc = ordered_df.iloc[0:6]
+        # Slice the entire dataframe
+        result = df.loc["a":"e"]
+        assert_frame_equal(result, df)
+        result_iloc = df.iloc[0:6]
         assert_frame_equal(result_iloc, result)
 
-        result = ordered_df.loc["a":"b"]
-        expected = DataFrame(
-            {"A": range(0, 3)},
-            index=CategoricalIndex(
-                list("aab"), categories=list("abcde"), name="B", ordered=True
-            ),
-        )
-        assert_frame_equal(result, expected)
+        # check if the result is identical to an ordinary index
+        df_non_cat_index = df.copy()
+        df_non_cat_index.index = df_non_cat_index.index.astype(str)
+        result = df.loc["a":"e"]
+        result_non_cat = df_non_cat_index.loc["a": "e"]
+        result.index = result.index.astype(str)
+        assert_frame_equal(result_non_cat, result)
 
     @pytest.mark.parametrize(
         "content",
@@ -669,6 +660,8 @@ def test_loc_beyond_edge_slicing(self, content):
         """
         This test ensures that no `KeyError` is raised if trying to slice
         beyond the edges of known, ordered categories.
+
+        see GH21019
         """
         # This dataframe might be a slice of a larger categorical
         # (i.e. more categories are known than there are in the column)
@@ -701,6 +694,14 @@ def test_loc_beyond_edge_slicing(self, content):
             # If the category is not known, there is nothing we can do
             ordered_df.loc["a":"z"]
 
+        unordered_df = ordered_df.copy()
+        unordered_df.index = unordered_df.index.as_unordered()
+        with pytest.raises(KeyError):
+            # This operation previously succeeded for an ordered index. Since
+            # this index is no longer ordered, we cannot perfom out of range
+            # slicing / searchsorted
+            unordered_df.loc["a": "d"]
+
     def test_boolean_selection(self):
 
         df3 = self.df3