11# -*- coding: utf-8 -*-
22
33import pytest
4+ import sys
45
56import numpy as np
67
78import pandas .util .testing as tm
8- from pandas import Categorical
9+ from pandas import Categorical , Index , Series
10+
11+ from pandas .compat import PYPY
912
1013
1114class TestCategoricalAnalytics (object ):
@@ -16,17 +19,20 @@ def test_min_max(self):
1619 cat = Categorical (["a" , "b" , "c" , "d" ], ordered = False )
1720 pytest .raises (TypeError , lambda : cat .min ())
1821 pytest .raises (TypeError , lambda : cat .max ())
22+
1923 cat = Categorical (["a" , "b" , "c" , "d" ], ordered = True )
2024 _min = cat .min ()
2125 _max = cat .max ()
2226 assert _min == "a"
2327 assert _max == "d"
28+
2429 cat = Categorical (["a" , "b" , "c" , "d" ],
2530 categories = ['d' , 'c' , 'b' , 'a' ], ordered = True )
2631 _min = cat .min ()
2732 _max = cat .max ()
2833 assert _min == "d"
2934 assert _max == "a"
35+
3036 cat = Categorical ([np .nan , "b" , "c" , np .nan ],
3137 categories = ['d' , 'c' , 'b' , 'a' ], ordered = True )
3238 _min = cat .min ()
@@ -51,36 +57,264 @@ def test_min_max(self):
5157 _max = cat .max (numeric_only = True )
5258 assert _max == 1
5359
54- def test_mode (self ):
55- s = Categorical ([1 , 1 , 2 , 4 , 5 , 5 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
56- ordered = True )
60+ @pytest .mark .parametrize ("values,categories,exp_mode" , [
61+ ([1 , 1 , 2 , 4 , 5 , 5 , 5 ], [5 , 4 , 3 , 2 , 1 ], [5 ]),
62+ ([1 , 1 , 1 , 4 , 5 , 5 , 5 ], [5 , 4 , 3 , 2 , 1 ], [5 , 1 ]),
63+ ([1 , 2 , 3 , 4 , 5 ], [5 , 4 , 3 , 2 , 1 ], [5 , 4 , 3 , 2 , 1 ]),
64+ ([np .nan , np .nan , np .nan , 4 , 5 ], [5 , 4 , 3 , 2 , 1 ], [5 , 4 ]),
65+ ([np .nan , np .nan , np .nan , 4 , 5 , 4 ], [5 , 4 , 3 , 2 , 1 ], [4 ]),
66+ ([np .nan , np .nan , 4 , 5 , 4 ], [5 , 4 , 3 , 2 , 1 ], [4 ])])
67+ def test_mode (self , values , categories , exp_mode ):
68+ s = Categorical (values , categories = categories , ordered = True )
5769 res = s .mode ()
58- exp = Categorical ([ 5 ] , categories = [ 5 , 4 , 3 , 2 , 1 ] , ordered = True )
70+ exp = Categorical (exp_mode , categories = categories , ordered = True )
5971 tm .assert_categorical_equal (res , exp )
60- s = Categorical ([1 , 1 , 1 , 4 , 5 , 5 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
61- ordered = True )
62- res = s .mode ()
63- exp = Categorical ([5 , 1 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
64- tm .assert_categorical_equal (res , exp )
65- s = Categorical ([1 , 2 , 3 , 4 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
66- ordered = True )
67- res = s .mode ()
68- exp = Categorical ([5 , 4 , 3 , 2 , 1 ],
69- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
70- tm .assert_categorical_equal (res , exp )
71- # NaN should not become the mode!
72- s = Categorical ([np .nan , np .nan , np .nan , 4 , 5 ],
73- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
74- res = s .mode ()
75- exp = Categorical ([5 , 4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
76- tm .assert_categorical_equal (res , exp )
77- s = Categorical ([np .nan , np .nan , np .nan , 4 , 5 , 4 ],
78- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
79- res = s .mode ()
80- exp = Categorical ([4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
81- tm .assert_categorical_equal (res , exp )
82- s = Categorical ([np .nan , np .nan , 4 , 5 , 4 ], categories = [5 , 4 , 3 , 2 , 1 ],
83- ordered = True )
84- res = s .mode ()
85- exp = Categorical ([4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
72+
73+ def test_searchsorted (self ):
74+ # https://github.com/pandas-dev/pandas/issues/8420
75+ # https://github.com/pandas-dev/pandas/issues/14522
76+
77+ c1 = Categorical (['cheese' , 'milk' , 'apple' , 'bread' , 'bread' ],
78+ categories = ['cheese' , 'milk' , 'apple' , 'bread' ],
79+ ordered = True )
80+ s1 = Series (c1 )
81+ c2 = Categorical (['cheese' , 'milk' , 'apple' , 'bread' , 'bread' ],
82+ categories = ['cheese' , 'milk' , 'apple' , 'bread' ],
83+ ordered = False )
84+ s2 = Series (c2 )
85+
86+ # Searching for single item argument, side='left' (default)
87+ res_cat = c1 .searchsorted ('apple' )
88+ res_ser = s1 .searchsorted ('apple' )
89+ exp = np .array ([2 ], dtype = np .intp )
90+ tm .assert_numpy_array_equal (res_cat , exp )
91+ tm .assert_numpy_array_equal (res_ser , exp )
92+
93+ # Searching for single item array, side='left' (default)
94+ res_cat = c1 .searchsorted (['bread' ])
95+ res_ser = s1 .searchsorted (['bread' ])
96+ exp = np .array ([3 ], dtype = np .intp )
97+ tm .assert_numpy_array_equal (res_cat , exp )
98+ tm .assert_numpy_array_equal (res_ser , exp )
99+
100+ # Searching for several items array, side='right'
101+ res_cat = c1 .searchsorted (['apple' , 'bread' ], side = 'right' )
102+ res_ser = s1 .searchsorted (['apple' , 'bread' ], side = 'right' )
103+ exp = np .array ([3 , 5 ], dtype = np .intp )
104+ tm .assert_numpy_array_equal (res_cat , exp )
105+ tm .assert_numpy_array_equal (res_ser , exp )
106+
107+ # Searching for a single value that is not from the Categorical
108+ pytest .raises (ValueError , lambda : c1 .searchsorted ('cucumber' ))
109+ pytest .raises (ValueError , lambda : s1 .searchsorted ('cucumber' ))
110+
111+ # Searching for multiple values one of each is not from the Categorical
112+ pytest .raises (ValueError ,
113+ lambda : c1 .searchsorted (['bread' , 'cucumber' ]))
114+ pytest .raises (ValueError ,
115+ lambda : s1 .searchsorted (['bread' , 'cucumber' ]))
116+
117+ # searchsorted call for unordered Categorical
118+ pytest .raises (ValueError , lambda : c2 .searchsorted ('apple' ))
119+ pytest .raises (ValueError , lambda : s2 .searchsorted ('apple' ))
120+
121+ with tm .assert_produces_warning (FutureWarning ):
122+ res = c1 .searchsorted (v = ['bread' ])
123+ exp = np .array ([3 ], dtype = np .intp )
124+ tm .assert_numpy_array_equal (res , exp )
125+
126+ def test_unique (self ):
127+ # categories are reordered based on value when ordered=False
128+ cat = Categorical (["a" , "b" ])
129+ exp = Index (["a" , "b" ])
130+ res = cat .unique ()
131+ tm .assert_index_equal (res .categories , exp )
132+ tm .assert_categorical_equal (res , cat )
133+
134+ cat = Categorical (["a" , "b" , "a" , "a" ], categories = ["a" , "b" , "c" ])
135+ res = cat .unique ()
136+ tm .assert_index_equal (res .categories , exp )
137+ tm .assert_categorical_equal (res , Categorical (exp ))
138+
139+ cat = Categorical (["c" , "a" , "b" , "a" , "a" ],
140+ categories = ["a" , "b" , "c" ])
141+ exp = Index (["c" , "a" , "b" ])
142+ res = cat .unique ()
143+ tm .assert_index_equal (res .categories , exp )
144+ exp_cat = Categorical (exp , categories = ['c' , 'a' , 'b' ])
145+ tm .assert_categorical_equal (res , exp_cat )
146+
147+ # nan must be removed
148+ cat = Categorical (["b" , np .nan , "b" , np .nan , "a" ],
149+ categories = ["a" , "b" , "c" ])
150+ res = cat .unique ()
151+ exp = Index (["b" , "a" ])
152+ tm .assert_index_equal (res .categories , exp )
153+ exp_cat = Categorical (["b" , np .nan , "a" ], categories = ["b" , "a" ])
154+ tm .assert_categorical_equal (res , exp_cat )
155+
156+ def test_unique_ordered (self ):
157+ # keep categories order when ordered=True
158+ cat = Categorical (['b' , 'a' , 'b' ], categories = ['a' , 'b' ], ordered = True )
159+ res = cat .unique ()
160+ exp_cat = Categorical (['b' , 'a' ], categories = ['a' , 'b' ], ordered = True )
161+ tm .assert_categorical_equal (res , exp_cat )
162+
163+ cat = Categorical (['c' , 'b' , 'a' , 'a' ], categories = ['a' , 'b' , 'c' ],
164+ ordered = True )
165+ res = cat .unique ()
166+ exp_cat = Categorical (['c' , 'b' , 'a' ], categories = ['a' , 'b' , 'c' ],
167+ ordered = True )
168+ tm .assert_categorical_equal (res , exp_cat )
169+
170+ cat = Categorical (['b' , 'a' , 'a' ], categories = ['a' , 'b' , 'c' ],
171+ ordered = True )
172+ res = cat .unique ()
173+ exp_cat = Categorical (['b' , 'a' ], categories = ['a' , 'b' ], ordered = True )
174+ tm .assert_categorical_equal (res , exp_cat )
175+
176+ cat = Categorical (['b' , 'b' , np .nan , 'a' ], categories = ['a' , 'b' , 'c' ],
177+ ordered = True )
178+ res = cat .unique ()
179+ exp_cat = Categorical (['b' , np .nan , 'a' ], categories = ['a' , 'b' ],
180+ ordered = True )
181+ tm .assert_categorical_equal (res , exp_cat )
182+
183+ def test_unique_index_series (self ):
184+ c = Categorical ([3 , 1 , 2 , 2 , 1 ], categories = [3 , 2 , 1 ])
185+ # Categorical.unique sorts categories by appearance order
186+ # if ordered=False
187+ exp = Categorical ([3 , 1 , 2 ], categories = [3 , 1 , 2 ])
188+ tm .assert_categorical_equal (c .unique (), exp )
189+
190+ tm .assert_index_equal (Index (c ).unique (), Index (exp ))
191+ tm .assert_categorical_equal (Series (c ).unique (), exp )
192+
193+ c = Categorical ([1 , 1 , 2 , 2 ], categories = [3 , 2 , 1 ])
194+ exp = Categorical ([1 , 2 ], categories = [1 , 2 ])
195+ tm .assert_categorical_equal (c .unique (), exp )
196+ tm .assert_index_equal (Index (c ).unique (), Index (exp ))
197+ tm .assert_categorical_equal (Series (c ).unique (), exp )
198+
199+ c = Categorical ([3 , 1 , 2 , 2 , 1 ], categories = [3 , 2 , 1 ], ordered = True )
200+ # Categorical.unique keeps categories order if ordered=True
201+ exp = Categorical ([3 , 1 , 2 ], categories = [3 , 2 , 1 ], ordered = True )
202+ tm .assert_categorical_equal (c .unique (), exp )
203+
204+ tm .assert_index_equal (Index (c ).unique (), Index (exp ))
205+ tm .assert_categorical_equal (Series (c ).unique (), exp )
206+
207+ def test_shift (self ):
208+ # GH 9416
209+ cat = Categorical (['a' , 'b' , 'c' , 'd' , 'a' ])
210+
211+ # shift forward
212+ sp1 = cat .shift (1 )
213+ xp1 = Categorical ([np .nan , 'a' , 'b' , 'c' , 'd' ])
214+ tm .assert_categorical_equal (sp1 , xp1 )
215+ tm .assert_categorical_equal (cat [:- 1 ], sp1 [1 :])
216+
217+ # shift back
218+ sn2 = cat .shift (- 2 )
219+ xp2 = Categorical (['c' , 'd' , 'a' , np .nan , np .nan ],
220+ categories = ['a' , 'b' , 'c' , 'd' ])
221+ tm .assert_categorical_equal (sn2 , xp2 )
222+ tm .assert_categorical_equal (cat [2 :], sn2 [:- 2 ])
223+
224+ # shift by zero
225+ tm .assert_categorical_equal (cat , cat .shift (0 ))
226+
227+ def test_nbytes (self ):
228+ cat = Categorical ([1 , 2 , 3 ])
229+ exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories
230+ assert cat .nbytes == exp
231+
232+ def test_memory_usage (self ):
233+ cat = Categorical ([1 , 2 , 3 ])
234+
235+ # .categories is an index, so we include the hashtable
236+ assert 0 < cat .nbytes <= cat .memory_usage ()
237+ assert 0 < cat .nbytes <= cat .memory_usage (deep = True )
238+
239+ cat = Categorical (['foo' , 'foo' , 'bar' ])
240+ assert cat .memory_usage (deep = True ) > cat .nbytes
241+
242+ if not PYPY :
243+ # sys.getsizeof will call the .memory_usage with
244+ # deep=True, and add on some GC overhead
245+ diff = cat .memory_usage (deep = True ) - sys .getsizeof (cat )
246+ assert abs (diff ) < 100
247+
248+ def test_map (self ):
249+ c = Categorical (list ('ABABC' ), categories = list ('CBA' ), ordered = True )
250+ result = c .map (lambda x : x .lower ())
251+ exp = Categorical (list ('ababc' ), categories = list ('cba' ), ordered = True )
252+ tm .assert_categorical_equal (result , exp )
253+
254+ c = Categorical (list ('ABABC' ), categories = list ('ABC' ), ordered = False )
255+ result = c .map (lambda x : x .lower ())
256+ exp = Categorical (list ('ababc' ), categories = list ('abc' ), ordered = False )
257+ tm .assert_categorical_equal (result , exp )
258+
259+ result = c .map (lambda x : 1 )
260+ # GH 12766: Return an index not an array
261+ tm .assert_index_equal (result , Index (np .array ([1 ] * 5 , dtype = np .int64 )))
262+
263+ def test_validate_inplace (self ):
264+ cat = Categorical (['A' , 'B' , 'B' , 'C' , 'A' ])
265+ invalid_values = [1 , "True" , [1 , 2 , 3 ], 5.0 ]
266+
267+ for value in invalid_values :
268+ with pytest .raises (ValueError ):
269+ cat .set_ordered (value = True , inplace = value )
270+
271+ with pytest .raises (ValueError ):
272+ cat .as_ordered (inplace = value )
273+
274+ with pytest .raises (ValueError ):
275+ cat .as_unordered (inplace = value )
276+
277+ with pytest .raises (ValueError ):
278+ cat .set_categories (['X' , 'Y' , 'Z' ], rename = True , inplace = value )
279+
280+ with pytest .raises (ValueError ):
281+ cat .rename_categories (['X' , 'Y' , 'Z' ], inplace = value )
282+
283+ with pytest .raises (ValueError ):
284+ cat .reorder_categories (
285+ ['X' , 'Y' , 'Z' ], ordered = True , inplace = value )
286+
287+ with pytest .raises (ValueError ):
288+ cat .add_categories (
289+ new_categories = ['D' , 'E' , 'F' ], inplace = value )
290+
291+ with pytest .raises (ValueError ):
292+ cat .remove_categories (removals = ['D' , 'E' , 'F' ], inplace = value )
293+
294+ with pytest .raises (ValueError ):
295+ cat .remove_unused_categories (inplace = value )
296+
297+ with pytest .raises (ValueError ):
298+ cat .sort_values (inplace = value )
299+
300+ def test_repeat (self ):
301+ # GH10183
302+ cat = Categorical (["a" , "b" ], categories = ["a" , "b" ])
303+ exp = Categorical (["a" , "a" , "b" , "b" ], categories = ["a" , "b" ])
304+ res = cat .repeat (2 )
86305 tm .assert_categorical_equal (res , exp )
306+
307+ def test_numpy_repeat (self ):
308+ cat = Categorical (["a" , "b" ], categories = ["a" , "b" ])
309+ exp = Categorical (["a" , "a" , "b" , "b" ], categories = ["a" , "b" ])
310+ tm .assert_categorical_equal (np .repeat (cat , 2 ), exp )
311+
312+ msg = "the 'axis' parameter is not supported"
313+ tm .assert_raises_regex (ValueError , msg , np .repeat , cat , 2 , axis = 1 )
314+
315+ def test_isna (self ):
316+ exp = np .array ([False , False , True ])
317+ c = Categorical (["a" , "b" , np .nan ])
318+ res = c .isna ()
319+
320+ tm .assert_numpy_array_equal (res , exp )
0 commit comments