66import numpy as np
77
88import pandas .util .testing as tm
9- from pandas import Categorical , Index , Series
9+ from pandas import Categorical , CategoricalIndex , Index , Series , DataFrame
1010
1111from pandas .compat import PYPY
1212from pandas .core .categorical import _recode_for_categories
13+ from pandas .tests .categorical .common import TestCategorical
1314
1415
1516class TestCategoricalAPI (object ):
@@ -511,47 +512,6 @@ def f():
511512 exp = np .array ([0 , 1 , 2 , 0 , 2 ], dtype = 'int8' )
512513 tm .assert_numpy_array_equal (c .codes , exp )
513514
514- def test_min_max (self ):
515-
516- # unordered cats have no min/max
517- cat = Categorical (["a" , "b" , "c" , "d" ], ordered = False )
518- pytest .raises (TypeError , lambda : cat .min ())
519- pytest .raises (TypeError , lambda : cat .max ())
520- cat = Categorical (["a" , "b" , "c" , "d" ], ordered = True )
521- _min = cat .min ()
522- _max = cat .max ()
523- assert _min == "a"
524- assert _max == "d"
525- cat = Categorical (["a" , "b" , "c" , "d" ],
526- categories = ['d' , 'c' , 'b' , 'a' ], ordered = True )
527- _min = cat .min ()
528- _max = cat .max ()
529- assert _min == "d"
530- assert _max == "a"
531- cat = Categorical ([np .nan , "b" , "c" , np .nan ],
532- categories = ['d' , 'c' , 'b' , 'a' ], ordered = True )
533- _min = cat .min ()
534- _max = cat .max ()
535- assert np .isnan (_min )
536- assert _max == "b"
537-
538- _min = cat .min (numeric_only = True )
539- assert _min == "c"
540- _max = cat .max (numeric_only = True )
541- assert _max == "b"
542-
543- cat = Categorical ([np .nan , 1 , 2 , np .nan ], categories = [5 , 4 , 3 , 2 , 1 ],
544- ordered = True )
545- _min = cat .min ()
546- _max = cat .max ()
547- assert np .isnan (_min )
548- assert _max == 1
549-
550- _min = cat .min (numeric_only = True )
551- assert _min == 2
552- _max = cat .max (numeric_only = True )
553- assert _max == 1
554-
555515 def test_unique (self ):
556516 # categories are reordered based on value when ordered=False
557517 cat = Categorical (["a" , "b" ])
@@ -633,40 +593,6 @@ def test_unique_index_series(self):
633593 tm .assert_index_equal (Index (c ).unique (), Index (exp ))
634594 tm .assert_categorical_equal (Series (c ).unique (), exp )
635595
636- def test_mode (self ):
637- s = Categorical ([1 , 1 , 2 , 4 , 5 , 5 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
638- ordered = True )
639- res = s .mode ()
640- exp = Categorical ([5 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
641- tm .assert_categorical_equal (res , exp )
642- s = Categorical ([1 , 1 , 1 , 4 , 5 , 5 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
643- ordered = True )
644- res = s .mode ()
645- exp = Categorical ([5 , 1 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
646- tm .assert_categorical_equal (res , exp )
647- s = Categorical ([1 , 2 , 3 , 4 , 5 ], categories = [5 , 4 , 3 , 2 , 1 ],
648- ordered = True )
649- res = s .mode ()
650- exp = Categorical ([5 , 4 , 3 , 2 , 1 ],
651- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
652- tm .assert_categorical_equal (res , exp )
653- # NaN should not become the mode!
654- s = Categorical ([np .nan , np .nan , np .nan , 4 , 5 ],
655- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
656- res = s .mode ()
657- exp = Categorical ([5 , 4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
658- tm .assert_categorical_equal (res , exp )
659- s = Categorical ([np .nan , np .nan , np .nan , 4 , 5 , 4 ],
660- categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
661- res = s .mode ()
662- exp = Categorical ([4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
663- tm .assert_categorical_equal (res , exp )
664- s = Categorical ([np .nan , np .nan , 4 , 5 , 4 ], categories = [5 , 4 , 3 , 2 , 1 ],
665- ordered = True )
666- res = s .mode ()
667- exp = Categorical ([4 ], categories = [5 , 4 , 3 , 2 , 1 ], ordered = True )
668- tm .assert_categorical_equal (res , exp )
669-
670596 def test_shift (self ):
671597 # GH 9416
672598 cat = Categorical (['a' , 'b' , 'c' , 'd' , 'a' ])
@@ -774,13 +700,6 @@ def test_validate_inplace(self):
774700 with pytest .raises (ValueError ):
775701 cat .sort_values (inplace = value )
776702
777- @pytest .mark .xfail (reason = "Imaginary values not supported in Categorical" )
778- def test_imaginary (self ):
779- values = [1 , 2 , 3 + 1j ]
780- c1 = Categorical (values )
781- tm .assert_index_equal (c1 .categories , Index (values ))
782- tm .assert_numpy_array_equal (np .array (c1 ), np .array (values ))
783-
784703 def test_repeat (self ):
785704 # GH10183
786705 cat = Categorical (["a" , "b" ], categories = ["a" , "b" ])
@@ -804,17 +723,61 @@ def test_astype_categorical(self):
804723
805724 pytest .raises (ValueError , lambda : cat .astype (float ))
806725
807- def test_cat_tab_completition (self ):
808- # test the tab completion display
809- ok_for_cat = [ 'categories' , 'codes' , 'ordered' , 'set_categories' ,
810- 'add_categories' , 'remove_categories' ,
811- 'rename_categories' , 'reorder_categories' ,
812- 'remove_unused_categories' , 'as_ordered' , 'as_unordered' ]
726+ def test_isna (self ):
727+ exp = np . array ([ False , False , True ])
728+ c = Categorical ([ "a" , "b" , np . nan ])
729+ res = c . isna ()
730+
731+ tm . assert_numpy_array_equal ( res , exp )
813732
814- def get_dir (s ):
815- results = [r for r in s .cat .__dir__ () if not r .startswith ('_' )]
816- return list (sorted (set (results )))
817733
818- s = Series (list ('aabbcde' )).astype ('category' )
819- results = get_dir (s )
820- tm .assert_almost_equal (results , list (sorted (set (ok_for_cat ))))
734+ class TestCategoricalAPIWithFactor (TestCategorical ):
735+
736+ def test_describe (self ):
737+ # string type
738+ desc = self .factor .describe ()
739+ assert self .factor .ordered
740+ exp_index = CategoricalIndex (['a' , 'b' , 'c' ], name = 'categories' ,
741+ ordered = self .factor .ordered )
742+ expected = DataFrame ({'counts' : [3 , 2 , 3 ],
743+ 'freqs' : [3 / 8. , 2 / 8. , 3 / 8. ]},
744+ index = exp_index )
745+ tm .assert_frame_equal (desc , expected )
746+
747+ # check unused categories
748+ cat = self .factor .copy ()
749+ cat .set_categories (["a" , "b" , "c" , "d" ], inplace = True )
750+ desc = cat .describe ()
751+
752+ exp_index = CategoricalIndex (
753+ list ('abcd' ), ordered = self .factor .ordered , name = 'categories' )
754+ expected = DataFrame ({'counts' : [3 , 2 , 3 , 0 ],
755+ 'freqs' : [3 / 8. , 2 / 8. , 3 / 8. , 0 ]},
756+ index = exp_index )
757+ tm .assert_frame_equal (desc , expected )
758+
759+ # check an integer one
760+ cat = Categorical ([1 , 2 , 3 , 1 , 2 , 3 , 3 , 2 , 1 , 1 , 1 ])
761+ desc = cat .describe ()
762+ exp_index = CategoricalIndex ([1 , 2 , 3 ], ordered = cat .ordered ,
763+ name = 'categories' )
764+ expected = DataFrame ({'counts' : [5 , 3 , 3 ],
765+ 'freqs' : [5 / 11. , 3 / 11. , 3 / 11. ]},
766+ index = exp_index )
767+ tm .assert_frame_equal (desc , expected )
768+
769+ # https://github.com/pandas-dev/pandas/issues/3678
770+ # describe should work with NaN
771+ cat = Categorical ([np .nan , 1 , 2 , 2 ])
772+ desc = cat .describe ()
773+ expected = DataFrame ({'counts' : [1 , 2 , 1 ],
774+ 'freqs' : [1 / 4. , 2 / 4. , 1 / 4. ]},
775+ index = CategoricalIndex ([1 , 2 , np .nan ],
776+ categories = [1 , 2 ],
777+ name = 'categories' ))
778+ tm .assert_frame_equal (desc , expected )
779+
780+ def test_set_categories_inplace (self ):
781+ cat = self .factor .copy ()
782+ cat .set_categories (['a' , 'b' , 'c' , 'd' ], inplace = True )
783+ tm .assert_index_equal (cat .categories , Index (['a' , 'b' , 'c' , 'd' ]))
0 commit comments