1515import pandas .core .common as com
1616from pandas .util .decorators import cache_readonly
1717
18- from pandas .core .common import isnull
18+ from pandas .core .common import (CategoricalDtype , ABCSeries , isnull , notnull ,
19+ is_categorical_dtype , is_integer_dtype , is_object_dtype ,
20+ _possibly_infer_to_datetimelike , get_dtype_kinds ,
21+ is_list_like , _is_sequence ,
22+ _ensure_platform_int , _ensure_object , _ensure_int64 ,
23+ _coerce_indexer_dtype , _values_from_object , take_1d )
1924from pandas .util .terminal import get_terminal_size
2025from pandas .core .config import get_option
2126from pandas .core import format as fmt
@@ -69,11 +74,11 @@ def f(self, other):
6974
7075def _is_categorical (array ):
7176 """ return if we are a categorical possibility """
72- return isinstance (array , Categorical ) or isinstance (array .dtype , com . CategoricalDtype )
77+ return isinstance (array , Categorical ) or isinstance (array .dtype , CategoricalDtype )
7378
7479def _maybe_to_categorical (array ):
7580 """ coerce to a categorical if a series is given """
76- if isinstance (array , com . ABCSeries ):
81+ if isinstance (array , ABCSeries ):
7782 return array .values
7883 return array
7984
@@ -175,7 +180,7 @@ class Categorical(PandasObject):
175180 >>> a.min()
176181 'c'
177182 """
178- dtype = com . CategoricalDtype ()
183+ dtype = CategoricalDtype ()
179184 """The dtype (always "category")"""
180185
181186 ordered = None
@@ -203,7 +208,7 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
203208
204209 if fastpath :
205210 # fast path
206- self ._codes = com . _coerce_indexer_dtype (values , categories )
211+ self ._codes = _coerce_indexer_dtype (values , categories )
207212 self .name = name
208213 self .categories = categories
209214 self .ordered = ordered
@@ -223,11 +228,11 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
223228 "use only 'categories'" )
224229
225230 # sanitize input
226- if com . is_categorical_dtype (values ):
231+ if is_categorical_dtype (values ):
227232
228233 # we are either a Series or a Categorical
229234 cat = values
230- if isinstance (values , com . ABCSeries ):
235+ if isinstance (values , ABCSeries ):
231236 cat = values .values
232237 if categories is None :
233238 categories = cat .categories
@@ -244,7 +249,7 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
244249 # which is fine, but since factorize does this correctly no need here
245250 # this is an issue because _sanitize_array also coerces np.nan to a string
246251 # under certain versions of numpy as well
247- values = com . _possibly_infer_to_datetimelike (values , convert_dates = True )
252+ values = _possibly_infer_to_datetimelike (values , convert_dates = True )
248253 if not isinstance (values , np .ndarray ):
249254 values = _convert_to_list_like (values )
250255 from pandas .core .series import _sanitize_array
@@ -286,11 +291,11 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
286291 codes = _get_codes_for_values (values , categories )
287292
288293 # TODO: check for old style usage. These warnings should be removes after 0.18/ in 2016
289- if com . is_integer_dtype (values ) and not com . is_integer_dtype (categories ):
294+ if is_integer_dtype (values ) and not is_integer_dtype (categories ):
290295 warn ("Values and categories have different dtypes. Did you mean to use\n "
291296 "'Categorical.from_codes(codes, categories)'?" , RuntimeWarning )
292297
293- if com . is_integer_dtype (values ) and (codes == - 1 ).all ():
298+ if is_integer_dtype (values ) and (codes == - 1 ).all ():
294299 warn ("None of the categories were found in values. Did you mean to use\n "
295300 "'Categorical.from_codes(codes, categories)'?" , RuntimeWarning )
296301
@@ -302,7 +307,7 @@ def __init__(self, values, categories=None, ordered=None, name=None, fastpath=Fa
302307 self .ordered = False if ordered is None else ordered
303308 self .categories = categories
304309 self .name = name
305- self ._codes = com . _coerce_indexer_dtype (codes , categories )
310+ self ._codes = _coerce_indexer_dtype (codes , categories )
306311
307312 def copy (self ):
308313 """ Copy constructor. """
@@ -409,7 +414,7 @@ def _validate_categories(cls, categories):
409414 # on categories with NaNs, int values would be converted to float.
410415 # Use "object" dtype to prevent this.
411416 if isnull (categories ).any ():
412- without_na = np .array ([x for x in categories if com . notnull (x )])
417+ without_na = np .array ([x for x in categories if notnull (x )])
413418 with_na = np .array (categories )
414419 if with_na .dtype != without_na .dtype :
415420 dtype = "object"
@@ -617,7 +622,7 @@ def add_categories(self, new_categories, inplace=False):
617622 remove_unused_categories
618623 set_categories
619624 """
620- if not com . is_list_like (new_categories ):
625+ if not is_list_like (new_categories ):
621626 new_categories = [new_categories ]
622627 already_included = set (new_categories ) & set (self ._categories )
623628 if len (already_included ) != 0 :
@@ -627,7 +632,7 @@ def add_categories(self, new_categories, inplace=False):
627632 new_categories = self ._validate_categories (new_categories )
628633 cat = self if inplace else self .copy ()
629634 cat ._categories = new_categories
630- cat ._codes = com . _coerce_indexer_dtype (cat ._codes , new_categories )
635+ cat ._codes = _coerce_indexer_dtype (cat ._codes , new_categories )
631636 if not inplace :
632637 return cat
633638
@@ -662,7 +667,7 @@ def remove_categories(self, removals, inplace=False):
662667 remove_unused_categories
663668 set_categories
664669 """
665- if not com . is_list_like (removals ):
670+ if not is_list_like (removals ):
666671 removals = [removals ]
667672 removals = set (list (removals ))
668673 not_included = removals - set (self ._categories )
@@ -696,7 +701,7 @@ def remove_unused_categories(self, inplace=False):
696701 """
697702 cat = self if inplace else self .copy ()
698703 _used = sorted (np .unique (cat ._codes ))
699- new_categories = cat .categories .take (com . _ensure_platform_int (_used ))
704+ new_categories = cat .categories .take (_ensure_platform_int (_used ))
700705 new_categories = _ensure_index (new_categories )
701706 cat ._codes = _get_codes_for_values (cat .__array__ (), new_categories )
702707 cat ._categories = new_categories
@@ -734,7 +739,7 @@ def __array__(self, dtype=None):
734739 A numpy array of either the specified dtype or, if dtype==None (default), the same
735740 dtype as categorical.categories.dtype
736741 """
737- ret = com . take_1d (self .categories .values , self ._codes )
742+ ret = take_1d (self .categories .values , self ._codes )
738743 if dtype and dtype != self .categories .dtype :
739744 return np .asarray (ret , dtype )
740745 return ret
@@ -822,8 +827,8 @@ def get_values(self):
822827
823828 # if we are a period index, return a string repr
824829 if isinstance (self .categories , PeriodIndex ):
825- return com . take_1d (np .array (self .categories .to_native_types (), dtype = object ),
826- self ._codes )
830+ return take_1d (np .array (self .categories .to_native_types (), dtype = object ),
831+ self ._codes )
827832
828833 return np .array (self )
829834
@@ -1010,7 +1015,7 @@ def fillna(self, fill_value=None, method=None, limit=None, **kwargs):
10101015
10111016 else :
10121017
1013- if not com . isnull (fill_value ) and fill_value not in self .categories :
1018+ if not isnull (fill_value ) and fill_value not in self .categories :
10141019 raise ValueError ("fill value must be in categories" )
10151020
10161021 mask = values == - 1
@@ -1031,7 +1036,7 @@ def take_nd(self, indexer, allow_fill=True, fill_value=None):
10311036 # but is passed thru internally
10321037 assert isnull (fill_value )
10331038
1034- codes = com . take_1d (self ._codes , indexer , allow_fill = True , fill_value = - 1 )
1039+ codes = take_1d (self ._codes , indexer , allow_fill = True , fill_value = - 1 )
10351040 result = Categorical (codes , categories = self .categories , ordered = self .ordered ,
10361041 name = self .name , fastpath = True )
10371042 return result
@@ -1178,7 +1183,7 @@ def __setitem__(self, key, value):
11781183 raise ValueError ("Cannot set a Categorical with another, without identical "
11791184 "categories" )
11801185
1181- rvalue = value if com . is_list_like (value ) else [value ]
1186+ rvalue = value if is_list_like (value ) else [value ]
11821187 to_add = Index (rvalue ).difference (self .categories )
11831188 # no assignments of values not in categories, but it's always ok to set something to np.nan
11841189 if len (to_add ) and not isnull (to_add ).all ():
@@ -1221,7 +1226,7 @@ def __setitem__(self, key, value):
12211226 # float categories do currently return -1 for np.nan, even if np.nan is included in the
12221227 # index -> "repair" this here
12231228 if isnull (rvalue ).any () and isnull (self .categories ).any ():
1224- nan_pos = np .where (com . isnull (self .categories ))[0 ]
1229+ nan_pos = np .where (isnull (self .categories ))[0 ]
12251230 lindexer [lindexer == - 1 ] = nan_pos
12261231
12271232 key = self ._maybe_coerce_indexer (key )
@@ -1304,7 +1309,7 @@ def mode(self):
13041309
13051310 import pandas .hashtable as htable
13061311 good = self ._codes != - 1
1307- result = Categorical (sorted (htable .mode_int64 (com . _ensure_int64 (self ._codes [good ]))),
1312+ result = Categorical (sorted (htable .mode_int64 (_ensure_int64 (self ._codes [good ]))),
13081313 categories = self .categories ,ordered = self .ordered , name = self .name ,
13091314 fastpath = True )
13101315 return result
@@ -1373,9 +1378,9 @@ def describe(self):
13731378 categories = np .arange (0 ,len (self .categories )+ 1 ,dtype = object )
13741379 categories [:- 1 ] = self .categories
13751380 categories [- 1 ] = np .nan
1376- result .index = categories .take (com . _ensure_platform_int (result .index ))
1381+ result .index = categories .take (_ensure_platform_int (result .index ))
13771382 else :
1378- result .index = self .categories .take (com . _ensure_platform_int (result .index ))
1383+ result .index = self .categories .take (_ensure_platform_int (result .index ))
13791384 result = result .reindex (self .categories )
13801385 result .index .name = 'categories'
13811386
@@ -1447,23 +1452,72 @@ def _get_codes_for_values(values, categories):
14471452
14481453 from pandas .core .algorithms import _get_data_algo , _hashtables
14491454 if values .dtype != categories .dtype :
1450- values = com . _ensure_object (values )
1451- categories = com . _ensure_object (categories )
1455+ values = _ensure_object (values )
1456+ categories = _ensure_object (categories )
14521457 (hash_klass , vec_klass ), vals = _get_data_algo (values , _hashtables )
14531458 t = hash_klass (len (categories ))
1454- t .map_locations (com . _values_from_object (categories ))
1455- return com . _coerce_indexer_dtype (t .lookup (values ), categories )
1459+ t .map_locations (_values_from_object (categories ))
1460+ return _coerce_indexer_dtype (t .lookup (values ), categories )
14561461
14571462def _convert_to_list_like (list_like ):
14581463 if hasattr (list_like , "dtype" ):
14591464 return list_like
14601465 if isinstance (list_like , list ):
14611466 return list_like
1462- if (com . _is_sequence (list_like ) or isinstance (list_like , tuple )
1463- or isinstance (list_like , types .GeneratorType )):
1467+ if (_is_sequence (list_like ) or isinstance (list_like , tuple )
1468+ or isinstance (list_like , types .GeneratorType )):
14641469 return list (list_like )
14651470 elif np .isscalar (list_like ):
14661471 return [list_like ]
14671472 else :
14681473 # is this reached?
14691474 return [list_like ]
1475+
1476+ def _concat_compat (to_concat , axis = 0 ):
1477+ """
1478+ provide concatenation of an object/categorical array of arrays each of which is a single dtype
1479+
1480+ Parameters
1481+ ----------
1482+ to_concat : array of arrays
1483+ axis : axis to provide concatenation
1484+
1485+ Returns
1486+ -------
1487+ a single array, preserving the combined dtypes
1488+ """
1489+
1490+ def convert_categorical (x ):
1491+ # coerce to object dtype
1492+ if is_categorical_dtype (x .dtype ):
1493+ return x .get_values ()
1494+ return x .ravel ()
1495+
1496+ typs = get_dtype_kinds (to_concat )
1497+ if not len (typs - set (['object' ,'category' ])):
1498+
1499+ # we only can deal with object & category types
1500+ pass
1501+
1502+ else :
1503+
1504+ # convert to object type and perform a regular concat
1505+ from pandas .core .common import _concat_compat
1506+ return _concat_compat ([ np .array (x ,copy = False ).astype ('object' ) for x in to_concat ],axis = axis )
1507+
1508+ # we could have object blocks and categorical's here
1509+ # if we only have a single cateogoricals then combine everything
1510+ # else its a non-compat categorical
1511+ categoricals = [ x for x in to_concat if is_categorical_dtype (x .dtype ) ]
1512+ objects = [ x for x in to_concat if is_object_dtype (x .dtype ) ]
1513+
1514+ # validate the categories
1515+ categories = None
1516+ for x in categoricals :
1517+ if categories is None :
1518+ categories = x .categories
1519+ if not categories .equals (x .categories ):
1520+ raise ValueError ("incompatible categories in categorical concat" )
1521+
1522+ # concat them
1523+ return Categorical (np .concatenate ([ convert_categorical (x ) for x in to_concat ],axis = axis ), categories = categories )
0 commit comments