@@ -200,6 +200,62 @@ def contains(cat, key, container):
200200 return any (loc_ in container for loc_ in loc )
201201
202202
203+ def create_categorical_dtype (values , categories = None , ordered = None ,
204+ dtype = None ):
205+ """
206+ Helper function to Construct/return a :class:`CategoricalDtype`.
207+
208+ Construct the CategoricalDtype from typical inputs to :class:`Categorical`.
209+
210+ Parameters
211+ ----------
212+ values : array-like or Categorical, (1-dimensional), optional
213+ categories : list-like, optional
214+ categories for the CategoricalDtype
215+ ordered : bool, optional
216+ designating if the categories are ordered
217+ dtype : CategoricalDtype, optional
218+ Cannot be used in combination with `categories` or `ordered`.
219+
220+ Returns
221+ -------
222+ CategoricalDtype
223+
224+ Examples
225+ --------
226+ >>> create_categorical_dtype()
227+ CategoricalDtype(categories=None, ordered=None)
228+ >>> create_categorical_dtype(categories=['a', 'b'], ordered=True)
229+ CategoricalDtype(categories=['a', 'b'], ordered=True)
230+ >>> dtype = CategoricalDtype(['a', 'b'], ordered=True)
231+ >>> c = Categorical([0, 1], dtype=dtype, fastpath=True)
232+ >>> create_categorical_dtype(c, ['x', 'y'], True, dtype=dtype)
233+ CategoricalDtype(['a', 'b'], ordered=True)
234+ """
235+ if dtype is not None :
236+ # The dtype argument takes precedence over values.dtype (if any)
237+ if isinstance (dtype , compat .string_types ):
238+ if dtype == 'category' :
239+ dtype = CategoricalDtype (categories , ordered )
240+ else :
241+ msg = "Unknown dtype {dtype!r}"
242+ raise ValueError (msg .format (dtype = dtype ))
243+ elif categories is not None or ordered is not None :
244+ raise ValueError ("Cannot specify `categories` or `ordered` "
245+ "together with `dtype`." )
246+ elif is_categorical (values ):
247+ # If no "dtype" was passed, use the one from "values", but honor
248+ # the "ordered" and "categories" arguments
249+ dtype = values .dtype ._from_categorical_dtype (values .dtype ,
250+ categories , ordered )
251+ else :
252+ # If dtype=None and values is not categorical, create a new dtype.
253+ # Note: This could potentially have categories=None and ordered=None.
254+ dtype = CategoricalDtype (categories , ordered )
255+
256+ return dtype
257+
258+
203259_codes_doc = """\
204260 The category codes of this categorical.
205261
@@ -316,50 +372,18 @@ class Categorical(ExtensionArray, PandasObject):
316372 def __init__ (self , values , categories = None , ordered = None , dtype = None ,
317373 fastpath = False ):
318374
319- # Ways of specifying the dtype (prioritized ordered)
320- # 1. dtype is a CategoricalDtype
321- # a.) with known categories, use dtype.categories
322- # b.) else with Categorical values, use values.dtype
323- # c.) else, infer from values
324- # d.) specifying dtype=CategoricalDtype and categories is an error
325- # 2. dtype is a string 'category'
326- # a.) use categories, ordered
327- # b.) use values.dtype
328- # c.) infer from values
329- # 3. dtype is None
330- # a.) use categories, ordered
331- # b.) use values.dtype
332- # c.) infer from values
333- if dtype is not None :
334- # The dtype argument takes precedence over values.dtype (if any)
335- if isinstance (dtype , compat .string_types ):
336- if dtype == 'category' :
337- dtype = CategoricalDtype (categories , ordered )
338- else :
339- msg = "Unknown `dtype` {dtype}"
340- raise ValueError (msg .format (dtype = dtype ))
341- elif categories is not None or ordered is not None :
342- raise ValueError ("Cannot specify both `dtype` and `categories`"
343- " or `ordered`." )
344- elif is_categorical (values ):
345- # If no "dtype" was passed, use the one from "values", but honor
346- # the "ordered" and "categories" arguments
347- dtype = values .dtype ._from_categorical_dtype (values .dtype ,
348- categories , ordered )
375+ dtype = create_categorical_dtype (values , categories , ordered , dtype )
376+ # At this point, dtype is always a CategoricalDtype, but
377+ # we may have dtype.categories be None, and we need to
378+ # infer categories in a factorization step futher below
349379
380+ if is_categorical (values ):
350381 # GH23814, for perf, if values._values already an instance of
351382 # Categorical, set values to codes, and run fastpath
352383 if (isinstance (values , (ABCSeries , ABCIndexClass )) and
353384 isinstance (values ._values , type (self ))):
354385 values = values ._values .codes .copy ()
355386 fastpath = True
356- else :
357- # If dtype=None and values is not categorical, create a new dtype
358- dtype = CategoricalDtype (categories , ordered )
359-
360- # At this point, dtype is always a CategoricalDtype and you should not
361- # use categories and ordered seperately.
362- # if dtype.categories is None, we are inferring
363387
364388 if fastpath :
365389 self ._codes = coerce_indexer_dtype (values , dtype .categories )
@@ -656,6 +680,8 @@ def from_codes(cls, codes, categories, ordered=False):
656680 categorical. If not given, the resulting categorical will be
657681 unordered.
658682 """
683+ dtype = create_categorical_dtype (codes , categories , ordered )
684+
659685 codes = np .asarray (codes ) # #21767
660686 if not is_integer_dtype (codes ):
661687 msg = "codes need to be array-like integers"
@@ -675,14 +701,12 @@ def from_codes(cls, codes, categories, ordered=False):
675701 raise ValueError (
676702 "codes need to be convertible to an arrays of integers" )
677703
678- categories = CategoricalDtype .validate_categories (categories )
679-
680- if len (codes ) and (codes .max () >= len (categories ) or codes .min () < - 1 ):
704+ if len (codes ) and (
705+ codes .max () >= len (dtype .categories ) or codes .min () < - 1 ):
681706 raise ValueError ("codes need to be between -1 and "
682707 "len(categories)-1" )
683708
684- return cls (codes , categories = categories , ordered = ordered ,
685- fastpath = True )
709+ return cls (codes , dtype = dtype , fastpath = True )
686710
687711 _codes = None
688712
0 commit comments