|
13 | 13 | from pandas import compat |
14 | 14 |
|
15 | 15 | from .base import ExtensionDtype, _DtypeOpsMixin |
| 16 | +from .inference import is_list_like |
16 | 17 |
|
17 | 18 |
|
18 | 19 | def register_extension_dtype(cls): |
@@ -240,6 +241,90 @@ def _from_categorical_dtype(cls, dtype, categories=None, ordered=None): |
240 | 241 | ordered = dtype.ordered |
241 | 242 | return cls(categories, ordered) |
242 | 243 |
|
| 244 | + @classmethod |
| 245 | + def _from_values_or_dtype(cls, values=None, categories=None, ordered=None, |
| 246 | + dtype=None): |
| 247 | + """ |
| 248 | + Construct dtype from the input parameters used in :class:`Categorical`. |
| 249 | +
|
| 250 | + This constructor method specifically does not do the factorization |
| 251 | + step, if that is needed to find the categories. This constructor may |
| 252 | + therefore return ``CategoricalDtype(categories=None, ordered=None)``, |
| 253 | + which may not be useful. Additional steps may therefore have to be |
| 254 | + taken to create the final dtype. |
| 255 | +
|
| 256 | + The return dtype is specified from the inputs in this prioritized |
| 257 | + order: |
| 258 | + 1. if dtype is a CategoricalDtype, return dtype |
| 259 | + 2. if dtype is the string 'category', create a CategoricalDtype from |
| 260 | + the supplied categories and ordered parameters, and return that. |
| 261 | + 3. if values is a categorical, use value.dtype, but override it with |
| 262 | + categories and ordered if either/both of those are not None. |
| 263 | + 4. if dtype is None and values is not a categorical, construct the |
| 264 | + dtype from categories and ordered, even if either of those is None. |
| 265 | +
|
| 266 | + Parameters |
| 267 | + ---------- |
| 268 | + values : list-like, optional |
| 269 | + The list-like must be 1-dimensional. |
| 270 | + categories : list-like, optional |
| 271 | + Categories for the CategoricalDtype. |
| 272 | + ordered : bool, optional |
| 273 | + Designating if the categories are ordered. |
| 274 | + dtype : CategoricalDtype or the string "category", optional |
| 275 | + If ``CategoricalDtype``, cannot be used together with |
| 276 | + `categories` or `ordered`. |
| 277 | +
|
| 278 | + Returns |
| 279 | + ------- |
| 280 | + CategoricalDtype |
| 281 | +
|
| 282 | + Examples |
| 283 | + -------- |
| 284 | + >>> CategoricalDtype._from_values_or_dtype() |
| 285 | + CategoricalDtype(categories=None, ordered=None) |
| 286 | + >>> CategoricalDtype._from_values_or_dtype(categories=['a', 'b'], |
| 287 | + ... ordered=True) |
| 288 | + CategoricalDtype(categories=['a', 'b'], ordered=True) |
| 289 | + >>> dtype1 = CategoricalDtype(['a', 'b'], ordered=True) |
| 290 | + >>> dtype2 = CategoricalDtype(['x', 'y'], ordered=False) |
| 291 | + >>> c = Categorical([0, 1], dtype=dtype1, fastpath=True) |
| 292 | + >>> CategoricalDtype._from_values_or_dtype(c, ['x', 'y'], ordered=True, |
| 293 | + ... dtype=dtype2) |
| 294 | + ValueError: Cannot specify `categories` or `ordered` together with |
| 295 | + `dtype`. |
| 296 | +
|
| 297 | + The supplied dtype takes precedence over values' dtype: |
| 298 | +
|
| 299 | + >>> CategoricalDtype._from_values_or_dtype(c, dtype=dtype2) |
| 300 | + CategoricalDtype(['x', 'y'], ordered=False) |
| 301 | + """ |
| 302 | + from pandas.core.dtypes.common import is_categorical |
| 303 | + |
| 304 | + if dtype is not None: |
| 305 | + # The dtype argument takes precedence over values.dtype (if any) |
| 306 | + if isinstance(dtype, compat.string_types): |
| 307 | + if dtype == 'category': |
| 308 | + dtype = CategoricalDtype(categories, ordered) |
| 309 | + else: |
| 310 | + msg = "Unknown dtype {dtype!r}" |
| 311 | + raise ValueError(msg.format(dtype=dtype)) |
| 312 | + elif categories is not None or ordered is not None: |
| 313 | + raise ValueError("Cannot specify `categories` or `ordered` " |
| 314 | + "together with `dtype`.") |
| 315 | + elif is_categorical(values): |
| 316 | + # If no "dtype" was passed, use the one from "values", but honor |
| 317 | + # the "ordered" and "categories" arguments |
| 318 | + dtype = values.dtype._from_categorical_dtype(values.dtype, |
| 319 | + categories, ordered) |
| 320 | + else: |
| 321 | + # If dtype=None and values is not categorical, create a new dtype. |
| 322 | + # Note: This could potentially have categories=None and |
| 323 | + # ordered=None. |
| 324 | + dtype = CategoricalDtype(categories, ordered) |
| 325 | + |
| 326 | + return dtype |
| 327 | + |
243 | 328 | def _finalize(self, categories, ordered, fastpath=False): |
244 | 329 |
|
245 | 330 | if ordered is not None: |
@@ -408,7 +493,10 @@ def validate_categories(categories, fastpath=False): |
408 | 493 | """ |
409 | 494 | from pandas import Index |
410 | 495 |
|
411 | | - if not isinstance(categories, ABCIndexClass): |
| 496 | + if not fastpath and not is_list_like(categories): |
| 497 | + msg = "Parameter 'categories' must be list-like, was {!r}" |
| 498 | + raise TypeError(msg.format(categories)) |
| 499 | + elif not isinstance(categories, ABCIndexClass): |
412 | 500 | categories = Index(categories, tupleize_cols=False) |
413 | 501 |
|
414 | 502 | if not fastpath: |
|
0 commit comments