@@ -73,32 +73,32 @@ def _maybe_to_categorical(array):
73
73
Level codes are an array if integer which are the positions of the real
74
74
values in the categories array.
75
75
76
- There is not setter, used the other categorical methods and the item setter on
77
- Categorical to change values in the categorical.
76
+ There is not setter, use the other categorical methods and the normal item setter to change
77
+ values in the categorical.
78
78
"""
79
79
80
80
_categories_doc = """The categories of this categorical.
81
81
82
82
Setting assigns new values to each category (effectively a rename of
83
83
each individual category).
84
84
85
- The assigned value has to be a list-like object. If the number of
86
- category-items is less than number of category-items in the current category,
87
- all category-items at a higher position are set to NaN. If the number of
88
- category-items is more that the current number of category-items, new
89
- (unused) categories are added at the end.
90
-
91
- To add category-items in between, use `reorder_categories`.
85
+ The assigned value has to be a list-like object. All items must be unique and the number of items
86
+ in the new categories must be the same as the number of items in the old categories.
92
87
93
88
Raises
94
89
------
95
90
ValueError
96
- If the new categories do not validate as categories
91
+ If the new categories do not validate as categories or if the number of new categories is
92
+ unequal the number of old categories
97
93
98
94
See also
99
95
--------
100
- Categorical.reorder_categories
101
- Categorical.remove_unused_categories
96
+ rename_categories
97
+ reorder_categories
98
+ add_categories
99
+ remove_categories
100
+ remove_unused_categories
101
+ set_categories
102
102
"""
103
103
class Categorical (PandasObject ):
104
104
@@ -399,10 +399,9 @@ def _validate_categories(cls, categories):
399
399
def _set_categories (self , categories ):
400
400
""" Sets new categories """
401
401
categories = self ._validate_categories (categories )
402
-
403
- if not self ._categories is None and len (categories ) < len (self ._categories ):
404
- # remove all _codes which are larger
405
- self ._codes [self ._codes >= len (categories )] = - 1
402
+ if not self ._categories is None and len (categories ) != len (self ._categories ):
403
+ raise ValueError ("new categories need to have the same number of items than the old "
404
+ "categories!" )
406
405
self ._categories = categories
407
406
408
407
def _get_categories (self ):
@@ -425,18 +424,118 @@ def _get_levels(self):
425
424
# TODO: Remove after deprecation period in 2017/ after 0.18
426
425
levels = property (fget = _get_levels , fset = _set_levels )
427
426
427
+ def set_categories (self , new_categories , ordered = None , rename = False , inplace = False ):
428
+ """ Sets the categories to the specified new_categories.
429
+
430
+ `new_categories` can include new categories (which will result in unused categories) or
431
+ or remove old categories (which results in values set to NaN). If `rename==True`,
432
+ the categories will simple be renamed (less or more items than in old categories will
433
+ result in values set to NaN or in unused categories respectively).
434
+
435
+ This method can be used to perform more than one action of adding, removing,
436
+ and reordering simultaneously and is therefore faster than performing the individual steps
437
+ via the more specialised methods.
438
+
439
+ On the other hand this methods does not do checks (e.g., whether the old categories are
440
+ included in the new categories on a reorder), which can result in surprising changes, for
441
+ example when using special string dtypes on python3, which does not considers a S1 string
442
+ equal to a single char python string.
443
+
444
+ Raises
445
+ ------
446
+ ValueError
447
+ If new_categories does not validate as categories
448
+
449
+ Parameters
450
+ ----------
451
+ new_categories : Index-like
452
+ The categories in new order.
453
+ ordered : boolean, optional
454
+ Whether or not the categorical is treated as a ordered categorical. If not given,
455
+ do not change the ordered information.
456
+ rename : boolean (default: False)
457
+ Whether or not the new_categories should be considered as a rename of the old
458
+ categories or as reordered categories.
459
+ inplace : boolean (default: False)
460
+ Whether or not to reorder the categories inplace or return a copy of this categorical
461
+ with reordered categories.
462
+
463
+ Returns
464
+ -------
465
+ cat : Categorical with reordered categories or None if inplace.
466
+
467
+ See also
468
+ --------
469
+ rename_categories
470
+ reorder_categories
471
+ add_categories
472
+ remove_categories
473
+ remove_unused_categories
474
+ """
475
+ new_categories = self ._validate_categories (new_categories )
476
+ cat = self if inplace else self .copy ()
477
+ if rename :
478
+ if not cat ._categories is None and len (new_categories ) < len (cat ._categories ):
479
+ # remove all _codes which are larger and set to -1/NaN
480
+ self ._codes [self ._codes >= len (new_categories )] = - 1
481
+ cat ._categories = new_categories
482
+ else :
483
+ values = cat .__array__ ()
484
+ cat ._codes = _get_codes_for_values (values , new_categories )
485
+ cat ._categories = new_categories
486
+
487
+ if not ordered is None :
488
+ cat .ordered = ordered
489
+
490
+ if not inplace :
491
+ return cat
492
+
493
+ def rename_categories (self , new_categories , inplace = False ):
494
+ """ Renames categories.
495
+
496
+ The new categories has to be a list-like object. All items must be unique and the number of
497
+ items in the new categories must be the same as the number of items in the old categories.
498
+
499
+ Raises
500
+ ------
501
+ ValueError
502
+ If the new categories do not have the same number of items than the current categories
503
+ or do not validate as categories
504
+
505
+ Parameters
506
+ ----------
507
+ new_categories : Index-like
508
+ The renamed categories.
509
+ inplace : boolean (default: False)
510
+ Whether or not to rename the categories inplace or return a copy of this categorical
511
+ with renamed categories.
512
+
513
+ Returns
514
+ -------
515
+ cat : Categorical with renamed categories added or None if inplace.
516
+
517
+ See also
518
+ --------
519
+ reorder_categories
520
+ add_categories
521
+ remove_categories
522
+ remove_unused_categories
523
+ set_categories
524
+ """
525
+ cat = self if inplace else self .copy ()
526
+ cat .categories = new_categories
527
+ if not inplace :
528
+ return cat
428
529
429
530
def reorder_categories (self , new_categories , ordered = None , inplace = False ):
430
531
""" Reorders categories as specified in new_categories.
431
532
432
- `new_categories` do not need to include all old categories and can also include new
433
- category items. All old categories not in new categories are replaced by NaN. In
434
- contrast to assigning to `categories`, new category items can be in arbitrary positions.
533
+ `new_categories` need to include all old categories and no new category items.
435
534
436
535
Raises
437
536
------
438
537
ValueError
439
- If the new categories do not contain all old category items
538
+ If the new categories do not contain all old category items or any new ones
440
539
441
540
Parameters
442
541
----------
@@ -445,38 +544,131 @@ def reorder_categories(self, new_categories, ordered=None, inplace=False):
445
544
ordered : boolean, optional
446
545
Whether or not the categorical is treated as a ordered categorical. If not given,
447
546
do not change the ordered information.
448
- inplace : bool (default: False)
547
+ inplace : boolean (default: False)
449
548
Whether or not to reorder the categories inplace or return a copy of this categorical
450
549
with reordered categories.
451
550
452
551
Returns
453
552
-------
454
553
cat : Categorical with reordered categories or None if inplace.
554
+
555
+ See also
556
+ --------
557
+ rename_categories
558
+ add_categories
559
+ remove_categories
560
+ remove_unused_categories
561
+ set_categories
455
562
"""
456
- new_categories = self ._validate_categories (new_categories )
563
+ if set (self ._categories ) != set (new_categories ):
564
+ raise ValueError ("items in new_categories are not the same as in old categories" )
565
+ return self .set_categories (new_categories , ordered = ordered , inplace = inplace )
566
+
567
+ def add_categories (self , new_categories , inplace = False ):
568
+ """ Add new categories.
457
569
570
+ `new_categories` will be included at the last/highest place in the categories and will be
571
+ unused directly after this call.
572
+
573
+ Raises
574
+ ------
575
+ ValueError
576
+ If the new categories include old categories or do not validate as categories
577
+
578
+ Parameters
579
+ ----------
580
+ new_categories : category or list-like of category
581
+ The new categories to be included.
582
+ inplace : boolean (default: False)
583
+ Whether or not to add the categories inplace or return a copy of this categorical
584
+ with added categories.
585
+
586
+ Returns
587
+ -------
588
+ cat : Categorical with new categories added or None if inplace.
589
+
590
+ See also
591
+ --------
592
+ rename_categories
593
+ reorder_categories
594
+ remove_categories
595
+ remove_unused_categories
596
+ set_categories
597
+ """
598
+ if not com .is_list_like (new_categories ):
599
+ new_categories = [new_categories ]
600
+ already_included = set (new_categories ) & set (self ._categories )
601
+ if len (already_included ) != 0 :
602
+ msg = "new categories must not include old categories: %s" % str (already_included )
603
+ raise ValueError (msg )
604
+ new_categories = list (self ._categories ) + (new_categories )
605
+ new_categories = self ._validate_categories (new_categories )
458
606
cat = self if inplace else self .copy ()
459
- values = cat .__array__ ()
460
- cat ._codes = _get_codes_for_values (values , new_categories )
461
607
cat ._categories = new_categories
462
- if not ordered is None :
463
- cat .ordered = ordered
464
608
if not inplace :
465
609
return cat
466
610
611
+ def remove_categories (self , removals , inplace = False ):
612
+ """ Removes the specified categories.
613
+
614
+ `removals` must be included in the old categories. Values which were in the removed
615
+ categories will be set to NaN
616
+
617
+ Raises
618
+ ------
619
+ ValueError
620
+ If the removals are not contained in the categories
621
+
622
+ Parameters
623
+ ----------
624
+ removals : category or list of categories
625
+ The categories which should be removed.
626
+ inplace : boolean (default: False)
627
+ Whether or not to remove the categories inplace or return a copy of this categorical
628
+ with removed categories.
629
+
630
+ Returns
631
+ -------
632
+ cat : Categorical with removed categories or None if inplace.
633
+
634
+ See also
635
+ --------
636
+ rename_categories
637
+ reorder_categories
638
+ add_categories
639
+ remove_unused_categories
640
+ set_categories
641
+ """
642
+ if not com .is_list_like (removals ):
643
+ removals = [removals ]
644
+ not_included = set (removals ) - set (self ._categories )
645
+ if len (not_included ) != 0 :
646
+ raise ValueError ("removals must all be in old categories: %s" % str (not_included ))
647
+ new_categories = set (self ._categories ) - set (removals )
648
+ return self .set_categories (new_categories , ordered = self .ordered , rename = False ,
649
+ inplace = inplace )
650
+
651
+
467
652
def remove_unused_categories (self , inplace = False ):
468
653
""" Removes categories which are not used.
469
654
470
655
Parameters
471
656
----------
472
- inplace : bool (default: False)
657
+ inplace : boolean (default: False)
473
658
Whether or not to drop unused categories inplace or return a copy of this categorical
474
659
with unused categories dropped.
475
660
476
661
Returns
477
662
-------
478
663
cat : Categorical with unused categories dropped or None if inplace.
479
664
665
+ See also
666
+ --------
667
+ rename_categories
668
+ reorder_categories
669
+ add_categories
670
+ remove_categories
671
+ set_categories
480
672
"""
481
673
cat = self if inplace else self .copy ()
482
674
_used = sorted (np .unique (cat ._codes ))
@@ -1176,8 +1368,12 @@ def _delegate_method(self, name, *args, **kwargs):
1176
1368
accessors = ["categories" , "ordered" ],
1177
1369
typ = 'property' )
1178
1370
CategoricalAccessor ._add_delegate_accessors (delegate = Categorical ,
1179
- accessors = ["reorder_categories" ,
1180
- "remove_unused_categories" ],
1371
+ accessors = ["rename_categories" ,
1372
+ "reorder_categories" ,
1373
+ "add_categories" ,
1374
+ "remove_categories" ,
1375
+ "remove_unused_categories" ,
1376
+ "set_categories" ],
1181
1377
typ = 'method' )
1182
1378
1183
1379
##### utility routines #####
0 commit comments