Skip to content

Commit 2f00159

Browse files
toobazjorisvandenbossche
authored andcommitted
CLN: Index.append() refactoring (#16236)
1 parent a4c4ede commit 2f00159

File tree

6 files changed

+63
-67
lines changed

6 files changed

+63
-67
lines changed

pandas/core/dtypes/concat.py

+47-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
_TD_DTYPE)
2020
from pandas.core.dtypes.generic import (
2121
ABCDatetimeIndex, ABCTimedeltaIndex,
22-
ABCPeriodIndex)
22+
ABCPeriodIndex, ABCRangeIndex)
2323

2424

2525
def get_dtype_kinds(l):
@@ -41,6 +41,8 @@ def get_dtype_kinds(l):
4141
typ = 'category'
4242
elif is_sparse(arr):
4343
typ = 'sparse'
44+
elif isinstance(arr, ABCRangeIndex):
45+
typ = 'range'
4446
elif is_datetimetz(arr):
4547
# if to_concat contains different tz,
4648
# the result must be object dtype
@@ -559,3 +561,47 @@ def convert_sparse(x, axis):
559561
# coerce to object if needed
560562
result = result.astype('object')
561563
return result
564+
565+
566+
def _concat_rangeindex_same_dtype(indexes):
567+
"""
568+
Concatenates multiple RangeIndex instances. All members of "indexes" must
569+
be of type RangeIndex; result will be RangeIndex if possible, Int64Index
570+
otherwise. E.g.:
571+
indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
572+
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
573+
"""
574+
575+
start = step = next = None
576+
577+
for obj in indexes:
578+
if not len(obj):
579+
continue
580+
581+
if start is None:
582+
# This is set by the first non-empty index
583+
start = obj._start
584+
if step is None and len(obj) > 1:
585+
step = obj._step
586+
elif step is None:
587+
# First non-empty index had only one element
588+
if obj._start == start:
589+
return _concat_index_asobject(indexes)
590+
step = obj._start - start
591+
592+
non_consecutive = ((step != obj._step and len(obj) > 1) or
593+
(next is not None and obj._start != next))
594+
if non_consecutive:
595+
# Int64Index._append_same_dtype([ix.astype(int) for ix in indexes])
596+
# would be preferred... but it currently resorts to
597+
# _concat_index_asobject anyway.
598+
return _concat_index_asobject(indexes)
599+
600+
if step is not None:
601+
next = obj[-1] + step
602+
603+
if start is None:
604+
start = obj._start
605+
step = obj._step
606+
stop = obj._stop if next is None else next
607+
return indexes[0].__class__(start, stop, step)

pandas/core/indexes/base.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -1745,18 +1745,17 @@ def append(self, other):
17451745
names = set([obj.name for obj in to_concat])
17461746
name = None if len(names) > 1 else self.name
17471747

1748-
if self.is_categorical():
1749-
# if calling index is category, don't check dtype of others
1750-
from pandas.core.indexes.category import CategoricalIndex
1751-
return CategoricalIndex._append_same_dtype(self, to_concat, name)
1748+
return self._concat(to_concat, name)
1749+
1750+
def _concat(self, to_concat, name):
17521751

17531752
typs = _concat.get_dtype_kinds(to_concat)
17541753

17551754
if len(typs) == 1:
1756-
return self._append_same_dtype(to_concat, name=name)
1755+
return self._concat_same_dtype(to_concat, name=name)
17571756
return _concat._concat_index_asobject(to_concat, name=name)
17581757

1759-
def _append_same_dtype(self, to_concat, name):
1758+
def _concat_same_dtype(self, to_concat, name):
17601759
"""
17611760
Concatenate to_concat which has the same class
17621761
"""

pandas/core/indexes/category.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -633,7 +633,11 @@ def insert(self, loc, item):
633633
codes = np.concatenate((codes[:loc], code, codes[loc:]))
634634
return self._create_from_codes(codes)
635635

636-
def _append_same_dtype(self, to_concat, name):
636+
def _concat(self, to_concat, name):
637+
# if calling index is category, don't check dtype of others
638+
return CategoricalIndex._concat_same_dtype(self, to_concat, name)
639+
640+
def _concat_same_dtype(self, to_concat, name):
637641
"""
638642
Concatenate to_concat which has the same class
639643
ValueError if other is not in the categories

pandas/core/indexes/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -837,7 +837,7 @@ def summary(self, name=None):
837837
result = result.replace("'", "")
838838
return result
839839

840-
def _append_same_dtype(self, to_concat, name):
840+
def _concat_same_dtype(self, to_concat, name):
841841
"""
842842
Concatenate to_concat which has the same class
843843
"""

pandas/core/indexes/interval.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,7 @@ def _as_like_interval_index(self, other, error_msg):
876876
raise ValueError(error_msg)
877877
return other
878878

879-
def _append_same_dtype(self, to_concat, name):
879+
def _concat_same_dtype(self, to_concat, name):
880880
"""
881881
assert that we all have the same .closed
882882
we allow a 0-len index here as well
@@ -885,7 +885,7 @@ def _append_same_dtype(self, to_concat, name):
885885
msg = ('can only append two IntervalIndex objects '
886886
'that are closed on the same side')
887887
raise ValueError(msg)
888-
return super(IntervalIndex, self)._append_same_dtype(to_concat, name)
888+
return super(IntervalIndex, self)._concat_same_dtype(to_concat, name)
889889

890890
@Appender(_index_shared_docs['take'] % _index_doc_kwargs)
891891
def take(self, indices, axis=0, allow_fill=True,

pandas/core/indexes/range.py

+3-56
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from pandas.compat.numpy import function as nv
1515
from pandas.core.indexes.base import Index, _index_shared_docs
1616
from pandas.util._decorators import Appender, cache_readonly
17+
import pandas.core.dtypes.concat as _concat
1718
import pandas.core.indexes.base as ibase
1819

1920
from pandas.core.indexes.numeric import Int64Index
@@ -447,62 +448,8 @@ def join(self, other, how='left', level=None, return_indexers=False,
447448
return super(RangeIndex, self).join(other, how, level, return_indexers,
448449
sort)
449450

450-
def append(self, other):
451-
"""
452-
Append a collection of Index options together
453-
454-
Parameters
455-
----------
456-
other : Index or list/tuple of indices
457-
458-
Returns
459-
-------
460-
appended : RangeIndex if all indexes are consecutive RangeIndexes,
461-
otherwise Int64Index or Index
462-
"""
463-
464-
to_concat = [self]
465-
466-
if isinstance(other, (list, tuple)):
467-
to_concat = to_concat + list(other)
468-
else:
469-
to_concat.append(other)
470-
471-
if not all([isinstance(i, RangeIndex) for i in to_concat]):
472-
return super(RangeIndex, self).append(other)
473-
474-
start = step = next = None
475-
476-
for obj in to_concat:
477-
if not len(obj):
478-
continue
479-
480-
if start is None:
481-
# This is set by the first non-empty index
482-
start = obj._start
483-
if step is None and len(obj) > 1:
484-
step = obj._step
485-
elif step is None:
486-
# First non-empty index had only one element
487-
if obj._start == start:
488-
return super(RangeIndex, self).append(other)
489-
step = obj._start - start
490-
491-
non_consecutive = ((step != obj._step and len(obj) > 1) or
492-
(next is not None and obj._start != next))
493-
if non_consecutive:
494-
return super(RangeIndex, self).append(other)
495-
496-
if step is not None:
497-
next = obj[-1] + step
498-
499-
if start is None:
500-
start = obj._start
501-
step = obj._step
502-
stop = obj._stop if next is None else next
503-
names = set([obj.name for obj in to_concat])
504-
name = None if len(names) > 1 else self.name
505-
return RangeIndex(start, stop, step, name=name)
451+
def _concat_same_dtype(self, indexes, name):
452+
return _concat._concat_rangeindex_same_dtype(indexes).rename(name)
506453

507454
def __len__(self):
508455
"""

0 commit comments

Comments
 (0)