Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Change default for Index.union sort #25007

Closed
37 changes: 31 additions & 6 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2245,18 +2245,34 @@ def _get_reconciled_name_object(self, other):
return self._shallow_copy(name=name)
return self

def union(self, other, sort=True):
def union(self, other, sort=None):
"""
Form the union of two Index objects.

Parameters
----------
other : Index or array-like
sort : bool, default True
Sort the resulting index if possible
sort : bool or None, default None
Whether to sort the resulting Index.

* None : Sort the result, except when

1. `self` and `other` are equal.
2. `self` or `other` has length 0.
3. Some values in `self` or `other` cannot be compared.
A RuntimeWarning is issued in this case.

* True : sort the result. A TypeError is raised when the
values cannot be compared.
* False : do not sort the result.

.. versionadded:: 0.24.0

.. versionchanged:: 0.24.0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be "0.24.1"


Changed the default `sort` to None, matching the
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this being changed? this is certainly not a regression at all. This was the default behavior.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be clear: no behaviour is changed. It was indeed the default, it stays the default. It's only the value that encodes the default that is changed (True -> None), so that True can mean something else (=always sort).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, maybe it should be more clear in the doc-string

behavior of pandas 0.23.4 and earlier.

Returns
-------
union : Index
Expand All @@ -2273,10 +2289,16 @@ def union(self, other, sort=True):
other = ensure_index(other)

if len(other) == 0 or self.equals(other):
return self._get_reconciled_name_object(other)
result = self._get_reconciled_name_object(other)
if sort:
result = result.sort_values()
return result

if len(self) == 0:
return other._get_reconciled_name_object(self)
result = other._get_reconciled_name_object(self)
if sort:
result = result.sort_values()
return result

# TODO: is_dtype_union_equal is a hack around
# 1. buggy set ops with duplicates (GH #13432)
Expand Down Expand Up @@ -2319,13 +2341,16 @@ def union(self, other, sort=True):
else:
result = lvals

if sort:
if sort is None:
try:
result = sorting.safe_sort(result)
except TypeError as e:
warnings.warn("{}, sort order is undefined for "
"incomparable objects".format(e),
RuntimeWarning, stacklevel=3)
elif sort:
# raise if not sortable.
result = sorting.safe_sort(result)

# for subclasses
return self._wrap_setop_result(other, result)
Expand Down
46 changes: 46 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,6 +794,52 @@ def test_union(self, sort):
tm.assert_index_equal(union, everything.sort_values())
assert tm.equalContents(union, everything)

def test_union_sort_other_equal(self):
a = pd.Index([1, 0, 2])
# default, sort=None
result = a.union(a)
tm.assert_index_equal(result, a)

# sort=True
result = a.union(a, sort=True)
expected = pd.Index([0, 1, 2])
tm.assert_index_equal(result, expected)

# sort=False
result = a.union(a, sort=False)
tm.assert_index_equal(result, a)

def test_union_sort_other_empty(self):
a = pd.Index([1, 0, 2])
# default, sort=None
tm.assert_index_equal(a.union(a[:0]), a)
tm.assert_index_equal(a[:0].union(a), a)

# sort=True
expected = pd.Index([0, 1, 2])
tm.assert_index_equal(a.union(a[:0], sort=True), expected)
tm.assert_index_equal(a[:0].union(a, sort=True), expected)

# sort=False
tm.assert_index_equal(a.union(a[:0], sort=False), a)
tm.assert_index_equal(a[:0].union(a, sort=False), a)

def test_union_sort_other_incomparable(self):
a = pd.Index([1, pd.Timestamp('2000')])
# default, sort=None
with tm.assert_produces_warning(RuntimeWarning):
result = a.union(a[:1])

tm.assert_index_equal(result, a)

# sort=True
with pytest.raises(TypeError, match='.*'):
a.union(a[:1], sort=True)

# sort=False
result = a.union(a[:1], sort=False)
tm.assert_index_equal(result, a)

@pytest.mark.parametrize("klass", [
np.array, Series, list])
@pytest.mark.parametrize("sort", [True, False])
Expand Down