Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,63 @@ In cases with mixed-resolution inputs, the highest resolution is used:
In [2]: pd.to_datetime([pd.Timestamp("2024-03-22 11:43:01"), "2024-03-22 11:43:01.002"]).dtype
Out[2]: dtype('<M8[ns]')

.. _whatsnew_300.api_breaking.concat_datetime_sorting:

:func:`concat` no longer ignores ``sort`` when all objects have a :class:`DatetimeIndex`
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

When all objects passed to :func:`concat` have a :class:`DatetimeIndex`,
passing ``sort=False`` will now result in the non-concatenation axis not
being sorted. Previously, the result would always be sorted along
the non-concatenation axis even when ``sort=False`` is passed. :issue:`57335`

If you do not specify the ``sort`` argument, pandas will continue to return a
sorted result but this behavior is deprecated and you will receive a warning.
In order to make this less noisy for users, pandas checks if not sorting would
impact the result and only warns when it would. This check can be expensive,
and users can skip the check by explicitly specifying ``sort=True`` or
``sort=False``.

This deprecation can also impact pandas' internal usage of :func:`concat`.
While we have investigated uses of :func:`concat` to determine if this could lead
to a change in behavior of other functions and methods in the API, it is
possible some have been missed. In order to be cautious here, pandas has *not*
added ``sort=False`` to any internal calls where we believe behavior should not change.
If we have missed something, users will not experience a behavior change but they
will receive a warning about :func:`concat` even though they are not directly
calling this function. If this does occur, we ask users to open an issue so that
we may address any potential behavior changes.

.. ipython:: python

idx1 = pd.date_range("2025-01-02", periods=3, freq="h")
df1 = pd.DataFrame({"a": [1, 2, 3]}, index=idx1)
df1

idx2 = pd.date_range("2025-01-01", periods=3, freq="h")
df2 = pd.DataFrame({"b": [1, 2, 3]}, index=idx2)
df2

*Old behavior*

.. code-block:: ipython

In [3]: pd.concat([df1, df2], axis=1, sort=False)
Out[3]:
a b
2025-01-01 00:00:00 NaN 1.0
2025-01-01 01:00:00 NaN 2.0
2025-01-01 02:00:00 NaN 3.0
2025-01-02 00:00:00 1.0 NaN
2025-01-02 01:00:00 2.0 NaN
2025-01-02 02:00:00 3.0 NaN

*New behavior*

.. ipython:: python

pd.concat([df1, df2], axis=1, sort=False)

.. _whatsnew_300.api_breaking.value_counts_sorting:

Changed behavior in :meth:`DataFrame.value_counts` and :meth:`DataFrameGroupBy.value_counts` when ``sort=False``
Expand Down Expand Up @@ -1173,6 +1230,7 @@ Groupby/resample/rolling
- Bug in :meth:`DataFrameGroupBy.apply` with ``as_index=False`` that was returning :class:`MultiIndex` instead of returning :class:`Index`. (:issue:`58291`)
- Bug in :meth:`DataFrameGroupBy.cumsum` and :meth:`DataFrameGroupBy.cumprod` where ``numeric_only`` parameter was passed indirectly through kwargs instead of passing directly. (:issue:`58811`)
- Bug in :meth:`DataFrameGroupBy.cumsum` where it did not return the correct dtype when the label contained ``None``. (:issue:`58811`)
- Bug in :meth:`DataFrameGroupBy.shift` where the resulting index would be sorted if the input is a :class:`DatetimeIndex` and multiple periods are specified (:issue:`62843`)
- Bug in :meth:`DataFrameGroupby.transform` and :meth:`SeriesGroupby.transform` with a reducer and ``observed=False`` that coerces dtype to float when there are unobserved categories. (:issue:`55326`)
- Bug in :meth:`Rolling.apply` for ``method="table"`` where column order was not being respected due to the columns getting sorted by default. (:issue:`59666`)
- Bug in :meth:`Rolling.apply` where the applied function could be called on fewer than ``min_period`` periods if ``method="table"``. (:issue:`58868`)
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def transform_dict_like(self, func) -> DataFrame:
for name, how in func.items():
colg = obj._gotitem(name, ndim=1)
results[name] = colg.transform(how, 0, *args, **kwargs)
return concat(results, axis=1)
return concat(results, axis=1) # nobug

def transform_str_or_callable(self, func) -> DataFrame | Series:
"""
Expand Down Expand Up @@ -485,7 +485,7 @@ def wrap_results_list_like(
obj = self.obj

try:
return concat(results, keys=keys, axis=1, sort=False)
return concat(results, keys=keys, axis=1, sort=False) # maybebug
except TypeError as err:
# we are concatting non-NDFrame objects,
# e.g. a list of scalars
Expand Down Expand Up @@ -635,7 +635,7 @@ def wrap_results_dict_like(
keys_to_use = ktu

axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1
result = concat(
result = concat( # maybebug
results,
axis=axis,
keys=keys_to_use,
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/arrow/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,6 @@ def explode(self) -> DataFrame:
from pandas import concat

pa_type = self._pa_array.type
return concat(
return concat( # nobug
[self.field(i) for i in range(pa_type.num_fields)], axis="columns"
)
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2690,7 +2690,7 @@ def describe(self) -> DataFrame:
from pandas import Index
from pandas.core.reshape.concat import concat

result = concat([counts, freqs], ignore_index=True, axis=1)
result = concat([counts, freqs], ignore_index=True, axis=1) # nobug
result.columns = Index(["counts", "freqs"])
result.index.name = "categories"

Expand Down
14 changes: 8 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6130,7 +6130,7 @@ def shift(
.shift(periods=period, freq=freq, axis=axis, fill_value=fill_value)
.add_suffix(f"{suffix}_{period}" if suffix else f"_{period}")
)
return concat(shifted_dataframes, axis=1)
return concat(shifted_dataframes, axis=1) # bug
elif suffix:
raise ValueError("Cannot specify `suffix` if `periods` is an int.")
periods = cast(int, periods)
Expand Down Expand Up @@ -11168,7 +11168,7 @@ def _append_internal(

from pandas.core.reshape.concat import concat

result = concat(
result = concat( # possible bug
[self, row_df],
ignore_index=ignore_index,
)
Expand Down Expand Up @@ -11396,12 +11396,12 @@ def join(
# join indexes only using concat
if can_concat:
if how == "left":
res = concat(
res = concat( # nobug
frames, axis=1, join="outer", verify_integrity=True, sort=sort
)
return res.reindex(self.index)
else:
return concat(
return concat( # bug
frames, axis=1, join=how, verify_integrity=True, sort=sort
)

Expand Down Expand Up @@ -11590,7 +11590,9 @@ def _series_round(ser: Series, decimals: int) -> Series:

if new_cols is not None and len(new_cols) > 0:
return self._constructor(
concat(new_cols, axis=1), index=self.index, columns=self.columns
concat(new_cols, axis=1),
index=self.index,
columns=self.columns, # nobug
).__finalize__(self, method="round")
else:
return self.copy(deep=False)
Expand Down Expand Up @@ -14173,7 +14175,7 @@ def isin(self, values: Series | DataFrame | Sequence | Mapping) -> DataFrame:
from pandas.core.reshape.concat import concat

values = collections.defaultdict(list, values)
result = concat(
result = concat( # nobug
(
self.iloc[:, [i]].isin(values[col])
for i, col in enumerate(self.columns)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6525,7 +6525,7 @@ def astype(
return self.copy(deep=False)

# GH 19920: retain column metadata after concat
result = concat(results, axis=1)
result = concat(results, axis=1) # nobug
# GH#40810 retain subclass
# error: Incompatible types in assignment
# (expression has type "Self", variable has type "DataFrame")
Expand Down Expand Up @@ -9507,7 +9507,7 @@ def compare(

# error: List item 0 has incompatible type "NDFrame"; expected
# "Union[Series, DataFrame]"
diff = concat(
diff = concat( # bug
[self, other], # type: ignore[list-item]
axis=axis,
keys=result_names,
Expand Down
12 changes: 7 additions & 5 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame:
if any(isinstance(x, DataFrame) for x in results.values()):
from pandas import concat

res_df = concat(
res_df = concat( # nobug
results.values(), axis=1, keys=[key.label for key in results]
)
return res_df
Expand Down Expand Up @@ -722,7 +722,7 @@ def _transform_general(
if results:
from pandas.core.reshape.concat import concat

concatenated = concat(results, ignore_index=True)
concatenated = concat(results, ignore_index=True) # nobug
result = self._set_result_index_ordered(concatenated)
else:
result = self.obj._constructor(dtype=np.float64)
Expand Down Expand Up @@ -2238,7 +2238,7 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs):
applied.append(res)

concat_index = obj.columns
concatenated = concat(
concatenated = concat( # nobug
applied, axis=0, verify_integrity=False, ignore_index=True
)
concatenated = concatenated.reindex(concat_index, axis=1)
Expand Down Expand Up @@ -2530,7 +2530,7 @@ def _apply_to_column_groupbys(self, func) -> DataFrame:
# concat would raise
res_df = DataFrame([], columns=columns, index=self._grouper.result_index)
else:
res_df = concat(results, keys=columns, axis=1)
res_df = concat(results, keys=columns, axis=1) # nobug

if not self.as_index:
res_df.index = default_index(len(res_df))
Expand Down Expand Up @@ -3390,7 +3390,9 @@ def _wrap_transform_general_frame(
# other dimension; this will preserve dtypes
# GH14457
if res.index.is_(obj.index):
res_frame = concat([res] * len(group.columns), axis=1, ignore_index=True)
res_frame = concat(
[res] * len(group.columns), axis=1, ignore_index=True
) # nobug
res_frame.columns = group.columns
res_frame.index = group.index
else:
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1143,7 +1143,7 @@ def _concat_objects(
group_levels = self._grouper.levels
group_names = self._grouper.names

result = concat(
result = concat( # maybebug
values,
axis=0,
keys=group_keys,
Expand All @@ -1152,10 +1152,10 @@ def _concat_objects(
sort=False,
)
else:
result = concat(values, axis=0)
result = concat(values, axis=0) # maybebug

elif not not_indexed_same:
result = concat(values, axis=0)
result = concat(values, axis=0) # maybebug

ax = self._selected_obj.index
if self.dropna:
Expand All @@ -1178,7 +1178,7 @@ def _concat_objects(
result = result.reindex(ax, axis=0)

else:
result = concat(values, axis=0)
result = concat(values, axis=0) # maybebug

if self.obj.ndim == 1:
name = self.obj.name
Expand Down Expand Up @@ -5238,7 +5238,7 @@ def shift(
return (
shifted_dataframes[0]
if len(shifted_dataframes) == 1
else concat(shifted_dataframes, axis=1)
else concat(shifted_dataframes, axis=1, sort=False) # nobug
)

@final
Expand Down
24 changes: 14 additions & 10 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def get_objs_combined_axis(
objs,
intersect: bool = False,
axis: Axis = 0,
sort: bool = True,
sort: bool | lib.NoDefault = True,
) -> Index:
"""
Extract combined index: return intersection or union (depending on the
Expand All @@ -81,7 +81,8 @@ def get_objs_combined_axis(
axis : {0 or 'index', 1 or 'outer'}, default 0
The axis to extract indexes from.
sort : bool, default True
Whether the result index should come out sorted or not.
Whether the result index should come out sorted or not. NoDefault
use for deprecation in GH#57335.

Returns
-------
Expand All @@ -108,7 +109,7 @@ def _get_distinct_objs(objs: list[Index]) -> list[Index]:
def _get_combined_index(
indexes: list[Index],
intersect: bool = False,
sort: bool = False,
sort: bool | lib.NoDefault = False,
) -> Index:
"""
Return the union or intersection of indexes.
Expand All @@ -121,7 +122,8 @@ def _get_combined_index(
If True, calculate the intersection between indexes. Otherwise,
calculate the union.
sort : bool, default False
Whether the result index should come out sorted or not.
Whether the result index should come out sorted or not. NoDefault
used for deprecation of GH#57335

Returns
-------
Expand All @@ -138,10 +140,10 @@ def _get_combined_index(
for other in indexes[1:]:
index = index.intersection(other)
else:
index = union_indexes(indexes, sort=False)
index = union_indexes(indexes, sort=sort if sort is lib.no_default else False)
index = ensure_index(index)

if sort:
if sort and sort is not lib.no_default:
index = safe_sort_index(index)
return index

Expand Down Expand Up @@ -180,7 +182,7 @@ def safe_sort_index(index: Index) -> Index:
return index


def union_indexes(indexes, sort: bool | None = True) -> Index:
def union_indexes(indexes, sort: bool | lib.NoDefault = True) -> Index:
"""
Return the union of indexes.

Expand All @@ -190,7 +192,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
----------
indexes : list of Index or list objects
sort : bool, default True
Whether the result index should come out sorted or not.
Whether the result index should come out sorted or not. NoDefault
used for deprecation of GH#57335.

Returns
-------
Expand All @@ -201,7 +204,7 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
if len(indexes) == 1:
result = indexes[0]
if isinstance(result, list):
if not sort:
if not sort or sort is lib.no_default:
result = Index(result)
else:
result = Index(sorted(result))
Expand All @@ -227,7 +230,8 @@ def union_indexes(indexes, sort: bool | None = True) -> Index:
raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

if num_dtis == len(indexes):
sort = True
if sort is lib.no_default:
sort = True
result = indexes[0]

elif num_dtis > 1:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5379,9 +5379,9 @@ def append(self, other: Index | Sequence[Index]) -> Index:
names = {obj.name for obj in to_concat}
name = None if len(names) > 1 else self.name

return self._concat(to_concat, name)
return self._concat(to_concat, name) # nobug

def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
def _concat(self, to_concat: list[Index], name: Hashable) -> Index: # nobug
"""
Concatenate multiple Index objects.
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -1181,7 +1181,7 @@ def insert(self, loc: int, item) -> Index:

return super().insert(loc, item)

def _concat(self, indexes: list[Index], name: Hashable) -> Index:
def _concat(self, indexes: list[Index], name: Hashable) -> Index: # nobug
"""
Overriding parent method for the case of all RangeIndex instances.

Expand All @@ -1191,7 +1191,7 @@ def _concat(self, indexes: list[Index], name: Hashable) -> Index:
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Index([0,1,2,4,5], dtype='int64')
"""
if not all(isinstance(x, RangeIndex) for x in indexes):
result = super()._concat(indexes, name)
result = super()._concat(indexes, name) # nobug
if result.dtype.kind == "i":
return self._shallow_copy(result._values)
return result
Expand Down
Loading