diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index cd91e89554b678..7fe1d55ba55be6 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1634,7 +1634,9 @@ def func(df): return df._constructor_sliced(result, index=res.index) func.__name__ = "idxmax" - result = self._python_apply_general(func, self._obj_with_exclusions) + result = self._python_apply_general( + func, self._obj_with_exclusions, not_indexed_same=True + ) self._maybe_warn_numeric_only_depr("idxmax", result, numeric_only) return result @@ -1673,7 +1675,9 @@ def func(df): return df._constructor_sliced(result, index=res.index) func.__name__ = "idxmin" - result = self._python_apply_general(func, self._obj_with_exclusions) + result = self._python_apply_general( + func, self._obj_with_exclusions, not_indexed_same=True + ) self._maybe_warn_numeric_only_depr("idxmin", result, numeric_only) return result diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 16ee154156616c..89c9f3701a4245 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1040,7 +1040,10 @@ def curried(x): return self._obj_with_exclusions result = self._python_apply_general( - curried, self._obj_with_exclusions, is_transform=is_transform + curried, + self._obj_with_exclusions, + is_transform=is_transform, + not_indexed_same=not is_transform, ) if self._selected_obj.ndim != 1 and self.axis != 1 and result.ndim != 1: diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py index 6e1b2de10e8e68..4602819b4834a7 100644 --- a/pandas/core/interchange/from_dataframe.py +++ b/pandas/core/interchange/from_dataframe.py @@ -497,7 +497,7 @@ def set_nulls( null_pos = None if null_kind == ColumnNullType.USE_SENTINEL: - null_pos = data == sentinel_val + null_pos = pd.Series(data) == sentinel_val elif null_kind in (ColumnNullType.USE_BITMASK, ColumnNullType.USE_BYTEMASK): assert validity, "Expected to have a validity buffer for the mask" valid_buff, valid_dtype = validity diff --git a/pandas/plotting/_matplotlib/core.py b/pandas/plotting/_matplotlib/core.py index 7d8c7da6dd9aa0..0b6e5b346062ab 100644 --- a/pandas/plotting/_matplotlib/core.py +++ b/pandas/plotting/_matplotlib/core.py @@ -56,6 +56,7 @@ from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib.converter import register_pandas_matplotlib_converters from pandas.plotting._matplotlib.groupby import reconstruct_data_with_by +from pandas.plotting._matplotlib.misc import unpack_single_str_list from pandas.plotting._matplotlib.style import get_standard_colors from pandas.plotting._matplotlib.timeseries import ( decorate_axes, @@ -177,7 +178,7 @@ def __init__( # For `hist` plot, need to get grouped original data before `self.data` is # updated later if self.by is not None and self._kind == "hist": - self._grouped = data.groupby(self.by) + self._grouped = data.groupby(unpack_single_str_list(self.by)) self.kind = kind diff --git a/pandas/plotting/_matplotlib/hist.py b/pandas/plotting/_matplotlib/hist.py index 3ca00ae41d5879..d69f68d9e0b667 100644 --- a/pandas/plotting/_matplotlib/hist.py +++ b/pandas/plotting/_matplotlib/hist.py @@ -63,7 +63,6 @@ def __init__( MPLPlot.__init__(self, data, **kwargs) def _args_adjust(self): - # calculate bin number separately in different subplots # where subplots are created based on by argument if is_integer(self.bins): diff --git a/pandas/plotting/_matplotlib/misc.py b/pandas/plotting/_matplotlib/misc.py index 4b74b067053a6e..633cb63664823b 100644 --- a/pandas/plotting/_matplotlib/misc.py +++ b/pandas/plotting/_matplotlib/misc.py @@ -479,7 +479,6 @@ def r(h): def unpack_single_str_list(keys): # GH 42795 - if isinstance(keys, list): - if len(keys) == 1 and isinstance(keys[0], str): - keys = keys[0] + if isinstance(keys, list) and len(keys) == 1: + keys = keys[0] return keys diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index f0a3219d0b419f..7e7f1a628da6e6 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -188,21 +188,20 @@ def test_ngroup_cumcount_pair(self): tm.assert_series_equal(g.ngroup(), Series(ngroupd)) tm.assert_series_equal(g.cumcount(), Series(cumcounted)) - def test_ngroup_respects_groupby_order(self): + def test_ngroup_respects_groupby_order(self, sort): np.random.seed(0) df = DataFrame({"a": np.random.choice(list("abcdef"), 100)}) - for sort_flag in (False, True): - g = df.groupby(["a"], sort=sort_flag) - df["group_id"] = -1 - df["group_index"] = -1 - - for i, (_, group) in enumerate(g): - df.loc[group.index, "group_id"] = i - for j, ind in enumerate(group.index): - df.loc[ind, "group_index"] = j - - tm.assert_series_equal(Series(df["group_id"].values), g.ngroup()) - tm.assert_series_equal(Series(df["group_index"].values), g.cumcount()) + g = df.groupby("a", sort=sort) + df["group_id"] = -1 + df["group_index"] = -1 + + for i, (_, group) in enumerate(g): + df.loc[group.index, "group_id"] = i + for j, ind in enumerate(group.index): + df.loc[ind, "group_index"] = j + + tm.assert_series_equal(Series(df["group_id"].values), g.ngroup()) + tm.assert_series_equal(Series(df["group_index"].values), g.cumcount()) @pytest.mark.parametrize( "datetimelike", diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 90b29a022f8011..7ba22c09cd26d7 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1590,11 +1590,11 @@ def test_corrwith_with_1_axis(): tm.assert_series_equal(result, expected) -@pytest.mark.filterwarnings("ignore:The 'mad' method.*:FutureWarning") +@pytest.mark.filterwarnings("ignore:.* is deprecated:FutureWarning") def test_multiindex_group_all_columns_when_empty(groupby_func): # GH 32464 df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) - gb = df.groupby(["a", "b", "c"]) + gb = df.groupby(["a", "b", "c"], group_keys=False) method = getattr(gb, groupby_func) args = get_groupby_method_args(groupby_func, df) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 41b2e78d093ea3..2b7ecbcdf9f801 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -33,7 +33,7 @@ def data_test_ix(request, dirpath): for k in range(df.shape[1]): col = df.iloc[:, k] if col.dtype == np.int64: - df.iloc[:, k] = df.iloc[:, k].astype(np.float64) + df.isetitem(k, df.iloc[:, k].astype(np.float64)) return df, test_ix diff --git a/pandas/tests/plotting/frame/test_hist_box_by.py b/pandas/tests/plotting/frame/test_hist_box_by.py index e568016c858fdc..999118144b58d0 100644 --- a/pandas/tests/plotting/frame/test_hist_box_by.py +++ b/pandas/tests/plotting/frame/test_hist_box_by.py @@ -83,7 +83,9 @@ class TestHistWithBy(TestPlotBase): ) def test_hist_plot_by_argument(self, by, column, titles, legends, hist_df): # GH 15079 - axes = _check_plot_works(hist_df.plot.hist, column=column, by=by) + axes = _check_plot_works( + hist_df.plot.hist, column=column, by=by, default_axes=True + ) result_titles = [ax.get_title() for ax in axes] result_legends = [ [legend.get_text() for legend in ax.get_legend().texts] for ax in axes @@ -120,7 +122,7 @@ def test_hist_plot_by_0(self, by, column, titles, legends, hist_df): df = hist_df.copy() df = df.rename(columns={"C": 0}) - axes = _check_plot_works(df.plot.hist, column=column, by=by) + axes = _check_plot_works(df.plot.hist, default_axes=True, column=column, by=by) result_titles = [ax.get_title() for ax in axes] result_legends = [ [legend.get_text() for legend in ax.get_legend().texts] for ax in axes @@ -142,7 +144,9 @@ def test_hist_plot_empty_list_string_tuple_by(self, by, column, hist_df): # GH 15079 msg = "No group keys passed" with pytest.raises(ValueError, match=msg): - _check_plot_works(hist_df.plot.hist, column=column, by=by) + _check_plot_works( + hist_df.plot.hist, default_axes=True, column=column, by=by + ) @pytest.mark.slow @pytest.mark.parametrize( @@ -274,7 +278,9 @@ class TestBoxWithBy(TestPlotBase): ) def test_box_plot_by_argument(self, by, column, titles, xticklabels, hist_df): # GH 15079 - axes = _check_plot_works(hist_df.plot.box, column=column, by=by) + axes = _check_plot_works( + hist_df.plot.box, default_axes=True, column=column, by=by + ) result_titles = [ax.get_title() for ax in axes] result_xticklabels = [ [label.get_text() for label in ax.get_xticklabels()] for ax in axes @@ -313,7 +319,7 @@ def test_box_plot_by_0(self, by, column, titles, xticklabels, hist_df): df = hist_df.copy() df = df.rename(columns={"C": 0}) - axes = _check_plot_works(df.plot.box, column=column, by=by) + axes = _check_plot_works(df.plot.box, default_axes=True, column=column, by=by) result_titles = [ax.get_title() for ax in axes] result_xticklabels = [ [label.get_text() for label in ax.get_xticklabels()] for ax in axes @@ -335,7 +341,7 @@ def test_box_plot_with_none_empty_list_by(self, by, column, hist_df): # GH 15079 msg = "No group keys passed" with pytest.raises(ValueError, match=msg): - _check_plot_works(hist_df.plot.box, column=column, by=by) + _check_plot_works(hist_df.plot.box, default_axes=True, column=column, by=by) @pytest.mark.slow @pytest.mark.parametrize( @@ -351,7 +357,9 @@ def test_box_plot_with_none_empty_list_by(self, by, column, hist_df): ) def test_box_plot_layout_with_by(self, by, column, layout, axes_num, hist_df): # GH 15079 - axes = _check_plot_works(hist_df.plot.box, column=column, by=by, layout=layout) + axes = _check_plot_works( + hist_df.plot.box, default_axes=True, column=column, by=by, layout=layout + ) self._check_axes_shape(axes, axes_num=axes_num, layout=layout) @pytest.mark.parametrize(