diff --git a/doc/source/release.rst b/doc/source/release.rst index ea5af9165b483..47a2ef82c78dc 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -139,11 +139,11 @@ API Changes - Following keywords are now acceptable for :meth:`DataFrame.plot(kind='bar')` and :meth:`DataFrame.plot(kind='barh')`. - `width`: Specify the bar width. In previous versions, static value 0.5 was passed to matplotlib and it cannot be overwritten. (:issue:`6604`) - + - `align`: Specify the bar alignment. Default is `center` (different from matplotlib). In previous versions, pandas passes `align='edge'` to matplotlib and adjust the location to `center` by itself, and it results `align` keyword is not applied as expected. (:issue:`4525`) - + - `position`: Specify relative alignments for bar plot layout. From 0 (left/bottom-end) to 1(right/top-end). Default is 0.5 (center). (:issue:`6604`) - + - Define and document the order of column vs index names in query/eval (:issue:`6676`) @@ -289,6 +289,7 @@ Bug Fixes - Bug in binary operations with a rhs of a Series not aligning (:issue:`6681`) - Bug in ``DataFrame.to_stata`` which incorrectly handles nan values and ignores 'with_index' keyword argument (:issue:`6685`) - Bug in resample with extra bins when using an evenly divisible frequency (:issue:`4076`) +- Bug in consistency of groupby aggregation when passing a custom function (:issue:`6715`) pandas 0.13.1 ------------- diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 208f9f1a8e19a..996a691eca082 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1081,10 +1081,13 @@ def apply(self, f, data, axis=0): try: values, mutated = splitter.fast_apply(f, group_keys) return group_keys, values, mutated - except Exception: + except (lib.InvalidApply): # we detect a mutation of some kind # so take slow path pass + except (Exception) as e: + # raise this error to the caller + pass result_values = [] for key, (i, group) in zip(group_keys, splitter): @@ -2295,7 +2298,15 @@ def aggregate(self, arg, *args, **kwargs): if self.grouper.nkeys > 1: return self._python_agg_general(arg, *args, **kwargs) else: - result = self._aggregate_generic(arg, *args, **kwargs) + + # try to treat as if we are passing a list + try: + assert not args and not kwargs + result = self._aggregate_multiple_funcs([arg]) + result.columns = Index(result.columns.levels[0], + name=self._selected_obj.columns.name) + except: + result = self._aggregate_generic(arg, *args, **kwargs) if not self.as_index: if isinstance(result.index, MultiIndex): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index b14c355f44a1c..79eac770f547e 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1946,6 +1946,29 @@ def test_grouping_ndarray(self): expected = self.df.groupby('A').sum() assert_frame_equal(result, expected, check_names=False) # Note: no names when grouping by value + def test_agg_consistency(self): + # agg with ([]) and () not consistent + # GH 6715 + + def P1(a): + try: + return np.percentile(a.dropna(), q=1) + except: + return np.nan + + import datetime as dt + df = DataFrame({'col1':[1,2,3,4], + 'col2':[10,25,26,31], + 'date':[dt.date(2013,2,10),dt.date(2013,2,10),dt.date(2013,2,11),dt.date(2013,2,11)]}) + + g = df.groupby('date') + + expected = g.agg([P1]) + expected.columns = expected.columns.levels[0] + + result = g.agg(P1) + assert_frame_equal(result, expected) + def test_apply_typecast_fail(self): df = DataFrame({'d': [1., 1., 1., 2., 2., 2.], 'c': np.tile(['a', 'b', 'c'], 2),