From 8bae0d42db5e4fbd93407a016b35267f830a4ace Mon Sep 17 00:00:00 2001 From: dsm054 Date: Sun, 22 Mar 2015 11:38:18 -0400 Subject: [PATCH] BUG: ensure we use group sizes, not group counts, in transform (GH9697) --- doc/source/whatsnew/v0.16.1.txt | 2 ++ pandas/core/groupby.py | 2 +- pandas/tests/test_groupby.py | 13 +++++++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index bf01d3b21f3fa..5801d1b811790 100644 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -31,3 +31,5 @@ Performance Improvements Bug Fixes ~~~~~~~~~ + +- Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`) diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 73439fb1e535d..6d98b3b99021b 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2453,7 +2453,7 @@ def _transform_fast(self, func): if isinstance(func, compat.string_types): func = getattr(self,func) values = func().values - counts = self.count().values + counts = self.size().values values = np.repeat(values, com._ensure_platform_int(counts)) return self._set_result_index_ordered(Series(values)) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 79ebb80fc9ebb..e7001eb09f20c 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1058,6 +1058,19 @@ def test_transform_function_aliases(self): expected = self.df.groupby('A')['C'].transform(np.mean) assert_series_equal(result, expected) + def test_transform_length(self): + # GH 9697 + df = pd.DataFrame({'col1':[1,1,2,2], 'col2':[1,2,3,np.nan]}) + expected = pd.Series([3.0]*4) + def nsum(x): + return np.nansum(x) + results = [df.groupby('col1').transform(sum)['col2'], + df.groupby('col1')['col2'].transform(sum), + df.groupby('col1').transform(nsum)['col2'], + df.groupby('col1')['col2'].transform(nsum)] + for result in results: + assert_series_equal(result, expected) + def test_with_na(self): index = Index(np.arange(10))