diff --git a/_unittests/ut_df/test_pandas_groupbynan.py b/_unittests/ut_df/test_pandas_groupbynan.py index 7146c96..141568b 100644 --- a/_unittests/ut_df/test_pandas_groupbynan.py +++ b/_unittests/ut_df/test_pandas_groupbynan.py @@ -35,7 +35,8 @@ def test_pandas_groupbynan(self): self.assertTrue(numpy.isnan(li[-1])) except AssertionError as e: raise AssertionError( - "Issue with value {0}\n--df--\n{1}\n--co--\n{2}".format(li, df, co)) from e + "Issue with value {}\n--df--\n{}\n--gr--\n{}\n--co--\n{}".format( + li, df, gr.count(), co)) from e for ty in types: data = [{"this": "cst", "type": "tt1=" + str(ty[0]), "value": ty[1]}, diff --git a/pandas_streaming/df/dataframe_helpers.py b/pandas_streaming/df/dataframe_helpers.py index df02ab9..18ead2e 100644 --- a/pandas_streaming/df/dataframe_helpers.py +++ b/pandas_streaming/df/dataframe_helpers.py @@ -419,14 +419,27 @@ def pandas_groupby_nan(df, by, axis=0, as_index=False, suffix=None, nanback=True res.grouper.groupings[0]._group_index = Index(new_val) res.grouper.groupings[0].obj[b].replace( fnan, numpy.nan, inplace=True) - if isinstance(res.grouper.groupings[0].grouper, numpy.ndarray): - arr = numpy.array(new_val) - res.grouper.groupings[0].grouper = arr - if hasattr(res.grouper.groupings[0], '_cache') and 'result_index' in res.grouper.groupings[0]._cache: - del res.grouper.groupings[0]._cache['result_index'] + if hasattr(res.grouper, 'grouping'): + if isinstance(res.grouper.groupings[0].grouper, numpy.ndarray): + arr = numpy.array(new_val) + res.grouper.groupings[0].grouper = arr + if (hasattr(res.grouper.groupings[0], '_cache') and + 'result_index' in res.grouper.groupings[0]._cache): + del res.grouper.groupings[0]._cache['result_index'] + else: + raise NotImplementedError("Not implemented for type: {0}".format( + type(res.grouper.groupings[0].grouper))) else: - raise NotImplementedError("Not implemented for type: {0}".format( - type(res.grouper.groupings[0].grouper))) + grouper = res.grouper._get_grouper() + if isinstance(grouper, numpy.ndarray): + arr = numpy.array(new_val) + res.grouper.groupings[0].grouping_vector = arr + if (hasattr(res.grouper.groupings[0], '_cache') and + 'result_index' in res.grouper.groupings[0]._cache): + res.grouper.groupings[0]._cache = {} + else: + raise NotImplementedError("Not implemented for type: {0}".format( + type(res.grouper.groupings[0].grouper))) res.grouper._cache['result_index'] = res.grouper.groupings[0]._group_index else: if not nanback: