Skip to content

Commit

Permalink
BUG: fix regression with SerieGrouper with Timestamp index (pandas-de…
Browse files Browse the repository at this point in the history
…v#42390)

This fixes a regression introduced in c355ed1 where cache is not
initialized with correct state of islider and vslider.

On Timestamp index this trigger a "ValueError Length of values does not match length of index"

Closes pandas-dev#42390

Signed-off-by: Philippe Pepiot <phil@lowatt.fr>
  • Loading branch information
philpep committed Jul 5, 2021
1 parent 5672766 commit e09091c
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 8 deletions.
18 changes: 10 additions & 8 deletions pandas/_libs/reduction.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -165,10 +165,6 @@ cdef class SeriesBinGrouper(_BaseGrouper):

result = np.empty(self.ngroups, dtype='O')

cached_index, cached_series = self._init_dummy_series_and_index(
islider, vslider
)

start = 0
try:
for i in range(self.ngroups):
Expand All @@ -178,6 +174,11 @@ cdef class SeriesBinGrouper(_BaseGrouper):
islider.move(start, end)
vslider.move(start, end)

if cached_index is None:
cached_index, cached_series = self._init_dummy_series_and_index(
islider, vslider
)

self._update_cached_objs(
cached_series, cached_index, islider, vslider)

Expand Down Expand Up @@ -254,10 +255,6 @@ cdef class SeriesGrouper(_BaseGrouper):

result = np.empty(self.ngroups, dtype='O')

cached_index, cached_series = self._init_dummy_series_and_index(
islider, vslider
)

start = 0
try:
for i in range(n):
Expand All @@ -275,6 +272,11 @@ cdef class SeriesGrouper(_BaseGrouper):
islider.move(start, end)
vslider.move(start, end)

if cached_index is None:
cached_index, cached_series = self._init_dummy_series_and_index(
islider, vslider
)

self._update_cached_objs(
cached_series, cached_index, islider, vslider)

Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/groupby/test_bin_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,23 @@ def test_series_grouper():
tm.assert_almost_equal(counts, exp_counts)


def test_series_grouper_timestamp():
# GH XXXX
obj = Series([1], index=[pd.Timestamp("2018-01-16 00:00:00+00:00")], dtype=np.intp)
labels = np.array([0])

def agg(series):
# this should not raise
if series.isna().values.all():
return None
return np.sum(series)

grouper = libreduction.SeriesGrouper(obj, agg, labels, 1)
result, counts = grouper.get_result()
tm.assert_almost_equal(result, np.array([1], dtype=object))
tm.assert_almost_equal(counts, np.array([1]))


def test_series_grouper_result_length_difference():
# GH 40014
obj = Series(np.random.randn(10), dtype="float64")
Expand Down

0 comments on commit e09091c

Please sign in to comment.