BUG: Groupby with categorical multiIndex and timedelta returns incorrect type.

Groupby (`observed=False`) with a categorical multiIndex and integer data values returns zero for categories that do no appear in the data, as seen in the first example (there are no wild parrots).

``` python
import pandas as pd

animals = ['Falcon', 'Parrot']
types = ['Captive', 'Wild']

df = pd.DataFrame({
    'animal': pd.Categorical(['Falcon', 'Falcon', 'Parrot', 'Parrot'],
                             categories=animals),
    'type': pd.Categorical(['Captive', 'Wild', 'Captive', 'Captive'],
                           categories=types),
    'time': [1, 2, 3, 4]
     })

df.set_index(['animal', 'type'], inplace=True)
df.groupby(level=['animal', 'type'], observed=False).sum()
```

| index                      | time |
|------------------------|-----|
| ('Falcon', 'Captive') |    1 |
| ('Falcon', 'Wild')    |    2 |
| ('Parrot', 'Captive') |    7 |
| ('Parrot', 'Wild')    |    0 |

But when using `Timedelta` data values an `int` is returned, instead of a Timedelta.

``` python
import pandas as pd

animals = ['Falcon', 'Parrot']
types = ['Captive', 'Wild']

df = pd.DataFrame({
    'animal': pd.Categorical(['Falcon', 'Falcon', 'Parrot', 'Parrot'],
                             categories=animals),
    'type': pd.Categorical(['Captive', 'Wild', 'Captive', 'Captive'],
                           categories=types),
    'time': [1, 2, 3, 4]
     })
# Convert time to time delta.
df['time'] = pd.to_timedelta(df['time'])
df.set_index(['animal', 'type'], inplace=True)
df.groupby(level=['animal', 'type'], observed=False).sum()
```

Error:
```
  ---------------------------------------------------------------------------
  TypeError                                 Traceback (most recent call last)
  /tmp/ipykernel_106514/2243147148.py in <module>
       14 df['time'] = pd.to_timedelta(df['time'])
       15 df.set_index(['animal', 'type'], inplace=True)
  ---> 16 df.groupby(level=['animal', 'type'], observed=False).sum()

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/groupby/groupby.py in sum(self, numeric_only, min_count)
     1851             )
     1852 
  -> 1853         return self._reindex_output(result, fill_value=0)
     1854 
     1855     @final

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/groupby/groupby.py in _reindex_output(self, output, fill_value)
     3169                 "fill_value": fill_value,
     3170             }
  -> 3171             return output.reindex(**d)
     3172 
     3173         # GH 13204

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/util/_decorators.py in wrapper(*args, **kwargs)
      322         @wraps(func)
      323         def wrapper(*args, **kwargs) -> Callable[..., Any]:
  --> 324             return func(*args, **kwargs)
      325 
      326         kind = inspect.Parameter.POSITIONAL_OR_KEYWORD

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/frame.py in reindex(self, *args, **kwargs)
     4770         kwargs.pop("axis", None)
     4771         kwargs.pop("labels", None)
  -> 4772         return super().reindex(**kwargs)
     4773 
     4774     @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"])

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/generic.py in reindex(self, *args, **kwargs)
     4816 
     4817         # perform the reindex on the axes
  -> 4818         return self._reindex_axes(
     4819             axes, level, limit, tolerance, method, fill_value, copy
     4820         ).__finalize__(self, method="reindex")

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
     4595         index = axes["index"]
     4596         if index is not None:
  -> 4597             frame = frame._reindex_index(
     4598                 index, method, copy, level, fill_value, limit, tolerance
     4599             )

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
     4614             new_index, method=method, level=level, limit=limit, tolerance=tolerance
     4615         )
  -> 4616         return self._reindex_with_indexers(
     4617             {0: [new_index, indexer]},
     4618             copy=copy,

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
     4881 
     4882             # TODO: speed up on homogeneous DataFrame objects
  -> 4883             new_data = new_data.reindex_indexer(
     4884                 index,
     4885                 indexer,

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/internals/managers.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy, consolidate, only_slice)
      678             )
      679         else:
  --> 680             new_blocks = [
      681                 blk.take_nd(
      682                     indexer,

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/internals/managers.py in <listcomp>(.0)
      679         else:
      680             new_blocks = [
  --> 681                 blk.take_nd(
      682                     indexer,
      683                     axis=1,

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/internals/blocks.py in take_nd(self, indexer, axis, new_mgr_locs, fill_value)
     1143             allow_fill = True
     1144 
  -> 1145         new_values = algos.take_nd(
     1146             values, indexer, axis=axis, allow_fill=allow_fill, fill_value=fill_value
     1147         )

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/array_algos/take.py in take_nd(arr, indexer, axis, fill_value, allow_fill)
       99             # i.e. DatetimeArray, TimedeltaArray
      100             arr = cast("NDArrayBackedExtensionArray", arr)
  --> 101             return arr.take(
      102                 indexer, fill_value=fill_value, allow_fill=allow_fill, axis=axis
      103             )

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/arrays/_mixins.py in take(self, indices, allow_fill, fill_value, axis)
       95     ) -> NDArrayBackedExtensionArrayT:
       96         if allow_fill:
  ---> 97             fill_value = self._validate_scalar(fill_value)
       98 
       99         new_data = take(

  ~/.local/share/virtualenvs/dv-eRvxryI1/lib/python3.9/site-packages/pandas/core/arrays/datetimelike.py in _validate_scalar(self, value, allow_listlike, setitem, unbox)
      643         else:
      644             msg = self._validation_error_message(value, allow_listlike)
  --> 645             raise TypeError(msg)
      646 
      647         if not unbox:

  TypeError: value should be a 'Timedelta' or 'NaT'. Got 'int' instead.
```

pd.show_versions()

INSTALLED VERSIONS
------------------
commit           : 73c68257545b5f8530b7044f56647bd2db92e2ba
python           : 3.9.2.final.0
python-bits      : 64
OS               : Linux
OS-release       : 5.10.0-8-amd64
Version          : #1 SMP Debian 5.10.46-5 (2021-09-23)
machine          : x86_64
processor        : 
byteorder        : little
LC_ALL           : None
LANG             : en_GB.UTF-8
LOCALE           : en_GB.UTF-8

pandas           : 1.3.3
numpy            : 1.19.5
pytz             : 2021.3
dateutil         : 2.8.2
pip              : 21.2.4
setuptools       : 58.1.0
Cython           : 0.29.24
pytest           : 6.2.5
hypothesis       : None
sphinx           : None
blosc            : None
feather          : None
xlsxwriter       : None
lxml.etree       : 4.6.3
html5lib         : None
pymysql          : None
psycopg2         : None
jinja2           : 3.0.2
IPython          : 7.28.0
pandas_datareader: None
bs4              : 4.10.0
bottleneck       : 1.3.2
fsspec           : 2021.10.0
fastparquet      : None
gcsfs            : None
matplotlib       : 3.4.3
numexpr          : 2.7.3
odfpy            : None
openpyxl         : None
pandas_gbq       : None
pyarrow          : None
pyxlsb           : None
s3fs             : None
scipy            : 1.7.1
sqlalchemy       : 1.3.24
tables           : 3.6.1
tabulate         : 0.8.9
xarray           : 0.19.0
xlrd             : 2.0.1
xlwt             : None
numba            : 0.54.0

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

BUG: Groupby with categorical multiIndex and timedelta returns incorrect type. #43891

INSTALLED VERSIONS

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

index	time
('Falcon', 'Captive')	1
('Falcon', 'Wild')	2
('Parrot', 'Captive')	7
('Parrot', 'Wild')	0

Uh oh!

BUG: Groupby with categorical multiIndex and timedelta returns incorrect type. #43891

Description

INSTALLED VERSIONS

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions