Skip to content

Commit

Permalink
Fix DataFrame reduction for upcoming Pandas
Browse files Browse the repository at this point in the history
Issue hgrecco#174 reports that DataFrame reduction was broken by the latest Pint-Pandas changes.  This commit adapts Pint-Pandas to work with upcoming Pandas 2.1, currently scheduled for release Aug 20, 2023.

Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com>
  • Loading branch information
MichaelTiemannOSC committed Jul 23, 2023
1 parent 1506df2 commit 772636b
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 7 deletions.
19 changes: 13 additions & 6 deletions pint_pandas/pint_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,8 +208,8 @@ def __repr__(self):
float: pd.Float64Dtype(),
np.float64: pd.Float64Dtype(),
np.float32: pd.Float32Dtype(),
np.complex128: pd.core.dtypes.dtypes.PandasDtype("complex128"),
np.complex64: pd.core.dtypes.dtypes.PandasDtype("complex64"),
np.complex128: pd.core.dtypes.dtypes.NumpyEADtype("complex128"),
np.complex64: pd.core.dtypes.dtypes.NumpyEADtype("complex64"),
# np.float16: pd.Float16Dtype(),
}
dtypeunmap = {v: k for k, v in dtypemap.items()}
Expand Down Expand Up @@ -520,7 +520,10 @@ def take(self, indices, allow_fill=False, fill_value=None):
# magnitude is in fact an array scalar, which will get rejected by pandas.
fill_value = fill_value[()]

result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
# Turn off warning that PandasArray is deprecated for ``take``
result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill)

return PintArray(result, dtype=self.dtype)

Expand Down Expand Up @@ -990,7 +993,7 @@ def _to_array_of_quantity(self, copy=False):
qtys = [
self._Q(item, self._dtype.units)
if item is not self.dtype.na_value.m
else item
else self.dtype.na_value
for item in self._data
]
with warnings.catch_warnings(record=True):
Expand Down Expand Up @@ -1048,7 +1051,7 @@ def searchsorted(self, value, side="left", sorter=None):
value = [item.to(self.units).magnitude for item in value]
return arr.searchsorted(value, side=side, sorter=sorter)

def _reduce(self, name, **kwds):
def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds):
"""
Return a scalar result of performing the reduction operation.
Expand Down Expand Up @@ -1092,14 +1095,18 @@ def _reduce(self, name, **kwds):

if isinstance(self._data, ExtensionArray):
try:
result = self._data._reduce(name, **kwds)
result = self._data._reduce(name, skipna=skipna, keepdims=keepdims, **kwds)
except NotImplementedError:
result = functions[name](self.numpy_data, **kwds)

if name in {"all", "any", "kurt", "skew"}:
return result
if name == "var":
if keepdims:
return PintArray(result, f"pint[({self.units})**2]")
return self._Q(result, self.units**2)
if keepdims:
return PintArray(result, self.dtype)
return self._Q(result, self.units)

def _accumulate(self, name: str, *, skipna: bool = True, **kwds):
Expand Down
13 changes: 13 additions & 0 deletions pint_pandas/testsuite/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,3 +202,16 @@ def test_issue_139():
assert np.all(a_m[0:4] == a_cm[0:4])
for x, y in zip(a_m[4:], a_cm[4:]):
assert unp.isnan(x) == unp.isnan(y)

class TestIssue174(BaseExtensionTests):
def test_sum(self):
a = pd.DataFrame([[0, 1, 2], [3, 4, 5]]).astype("pint[m]")
row_sum = a.sum(axis=0)
expected_1 = pd.Series([3, 5, 7], dtype="pint[m]")

self.assert_series_equal(row_sum, expected_1)

col_sum = a.sum(axis=1)
expected_2 = pd.Series([3, 12], dtype="pint[m]")

self.assert_series_equal(col_sum, expected_2)
6 changes: 5 additions & 1 deletion pint_pandas/testsuite/test_pandas_extensiontests.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,7 @@ def _check_divmod_op(self, s, op, other, exc=None):
divmod(s, other)

def _get_exception(self, data, op_name):
if data.data.dtype == pd.core.dtypes.dtypes.PandasDtype("complex128"):
if data.data.dtype == pd.core.dtypes.dtypes.NumpyEADtype("complex128"):
if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]:
return op_name, TypeError
if op_name in ["__pow__", "__rpow__"]:
Expand Down Expand Up @@ -627,6 +627,10 @@ def check_reduce(self, s, op_name, skipna):
expected = expected_m
assert result == expected

@pytest.mark.skip("tests not written yet")
def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool):
pass

@pytest.mark.parametrize("skipna", [True, False])
def test_reduce_scaling(
self, data, all_numeric_reductions, skipna, USE_UNCERTAINTIES
Expand Down

0 comments on commit 772636b

Please sign in to comment.