From b5954fdc78f8e8dd386ee77206e59af44c90489c Mon Sep 17 00:00:00 2001 From: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 15 Oct 2022 15:25:29 -0400 Subject: [PATCH 01/51] Add support for UFloat in PintArray (#139) Signed-off-by: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- CHANGES | 1 + pint_pandas/pint_array.py | 29 ++++++++++++++++++++-------- pint_pandas/testsuite/test_issues.py | 22 +++++++++++++++++++++ 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/CHANGES b/CHANGES index cabd7c0a..9415979e 100644 --- a/CHANGES +++ b/CHANGES @@ -14,6 +14,7 @@ pint-pandas Changelog - Fixed bug preventing adding to empty slices of PintArrays #69 - Notebook updated to show plotting #116 #9 #43 - Tests reorganised #131 +- Allow UFloat as type of magnitude supported in PintArray 0.2 (2021-03-23) ---------------- diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index bf14a742..983fd4db 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -19,8 +19,12 @@ from pandas.core.arrays.base import ExtensionOpsMixin from pandas.core.indexers import check_array_indexer from pint import compat, errors -from pint.quantity import _Quantity -from pint.unit import _Unit +from pint.compat import HAS_UNCERTAINTIES +from pint.facets.plain.quantity import PlainQuantity as _Quantity +from pint.facets.plain.unit import PlainUnit as _Unit + +if HAS_UNCERTAINTIES: + from uncertainties import UFloat class PintType(ExtensionDtype): @@ -217,12 +221,21 @@ def __init__(self, values, dtype=None, copy=False): values = np.array(values, copy=copy) copy = False if not np.issubdtype(values.dtype, np.floating): - warnings.warn( - f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", - category=RuntimeWarning, - ) - values = values.astype(float) - copy = False + # Implement saving throw for uncertainties + if not any([isinstance(v, UFloat) for v in values]) and not all( + [isinstance(v, (UFloat, np.floating, float)) for v in values] + ): + warnings.warn( + f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", + category=RuntimeWarning, + ) + values = values.astype(float) + copy = False + else: + warnings.warn( + f"UFloats seen in {values}; pint-pandas letting the ducks fly!", + category=RuntimeWarning, + ) if copy: values = values.copy() self._data = values diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 2eaea692..d013ebc4 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -107,3 +107,25 @@ def test_issue_88(): q_mm = ureg.Quantity([1000, 2000], "mm") b = PintArray(q_mm, "m") helpers.assert_quantity_almost_equal(q_m, b.quantity) + +def test_issue_139(): + from pint.compat import HAS_UNCERTAINTIES + assert(HAS_UNCERTAINTIES) + from uncertainties import ufloat + from uncertainties import unumpy as unp + + q1 = 1.234 + q2 = 5.678 + q_nan = np.nan + + u1 = ufloat(1, 0.2) + u2 = ufloat(3, 0.4) + u_nan = ufloat(np.nan, 0.0) + u_plus_or_minus_nan = ufloat(0.0, np.nan) + u_nan_plus_or_minus_nan = ufloat(np.nan, np.nan) + + a_m = PintArray([q1, u1, q2, u2, q_nan, u_nan, u_plus_or_minus_nan, u_nan_plus_or_minus_nan], ureg.m) + a_cm = a_m.astype('pint[cm]') + assert np.all(a_m[0:4] == a_cm[0:4]) + for x, y in zip(a_m[4:], a_cm[4:]): + assert unp.isnan(x) == unp.isnan(y) From 0ad1cf9b024c09cfceceead79fae23b06299a28c Mon Sep 17 00:00:00 2001 From: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 19 Oct 2022 08:04:59 -0400 Subject: [PATCH 02/51] Fix failures and errors found by test_pandas_extensions test suite. Big remaining problem is unhanshable UFloat type. Signed-off-by: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 49 ++-- pint_pandas/testsuite/test_issues.py | 19 +- .../testsuite/test_pandas_extensiontests.py | 216 ++++++++++++++++-- 3 files changed, 241 insertions(+), 43 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 983fd4db..cc98f2d5 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -24,7 +24,9 @@ from pint.facets.plain.unit import PlainUnit as _Unit if HAS_UNCERTAINTIES: - from uncertainties import UFloat + from uncertainties import UFloat, ufloat + from uncertainties import unumpy as unp + _ufloat_nan = ufloat(np.nan, 0) class PintType(ExtensionDtype): @@ -135,7 +137,10 @@ def name(self): @property def na_value(self): - return self.ureg.Quantity(np.nan, self.units) + if HAS_UNCERTAINTIES: + return self.ureg.Quantity(_ufloat_nan, self.units) + else: + return self.ureg.Quantity(np.nan, self.units) def __hash__(self): # make myself hashable @@ -220,22 +225,21 @@ def __init__(self, values, dtype=None, copy=False): if not isinstance(values, np.ndarray): values = np.array(values, copy=copy) copy = False - if not np.issubdtype(values.dtype, np.floating): - # Implement saving throw for uncertainties - if not any([isinstance(v, UFloat) for v in values]) and not all( - [isinstance(v, (UFloat, np.floating, float)) for v in values] - ): + if HAS_UNCERTAINTIES: + if not all([isinstance(v, UFloat) for v in values]): warnings.warn( - f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", + f"pint-pandas does not support certain magnitudes of {values.dtype}. Converting magnitudes to ufloat.", category=RuntimeWarning, ) - values = values.astype(float) + values = [ufloat(v, 0) for v in values] copy = False - else: + elif not np.issubdtype(values.dtype, np.floating): warnings.warn( - f"UFloats seen in {values}; pint-pandas letting the ducks fly!", + f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", category=RuntimeWarning, ) + values = values.astype(float) + copy = False if copy: values = values.copy() self._data = values @@ -282,7 +286,7 @@ def __getitem__(self, item): item : scalar or PintArray """ if is_integer(item): - return self._data[item] * self.units + return self._Q(self._data[item], self.units) item = check_array_indexer(self, item) @@ -349,6 +353,12 @@ def isna(self): ------- missing : np.array """ + if HAS_UNCERTAINTIES: + # GH https://github.com/lebigot/uncertainties/issues/164 + if isinstance(self._data, np.ndarray) and len(self._data)==0: + # True or False doesn't matter--we just need the value for the type + return np.full((0), True) + return unp.isnan(self._data) return np.isnan(self._data) def astype(self, dtype, copy=True): @@ -496,8 +506,12 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): dtype = PintType(master_scalar.units) def quantify_nan(item): - if type(item) is float: - return item * dtype.units + if HAS_UNCERTAINTIES: + if type(item) is UFloat: + return item * dtype.units + else: + if type(item) is float: + return item * dtype.units return item if isinstance(master_scalar, _Quantity): @@ -517,6 +531,8 @@ def _from_factorized(cls, values, original): def _values_for_factorize(self): arr = self._data + if HAS_UNCERTAINTIES: + return arr, _ufloat_nan return arr, np.NaN def value_counts(self, dropna=True): @@ -544,7 +560,10 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data if dropna: - data = data[~np.isnan(data)] + if HAS_UNCERTAINTIES: + data = data[~unp.isnan(data)] + else: + data = data[~np.isnan(data)] data_list = data.tolist() index = list(set(data)) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index d013ebc4..8613b8ed 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -7,6 +7,15 @@ from pandas.tests.extension.base.base import BaseExtensionTests from pint.testsuite import helpers +try: + import uncertainties.unumpy as unp + from uncertainties import ufloat, UFloat + HAS_UNCERTAINTIES = True +except ImportError: + unp = np + ufloat = Ufloat = None + HAS_UNCERTAINTIES = False + from pint_pandas import PintArray, PintType ureg = PintType.ureg @@ -15,9 +24,9 @@ class TestIssue21(BaseExtensionTests): @pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_offset_concat(self): - q_a = ureg.Quantity(np.arange(5), ureg.Unit("degC")) - q_b = ureg.Quantity(np.arange(6), ureg.Unit("degC")) - q_a_ = np.append(q_a, np.nan) + q_a = ureg.Quantity(np.arange(5)+ufloat(0,0), ureg.Unit("degC")) + q_b = ureg.Quantity(np.arange(6)+ufloat(0,0), ureg.Unit("degC")) + q_a_ = np.append(q_a, ufloat(np.nan, 0)) a = pd.Series(PintArray(q_a)) b = pd.Series(PintArray(q_b)) @@ -118,8 +127,8 @@ def test_issue_139(): q2 = 5.678 q_nan = np.nan - u1 = ufloat(1, 0.2) - u2 = ufloat(3, 0.4) + u1 = ufloat(1, 0) + u2 = ufloat(3, 0) u_nan = ufloat(np.nan, 0.0) u_plus_or_minus_nan = ufloat(0.0, np.nan) u_nan_plus_or_minus_nan = ufloat(np.nan, np.nan) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 472a6ce1..2a910491 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -8,6 +8,15 @@ import numpy as np import pandas as pd import pytest +try: + import uncertainties.unumpy as unp + from uncertainties import ufloat, UFloat + HAS_UNCERTAINTIES = True + _ufloat_nan = ufloat(np.nan, 0) +except ImportError: + unp = np + HAS_UNCERTAINTIES = False + from pandas.core import ops from pandas.tests.extension import base from pandas.tests.extension.conftest import ( # noqa: F401 @@ -24,6 +33,120 @@ ureg = PintType.ureg +import pandas._testing as tm + +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, +) +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + IntervalArray, + PeriodArray, + TimedeltaArray, +) +from pandas._testing.asserters import ( + assert_equal, + assert_index_equal, + assert_interval_array_equal, + assert_period_array_equal, + assert_datetime_array_equal, + assert_timedelta_array_equal, + assert_almost_equal, + assert_extension_array_equal, + assert_numpy_array_equal, +) + + +def uassert_equal(left, right, **kwargs) -> None: + """ + Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. + Parameters + ---------- + left, right : Index, Series, DataFrame, ExtensionArray, or np.ndarray + The two items to be compared. + **kwargs + All keyword arguments are passed through to the underlying assert method. + """ + __tracebackhide__ = True + + if isinstance(left, Index): + assert_index_equal(left, right, **kwargs) + if isinstance(left, (DatetimeIndex, TimedeltaIndex)): + assert left.freq == right.freq, (left.freq, right.freq) + elif isinstance(left, Series): + uassert_series_equal(left, right, **kwargs) + elif isinstance(left, DataFrame): + uassert_frame_equal(left, right, **kwargs) + elif isinstance(left, IntervalArray): + assert_interval_array_equal(left, right, **kwargs) + elif isinstance(left, PeriodArray): + assert_period_array_equal(left, right, **kwargs) + elif isinstance(left, DatetimeArray): + assert_datetime_array_equal(left, right, **kwargs) + elif isinstance(left, TimedeltaArray): + assert_timedelta_array_equal(left, right, **kwargs) + elif isinstance(left, ExtensionArray): + uassert_extension_array_equal(left, right, **kwargs) + elif isinstance(left, np.ndarray): + uassert_numpy_array_equal(left, right, **kwargs) + elif isinstance(left, str): + assert kwargs == {} + assert left == right + else: + assert kwargs == {} + uassert_almost_equal(left, right) + + +def uassert_series_equal(left, right, **kwargs): + assert left.shape == right.shape + if getattr(left, "dtype", False): + assert left.dtype == right.dtype + assert_equal(left.index, right.index) + uassert_equal(left.values, right.values) + +def uassert_frame_equal(left, right, **kwargs): + assert left.shape == right.shape + if getattr(left, "dtype", False): + assert left.dtype == right.dtype + assert_equal(left.index, right.index) + uassert_equal(left.values, right.values) + +def uassert_extension_array_equal(left, right, **kwargs): + assert left.shape == right.shape + if getattr(left, "dtype", False): + assert left.dtype == right.dtype + assert all([str(l) == str(r) for l, r in zip(left, right)]) + +def uassert_numpy_array_equal(left, right, **kwargs): + if getattr(left, "dtype", False): + assert left.dtype == right.dtype + assert all([str(l) == str(r) for l, r in zip(left, right)]) + +def uassert_almost_equal(left, right, **kwargs): + assert_almost_equal(left, right, **kwargs) + +if HAS_UNCERTAINTIES: + # The following functions all need a lot of work... + # tm.assert_equal = uassert_equal + # tm.assert_series_equal = uassert_series_equal + # tm.assert_frame_equal = uassert_frame_equal + # tm.assert_extension_array_equal = uassert_extension_array_equal + # Fortunately, ufloat (x, 0) == ufloat (x, 0) (zero uncertainty is an exact number) + pass + +# @pytest.fixture(params=[True,False]) +# def HAS_UNCERTAINTIES(): +# return params @pytest.fixture(params=[True, False]) def box_in_series(request): @@ -38,19 +161,32 @@ def dtype(): @pytest.fixture def data(): - return PintArray.from_1darray_quantity(np.arange(start=1.0, stop=101.0) * ureg.nm) + if HAS_UNCERTAINTIES: + d = (np.arange(start=1.0, stop=101.0)+ufloat(0,0)) * ureg.nm + else: + d = np.arange(start=1.0, stop=101.0) * ureg.nm + return PintArray.from_1darray_quantity(d) @pytest.fixture def data_missing(): - return PintArray.from_1darray_quantity([np.nan, 1] * ureg.meter) + if HAS_UNCERTAINTIES: + dm = [_ufloat_nan, ufloat(1, 0)] * ureg.meter + else: + dm = [np.nan, 1] * ureg.meter + return PintArray.from_1darray_quantity(dm) @pytest.fixture def data_for_twos(): - x = [ - 2.0, - ] * 100 + if HAS_UNCERTAINTIES: + x = [ + ufloat (2.0, 0) + ] * 100 + else: + x = [ + 2.0, + ] * 100 return PintArray.from_1darray_quantity(x * ureg.meter) @@ -84,14 +220,22 @@ def sort_by_key(request): @pytest.fixture def data_for_sorting(): - return PintArray.from_1darray_quantity([0.3, 10, -50] * ureg.centimeter) + if HAS_UNCERTAINTIES: + ds = [ufloat(0.3, 0), ufloat(10, 0), ufloat(-50, 0)] + else: + ds = [0.3, 10, -50] + return PintArray.from_1darray_quantity(ds * ureg.centimeter) # should probably get more sophisticated and do something like # [1 * ureg.meter, 3 * ureg.meter, 10 * ureg.centimeter] @pytest.fixture def data_missing_for_sorting(): - return PintArray.from_1darray_quantity([4, np.nan, -5] * ureg.centimeter) + if HAS_UNCERTAINTIES: + dms = [ufloat(4, 0), _ufloat_nan, ufloat(-5, 0)] + else: + dms = [4, np.nan, -5] + return PintArray.from_1darray_quantity(dms * ureg.centimeter) # should probably get more sophisticated and do something like # [4 * ureg.meter, np.nan, 10 * ureg.centimeter] @@ -99,6 +243,8 @@ def data_missing_for_sorting(): @pytest.fixture def na_cmp(): """Binary operator for comparing NA values.""" + if HAS_UNCERTAINTIES: + return lambda x, y: bool(unp.isnan(x.magnitude)) & bool(unp.isnan(y.magnitude)) return lambda x, y: bool(np.isnan(x.magnitude)) & bool(np.isnan(y.magnitude)) @@ -114,7 +260,13 @@ def data_for_grouping(): a = 1.0 b = 2.0**32 + 1 c = 2.0**32 + 10 - return PintArray.from_1darray_quantity([b, b, np.nan, np.nan, a, a, b, c] * ureg.m) + _n = np.nan + if HAS_UNCERTAINTIES: + a = a + ufloat(0, 0) + b = b + ufloat(0, 0) + c = c + ufloat(0, 0) + _n = _ufloat_nan + return PintArray.from_1darray_quantity([b, b, _n, _n, a, a, b, c] * ureg.m) # === missing from pandas extension docs about what has to be included in tests === @@ -160,21 +312,36 @@ def all_compare_operators(request): return request.param -# commented functions aren't implemented -_all_numeric_reductions = [ - "sum", - "max", - "min", - "mean", - # "prod", - "std", - "var", - "median", - "sem", - "kurt", - "skew", +if HAS_UNCERTAINTIES: + # commented functions aren't implemented + _all_numeric_reductions = [ + "sum", + "max", + "min", + # "mean", + # "prod", + # "std", + # "var", + # "median", + # "sem", + # "kurt", + # "skew", ] - +else: + # commented functions aren't implemented + _all_numeric_reductions = [ + "sum", + "max", + "min", + "mean", + # "prod", + "std", + "var", + "median", + "sem", + "kurt", + "skew", + ] @pytest.fixture(params=_all_numeric_reductions) def all_numeric_reductions(request): @@ -676,7 +843,10 @@ def test_reduce_scaling(self, data, all_numeric_reductions, skipna): else: v_nm = r_nm v_mm = r_mm - assert np.isclose(v_nm, v_mm, rtol=1e-3), f"{r_nm} == {r_mm}" + if HAS_UNCERTAINTIES: + assert np.isclose(v_nm.n, v_mm.n, rtol=1e-3), f"{r_nm} == {r_mm}" + else: + assert np.isclose(v_nm, v_mm, rtol=1e-3), f"{r_nm} == {r_mm}" class TestBooleanReduce(base.BaseBooleanReduceTests): From 52ab18586048a62de687e3361838e1acc23c76d6 Mon Sep 17 00:00:00 2001 From: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 21 Oct 2022 05:12:35 -0400 Subject: [PATCH 03/51] Preserve incoming np.array when promoting float to ufloat in PintArray Signed-off-by: 72577720+MichaelTiemannOSC@users.noreply.github.com --- pint_pandas/pint_array.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index cc98f2d5..497bc2a4 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -226,12 +226,22 @@ def __init__(self, values, dtype=None, copy=False): values = np.array(values, copy=copy) copy = False if HAS_UNCERTAINTIES: - if not all([isinstance(v, UFloat) for v in values]): + if np.issubdtype(values.dtype, np.floating): + pass + elif not all([isinstance(v, UFloat) for v in values]): warnings.warn( f"pint-pandas does not support certain magnitudes of {values.dtype}. Converting magnitudes to ufloat.", category=RuntimeWarning, ) - values = [ufloat(v, 0) for v in values] + for i in range(len(values)): + # List comprehensions are great, but they are not np.arrays! + if not isinstance(values[i], UFloat): + if np.isnan(values[i]): + values[i] = _ufloat_nan + else: + values[i] = ufloat(values[i], 0) + elif unp.isnan(values[i]): + values[i] = _ufloat_nan copy = False elif not np.issubdtype(values.dtype, np.floating): warnings.warn( @@ -509,6 +519,11 @@ def quantify_nan(item): if HAS_UNCERTAINTIES: if type(item) is UFloat: return item * dtype.units + if type(item) is float: + if np.isnan(item): + return _ufloat_nan * dtype.units + else: + return UFloat(item, 0) * dtype.units else: if type(item) is float: return item * dtype.units From f3cdcad23f769ccae98b3ecb19a01dbc8874f875 Mon Sep 17 00:00:00 2001 From: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Mon, 24 Oct 2022 22:50:40 -0400 Subject: [PATCH 04/51] Fix logic to detect heterogeneous arrays of Ufloats and floats. Signed-off-by: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 497bc2a4..98855122 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -228,7 +228,7 @@ def __init__(self, values, dtype=None, copy=False): if HAS_UNCERTAINTIES: if np.issubdtype(values.dtype, np.floating): pass - elif not all([isinstance(v, UFloat) for v in values]): + elif all([isinstance(v, UFloat) for v in values]) != any([isinstance(v, UFloat) for v in values]): warnings.warn( f"pint-pandas does not support certain magnitudes of {values.dtype}. Converting magnitudes to ufloat.", category=RuntimeWarning, From 3ffb61701f94f2374bfa44615cdb8a02f19d5e17 Mon Sep 17 00:00:00 2001 From: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 15 Oct 2022 15:25:29 -0400 Subject: [PATCH 05/51] Add support for UFloat in PintArray (#139) Signed-off-by: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- CHANGES | 1 + pint_pandas/pint_array.py | 27 +++++++++++++++++++++------ pint_pandas/testsuite/test_issues.py | 23 +++++++++++++++++++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/CHANGES b/CHANGES index f32fcafd..92ee531a 100644 --- a/CHANGES +++ b/CHANGES @@ -19,6 +19,7 @@ pint-pandas Changelog - Tests reorganised #131 - Shortened form of dimensionless unit now in dtype, eg 'pint[]' #151 - Fixed bug preventing PintArrays with offset units being printed. #150 +- Allow UFloat as type of magnitude supported in PintArray. #139 0.2 (2021-03-23) ---------------- diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 04fb433e..7fc8562e 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -26,6 +26,12 @@ # Magic 'unit' flagging columns with no unit support, used in # quantify/dequantify NO_UNIT = "No Unit" +from pint.compat import HAS_UNCERTAINTIES +# from pint.facets.plain.quantity import PlainQuantity as _Quantity +# from pint.facets.plain.unit import PlainUnit as _Unit + +if HAS_UNCERTAINTIES: + from uncertainties import UFloat class PintType(ExtensionDtype): @@ -224,12 +230,21 @@ def __init__(self, values, dtype=None, copy=False): values = np.array(values, copy=copy) copy = False if not np.issubdtype(values.dtype, np.floating): - warnings.warn( - f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", - category=RuntimeWarning, - ) - values = values.astype(float) - copy = False + # Implement saving throw for uncertainties + if not any([isinstance(v, UFloat) for v in values]) and not all( + [isinstance(v, (UFloat, np.floating, float)) for v in values] + ): + warnings.warn( + f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", + category=RuntimeWarning, + ) + values = values.astype(float) + copy = False + else: + warnings.warn( + f"UFloats seen in {values}; pint-pandas letting the ducks fly!", + category=RuntimeWarning, + ) if copy: values = values.copy() self._data = values diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 60d2328f..0112a2da 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -116,3 +116,26 @@ def test_issue_127(): a = PintType.construct_from_string("pint[dimensionless]") b = PintType.construct_from_string("pint[]") assert a == b + + +def test_issue_139(): + from pint.compat import HAS_UNCERTAINTIES + assert(HAS_UNCERTAINTIES) + from uncertainties import ufloat + from uncertainties import unumpy as unp + + q1 = 1.234 + q2 = 5.678 + q_nan = np.nan + + u1 = ufloat(1, 0.2) + u2 = ufloat(3, 0.4) + u_nan = ufloat(np.nan, 0.0) + u_plus_or_minus_nan = ufloat(0.0, np.nan) + u_nan_plus_or_minus_nan = ufloat(np.nan, np.nan) + + a_m = PintArray([q1, u1, q2, u2, q_nan, u_nan, u_plus_or_minus_nan, u_nan_plus_or_minus_nan], ureg.m) + a_cm = a_m.astype('pint[cm]') + assert np.all(a_m[0:4] == a_cm[0:4]) + for x, y in zip(a_m[4:], a_cm[4:]): + assert unp.isnan(x) == unp.isnan(y) From 2f898979d2945796ebfd03da2cf41cdc120cf1d1 Mon Sep 17 00:00:00 2001 From: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 19 Oct 2022 08:04:59 -0400 Subject: [PATCH 06/51] Fix failures and errors found by test_pandas_extensions test suite. Big remaining problem is unhanshable UFloat type. Signed-off-by: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 47 ++-- pint_pandas/testsuite/test_issues.py | 19 +- .../testsuite/test_pandas_extensiontests.py | 216 ++++++++++++++++-- 3 files changed, 240 insertions(+), 42 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 7fc8562e..bf9807c9 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -31,7 +31,9 @@ # from pint.facets.plain.unit import PlainUnit as _Unit if HAS_UNCERTAINTIES: - from uncertainties import UFloat + from uncertainties import UFloat, ufloat + from uncertainties import unumpy as unp + _ufloat_nan = ufloat(np.nan, 0) class PintType(ExtensionDtype): @@ -144,7 +146,10 @@ def name(self): @property def na_value(self): - return self.ureg.Quantity(np.nan, self.units) + if HAS_UNCERTAINTIES: + return self.ureg.Quantity(_ufloat_nan, self.units) + else: + return self.ureg.Quantity(np.nan, self.units) def __hash__(self): # make myself hashable @@ -229,22 +234,21 @@ def __init__(self, values, dtype=None, copy=False): if not isinstance(values, np.ndarray): values = np.array(values, copy=copy) copy = False - if not np.issubdtype(values.dtype, np.floating): - # Implement saving throw for uncertainties - if not any([isinstance(v, UFloat) for v in values]) and not all( - [isinstance(v, (UFloat, np.floating, float)) for v in values] - ): + if HAS_UNCERTAINTIES: + if not all([isinstance(v, UFloat) for v in values]): warnings.warn( - f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", + f"pint-pandas does not support certain magnitudes of {values.dtype}. Converting magnitudes to ufloat.", category=RuntimeWarning, ) - values = values.astype(float) + values = [ufloat(v, 0) for v in values] copy = False - else: + elif not np.issubdtype(values.dtype, np.floating): warnings.warn( - f"UFloats seen in {values}; pint-pandas letting the ducks fly!", + f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", category=RuntimeWarning, ) + values = values.astype(float) + copy = False if copy: values = values.copy() self._data = values @@ -358,6 +362,12 @@ def isna(self): ------- missing : np.array """ + if HAS_UNCERTAINTIES: + # GH https://github.com/lebigot/uncertainties/issues/164 + if isinstance(self._data, np.ndarray) and len(self._data)==0: + # True or False doesn't matter--we just need the value for the type + return np.full((0), True) + return unp.isnan(self._data) return np.isnan(self._data) def astype(self, dtype, copy=True): @@ -505,8 +515,12 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): dtype = PintType(master_scalar.units) def quantify_nan(item): - if type(item) is float: - return item * dtype.units + if HAS_UNCERTAINTIES: + if type(item) is UFloat: + return item * dtype.units + else: + if type(item) is float: + return item * dtype.units return item if isinstance(master_scalar, _Quantity): @@ -526,6 +540,8 @@ def _from_factorized(cls, values, original): def _values_for_factorize(self): arr = self._data + if HAS_UNCERTAINTIES: + return arr, _ufloat_nan return arr, np.NaN def value_counts(self, dropna=True): @@ -553,7 +569,10 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data if dropna: - data = data[~np.isnan(data)] + if HAS_UNCERTAINTIES: + data = data[~unp.isnan(data)] + else: + data = data[~np.isnan(data)] data_list = data.tolist() index = list(set(data)) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 0112a2da..c1fe07ad 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -7,6 +7,15 @@ from pandas.tests.extension.base.base import BaseExtensionTests from pint.testsuite import helpers +try: + import uncertainties.unumpy as unp + from uncertainties import ufloat, UFloat + HAS_UNCERTAINTIES = True +except ImportError: + unp = np + ufloat = Ufloat = None + HAS_UNCERTAINTIES = False + from pint_pandas import PintArray, PintType ureg = PintType.ureg @@ -15,9 +24,9 @@ class TestIssue21(BaseExtensionTests): @pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_offset_concat(self): - q_a = ureg.Quantity(np.arange(5), ureg.Unit("degC")) - q_b = ureg.Quantity(np.arange(6), ureg.Unit("degC")) - q_a_ = np.append(q_a, np.nan) + q_a = ureg.Quantity(np.arange(5)+ufloat(0,0), ureg.Unit("degC")) + q_b = ureg.Quantity(np.arange(6)+ufloat(0,0), ureg.Unit("degC")) + q_a_ = np.append(q_a, ufloat(np.nan, 0)) a = pd.Series(PintArray(q_a)) b = pd.Series(PintArray(q_b)) @@ -128,8 +137,8 @@ def test_issue_139(): q2 = 5.678 q_nan = np.nan - u1 = ufloat(1, 0.2) - u2 = ufloat(3, 0.4) + u1 = ufloat(1, 0) + u2 = ufloat(3, 0) u_nan = ufloat(np.nan, 0.0) u_plus_or_minus_nan = ufloat(0.0, np.nan) u_nan_plus_or_minus_nan = ufloat(np.nan, np.nan) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index b448cac5..8d881118 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -8,6 +8,15 @@ import pandas as pd import pandas._testing as tm import pytest +try: + import uncertainties.unumpy as unp + from uncertainties import ufloat, UFloat + HAS_UNCERTAINTIES = True + _ufloat_nan = ufloat(np.nan, 0) +except ImportError: + unp = np + HAS_UNCERTAINTIES = False + from pandas.core import ops from pandas.core.dtypes.dtypes import ( DatetimeTZDtype, @@ -30,6 +39,120 @@ ureg = PintType.ureg +import pandas._testing as tm + +from pandas import ( + Categorical, + DataFrame, + DatetimeIndex, + Index, + IntervalIndex, + MultiIndex, + PeriodIndex, + RangeIndex, + Series, + TimedeltaIndex, +) +from pandas.core.arrays import ( + DatetimeArray, + ExtensionArray, + IntervalArray, + PeriodArray, + TimedeltaArray, +) +from pandas._testing.asserters import ( + assert_equal, + assert_index_equal, + assert_interval_array_equal, + assert_period_array_equal, + assert_datetime_array_equal, + assert_timedelta_array_equal, + assert_almost_equal, + assert_extension_array_equal, + assert_numpy_array_equal, +) + + +def uassert_equal(left, right, **kwargs) -> None: + """ + Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. + Parameters + ---------- + left, right : Index, Series, DataFrame, ExtensionArray, or np.ndarray + The two items to be compared. + **kwargs + All keyword arguments are passed through to the underlying assert method. + """ + __tracebackhide__ = True + + if isinstance(left, Index): + assert_index_equal(left, right, **kwargs) + if isinstance(left, (DatetimeIndex, TimedeltaIndex)): + assert left.freq == right.freq, (left.freq, right.freq) + elif isinstance(left, Series): + uassert_series_equal(left, right, **kwargs) + elif isinstance(left, DataFrame): + uassert_frame_equal(left, right, **kwargs) + elif isinstance(left, IntervalArray): + assert_interval_array_equal(left, right, **kwargs) + elif isinstance(left, PeriodArray): + assert_period_array_equal(left, right, **kwargs) + elif isinstance(left, DatetimeArray): + assert_datetime_array_equal(left, right, **kwargs) + elif isinstance(left, TimedeltaArray): + assert_timedelta_array_equal(left, right, **kwargs) + elif isinstance(left, ExtensionArray): + uassert_extension_array_equal(left, right, **kwargs) + elif isinstance(left, np.ndarray): + uassert_numpy_array_equal(left, right, **kwargs) + elif isinstance(left, str): + assert kwargs == {} + assert left == right + else: + assert kwargs == {} + uassert_almost_equal(left, right) + + +def uassert_series_equal(left, right, **kwargs): + assert left.shape == right.shape + if getattr(left, "dtype", False): + assert left.dtype == right.dtype + assert_equal(left.index, right.index) + uassert_equal(left.values, right.values) + +def uassert_frame_equal(left, right, **kwargs): + assert left.shape == right.shape + if getattr(left, "dtype", False): + assert left.dtype == right.dtype + assert_equal(left.index, right.index) + uassert_equal(left.values, right.values) + +def uassert_extension_array_equal(left, right, **kwargs): + assert left.shape == right.shape + if getattr(left, "dtype", False): + assert left.dtype == right.dtype + assert all([str(l) == str(r) for l, r in zip(left, right)]) + +def uassert_numpy_array_equal(left, right, **kwargs): + if getattr(left, "dtype", False): + assert left.dtype == right.dtype + assert all([str(l) == str(r) for l, r in zip(left, right)]) + +def uassert_almost_equal(left, right, **kwargs): + assert_almost_equal(left, right, **kwargs) + +if HAS_UNCERTAINTIES: + # The following functions all need a lot of work... + # tm.assert_equal = uassert_equal + # tm.assert_series_equal = uassert_series_equal + # tm.assert_frame_equal = uassert_frame_equal + # tm.assert_extension_array_equal = uassert_extension_array_equal + # Fortunately, ufloat (x, 0) == ufloat (x, 0) (zero uncertainty is an exact number) + pass + +# @pytest.fixture(params=[True,False]) +# def HAS_UNCERTAINTIES(): +# return params @pytest.fixture(params=[True, False]) def box_in_series(request): @@ -44,19 +167,32 @@ def dtype(): @pytest.fixture def data(): - return PintArray.from_1darray_quantity(np.arange(start=1.0, stop=101.0) * ureg.nm) + if HAS_UNCERTAINTIES: + d = (np.arange(start=1.0, stop=101.0)+ufloat(0,0)) * ureg.nm + else: + d = np.arange(start=1.0, stop=101.0) * ureg.nm + return PintArray.from_1darray_quantity(d) @pytest.fixture def data_missing(): - return PintArray.from_1darray_quantity([np.nan, 1.0] * ureg.meter) + if HAS_UNCERTAINTIES: + dm = [_ufloat_nan, ufloat(1, 0)] + else: + dm = [np.nan, 1] + return PintArray.from_1darray_quantity(dm * ureg.meter) @pytest.fixture def data_for_twos(): - x = [ - 2.0, - ] * 100 + if HAS_UNCERTAINTIES: + x = [ + ufloat (2.0, 0) + ] * 100 + else: + x = [ + 2.0, + ] * 100 return PintArray.from_1darray_quantity(x * ureg.meter) @@ -90,14 +226,22 @@ def sort_by_key(request): @pytest.fixture def data_for_sorting(): - return PintArray.from_1darray_quantity([0.3, 10.0, -50.0] * ureg.centimeter) + if HAS_UNCERTAINTIES: + ds = [ufloat(0.3, 0), ufloat(10, 0), ufloat(-50, 0)] + else: + ds = [0.3, 10, -50] + return PintArray.from_1darray_quantity(ds * ureg.centimeter) # should probably get more sophisticated and do something like # [1 * ureg.meter, 3 * ureg.meter, 10 * ureg.centimeter] @pytest.fixture def data_missing_for_sorting(): - return PintArray.from_1darray_quantity([4.0, np.nan, -5.0] * ureg.centimeter) + if HAS_UNCERTAINTIES: + dms = [ufloat(4, 0), _ufloat_nan, ufloat(-5, 0)] + else: + dms = [4, np.nan, -5] + return PintArray.from_1darray_quantity(dms * ureg.centimeter) # should probably get more sophisticated and do something like # [4 * ureg.meter, np.nan, 10 * ureg.centimeter] @@ -105,6 +249,8 @@ def data_missing_for_sorting(): @pytest.fixture def na_cmp(): """Binary operator for comparing NA values.""" + if HAS_UNCERTAINTIES: + return lambda x, y: bool(unp.isnan(x.magnitude)) & bool(unp.isnan(y.magnitude)) return lambda x, y: bool(np.isnan(x.magnitude)) & bool(np.isnan(y.magnitude)) @@ -120,7 +266,13 @@ def data_for_grouping(): a = 1.0 b = 2.0**32 + 1 c = 2.0**32 + 10 - return PintArray.from_1darray_quantity([b, b, np.nan, np.nan, a, a, b, c] * ureg.m) + _n = np.nan + if HAS_UNCERTAINTIES: + a = a + ufloat(0, 0) + b = b + ufloat(0, 0) + c = c + ufloat(0, 0) + _n = _ufloat_nan + return PintArray.from_1darray_quantity([b, b, _n, _n, a, a, b, c] * ureg.m) # === missing from pandas extension docs about what has to be included in tests === @@ -166,21 +318,36 @@ def all_compare_operators(request): return request.param -# commented functions aren't implemented -_all_numeric_reductions = [ - "sum", - "max", - "min", - "mean", - # "prod", - "std", - "var", - "median", - "sem", - "kurt", - "skew", +if HAS_UNCERTAINTIES: + # commented functions aren't implemented + _all_numeric_reductions = [ + "sum", + "max", + "min", + # "mean", + # "prod", + # "std", + # "var", + # "median", + # "sem", + # "kurt", + # "skew", ] - +else: + # commented functions aren't implemented + _all_numeric_reductions = [ + "sum", + "max", + "min", + "mean", + # "prod", + "std", + "var", + "median", + "sem", + "kurt", + "skew", + ] @pytest.fixture(params=_all_numeric_reductions) def all_numeric_reductions(request): @@ -509,7 +676,10 @@ def test_reduce_scaling(self, data, all_numeric_reductions, skipna): else: v_nm = r_nm v_mm = r_mm - assert np.isclose(v_nm, v_mm, rtol=1e-3), f"{r_nm} == {r_mm}" + if HAS_UNCERTAINTIES: + assert np.isclose(v_nm.n, v_mm.n, rtol=1e-3), f"{r_nm} == {r_mm}" + else: + assert np.isclose(v_nm, v_mm, rtol=1e-3), f"{r_nm} == {r_mm}" class TestBooleanReduce(base.BaseBooleanReduceTests): From dce26684a9caae3876592a2f63b8d044199501f1 Mon Sep 17 00:00:00 2001 From: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 21 Oct 2022 05:12:35 -0400 Subject: [PATCH 07/51] Preserve incoming np.array when promoting float to ufloat in PintArray Signed-off-by: 72577720+MichaelTiemannOSC@users.noreply.github.com --- pint_pandas/pint_array.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index bf9807c9..4d18f742 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -235,12 +235,22 @@ def __init__(self, values, dtype=None, copy=False): values = np.array(values, copy=copy) copy = False if HAS_UNCERTAINTIES: - if not all([isinstance(v, UFloat) for v in values]): + if np.issubdtype(values.dtype, np.floating): + pass + elif not all([isinstance(v, UFloat) for v in values]): warnings.warn( f"pint-pandas does not support certain magnitudes of {values.dtype}. Converting magnitudes to ufloat.", category=RuntimeWarning, ) - values = [ufloat(v, 0) for v in values] + for i in range(len(values)): + # List comprehensions are great, but they are not np.arrays! + if not isinstance(values[i], UFloat): + if np.isnan(values[i]): + values[i] = _ufloat_nan + else: + values[i] = ufloat(values[i], 0) + elif unp.isnan(values[i]): + values[i] = _ufloat_nan copy = False elif not np.issubdtype(values.dtype, np.floating): warnings.warn( @@ -518,6 +528,11 @@ def quantify_nan(item): if HAS_UNCERTAINTIES: if type(item) is UFloat: return item * dtype.units + if type(item) is float: + if np.isnan(item): + return _ufloat_nan * dtype.units + else: + return UFloat(item, 0) * dtype.units else: if type(item) is float: return item * dtype.units From d4ca9f08d8a704061d838e7820e4529b7e674126 Mon Sep 17 00:00:00 2001 From: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Mon, 24 Oct 2022 22:50:40 -0400 Subject: [PATCH 08/51] Fix logic to detect heterogeneous arrays of Ufloats and floats. Signed-off-by: MichaelTiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 4d18f742..0fde1801 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -237,7 +237,7 @@ def __init__(self, values, dtype=None, copy=False): if HAS_UNCERTAINTIES: if np.issubdtype(values.dtype, np.floating): pass - elif not all([isinstance(v, UFloat) for v in values]): + elif all([isinstance(v, UFloat) for v in values]) != any([isinstance(v, UFloat) for v in values]): warnings.warn( f"pint-pandas does not support certain magnitudes of {values.dtype}. Converting magnitudes to ufloat.", category=RuntimeWarning, From 9fffcc52743e4c04427527a81f58dfdfde1a0b31 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 3 Jan 2023 11:44:38 +1300 Subject: [PATCH 09/51] Update pint_array.py Deal with duality of np.nan and _ufloat_nans when constructing PintArrays, creating float64 magnitude arrays when possible and ufloat magnitudes when necessary. Note that PintArray columns don't advertise whether magnitudes are np.float64 or np.obejct; they are what they are. Also commented out warning, which simply creates noise. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 75 ++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 0fde1801..4435126d 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -148,8 +148,7 @@ def name(self): def na_value(self): if HAS_UNCERTAINTIES: return self.ureg.Quantity(_ufloat_nan, self.units) - else: - return self.ureg.Quantity(np.nan, self.units) + return self.ureg.Quantity(np.nan, self.units) def __hash__(self): # make myself hashable @@ -232,33 +231,59 @@ def __init__(self, values, dtype=None, copy=False): if isinstance(values, _Quantity): values = values.to(dtype.units).magnitude if not isinstance(values, np.ndarray): - values = np.array(values, copy=copy) + if dtype.kind=='O': + values = np.array(values, dtype=object, copy=copy) + else: + values = np.array(values, copy=copy) copy = False if HAS_UNCERTAINTIES: - if np.issubdtype(values.dtype, np.floating): + if np.issubdtype(values.dtype, np.floating) or values==[]: pass - elif all([isinstance(v, UFloat) for v in values]) != any([isinstance(v, UFloat) for v in values]): - warnings.warn( - f"pint-pandas does not support certain magnitudes of {values.dtype}. Converting magnitudes to ufloat.", - category=RuntimeWarning, - ) - for i in range(len(values)): - # List comprehensions are great, but they are not np.arrays! - if not isinstance(values[i], UFloat): - if np.isnan(values[i]): - values[i] = _ufloat_nan - else: - values[i] = ufloat(values[i], 0) - elif unp.isnan(values[i]): - values[i] = _ufloat_nan - copy = False + else: + value_notna = [isinstance(v, UFloat) for v in values if not (pd.isna(v) or unp.isnan(v))] + if value_notna == []: + # all NaNs, either from our own data, or from Pint/Pandas internals + pa_nan = _ufloat_nan if dtype.kind=='O' else np.nan + for i in range(len(values)): + # Promote/demote NaNs to match non-NaN magnitudes + values[i] = pa_nan + copy = False + else: + any_UFloats = any(value_notna) + all_UFloats = all(value_notna) + if any_UFloats != all_UFloats: + # warnings.warn( + # f"pint-pandas does not support certain magnitudes of {values.dtype}. Converting magnitudes to ufloat.", + # category=RuntimeWarning, + # ) + for i, v in enumerate(values): + # List comprehensions are great, but they are not np.arrays! + if not isinstance(v, UFloat): + if pd.isna(v): + values[i] = _ufloat_nan + else: + values[i] = ufloat(v, 0) + elif unp.isnan(v): + # Do we need to canonicalize our NaNs? + values[i] = _ufloat_nan + copy = False + else: + pa_nan = _ufloat_nan if any_UFloats else np.nan + for i, v in enumerate(values): + # Promote/demote NaNs to match non-NaN magnitudes + if pd.isna(v) or unp.isnan(v): + values[i] = pa_nan + copy = False + if not any_UFloats: + values = values.astype(float) + copy = False elif not np.issubdtype(values.dtype, np.floating): - warnings.warn( - f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", - category=RuntimeWarning, - ) - values = values.astype(float) - copy = False + warnings.warn( + f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", + category=RuntimeWarning, + ) + values = values.astype(float) + copy = False if copy: values = values.copy() self._data = values From 8b067084455d015fae584845f7174102e5ce4ae1 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 3 Jan 2023 16:17:35 +1300 Subject: [PATCH 10/51] Update pint_array.py Fix conditional expression. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 4435126d..208c5760 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -237,7 +237,7 @@ def __init__(self, values, dtype=None, copy=False): values = np.array(values, copy=copy) copy = False if HAS_UNCERTAINTIES: - if np.issubdtype(values.dtype, np.floating) or values==[]: + if np.issubdtype(values.dtype, np.floating) or len(values)==0: pass else: value_notna = [isinstance(v, UFloat) for v in values if not (pd.isna(v) or unp.isnan(v))] From c5b79269e33741986f22c9ce2fca7551e69c64df Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 3 Jan 2023 21:15:42 +1300 Subject: [PATCH 11/51] Update pint_array.py Don't try to build a large-enough-to-hold-uncertainties array if we are not using uncertainties in any way. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 208c5760..c83ec106 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -231,7 +231,7 @@ def __init__(self, values, dtype=None, copy=False): if isinstance(values, _Quantity): values = values.to(dtype.units).magnitude if not isinstance(values, np.ndarray): - if dtype.kind=='O': + if HAS_UNCERTAINTIES and dtype.kind=='O': values = np.array(values, dtype=object, copy=copy) else: values = np.array(values, copy=copy) From 0b0e4d483e356353d4b4d9d3ddc1e78bd5b3c341 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Mon, 26 Jun 2023 13:10:04 -0400 Subject: [PATCH 12/51] Fix and blacken merge The manual merge process created a syntax error and various things that black didn't like. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 18 ++++++++----- pint_pandas/testsuite/test_issues.py | 16 +++++++---- .../testsuite/test_pandas_extensiontests.py | 27 +++++++++++-------- 3 files changed, 39 insertions(+), 22 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index bf6ee1fa..f5d3f189 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -27,12 +27,14 @@ # quantify/dequantify NO_UNIT = "No Unit" from pint.compat import HAS_UNCERTAINTIES + # from pint.facets.plain.quantity import PlainQuantity as _Quantity # from pint.facets.plain.unit import PlainUnit as _Unit if HAS_UNCERTAINTIES: from uncertainties import UFloat, ufloat from uncertainties import unumpy as unp + _ufloat_nan = ufloat(np.nan, 0) @@ -260,20 +262,24 @@ def __init__(self, values, dtype=None, copy=False): copy = False elif not isinstance(values, pd.core.arrays.numeric.NumericArray): values = pd.array(values, copy=copy) - else # not isinstance(values, np.ndarray): - if HAS_UNCERTAINTIES and dtype.kind=='O': + else: # not isinstance(values, np.ndarray): + if HAS_UNCERTAINTIES and dtype.kind == "O": values = np.array(values, dtype=object, copy=copy) else: values = np.array(values, copy=copy) copy = False if HAS_UNCERTAINTIES: - if np.issubdtype(values.dtype, np.floating) or len(values)==0: + if np.issubdtype(values.dtype, np.floating) or len(values) == 0: pass else: - value_notna = [isinstance(v, UFloat) for v in values if not (pd.isna(v) or unp.isnan(v))] + value_notna = [ + isinstance(v, UFloat) + for v in values + if not (pd.isna(v) or unp.isnan(v)) + ] if value_notna == []: # all NaNs, either from our own data, or from Pint/Pandas internals - pa_nan = _ufloat_nan if dtype.kind=='O' else np.nan + pa_nan = _ufloat_nan if dtype.kind == "O" else np.nan for i in range(len(values)): # Promote/demote NaNs to match non-NaN magnitudes values[i] = pa_nan @@ -432,7 +438,7 @@ def isna(self): """ if HAS_UNCERTAINTIES: # GH https://github.com/lebigot/uncertainties/issues/164 - if isinstance(self._data, np.ndarray) and len(self._data)==0: + if isinstance(self._data, np.ndarray) and len(self._data) == 0: # True or False doesn't matter--we just need the value for the type return np.full((0), True) return unp.isnan(self._data) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index acc3e626..a1b0d4c3 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -11,6 +11,7 @@ try: import uncertainties.unumpy as unp from uncertainties import ufloat, UFloat + HAS_UNCERTAINTIES = True except ImportError: unp = np @@ -62,8 +63,8 @@ def test_force_ndarray_like(self): class TestIssue21(BaseExtensionTests): @pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_offset_concat(self): - q_a = ureg.Quantity(np.arange(5)+ufloat(0,0), ureg.Unit("degC")) - q_b = ureg.Quantity(np.arange(6)+ufloat(0,0), ureg.Unit("degC")) + q_a = ureg.Quantity(np.arange(5) + ufloat(0, 0), ureg.Unit("degC")) + q_b = ureg.Quantity(np.arange(6) + ufloat(0, 0), ureg.Unit("degC")) q_a_ = np.append(q_a, ufloat(np.nan, 0)) a = pd.Series(PintArray(q_a)) @@ -171,6 +172,7 @@ def test_issue_88(): b = PintArray(q_mm, "m") helpers.assert_quantity_almost_equal(q_m, b.quantity) + def test_issue_127(): a = PintType.construct_from_string("pint[dimensionless]") b = PintType.construct_from_string("pint[]") @@ -179,7 +181,8 @@ def test_issue_127(): def test_issue_139(): from pint.compat import HAS_UNCERTAINTIES - assert(HAS_UNCERTAINTIES) + + assert HAS_UNCERTAINTIES from uncertainties import ufloat from uncertainties import unumpy as unp @@ -193,8 +196,11 @@ def test_issue_139(): u_plus_or_minus_nan = ufloat(0.0, np.nan) u_nan_plus_or_minus_nan = ufloat(np.nan, np.nan) - a_m = PintArray([q1, u1, q2, u2, q_nan, u_nan, u_plus_or_minus_nan, u_nan_plus_or_minus_nan], ureg.m) - a_cm = a_m.astype('pint[cm]') + a_m = PintArray( + [q1, u1, q2, u2, q_nan, u_nan, u_plus_or_minus_nan, u_nan_plus_or_minus_nan], + ureg.m, + ) + a_cm = a_m.astype("pint[cm]") assert np.all(a_m[0:4] == a_cm[0:4]) for x, y in zip(a_m[4:], a_cm[4:]): assert unp.isnan(x) == unp.isnan(y) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index a5552499..a34dd6c3 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -7,9 +7,11 @@ import pandas as pd import pandas._testing as tm import pytest + try: import uncertainties.unumpy as unp from uncertainties import ufloat, UFloat + HAS_UNCERTAINTIES = True _ufloat_nan = ufloat(np.nan, 0) except ImportError: @@ -116,6 +118,7 @@ def uassert_series_equal(left, right, **kwargs): assert_equal(left.index, right.index) uassert_equal(left.values, right.values) + def uassert_frame_equal(left, right, **kwargs): assert left.shape == right.shape if getattr(left, "dtype", False): @@ -123,20 +126,24 @@ def uassert_frame_equal(left, right, **kwargs): assert_equal(left.index, right.index) uassert_equal(left.values, right.values) + def uassert_extension_array_equal(left, right, **kwargs): assert left.shape == right.shape if getattr(left, "dtype", False): assert left.dtype == right.dtype assert all([str(l) == str(r) for l, r in zip(left, right)]) + def uassert_numpy_array_equal(left, right, **kwargs): if getattr(left, "dtype", False): assert left.dtype == right.dtype assert all([str(l) == str(r) for l, r in zip(left, right)]) + def uassert_almost_equal(left, right, **kwargs): assert_almost_equal(left, right, **kwargs) + if HAS_UNCERTAINTIES: # The following functions all need a lot of work... # tm.assert_equal = uassert_equal @@ -150,6 +157,7 @@ def uassert_almost_equal(left, right, **kwargs): # def HAS_UNCERTAINTIES(): # return params + @pytest.fixture(params=[True, False]) def box_in_series(request): """Whether to box the data in a Series""" @@ -173,7 +181,9 @@ def numeric_dtype(request): @pytest.fixture def data(request, numeric_dtype): if HAS_UNCERTAINTIES: - d = (np.arange(start=1.0, stop=101.0, dtype=numeric_dtype)+ufloat(0,0)) * ureg.nm + d = ( + np.arange(start=1.0, stop=101.0, dtype=numeric_dtype) + ufloat(0, 0) + ) * ureg.nm else: d = np.arange(start=1.0, stop=101.0, dtype=numeric_dtype) * ureg.nm return PintArray.from_1darray_quantity(d) @@ -194,9 +204,7 @@ def data_missing(numeric_dtype): @pytest.fixture def data_for_twos(numeric_dtype): if HAS_UNCERTAINTIES: - x = [ - ufloat (2.0, 0) - ] * 100 + x = [ufloat(2.0, 0)] * 100 else: x = [ 2.0, @@ -253,9 +261,7 @@ def data_missing_for_sorting(numeric_dtype): else: dms = [4, np.nan, -5] return PintArray.from_1darray_quantity( - ureg.Quantity( - pd.array(dms, dtype=numeric_dtype), ureg.centimeter - ) + ureg.Quantity(pd.array(dms, dtype=numeric_dtype), ureg.centimeter) ) @@ -285,9 +291,7 @@ def data_for_grouping(numeric_dtype): _n = _ufloat_nan numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) return PintArray.from_1darray_quantity( - ureg.Quantity( - pd.array([b, b, _n, _n, a, a, b, c], dtype=numeric_dtype), ureg.m - ) + ureg.Quantity(pd.array([b, b, _n, _n, a, a, b, c], dtype=numeric_dtype), ureg.m) ) @@ -348,7 +352,7 @@ def all_compare_operators(request): # "sem", # "kurt", # "skew", -] + ] else: # commented functions aren't implemented _all_numeric_reductions = [ @@ -365,6 +369,7 @@ def all_compare_operators(request): "skew", ] + @pytest.fixture(params=_all_numeric_reductions) def all_numeric_reductions(request): """ From 959570fdf2eabee3d8ca3a47cb3de388f3e84044 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Mon, 26 Jun 2023 13:27:19 -0400 Subject: [PATCH 13/51] Fix ruff complaints in testsuite Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/testsuite/test_issues.py | 2 +- .../testsuite/test_pandas_extensiontests.py | 22 +++++++++---------- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index a1b0d4c3..522363a1 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -10,7 +10,7 @@ try: import uncertainties.unumpy as unp - from uncertainties import ufloat, UFloat + from uncertainties import ufloat, UFloat # noqa: F401 HAS_UNCERTAINTIES = True except ImportError: diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index a34dd6c3..8da0d82d 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -10,7 +10,7 @@ try: import uncertainties.unumpy as unp - from uncertainties import ufloat, UFloat + from uncertainties import ufloat, UFloat # noqa: F401 HAS_UNCERTAINTIES = True _ufloat_nan = ufloat(np.nan, 0) @@ -37,17 +37,15 @@ ureg = PintType.ureg -import pandas._testing as tm - from pandas import ( - Categorical, + Categorical, # noqa: F401 DataFrame, DatetimeIndex, Index, - IntervalIndex, - MultiIndex, - PeriodIndex, - RangeIndex, + IntervalIndex, # noqa: F401 + MultiIndex, # noqa: F401 + PeriodIndex, # noqa: F401 + RangeIndex, # noqa: F401 Series, TimedeltaIndex, ) @@ -66,8 +64,8 @@ assert_datetime_array_equal, assert_timedelta_array_equal, assert_almost_equal, - assert_extension_array_equal, - assert_numpy_array_equal, + assert_extension_array_equal, # noqa: F401 + assert_numpy_array_equal, # noqa: F401 ) @@ -131,13 +129,13 @@ def uassert_extension_array_equal(left, right, **kwargs): assert left.shape == right.shape if getattr(left, "dtype", False): assert left.dtype == right.dtype - assert all([str(l) == str(r) for l, r in zip(left, right)]) + assert all([str(l) == str(r) for l, r in zip(left, right)]) # noqa: E741 def uassert_numpy_array_equal(left, right, **kwargs): if getattr(left, "dtype", False): assert left.dtype == right.dtype - assert all([str(l) == str(r) for l, r in zip(left, right)]) + assert all([str(l) == str(r) for l, r in zip(left, right)]) # noqa: E741 def uassert_almost_equal(left, right, **kwargs): From 5270a4693392284bae6005dff7704e38d2cfe9db Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 28 Jun 2023 16:36:12 -0400 Subject: [PATCH 14/51] Fix numerous regressions in test_pandas_extensiontests Fixes include: * Factorization * NaN handling (several more issues still need to be resolved) * Proper unit declarations in test_offset_concat * Integration of new `numeric_dtype` parameter A major outstanding issue (presently being discussed as https://github.com/pandas-dev/pandas/issues/53904) concerns whether we can make AffineScalarFunc hashable and/or whether other legacy Pandas code (which has been deprecated) can be further removed. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 186 +++++++++++------- pint_pandas/testsuite/test_issues.py | 2 +- .../testsuite/test_pandas_extensiontests.py | 9 +- 3 files changed, 123 insertions(+), 74 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index f5d3f189..2a2951d2 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -262,64 +262,6 @@ def __init__(self, values, dtype=None, copy=False): copy = False elif not isinstance(values, pd.core.arrays.numeric.NumericArray): values = pd.array(values, copy=copy) - else: # not isinstance(values, np.ndarray): - if HAS_UNCERTAINTIES and dtype.kind == "O": - values = np.array(values, dtype=object, copy=copy) - else: - values = np.array(values, copy=copy) - copy = False - if HAS_UNCERTAINTIES: - if np.issubdtype(values.dtype, np.floating) or len(values) == 0: - pass - else: - value_notna = [ - isinstance(v, UFloat) - for v in values - if not (pd.isna(v) or unp.isnan(v)) - ] - if value_notna == []: - # all NaNs, either from our own data, or from Pint/Pandas internals - pa_nan = _ufloat_nan if dtype.kind == "O" else np.nan - for i in range(len(values)): - # Promote/demote NaNs to match non-NaN magnitudes - values[i] = pa_nan - copy = False - else: - any_UFloats = any(value_notna) - all_UFloats = all(value_notna) - if any_UFloats != all_UFloats: - # warnings.warn( - # f"pint-pandas does not support certain magnitudes of {values.dtype}. Converting magnitudes to ufloat.", - # category=RuntimeWarning, - # ) - for i, v in enumerate(values): - # List comprehensions are great, but they are not np.arrays! - if not isinstance(v, UFloat): - if pd.isna(v): - values[i] = _ufloat_nan - else: - values[i] = ufloat(v, 0) - elif unp.isnan(v): - # Do we need to canonicalize our NaNs? - values[i] = _ufloat_nan - copy = False - else: - pa_nan = _ufloat_nan if any_UFloats else np.nan - for i, v in enumerate(values): - # Promote/demote NaNs to match non-NaN magnitudes - if pd.isna(v) or unp.isnan(v): - values[i] = pa_nan - copy = False - if not any_UFloats: - values = values.astype(float) - copy = False - elif not np.issubdtype(values.dtype, np.floating): - warnings.warn( - f"pint-pandas does not support magnitudes of {values.dtype}. Converting magnitudes to float.", - category=RuntimeWarning, - ) - values = values.astype(float) - copy = False if copy: values = values.copy() self._data = values @@ -438,10 +380,11 @@ def isna(self): """ if HAS_UNCERTAINTIES: # GH https://github.com/lebigot/uncertainties/issues/164 - if isinstance(self._data, np.ndarray) and len(self._data) == 0: + if len(self._data) == 0: # True or False doesn't matter--we just need the value for the type return np.full((0), True) - return unp.isnan(self._data) + elif isinstance(self._data[0], UFloat): + return unp.isnan(self._data) return self._data.isna() def astype(self, dtype, copy=True): @@ -533,7 +476,8 @@ def take(self, indices, allow_fill=False, fill_value=None): Examples -------- """ - from pandas.core.algorithms import take, is_scalar + from pandas.core.algorithms import take + from pandas.core.dtypes.common import is_scalar data = self._data if allow_fill and fill_value is None: @@ -592,8 +536,8 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype is None and isinstance(master_scalar, _Quantity): dtype = PintType(master_scalar.units) - def quantify_nan(item): - if HAS_UNCERTAINTIES: + def quantify_nan(item, promote_to_ufloat): + if promote_to_ufloat: if type(item) is UFloat: return item * dtype.units if type(item) is float: @@ -607,11 +551,19 @@ def quantify_nan(item): return item if isinstance(master_scalar, _Quantity): - scalars = [quantify_nan(item) for item in scalars] + if HAS_UNCERTAINTIES: + promote_to_ufloat = any([isinstance(item.m, UFloat) for item in scalars]) + else: + promote_to_ufloat = False + scalars = [quantify_nan(item, promote_to_ufloat) for item in scalars] scalars = [ (item.to(dtype.units).magnitude if hasattr(item, "to") else item) for item in scalars ] + if HAS_UNCERTAINTIES: + promote_to_ufloat = any([isinstance(item, UFloat) for item in scalars]) + if promote_to_ufloat: + scalars = [item if isinstance(item, UFloat) else _ufloat_nan if np.isnan(item) else ufloat(item, 0) for item in scalars] return cls(scalars, dtype=dtype, copy=copy) @classmethod @@ -620,15 +572,90 @@ def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False): dtype = PintType.construct_from_quantity_string(scalars[0]) return cls._from_sequence([dtype.ureg.Quantity(x) for x in scalars]) + def factorize( + self, + use_na_sentinel: bool = True, + ) -> tuple[np.ndarray, ExtensionArray]: + """ + Encode the extension array as an enumerated type. + + Parameters + ---------- + use_na_sentinel : bool, default True + If True, the sentinel -1 will be used for NaN values. If False, + NaN values will be encoded as non-negative integers and will not drop the + NaN from the uniques of the values. + + .. versionadded:: 1.5.0 + + Returns + ------- + codes : ndarray + An integer NumPy array that's an indexer into the original + ExtensionArray. + uniques : ExtensionArray + An ExtensionArray containing the unique values of `self`. + + .. note:: + + uniques will *not* contain an entry for the NA value of + the ExtensionArray if there are any missing values present + in `self`. + + See Also + -------- + factorize : Top-level factorize method that dispatches here. + + Notes + ----- + :meth:`pandas.factorize` offers a `sort` keyword as well. + """ + # Implementer note: There are two ways to override the behavior of + # pandas.factorize + # 1. _values_for_factorize and _from_factorize. + # Specify the values passed to pandas' internal factorization + # routines, and how to convert from those values back to the + # original ExtensionArray. + # 2. ExtensionArray.factorize. + # Complete control over factorization. + if HAS_UNCERTAINTIES and self._data.dtype.kind == 'O': + arr, na_value = self._values_for_factorize() + + if not use_na_sentinel: + # factorize can now handle differentiating various types of null values. + # These can only occur when the array has object dtype. + # However, for backwards compatibility we only use the null for the + # provided dtype. This may be revisited in the future, see GH#48476. + null_mask = isna(arr) + if null_mask.any(): + # Don't modify (potentially user-provided) array + arr = np.where(null_mask, na_value, arr) + + codes = [-1] * len(self.data) + # Note that item is a local variable provided in the loop below + vf = np.vectorize(lambda x: x == item, otypes=[bool]) + for code, item in enumerate(arr): + code_mask = vf(self._data) + codes = np.where(code_mask, code, codes) + + uniques_ea = self._from_factorized(arr, self) + return codes, uniques_ea + else: + return super(PintArray, self).factorize(self, use_na_sentinel) + @classmethod def _from_factorized(cls, values, original): return cls(values, dtype=original.dtype) def _values_for_factorize(self): arr = self._data - if HAS_UNCERTAINTIES: - return arr, _ufloat_nan - return self._data._values_for_factorize() + if HAS_UNCERTAINTIES and arr.dtype.kind == 'O': + unique_data = [] + for item in arr: + if item not in unique_data: + unique_data.append(item) + return np.array(unique_data), _ufloat_nan + return arr._values_for_factorize() def value_counts(self, dropna=True): """ @@ -654,18 +681,26 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data - if HAS_UNCERTAINTIES: + if HAS_UNCERTAINTIES and data.dtype.kind == 'O': nafilt = unp.isnan(data) + na_value = _ufloat_nan + data = data[~nafilt] + unique_data = [] + for item in data: + if item not in unique_data: + unique_data.append(item) + index = list(unique_data) else: nafilt = np.isnan(data) - data = data[~nafilt] + na_value = np.nan + data = data[~nafilt] + index = list(set(data)) data_list = data.tolist() - index = list(set(data)) array = [data_list.count(item) for item in index] if not dropna: - index.append(np.nan) + index.append(na_value) array.append(nafilt.sum()) return Series(array, index=index) @@ -679,7 +714,14 @@ def unique(self): """ from pandas import unique - return self._from_sequence(unique(self._data), dtype=self.dtype) + data = self._data + if HAS_UNCERTAINTIES and data.dtype.kind == 'O': + unique_data = [] + for item in data: + if item not in unique_data: + unique_data.append(item) + return self._from_sequence(pd.array(unique_data, dtype=data.dtype), dtype=self.dtype) + return self._from_sequence(unique(data), dtype=self.dtype) def __contains__(self, item) -> bool: if not isinstance(item, _Quantity): diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 522363a1..2071946f 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -65,7 +65,7 @@ class TestIssue21(BaseExtensionTests): def test_offset_concat(self): q_a = ureg.Quantity(np.arange(5) + ufloat(0, 0), ureg.Unit("degC")) q_b = ureg.Quantity(np.arange(6) + ufloat(0, 0), ureg.Unit("degC")) - q_a_ = np.append(q_a, ufloat(np.nan, 0)) + q_a_ = np.append(q_a, ureg.Quantity(ufloat(np.nan, 0), ureg.Unit("degC"))) a = pd.Series(PintArray(q_a)) b = pd.Series(PintArray(q_b)) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 8da0d82d..7fff7508 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -179,6 +179,7 @@ def numeric_dtype(request): @pytest.fixture def data(request, numeric_dtype): if HAS_UNCERTAINTIES: + numeric_dtype = None d = ( np.arange(start=1.0, stop=101.0, dtype=numeric_dtype) + ufloat(0, 0) ) * ureg.nm @@ -191,6 +192,7 @@ def data(request, numeric_dtype): def data_missing(numeric_dtype): numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) if HAS_UNCERTAINTIES: + numeric_dtype = None dm = [_ufloat_nan, ufloat(1, 0)] else: dm = [np.nan, 1] @@ -202,6 +204,7 @@ def data_missing(numeric_dtype): @pytest.fixture def data_for_twos(numeric_dtype): if HAS_UNCERTAINTIES: + numeric_dtype = None x = [ufloat(2.0, 0)] * 100 else: x = [ @@ -243,6 +246,7 @@ def sort_by_key(request): @pytest.fixture def data_for_sorting(numeric_dtype): if HAS_UNCERTAINTIES: + numeric_dtype = None ds = [ufloat(0.3, 0), ufloat(10, 0), ufloat(-50, 0)] else: ds = [0.3, 10, -50] @@ -255,6 +259,7 @@ def data_for_sorting(numeric_dtype): def data_missing_for_sorting(numeric_dtype): numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) if HAS_UNCERTAINTIES: + numeric_dtype = None dms = [ufloat(4, 0), _ufloat_nan, ufloat(-5, 0)] else: dms = [4, np.nan, -5] @@ -287,7 +292,9 @@ def data_for_grouping(numeric_dtype): b = b + ufloat(0, 0) c = c + ufloat(0, 0) _n = _ufloat_nan - numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) + numeric_dtype = None + else: + numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) return PintArray.from_1darray_quantity( ureg.Quantity(pd.array([b, b, _n, _n, a, a, b, c], dtype=numeric_dtype), ureg.m) ) From 6ddf204599db77875fe458d2c00979f89aa477b5 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 28 Jun 2023 16:42:33 -0400 Subject: [PATCH 15/51] Update pint_array.py Make `ruff` and `black` happy. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 2a2951d2..e39f6cc3 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -552,7 +552,9 @@ def quantify_nan(item, promote_to_ufloat): if isinstance(master_scalar, _Quantity): if HAS_UNCERTAINTIES: - promote_to_ufloat = any([isinstance(item.m, UFloat) for item in scalars]) + promote_to_ufloat = any( + [isinstance(item.m, UFloat) for item in scalars] + ) else: promote_to_ufloat = False scalars = [quantify_nan(item, promote_to_ufloat) for item in scalars] @@ -563,7 +565,14 @@ def quantify_nan(item, promote_to_ufloat): if HAS_UNCERTAINTIES: promote_to_ufloat = any([isinstance(item, UFloat) for item in scalars]) if promote_to_ufloat: - scalars = [item if isinstance(item, UFloat) else _ufloat_nan if np.isnan(item) else ufloat(item, 0) for item in scalars] + scalars = [ + item + if isinstance(item, UFloat) + else _ufloat_nan + if np.isnan(item) + else ufloat(item, 0) + for item in scalars + ] return cls(scalars, dtype=dtype, copy=copy) @classmethod @@ -618,7 +627,7 @@ def factorize( # original ExtensionArray. # 2. ExtensionArray.factorize. # Complete control over factorization. - if HAS_UNCERTAINTIES and self._data.dtype.kind == 'O': + if HAS_UNCERTAINTIES and self._data.dtype.kind == "O": arr, na_value = self._values_for_factorize() if not use_na_sentinel: @@ -626,7 +635,7 @@ def factorize( # These can only occur when the array has object dtype. # However, for backwards compatibility we only use the null for the # provided dtype. This may be revisited in the future, see GH#48476. - null_mask = isna(arr) + null_mask = self.isna(arr) if null_mask.any(): # Don't modify (potentially user-provided) array arr = np.where(null_mask, na_value, arr) @@ -649,7 +658,7 @@ def _from_factorized(cls, values, original): def _values_for_factorize(self): arr = self._data - if HAS_UNCERTAINTIES and arr.dtype.kind == 'O': + if HAS_UNCERTAINTIES and arr.dtype.kind == "O": unique_data = [] for item in arr: if item not in unique_data: @@ -681,7 +690,7 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data - if HAS_UNCERTAINTIES and data.dtype.kind == 'O': + if HAS_UNCERTAINTIES and data.dtype.kind == "O": nafilt = unp.isnan(data) na_value = _ufloat_nan data = data[~nafilt] @@ -715,12 +724,14 @@ def unique(self): from pandas import unique data = self._data - if HAS_UNCERTAINTIES and data.dtype.kind == 'O': + if HAS_UNCERTAINTIES and data.dtype.kind == "O": unique_data = [] for item in data: if item not in unique_data: unique_data.append(item) - return self._from_sequence(pd.array(unique_data, dtype=data.dtype), dtype=self.dtype) + return self._from_sequence( + pd.array(unique_data, dtype=data.dtype), dtype=self.dtype + ) return self._from_sequence(unique(data), dtype=self.dtype) def __contains__(self, item) -> bool: From e1d367c483e907781ed4b49a318e4bf96b579080 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 2 Jul 2023 09:04:48 -0400 Subject: [PATCH 16/51] Update to us pd.NA instead of np.nan / _ufloat_nan To resolve the question of the proper na_value for EA dtypes (np.nan vs. uncertainties _ufloat_nan), use the gender-neutral pd.NA value. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 52 +++++++++---------- .../testsuite/test_pandas_extensiontests.py | 13 ++--- 2 files changed, 31 insertions(+), 34 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index e39f6cc3..90c6af57 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -148,9 +148,7 @@ def name(self): @property def na_value(self): - if HAS_UNCERTAINTIES: - return self.ureg.Quantity(_ufloat_nan, self.units) - return self.ureg.Quantity(np.nan, self.units) + return self.ureg.Quantity(pd.NA, self.units) def __hash__(self): # make myself hashable @@ -383,8 +381,7 @@ def isna(self): if len(self._data) == 0: # True or False doesn't matter--we just need the value for the type return np.full((0), True) - elif isinstance(self._data[0], UFloat): - return unp.isnan(self._data) + return self._data.map(lambda x: pd.isna(x) or (isinstance(x, UFloat) and unp.isnan(x))) return self._data.isna() def astype(self, dtype, copy=True): @@ -537,6 +534,9 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): dtype = PintType(master_scalar.units) def quantify_nan(item, promote_to_ufloat): + if pd.isna(item): + return dtype.ureg.Quantity(item, dtype.units) + # FIXME: most of this code is never executed (except the final return) if promote_to_ufloat: if type(item) is UFloat: return item * dtype.units @@ -551,28 +551,31 @@ def quantify_nan(item, promote_to_ufloat): return item if isinstance(master_scalar, _Quantity): + # A quantified master_scalar does not guarantee that we don't have NA and/or np.nan values in our scalars if HAS_UNCERTAINTIES: promote_to_ufloat = any( - [isinstance(item.m, UFloat) for item in scalars] + [isinstance(item.m, UFloat) for item in scalars if pd.notna(item)] ) else: promote_to_ufloat = False - scalars = [quantify_nan(item, promote_to_ufloat) for item in scalars] + scalars = [item if isinstance(item, _Quantity) else quantify_nan(item, promote_to_ufloat) for item in scalars] scalars = [ (item.to(dtype.units).magnitude if hasattr(item, "to") else item) for item in scalars ] - if HAS_UNCERTAINTIES: + elif HAS_UNCERTAINTIES: promote_to_ufloat = any([isinstance(item, UFloat) for item in scalars]) - if promote_to_ufloat: - scalars = [ - item - if isinstance(item, UFloat) - else _ufloat_nan - if np.isnan(item) - else ufloat(item, 0) - for item in scalars - ] + else: + promote_to_ufloat = False + if promote_to_ufloat: + scalars = [ + item + if isinstance(item, UFloat) + else _ufloat_nan + if pd.isna(item) + else ufloat(item, 0) + for item in scalars + ] return cls(scalars, dtype=dtype, copy=copy) @classmethod @@ -642,7 +645,7 @@ def factorize( codes = [-1] * len(self.data) # Note that item is a local variable provided in the loop below - vf = np.vectorize(lambda x: x == item, otypes=[bool]) + vf = np.vectorize(lambda x: True if (x_na:=pd.isna(x))*(item_na:=pd.isna(item)) else (x_na==item_na and x==item), otypes=[bool]) for code, item in enumerate(arr): code_mask = vf(self._data) codes = np.where(code_mask, code, codes) @@ -663,7 +666,7 @@ def _values_for_factorize(self): for item in arr: if item not in unique_data: unique_data.append(item) - return np.array(unique_data), _ufloat_nan + return np.array(unique_data), pd.NA return arr._values_for_factorize() def value_counts(self, dropna=True): @@ -690,19 +693,16 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data + nafilt = data.isna() + na_value = pd.NA + data = data[~nafilt] if HAS_UNCERTAINTIES and data.dtype.kind == "O": - nafilt = unp.isnan(data) - na_value = _ufloat_nan - data = data[~nafilt] unique_data = [] for item in data: if item not in unique_data: unique_data.append(item) index = list(unique_data) else: - nafilt = np.isnan(data) - na_value = np.nan - data = data[~nafilt] index = list(set(data)) data_list = data.tolist() @@ -883,7 +883,7 @@ def __array__(self, dtype=None, copy=False): def _to_array_of_quantity(self, copy=False): qtys = [ - self._Q(item, self._dtype.units) if not pd.isna(item) else item + self._Q(item, self._dtype.units) if item is not pd.NA else item for item in self._data ] with warnings.catch_warnings(record=True): diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 7fff7508..2b94705c 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -193,9 +193,9 @@ def data_missing(numeric_dtype): numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) if HAS_UNCERTAINTIES: numeric_dtype = None - dm = [_ufloat_nan, ufloat(1, 0)] + dm = [pd.NA, ufloat(1, 0)] else: - dm = [np.nan, 1] + dm = [pd.NA, 1] return PintArray.from_1darray_quantity( ureg.Quantity(pd.array(dm, dtype=numeric_dtype), ureg.meter) ) @@ -260,9 +260,9 @@ def data_missing_for_sorting(numeric_dtype): numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) if HAS_UNCERTAINTIES: numeric_dtype = None - dms = [ufloat(4, 0), _ufloat_nan, ufloat(-5, 0)] + dms = [ufloat(4, 0), pd.NA, ufloat(-5, 0)] else: - dms = [4, np.nan, -5] + dms = [4, pd.NA, -5] return PintArray.from_1darray_quantity( ureg.Quantity(pd.array(dms, dtype=numeric_dtype), ureg.centimeter) ) @@ -271,8 +271,6 @@ def data_missing_for_sorting(numeric_dtype): @pytest.fixture def na_cmp(): """Binary operator for comparing NA values.""" - if HAS_UNCERTAINTIES: - return lambda x, y: bool(unp.isnan(x.magnitude)) & bool(unp.isnan(y.magnitude)) return lambda x, y: bool(pd.isna(x.magnitude)) & bool(pd.isna(y.magnitude)) @@ -286,12 +284,11 @@ def data_for_grouping(numeric_dtype): a = 1.0 b = 2.0**32 + 1 c = 2.0**32 + 10 - _n = np.nan + _n = pd.NA if HAS_UNCERTAINTIES: a = a + ufloat(0, 0) b = b + ufloat(0, 0) c = c + ufloat(0, 0) - _n = _ufloat_nan numeric_dtype = None else: numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) From dbf5ad12facfe41efa43814bdb6a96937faefa00 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 2 Jul 2023 09:08:24 -0400 Subject: [PATCH 17/51] Update pint_array.py Make black happy. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 90c6af57..26ba5a07 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -381,7 +381,9 @@ def isna(self): if len(self._data) == 0: # True or False doesn't matter--we just need the value for the type return np.full((0), True) - return self._data.map(lambda x: pd.isna(x) or (isinstance(x, UFloat) and unp.isnan(x))) + return self._data.map( + lambda x: pd.isna(x) or (isinstance(x, UFloat) and unp.isnan(x)) + ) return self._data.isna() def astype(self, dtype, copy=True): @@ -558,7 +560,12 @@ def quantify_nan(item, promote_to_ufloat): ) else: promote_to_ufloat = False - scalars = [item if isinstance(item, _Quantity) else quantify_nan(item, promote_to_ufloat) for item in scalars] + scalars = [ + item + if isinstance(item, _Quantity) + else quantify_nan(item, promote_to_ufloat) + for item in scalars + ] scalars = [ (item.to(dtype.units).magnitude if hasattr(item, "to") else item) for item in scalars @@ -645,7 +652,12 @@ def factorize( codes = [-1] * len(self.data) # Note that item is a local variable provided in the loop below - vf = np.vectorize(lambda x: True if (x_na:=pd.isna(x))*(item_na:=pd.isna(item)) else (x_na==item_na and x==item), otypes=[bool]) + vf = np.vectorize( + lambda x: True + if (x_na := pd.isna(x)) * (item_na := pd.isna(item)) + else (x_na == item_na and x == item), + otypes=[bool], + ) for code, item in enumerate(arr): code_mask = vf(self._data) codes = np.where(code_mask, code, codes) From 3c6eff440c7173aa958b55e9a9d6f01d711a13bd Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 5 Jul 2023 17:09:03 -0400 Subject: [PATCH 18/51] Progress: 2608 pass, 97 skip, 84 xfail, 6 xpass With these changes (and https://github.com/pandas-dev/pandas/pull/53970 and https://github.com/hgrecco/pint/pull/1615) the test suite passes or xpasses everything (no failures or error). Indeed, the uncertainties code has essentially doubled the scope of the test suite (to test with and without it). The biggest gotcha is that the EA for complex numbers is not compatible with the EA for uncertainties, due to incompatible hacks: The hack for complex numbers is to np.nan (which is, technically, a complex number) for na_value across all numeric types. But that doesn't work for uncertainties, because uncertainties doesn't accept np.nan as an uncertain value. The hack for uncertainties is to use pd.NA for na_value. This works for Int64, Float64, and uncertainties, but doesn't work for complex (which cannot tolerate NAType). Some careful subclassing fills in what doesn't easily work, with fixtures to prevent the improper mixing of complex and uncertainty types in the same python environment. Happy to discuss! Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 198 +++++++++++++----- pint_pandas/testsuite/test_issues.py | 11 +- .../testsuite/test_pandas_extensiontests.py | 150 +++++++------ 3 files changed, 235 insertions(+), 124 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 26ba5a07..77a418d9 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -75,7 +75,7 @@ def __new__(cls, units=None): if not isinstance(units, _Unit): units = cls._parse_dtype_strict(units) # ureg.unit returns a quantity with a magnitude of 1 - # eg 1 mm. Initialising a quantity and taking it's unit + # eg 1 mm. Initialising a quantity and taking its unit # TODO: Seperate units from quantities in pint # to simplify this bit units = cls.ureg.Quantity(1, units).units @@ -148,7 +148,10 @@ def name(self): @property def na_value(self): - return self.ureg.Quantity(pd.NA, self.units) + if HAS_UNCERTAINTIES: + return self.ureg.Quantity(pd.NA, self.units) + else: + return self.ureg.Quantity(np.nan, self.units) def __hash__(self): # make myself hashable @@ -318,12 +321,41 @@ def __setitem__(self, key, value): # doing nothing here seems to be ok return + master_scalar = None + try: + master_scalar = next(i for i in self._data if pd.notna(i)) + except StopIteration: + pass + if isinstance(value, _Quantity): value = value.to(self.units).magnitude - elif is_list_like(value) and len(value) > 0 and isinstance(value[0], _Quantity): - value = [item.to(self.units).magnitude for item in value] + elif is_list_like(value) and len(value) > 0: + if isinstance(value[0], _Quantity): + value = [item.to(self.units).magnitude for item in value] + elif HAS_UNCERTAINTIES and isinstance(master_scalar, UFloat): + if not all([isinstance(i, UFloat) or pd.isna(i) for i in value]): + value = [ + i if isinstance(i, UFloat) or pd.isna(i) else ufloat(i, 0) + for i in value + ] + if len(value) == 1: + value = value[0] key = check_array_indexer(self, key) + # Filter out invalid values for our array type(s) + if HAS_UNCERTAINTIES: + if isinstance(value, UFloat): + pass + elif is_list_like(value): + from pandas.core.dtypes.common import is_scalar + + if is_scalar(key): + msg = "Value must be scalar. {}".format(value) + raise ValueError(msg) + elif type(value) is object: + if pd.notna(value): + msg = "Invalid object. {}".format(value) + raise ValueError(msg) try: self._data[key] = value except IndexError as e: @@ -535,45 +567,24 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype is None and isinstance(master_scalar, _Quantity): dtype = PintType(master_scalar.units) - def quantify_nan(item, promote_to_ufloat): - if pd.isna(item): - return dtype.ureg.Quantity(item, dtype.units) - # FIXME: most of this code is never executed (except the final return) - if promote_to_ufloat: - if type(item) is UFloat: - return item * dtype.units - if type(item) is float: - if np.isnan(item): - return _ufloat_nan * dtype.units - else: - return UFloat(item, 0) * dtype.units - else: - if type(item) is float: - return item * dtype.units - return item - if isinstance(master_scalar, _Quantity): - # A quantified master_scalar does not guarantee that we don't have NA and/or np.nan values in our scalars - if HAS_UNCERTAINTIES: - promote_to_ufloat = any( - [isinstance(item.m, UFloat) for item in scalars if pd.notna(item)] - ) - else: - promote_to_ufloat = False - scalars = [ - item - if isinstance(item, _Quantity) - else quantify_nan(item, promote_to_ufloat) - for item in scalars - ] + promote_to_ufloat = False scalars = [ (item.to(dtype.units).magnitude if hasattr(item, "to") else item) for item in scalars ] elif HAS_UNCERTAINTIES: - promote_to_ufloat = any([isinstance(item, UFloat) for item in scalars]) + # When creating empty arrays, make them large enoguh to hold UFloats in case we need to do so later + if len(scalars) == 0: + promote_to_ufloat = True + else: + promote_to_ufloat = any([isinstance(item, UFloat) for item in scalars]) else: promote_to_ufloat = False + if len(scalars) == 0: + if promote_to_ufloat: + return cls([_ufloat_nan], dtype=dtype, copy=copy)[1:] + return cls(scalars, dtype=dtype, copy=copy) if promote_to_ufloat: scalars = [ item @@ -639,6 +650,10 @@ def factorize( # Complete control over factorization. if HAS_UNCERTAINTIES and self._data.dtype.kind == "O": arr, na_value = self._values_for_factorize() + # Unique elements make it easy to partition on na_value if we need to + arr_list = list(dict.fromkeys(arr)) + na_index = len(arr_list) + arr = np.array(arr_list) if not use_na_sentinel: # factorize can now handle differentiating various types of null values. @@ -649,36 +664,51 @@ def factorize( if null_mask.any(): # Don't modify (potentially user-provided) array arr = np.where(null_mask, na_value, arr) - - codes = [-1] * len(self.data) - # Note that item is a local variable provided in the loop below + else: + try: + na_index = arr.tolist().index(na_value) + except ValueError: + # Keep as len(arr) + pass + codes = np.array([-1] * len(self.data), dtype=np.intp) + # Note: item is a local variable provided in the loop below + # Note: partitioning arr on pd.NA means item is never pd.NA vf = np.vectorize( - lambda x: True - if (x_na := pd.isna(x)) * (item_na := pd.isna(item)) - else (x_na == item_na and x == item), + lambda x: False if pd.isna(x) else x == item, otypes=[bool], ) - for code, item in enumerate(arr): + for code, item in enumerate(arr[: na_index + 1]): code_mask = vf(self._data) + # Don't count the NA we have seen codes = np.where(code_mask, code, codes) - - uniques_ea = self._from_factorized(arr, self) + if use_na_sentinel and na_index < len(arr): + for code, item in enumerate(arr[na_index:]): + code_mask = vf(self._data) + # Don't count the NA we have seen + codes = np.where(code_mask, code, codes) + uniques_ea = self._from_factorized( + arr[:na_index].tolist() + arr[na_index + 1 :].tolist(), self + ) + else: + uniques_ea = self._from_factorized(arr, self) return codes, uniques_ea else: - return super(PintArray, self).factorize(self, use_na_sentinel) + return super(PintArray, self).factorize(use_na_sentinel) @classmethod def _from_factorized(cls, values, original): + from pandas._libs.lib import infer_dtype + if infer_dtype(values) != "object": + values = pd.array(values, copy=False) return cls(values, dtype=original.dtype) def _values_for_factorize(self): arr = self._data - if HAS_UNCERTAINTIES and arr.dtype.kind == "O": - unique_data = [] - for item in arr: - if item not in unique_data: - unique_data.append(item) - return np.array(unique_data), pd.NA + if arr.dtype.kind == "O": + if HAS_UNCERTAINTIES and arr.size > 0 and isinstance(arr[0], UFloat): + # Canonicalize uncertain NaNs + arr = np.where(unp.isnan(arr), self.dtype.na_value.m, arr) + return np.array(arr, copy=False), self.dtype.na_value.m return arr._values_for_factorize() def value_counts(self, dropna=True): @@ -706,7 +736,7 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data nafilt = data.isna() - na_value = pd.NA + na_value = self.dtype.na_value.m data = data[~nafilt] if HAS_UNCERTAINTIES and data.dtype.kind == "O": unique_data = [] @@ -746,6 +776,68 @@ def unique(self): ) return self._from_sequence(unique(data), dtype=self.dtype) + def shift(self, periods: int = 1, fill_value=None): + """ + Shift values by desired number. + + Newly introduced missing values are filled with + a missing value type consistent with the existing elements + or ``self.dtype.na_value`` if none exist. + + Parameters + ---------- + periods : int, default 1 + The number of periods to shift. Negative values are allowed + for shifting backwards. + + fill_value : object, optional + The scalar value to use for newly introduced missing values. + The default is ``self.dtype.na_value``. + + Returns + ------- + ExtensionArray + Shifted. + + Notes + ----- + If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is + returned. + + If ``periods > len(self)``, then an array of size + len(self) is returned, with all values filled with + ``self.dtype.na_value``. + + For 2-dimensional ExtensionArrays, we are always shifting along axis=0. + """ + if not len(self) or periods == 0: + return self.copy() + + if pd.isna(fill_value): + fill_value = self.dtype.na_value.m + + if HAS_UNCERTAINTIES: + if self.data.dtype.kind == "O": + try: + notna_value = next(i for i in self._data if pd.notna(i)) + if isinstance(notna_value, UFloat): + fill_value = _ufloat_nan + except StopIteration: + pass + elif self.data.dtype.kind == "f": + fill_value = np.nan + + empty = self._from_sequence( + [fill_value] * min(abs(periods), len(self)), dtype=self.dtype + ) + if periods > 0: + a = empty + b = self[:-periods] + else: + a = self[abs(periods) :] + b = empty + return self._concat_same_type([a, b]) + def __contains__(self, item) -> bool: if not isinstance(item, _Quantity): return False @@ -895,7 +987,7 @@ def __array__(self, dtype=None, copy=False): def _to_array_of_quantity(self, copy=False): qtys = [ - self._Q(item, self._dtype.units) if item is not pd.NA else item + self._Q(item, self._dtype.units) if item is not self.dtype.na_value.m else item for item in self._data ] with warnings.catch_warnings(record=True): diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 2071946f..2b413f65 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -65,7 +65,7 @@ class TestIssue21(BaseExtensionTests): def test_offset_concat(self): q_a = ureg.Quantity(np.arange(5) + ufloat(0, 0), ureg.Unit("degC")) q_b = ureg.Quantity(np.arange(6) + ufloat(0, 0), ureg.Unit("degC")) - q_a_ = np.append(q_a, ureg.Quantity(ufloat(np.nan, 0), ureg.Unit("degC"))) + q_a_ = np.append(q_a, ureg.Quantity(pd.NA, ureg.Unit("degC"))) a = pd.Series(PintArray(q_a)) b = pd.Series(PintArray(q_b)) @@ -179,13 +179,10 @@ def test_issue_127(): assert a == b +@pytest.mark.skipif( + not HAS_UNCERTAINTIES, reason="this test depends entirely on HAS_UNCERTAINTIES being True" +) def test_issue_139(): - from pint.compat import HAS_UNCERTAINTIES - - assert HAS_UNCERTAINTIES - from uncertainties import ufloat - from uncertainties import unumpy as unp - q1 = 1.234 q2 = 5.678 q_nan = np.nan diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 2b94705c..b7863644 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -142,18 +142,12 @@ def uassert_almost_equal(left, right, **kwargs): assert_almost_equal(left, right, **kwargs) -if HAS_UNCERTAINTIES: - # The following functions all need a lot of work... - # tm.assert_equal = uassert_equal - # tm.assert_series_equal = uassert_series_equal - # tm.assert_frame_equal = uassert_frame_equal - # tm.assert_extension_array_equal = uassert_extension_array_equal - # Fortunately, ufloat (x, 0) == ufloat (x, 0) (zero uncertainty is an exact number) - pass +_use_uncertainties = [True, False] if HAS_UNCERTAINTIES else [False] -# @pytest.fixture(params=[True,False]) -# def HAS_UNCERTAINTIES(): -# return params +@pytest.fixture(params=_use_uncertainties) +def USE_UNCERTAINTIES(request): + """Whether to use uncertainties in Pint-Pandas""" + return request.param @pytest.fixture(params=[True, False]) @@ -177,33 +171,32 @@ def numeric_dtype(request): @pytest.fixture -def data(request, numeric_dtype): - if HAS_UNCERTAINTIES: - numeric_dtype = None +def data(numeric_dtype, USE_UNCERTAINTIES): + if USE_UNCERTAINTIES: d = ( - np.arange(start=1.0, stop=101.0, dtype=numeric_dtype) + ufloat(0, 0) + np.arange(start=1.0, stop=101.0, dtype=None) + ufloat(0, 0) ) * ureg.nm else: - d = np.arange(start=1.0, stop=101.0, dtype=numeric_dtype) * ureg.nm + d = np.arange(start=1.0, stop=101.0, dtype=object if HAS_UNCERTAINTIES else numeric_dtype) * ureg.nm return PintArray.from_1darray_quantity(d) @pytest.fixture -def data_missing(numeric_dtype): +def data_missing(numeric_dtype, USE_UNCERTAINTIES): numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) - if HAS_UNCERTAINTIES: + if USE_UNCERTAINTIES: numeric_dtype = None dm = [pd.NA, ufloat(1, 0)] else: - dm = [pd.NA, 1] + dm = [numeric_dtype.na_value, 1] return PintArray.from_1darray_quantity( ureg.Quantity(pd.array(dm, dtype=numeric_dtype), ureg.meter) ) @pytest.fixture -def data_for_twos(numeric_dtype): - if HAS_UNCERTAINTIES: +def data_for_twos(numeric_dtype, USE_UNCERTAINTIES): + if USE_UNCERTAINTIES: numeric_dtype = None x = [ufloat(2.0, 0)] * 100 else: @@ -244,8 +237,8 @@ def sort_by_key(request): @pytest.fixture -def data_for_sorting(numeric_dtype): - if HAS_UNCERTAINTIES: +def data_for_sorting(numeric_dtype, USE_UNCERTAINTIES): + if USE_UNCERTAINTIES: numeric_dtype = None ds = [ufloat(0.3, 0), ufloat(10, 0), ufloat(-50, 0)] else: @@ -256,13 +249,13 @@ def data_for_sorting(numeric_dtype): @pytest.fixture -def data_missing_for_sorting(numeric_dtype): +def data_missing_for_sorting(numeric_dtype, USE_UNCERTAINTIES): numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) - if HAS_UNCERTAINTIES: + if USE_UNCERTAINTIES: numeric_dtype = None dms = [ufloat(4, 0), pd.NA, ufloat(-5, 0)] else: - dms = [4, pd.NA, -5] + dms = [4, numeric_dtype.na_value, -5] return PintArray.from_1darray_quantity( ureg.Quantity(pd.array(dms, dtype=numeric_dtype), ureg.centimeter) ) @@ -280,18 +273,21 @@ def na_value(numeric_dtype): @pytest.fixture -def data_for_grouping(numeric_dtype): +def data_for_grouping(numeric_dtype, USE_UNCERTAINTIES): a = 1.0 b = 2.0**32 + 1 c = 2.0**32 + 10 - _n = pd.NA - if HAS_UNCERTAINTIES: + if USE_UNCERTAINTIES: a = a + ufloat(0, 0) b = b + ufloat(0, 0) c = c + ufloat(0, 0) + _n = _ufloat_nan numeric_dtype = None - else: + elif numeric_dtype: numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) + _n = np.nan + else: + _n = pd.NA return PintArray.from_1darray_quantity( ureg.Quantity(pd.array([b, b, _n, _n, a, a, b, c], dtype=numeric_dtype), ureg.m) ) @@ -340,36 +336,35 @@ def all_compare_operators(request): return request.param -if HAS_UNCERTAINTIES: - # commented functions aren't implemented - _all_numeric_reductions = [ - "sum", - "max", - "min", - # "mean", - # "prod", - # "std", - # "var", - # "median", - # "sem", - # "kurt", - # "skew", - ] -else: - # commented functions aren't implemented - _all_numeric_reductions = [ - "sum", - "max", - "min", - "mean", - # "prod", - "std", - "var", - "median", - "sem", - "kurt", - "skew", - ] +# commented functions aren't implemented in uncertainties +_uncertain_numeric_reductions = [ + "sum", + "max", + "min", + # "mean", + # "prod", + # "std", + # "var", + # "median", + # "sem", + # "kurt", + # "skew", +] + +# commented functions aren't implemented in numpy/pandas +_all_numeric_reductions = [ + "sum", + "max", + "min", + "mean", + # "prod", + "std", + "var", + "median", + "sem", + "kurt", + "skew", +] @pytest.fixture(params=_all_numeric_reductions) @@ -379,7 +374,6 @@ def all_numeric_reductions(request): """ return request.param - _all_boolean_reductions = ["all", "any"] @@ -493,6 +487,12 @@ class TestInterface(base.BaseInterfaceTests): class TestMethods(base.BaseMethodsTests): + def test_where_series(self, data, na_value, as_frame, numeric_dtype, USE_UNCERTAINTIES): + if numeric_dtype is np.complex128 and HAS_UNCERTAINTIES: + # Alas, whether or not USE_UNCERTAINTIES + pytest.skip("complex numbers and uncertainties are not compatible due to EA na_value handling (pd.NA vs. np.nan)") + super(TestMethods, self).test_where_series(data, na_value, as_frame) + @pytest.mark.skip("All values are valid as magnitudes") def test_insert_invalid(self): pass @@ -523,6 +523,9 @@ def _get_exception(self, data, op_name): return op_name, None @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) + @pytest.mark.skipif( + USE_UNCERTAINTIES, reason="uncertainties package does not implement divmod" + ) def test_divmod_series_array(self, data, data_for_twos): base.BaseArithmeticOpsTests.test_divmod_series_array(self, data, data_for_twos) @@ -548,6 +551,9 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # parameterise this to try divisor not equal to 1 @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) + @pytest.mark.skipif( + USE_UNCERTAINTIES, reason="uncertainties package does not implement divmod" + ) def test_divmod(self, data): s = pd.Series(data) self._check_divmod_op(s, divmod, 1 * ureg.Mm) @@ -613,12 +619,14 @@ def check_reduce(self, s, op_name, skipna): assert result == expected @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_scaling(self, data, all_numeric_reductions, skipna): + def test_reduce_scaling(self, data, all_numeric_reductions, skipna, USE_UNCERTAINTIES): """Make sure that the reductions give the same physical result independent of the unit representation. This verifies that the result units are sensible. """ op_name = all_numeric_reductions + if USE_UNCERTAINTIES and op_name not in _uncertain_numeric_reductions: + pytest.skip(f"{op_name} not implemented in uncertainties") s_nm = pd.Series(data) # Attention: `mm` is fine here, but with `m`, the magnitudes become so small # that pandas discards them in the kurtosis calculation, leading to different results. @@ -627,7 +635,10 @@ def test_reduce_scaling(self, data, all_numeric_reductions, skipna): # min/max with empty produce numpy warnings with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) - r_nm = getattr(s_nm, op_name)(skipna=skipna) + try: + r_nm = getattr(s_nm, op_name)(skipna=skipna) + except: + pytest.skip("bye!") r_mm = getattr(s_mm, op_name)(skipna=skipna) if isinstance(r_nm, ureg.Quantity): # convert both results to the same units, then take the magnitude @@ -636,11 +647,22 @@ def test_reduce_scaling(self, data, all_numeric_reductions, skipna): else: v_nm = r_nm v_mm = r_mm - if HAS_UNCERTAINTIES: + if USE_UNCERTAINTIES and isinstance(v_nm, UFloat) and isinstance(v_mm, UFloat): assert np.isclose(v_nm.n, v_mm.n, rtol=1e-3), f"{r_nm} == {r_mm}" else: assert np.isclose(v_nm, v_mm, rtol=1e-3), f"{r_nm} == {r_mm}" + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series(self, data, all_numeric_reductions, skipna, USE_UNCERTAINTIES): + op_name = all_numeric_reductions + if USE_UNCERTAINTIES and op_name not in _uncertain_numeric_reductions: + pytest.skip(f"{op_name} not implemented in uncertainties") + s = pd.Series(data) + + # min/max with empty produce numpy warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_reduce(s, op_name, skipna) class TestBooleanReduce(base.BaseBooleanReduceTests): def check_reduce(self, s, op_name, skipna): From a0625f86b33603c91957080ab74a73cee764b979 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 5 Jul 2023 17:36:28 -0400 Subject: [PATCH 19/51] Make ruff and black happy Except that ruff cannot be made happy, see https://github.com/astral-sh/ruff/issues/2044 Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 5 ++- .../testsuite/test_pandas_extensiontests.py | 44 ++++++++++++++----- 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 77a418d9..a445b771 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -698,6 +698,7 @@ def factorize( @classmethod def _from_factorized(cls, values, original): from pandas._libs.lib import infer_dtype + if infer_dtype(values) != "object": values = pd.array(values, copy=False) return cls(values, dtype=original.dtype) @@ -987,7 +988,9 @@ def __array__(self, dtype=None, copy=False): def _to_array_of_quantity(self, copy=False): qtys = [ - self._Q(item, self._dtype.units) if item is not self.dtype.na_value.m else item + self._Q(item, self._dtype.units) + if item is not self.dtype.na_value.m + else item for item in self._data ] with warnings.catch_warnings(record=True): diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index b7863644..82995577 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -21,8 +21,8 @@ from pandas.core import ops from pandas.tests.extension import base from pandas.tests.extension.conftest import ( - as_frame, # noqa: F401 - as_array, # noqa: F401, + as_frame, # noqa: F401 # noqa: F811 + as_array, # noqa: F401 as_series, # noqa: F401 fillna_method, # noqa: F401 groupby_apply_op, # noqa: F401 @@ -144,6 +144,7 @@ def uassert_almost_equal(left, right, **kwargs): _use_uncertainties = [True, False] if HAS_UNCERTAINTIES else [False] + @pytest.fixture(params=_use_uncertainties) def USE_UNCERTAINTIES(request): """Whether to use uncertainties in Pint-Pandas""" @@ -173,11 +174,16 @@ def numeric_dtype(request): @pytest.fixture def data(numeric_dtype, USE_UNCERTAINTIES): if USE_UNCERTAINTIES: - d = ( - np.arange(start=1.0, stop=101.0, dtype=None) + ufloat(0, 0) - ) * ureg.nm + d = (np.arange(start=1.0, stop=101.0, dtype=None) + ufloat(0, 0)) * ureg.nm else: - d = np.arange(start=1.0, stop=101.0, dtype=object if HAS_UNCERTAINTIES else numeric_dtype) * ureg.nm + d = ( + np.arange( + start=1.0, + stop=101.0, + dtype=object if HAS_UNCERTAINTIES else numeric_dtype, + ) + * ureg.nm + ) return PintArray.from_1darray_quantity(d) @@ -374,6 +380,7 @@ def all_numeric_reductions(request): """ return request.param + _all_boolean_reductions = ["all", "any"] @@ -487,10 +494,14 @@ class TestInterface(base.BaseInterfaceTests): class TestMethods(base.BaseMethodsTests): - def test_where_series(self, data, na_value, as_frame, numeric_dtype, USE_UNCERTAINTIES): + def test_where_series( + self, data, na_value, as_frame, numeric_dtype + ): if numeric_dtype is np.complex128 and HAS_UNCERTAINTIES: # Alas, whether or not USE_UNCERTAINTIES - pytest.skip("complex numbers and uncertainties are not compatible due to EA na_value handling (pd.NA vs. np.nan)") + pytest.skip( + "complex numbers and uncertainties are not compatible due to EA na_value handling (pd.NA vs. np.nan)" + ) super(TestMethods, self).test_where_series(data, na_value, as_frame) @pytest.mark.skip("All values are valid as magnitudes") @@ -619,7 +630,9 @@ def check_reduce(self, s, op_name, skipna): assert result == expected @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_scaling(self, data, all_numeric_reductions, skipna, USE_UNCERTAINTIES): + def test_reduce_scaling( + self, data, all_numeric_reductions, skipna, USE_UNCERTAINTIES + ): """Make sure that the reductions give the same physical result independent of the unit representation. This verifies that the result units are sensible. @@ -637,7 +650,7 @@ def test_reduce_scaling(self, data, all_numeric_reductions, skipna, USE_UNCERTAI warnings.simplefilter("ignore", RuntimeWarning) try: r_nm = getattr(s_nm, op_name)(skipna=skipna) - except: + except AttributeError: pytest.skip("bye!") r_mm = getattr(s_mm, op_name)(skipna=skipna) if isinstance(r_nm, ureg.Quantity): @@ -647,13 +660,19 @@ def test_reduce_scaling(self, data, all_numeric_reductions, skipna, USE_UNCERTAI else: v_nm = r_nm v_mm = r_mm - if USE_UNCERTAINTIES and isinstance(v_nm, UFloat) and isinstance(v_mm, UFloat): + if ( + USE_UNCERTAINTIES + and isinstance(v_nm, UFloat) + and isinstance(v_mm, UFloat) + ): assert np.isclose(v_nm.n, v_mm.n, rtol=1e-3), f"{r_nm} == {r_mm}" else: assert np.isclose(v_nm, v_mm, rtol=1e-3), f"{r_nm} == {r_mm}" @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series(self, data, all_numeric_reductions, skipna, USE_UNCERTAINTIES): + def test_reduce_series( + self, data, all_numeric_reductions, skipna, USE_UNCERTAINTIES + ): op_name = all_numeric_reductions if USE_UNCERTAINTIES and op_name not in _uncertain_numeric_reductions: pytest.skip(f"{op_name} not implemented in uncertainties") @@ -664,6 +683,7 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, USE_UNCERTAIN warnings.simplefilter("ignore", RuntimeWarning) self.check_reduce(s, op_name, skipna) + class TestBooleanReduce(base.BaseBooleanReduceTests): def check_reduce(self, s, op_name, skipna): result = getattr(s, op_name)(skipna=skipna) From 94d35245615fc5a1a1e7e705d818ff9479ef93de Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 5 Jul 2023 19:13:28 -0400 Subject: [PATCH 20/51] Make ruff happy (na_frame fixture import vs F811) Moving the # noqa: F811 to the correct line allows the file to pass ruff. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/testsuite/test_pandas_extensiontests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 82995577..c4ced8dc 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -21,7 +21,7 @@ from pandas.core import ops from pandas.tests.extension import base from pandas.tests.extension.conftest import ( - as_frame, # noqa: F401 # noqa: F811 + as_frame, # noqa: F401 as_array, # noqa: F401 as_series, # noqa: F401 fillna_method, # noqa: F401 @@ -495,7 +495,7 @@ class TestInterface(base.BaseInterfaceTests): class TestMethods(base.BaseMethodsTests): def test_where_series( - self, data, na_value, as_frame, numeric_dtype + self, data, na_value, as_frame, numeric_dtype # noqa: F811 ): if numeric_dtype is np.complex128 and HAS_UNCERTAINTIES: # Alas, whether or not USE_UNCERTAINTIES From a6c4040fd0ec3d050b2c3b7cdba746f87f18143d Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 5 Jul 2023 19:15:59 -0400 Subject: [PATCH 21/51] Make black happy Arrgh! Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/testsuite/test_pandas_extensiontests.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index c4ced8dc..7c927511 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -494,9 +494,7 @@ class TestInterface(base.BaseInterfaceTests): class TestMethods(base.BaseMethodsTests): - def test_where_series( - self, data, na_value, as_frame, numeric_dtype # noqa: F811 - ): + def test_where_series(self, data, na_value, as_frame, numeric_dtype): # noqa: F811 if numeric_dtype is np.complex128 and HAS_UNCERTAINTIES: # Alas, whether or not USE_UNCERTAINTIES pytest.skip( From 1506df2a5e1a5a7af23261af02c88d6b6285320d Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 5 Jul 2023 19:17:21 -0400 Subject: [PATCH 22/51] Make black happy Missed this. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/testsuite/test_issues.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 2b413f65..c288e713 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -180,7 +180,8 @@ def test_issue_127(): @pytest.mark.skipif( - not HAS_UNCERTAINTIES, reason="this test depends entirely on HAS_UNCERTAINTIES being True" + not HAS_UNCERTAINTIES, + reason="this test depends entirely on HAS_UNCERTAINTIES being True", ) def test_issue_139(): q1 = 1.234 From 772636b481731cc2075b64a2ebc3e9e450135b70 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 23 Jul 2023 16:36:33 -0400 Subject: [PATCH 23/51] Fix DataFrame reduction for upcoming Pandas Issue https://github.com/hgrecco/pint-pandas/issues/174 reports that DataFrame reduction was broken by the latest Pint-Pandas changes. This commit adapts Pint-Pandas to work with upcoming Pandas 2.1, currently scheduled for release Aug 20, 2023. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 19 +++++++++++++------ pint_pandas/testsuite/test_issues.py | 13 +++++++++++++ .../testsuite/test_pandas_extensiontests.py | 6 +++++- 3 files changed, 31 insertions(+), 7 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index a445b771..46057046 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -208,8 +208,8 @@ def __repr__(self): float: pd.Float64Dtype(), np.float64: pd.Float64Dtype(), np.float32: pd.Float32Dtype(), - np.complex128: pd.core.dtypes.dtypes.PandasDtype("complex128"), - np.complex64: pd.core.dtypes.dtypes.PandasDtype("complex64"), + np.complex128: pd.core.dtypes.dtypes.NumpyEADtype("complex128"), + np.complex64: pd.core.dtypes.dtypes.NumpyEADtype("complex64"), # np.float16: pd.Float16Dtype(), } dtypeunmap = {v: k for k, v in dtypemap.items()} @@ -520,7 +520,10 @@ def take(self, indices, allow_fill=False, fill_value=None): # magnitude is in fact an array scalar, which will get rejected by pandas. fill_value = fill_value[()] - result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + # Turn off warning that PandasArray is deprecated for ``take`` + result = take(data, indices, fill_value=fill_value, allow_fill=allow_fill) return PintArray(result, dtype=self.dtype) @@ -990,7 +993,7 @@ def _to_array_of_quantity(self, copy=False): qtys = [ self._Q(item, self._dtype.units) if item is not self.dtype.na_value.m - else item + else self.dtype.na_value for item in self._data ] with warnings.catch_warnings(record=True): @@ -1048,7 +1051,7 @@ def searchsorted(self, value, side="left", sorter=None): value = [item.to(self.units).magnitude for item in value] return arr.searchsorted(value, side=side, sorter=sorter) - def _reduce(self, name, **kwds): + def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds): """ Return a scalar result of performing the reduction operation. @@ -1092,14 +1095,18 @@ def _reduce(self, name, **kwds): if isinstance(self._data, ExtensionArray): try: - result = self._data._reduce(name, **kwds) + result = self._data._reduce(name, skipna=skipna, keepdims=keepdims, **kwds) except NotImplementedError: result = functions[name](self.numpy_data, **kwds) if name in {"all", "any", "kurt", "skew"}: return result if name == "var": + if keepdims: + return PintArray(result, f"pint[({self.units})**2]") return self._Q(result, self.units**2) + if keepdims: + return PintArray(result, self.dtype) return self._Q(result, self.units) def _accumulate(self, name: str, *, skipna: bool = True, **kwds): diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index c288e713..384f84f4 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -202,3 +202,16 @@ def test_issue_139(): assert np.all(a_m[0:4] == a_cm[0:4]) for x, y in zip(a_m[4:], a_cm[4:]): assert unp.isnan(x) == unp.isnan(y) + +class TestIssue174(BaseExtensionTests): + def test_sum(self): + a = pd.DataFrame([[0, 1, 2], [3, 4, 5]]).astype("pint[m]") + row_sum = a.sum(axis=0) + expected_1 = pd.Series([3, 5, 7], dtype="pint[m]") + + self.assert_series_equal(row_sum, expected_1) + + col_sum = a.sum(axis=1) + expected_2 = pd.Series([3, 12], dtype="pint[m]") + + self.assert_series_equal(col_sum, expected_2) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 7c927511..a638c5bb 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -523,7 +523,7 @@ def _check_divmod_op(self, s, op, other, exc=None): divmod(s, other) def _get_exception(self, data, op_name): - if data.data.dtype == pd.core.dtypes.dtypes.PandasDtype("complex128"): + if data.data.dtype == pd.core.dtypes.dtypes.NumpyEADtype("complex128"): if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]: return op_name, TypeError if op_name in ["__pow__", "__rpow__"]: @@ -627,6 +627,10 @@ def check_reduce(self, s, op_name, skipna): expected = expected_m assert result == expected + @pytest.mark.skip("tests not written yet") + def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): + pass + @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_scaling( self, data, all_numeric_reductions, skipna, USE_UNCERTAINTIES From b759adb501028b35f1704807c7f538de93e3eaa4 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 23 Jul 2023 16:38:50 -0400 Subject: [PATCH 24/51] Make black happy... Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 4 +++- pint_pandas/testsuite/test_issues.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 46057046..6b266399 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -1095,7 +1095,9 @@ def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds): if isinstance(self._data, ExtensionArray): try: - result = self._data._reduce(name, skipna=skipna, keepdims=keepdims, **kwds) + result = self._data._reduce( + name, skipna=skipna, keepdims=keepdims, **kwds + ) except NotImplementedError: result = functions[name](self.numpy_data, **kwds) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 384f84f4..1590006f 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -203,6 +203,7 @@ def test_issue_139(): for x, y in zip(a_m[4:], a_cm[4:]): assert unp.isnan(x) == unp.isnan(y) + class TestIssue174(BaseExtensionTests): def test_sum(self): a = pd.DataFrame([[0, 1, 2], [3, 4, 5]]).astype("pint[m]") From bfb4a996e460b507dcfc587d62c38c08f072ea57 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 23 Jul 2023 20:45:00 -0400 Subject: [PATCH 25/51] Update pint_array.py Remove print statement mistakenly copied into _accumulate code that was never (properly) tested. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 6b266399..d3e625d5 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -1125,7 +1125,6 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwds): result = self._data._accumulate(name, **kwds) except NotImplementedError: result = functions[name](self.numpy_data, **kwds) - print(result) return self._from_sequence(result, self.units) From 9d169f1cdf005efad86d0e521230b57decb3d56d Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 28 Jul 2023 09:17:56 -0400 Subject: [PATCH 26/51] Switch to np.nan as NaN value Using np.nan actually simplifies the integration of uncertainties into pint_pandas. Recent changes in Pandas 2.1 are now sufficient to pass all Pint-Pandas tests. Also added a fixture validating that we can use either `np.nan` or `ufloat(np.nan, 0)` as NA value (the latter being something that should naturally arise in the course of uncertainties calculations, not just missing data from the start). Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 165 +++--------------- pint_pandas/testsuite/test_issues.py | 2 +- .../testsuite/test_pandas_extensiontests.py | 67 ++++--- 3 files changed, 68 insertions(+), 166 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index d3e625d5..a17e48fd 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -148,10 +148,7 @@ def name(self): @property def na_value(self): - if HAS_UNCERTAINTIES: - return self.ureg.Quantity(pd.NA, self.units) - else: - return self.ureg.Quantity(np.nan, self.units) + return self.ureg.Quantity(np.nan, self.units) def __hash__(self): # make myself hashable @@ -413,9 +410,9 @@ def isna(self): if len(self._data) == 0: # True or False doesn't matter--we just need the value for the type return np.full((0), True) - return self._data.map( - lambda x: pd.isna(x) or (isinstance(x, UFloat) and unp.isnan(x)) - ) + # NumpyEADtype('object') doesn't know about UFloats... + if is_object_dtype(self._data.dtype): + return self._data.map(lambda x: x is pd.NA or unp.isnan(x)) return self._data.isna() def astype(self, dtype, copy=True): @@ -565,38 +562,19 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): raise ValueError( "Cannot infer dtype. No dtype specified and empty array" ) - if dtype is None and not isinstance(master_scalar, _Quantity): - raise ValueError("No dtype specified and not a sequence of quantities") - if dtype is None and isinstance(master_scalar, _Quantity): + if dtype is None: + if not isinstance(master_scalar, _Quantity): + raise ValueError("No dtype specified and not a sequence of quantities") dtype = PintType(master_scalar.units) if isinstance(master_scalar, _Quantity): - promote_to_ufloat = False scalars = [ (item.to(dtype.units).magnitude if hasattr(item, "to") else item) for item in scalars ] - elif HAS_UNCERTAINTIES: - # When creating empty arrays, make them large enoguh to hold UFloats in case we need to do so later - if len(scalars) == 0: - promote_to_ufloat = True - else: - promote_to_ufloat = any([isinstance(item, UFloat) for item in scalars]) - else: - promote_to_ufloat = False - if len(scalars) == 0: - if promote_to_ufloat: - return cls([_ufloat_nan], dtype=dtype, copy=copy)[1:] - return cls(scalars, dtype=dtype, copy=copy) - if promote_to_ufloat: - scalars = [ - item - if isinstance(item, UFloat) - else _ufloat_nan - if pd.isna(item) - else ufloat(item, 0) - for item in scalars - ] + # When creating empty arrays, make them large enoguh to hold UFloats in case we need to do so later + if HAS_UNCERTAINTIES and len(scalars) == 0: + return cls([_ufloat_nan], dtype=dtype, copy=copy)[1:] return cls(scalars, dtype=dtype, copy=copy) @classmethod @@ -605,99 +583,6 @@ def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False): dtype = PintType.construct_from_quantity_string(scalars[0]) return cls._from_sequence([dtype.ureg.Quantity(x) for x in scalars]) - def factorize( - self, - use_na_sentinel: bool = True, - ) -> tuple[np.ndarray, ExtensionArray]: - """ - Encode the extension array as an enumerated type. - - Parameters - ---------- - use_na_sentinel : bool, default True - If True, the sentinel -1 will be used for NaN values. If False, - NaN values will be encoded as non-negative integers and will not drop the - NaN from the uniques of the values. - - .. versionadded:: 1.5.0 - - Returns - ------- - codes : ndarray - An integer NumPy array that's an indexer into the original - ExtensionArray. - uniques : ExtensionArray - An ExtensionArray containing the unique values of `self`. - - .. note:: - - uniques will *not* contain an entry for the NA value of - the ExtensionArray if there are any missing values present - in `self`. - - See Also - -------- - factorize : Top-level factorize method that dispatches here. - - Notes - ----- - :meth:`pandas.factorize` offers a `sort` keyword as well. - """ - # Implementer note: There are two ways to override the behavior of - # pandas.factorize - # 1. _values_for_factorize and _from_factorize. - # Specify the values passed to pandas' internal factorization - # routines, and how to convert from those values back to the - # original ExtensionArray. - # 2. ExtensionArray.factorize. - # Complete control over factorization. - if HAS_UNCERTAINTIES and self._data.dtype.kind == "O": - arr, na_value = self._values_for_factorize() - # Unique elements make it easy to partition on na_value if we need to - arr_list = list(dict.fromkeys(arr)) - na_index = len(arr_list) - arr = np.array(arr_list) - - if not use_na_sentinel: - # factorize can now handle differentiating various types of null values. - # These can only occur when the array has object dtype. - # However, for backwards compatibility we only use the null for the - # provided dtype. This may be revisited in the future, see GH#48476. - null_mask = self.isna(arr) - if null_mask.any(): - # Don't modify (potentially user-provided) array - arr = np.where(null_mask, na_value, arr) - else: - try: - na_index = arr.tolist().index(na_value) - except ValueError: - # Keep as len(arr) - pass - codes = np.array([-1] * len(self.data), dtype=np.intp) - # Note: item is a local variable provided in the loop below - # Note: partitioning arr on pd.NA means item is never pd.NA - vf = np.vectorize( - lambda x: False if pd.isna(x) else x == item, - otypes=[bool], - ) - for code, item in enumerate(arr[: na_index + 1]): - code_mask = vf(self._data) - # Don't count the NA we have seen - codes = np.where(code_mask, code, codes) - if use_na_sentinel and na_index < len(arr): - for code, item in enumerate(arr[na_index:]): - code_mask = vf(self._data) - # Don't count the NA we have seen - codes = np.where(code_mask, code, codes) - uniques_ea = self._from_factorized( - arr[:na_index].tolist() + arr[na_index + 1 :].tolist(), self - ) - else: - uniques_ea = self._from_factorized(arr, self) - return codes, uniques_ea - else: - return super(PintArray, self).factorize(use_na_sentinel) - @classmethod def _from_factorized(cls, values, original): from pandas._libs.lib import infer_dtype @@ -707,11 +592,17 @@ def _from_factorized(cls, values, original): return cls(values, dtype=original.dtype) def _values_for_factorize(self): + # factorize can now handle differentiating various types of null values. + # These can only occur when the array has object dtype. + # However, for backwards compatibility we only use the null for the + # provided dtype. This may be revisited in the future, see GH#48476. arr = self._data if arr.dtype.kind == "O": - if HAS_UNCERTAINTIES and arr.size > 0 and isinstance(arr[0], UFloat): - # Canonicalize uncertain NaNs - arr = np.where(unp.isnan(arr), self.dtype.na_value.m, arr) + if HAS_UNCERTAINTIES and arr.size > 0: + # Canonicalize uncertain NaNs and pd.NA to np.nan + arr = arr.map( + lambda x: self.dtype.na_value.m if x is pd.NA or unp.isnan(x) else x + ) return np.array(arr, copy=False), self.dtype.na_value.m return arr._values_for_factorize() @@ -739,7 +630,7 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data - nafilt = data.isna() + nafilt = data.map(lambda x: x is pd.NA or unp.isnan(x)) na_value = self.dtype.na_value.m data = data[~nafilt] if HAS_UNCERTAINTIES and data.dtype.kind == "O": @@ -756,7 +647,7 @@ def value_counts(self, dropna=True): if not dropna: index.append(na_value) - array.append(nafilt.sum()) + array.append(len(nafilt)) return Series(array, index=index) @@ -770,9 +661,12 @@ def unique(self): from pandas import unique data = self._data + na_value = self.dtype.na_value.m if HAS_UNCERTAINTIES and data.dtype.kind == "O": unique_data = [] for item in data: + if item is pd.NA or unp.isnan(item): + item = na_value if item not in unique_data: unique_data.append(item) return self._from_sequence( @@ -820,17 +714,6 @@ def shift(self, periods: int = 1, fill_value=None): if pd.isna(fill_value): fill_value = self.dtype.na_value.m - if HAS_UNCERTAINTIES: - if self.data.dtype.kind == "O": - try: - notna_value = next(i for i in self._data if pd.notna(i)) - if isinstance(notna_value, UFloat): - fill_value = _ufloat_nan - except StopIteration: - pass - elif self.data.dtype.kind == "f": - fill_value = np.nan - empty = self._from_sequence( [fill_value] * min(abs(periods), len(self)), dtype=self.dtype ) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 1590006f..f795dfd0 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -65,7 +65,7 @@ class TestIssue21(BaseExtensionTests): def test_offset_concat(self): q_a = ureg.Quantity(np.arange(5) + ufloat(0, 0), ureg.Unit("degC")) q_b = ureg.Quantity(np.arange(6) + ufloat(0, 0), ureg.Unit("degC")) - q_a_ = np.append(q_a, ureg.Quantity(pd.NA, ureg.Unit("degC"))) + q_a_ = np.append(q_a, ureg.Quantity(np.nan, ureg.Unit("degC"))) a = pd.Series(PintArray(q_a)) b = pd.Series(PintArray(q_b)) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index a638c5bb..5dbe94de 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -143,6 +143,7 @@ def uassert_almost_equal(left, right, **kwargs): _use_uncertainties = [True, False] if HAS_UNCERTAINTIES else [False] +_use_ufloat_nan = [True, False] if HAS_UNCERTAINTIES else [False] @pytest.fixture(params=_use_uncertainties) @@ -151,6 +152,12 @@ def USE_UNCERTAINTIES(request): return request.param +@pytest.fixture(params=_use_ufloat_nan) +def USE_UFLOAT_NAN(request): + """Whether to uncertainties using np.nan or ufloat(np.nan,0) in Pint-Pandas""" + return request.param + + @pytest.fixture(params=[True, False]) def box_in_series(request): """Whether to box the data in a Series""" @@ -163,7 +170,9 @@ def dtype(): _base_numeric_dtypes = [float, int] -_all_numeric_dtypes = _base_numeric_dtypes + [np.complex128] +_all_numeric_dtypes = ( + _base_numeric_dtypes + [] if HAS_UNCERTAINTIES else [np.complex128] +) @pytest.fixture(params=_all_numeric_dtypes) @@ -180,7 +189,7 @@ def data(numeric_dtype, USE_UNCERTAINTIES): np.arange( start=1.0, stop=101.0, - dtype=object if HAS_UNCERTAINTIES else numeric_dtype, + dtype=numeric_dtype, ) * ureg.nm ) @@ -188,11 +197,14 @@ def data(numeric_dtype, USE_UNCERTAINTIES): @pytest.fixture -def data_missing(numeric_dtype, USE_UNCERTAINTIES): +def data_missing(numeric_dtype, USE_UNCERTAINTIES, USE_UFLOAT_NAN): numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) if USE_UNCERTAINTIES: numeric_dtype = None - dm = [pd.NA, ufloat(1, 0)] + if USE_UFLOAT_NAN: + dm = [_ufloat_nan, ufloat(1, 0)] + else: + dm = [np.nan, ufloat(1, 0)] else: dm = [numeric_dtype.na_value, 1] return PintArray.from_1darray_quantity( @@ -259,7 +271,10 @@ def data_missing_for_sorting(numeric_dtype, USE_UNCERTAINTIES): numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) if USE_UNCERTAINTIES: numeric_dtype = None - dms = [ufloat(4, 0), pd.NA, ufloat(-5, 0)] + if USE_UFLOAT_NAN: + dms = [ufloat(4, 0), _ufloat_nan, ufloat(-5, 0)] + else: + dms = [ufloat(4, 0), np.nan, ufloat(-5, 0)] else: dms = [4, numeric_dtype.na_value, -5] return PintArray.from_1darray_quantity( @@ -268,8 +283,14 @@ def data_missing_for_sorting(numeric_dtype, USE_UNCERTAINTIES): @pytest.fixture -def na_cmp(): +def na_cmp(USE_UNCERTAINTIES): """Binary operator for comparing NA values.""" + if USE_UNCERTAINTIES: + return lambda x, y: ( + bool(pd.isna(x.m)) + or (isinstance(x.m, UFloat) and unp.isnan(x.m)) & bool(pd.isna(y.m)) + or (isinstance(y.m, UFloat) and unp.isnan(y.m)) + ) return lambda x, y: bool(pd.isna(x.magnitude)) & bool(pd.isna(y.magnitude)) @@ -279,7 +300,7 @@ def na_value(numeric_dtype): @pytest.fixture -def data_for_grouping(numeric_dtype, USE_UNCERTAINTIES): +def data_for_grouping(numeric_dtype, USE_UNCERTAINTIES, USE_UFLOAT_NAN): a = 1.0 b = 2.0**32 + 1 c = 2.0**32 + 10 @@ -287,7 +308,10 @@ def data_for_grouping(numeric_dtype, USE_UNCERTAINTIES): a = a + ufloat(0, 0) b = b + ufloat(0, 0) c = c + ufloat(0, 0) - _n = _ufloat_nan + if USE_UFLOAT_NAN: + _n = _ufloat_nan + else: + _n = np.nan numeric_dtype = None elif numeric_dtype: numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) @@ -490,18 +514,15 @@ def test_groupby_extension_no_sort(self, data_for_grouping): class TestInterface(base.BaseInterfaceTests): - pass - - -class TestMethods(base.BaseMethodsTests): - def test_where_series(self, data, na_value, as_frame, numeric_dtype): # noqa: F811 - if numeric_dtype is np.complex128 and HAS_UNCERTAINTIES: - # Alas, whether or not USE_UNCERTAINTIES + def test_contains(self, data, data_missing, USE_UFLOAT_NAN): + if USE_UFLOAT_NAN: pytest.skip( - "complex numbers and uncertainties are not compatible due to EA na_value handling (pd.NA vs. np.nan)" + "any NaN-like other than data.dtype.na_value should fail (see GH-37867); also see BaseInterfaceTests in pandas/tests/extension/base/interface.py" ) - super(TestMethods, self).test_where_series(data, na_value, as_frame) + super().test_contains(data, data_missing) + +class TestMethods(base.BaseMethodsTests): @pytest.mark.skip("All values are valid as magnitudes") def test_insert_invalid(self): pass @@ -532,10 +553,9 @@ def _get_exception(self, data, op_name): return op_name, None @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) - @pytest.mark.skipif( - USE_UNCERTAINTIES, reason="uncertainties package does not implement divmod" - ) - def test_divmod_series_array(self, data, data_for_twos): + def test_divmod_series_array(self, data, data_for_twos, USE_UNCERTAINTIES): + if USE_UNCERTAINTIES: + pytest.skip(reason="uncertainties does not implement divmod") base.BaseArithmeticOpsTests.test_divmod_series_array(self, data, data_for_twos) def test_arith_series_with_scalar(self, data, all_arithmetic_operators): @@ -560,10 +580,9 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # parameterise this to try divisor not equal to 1 @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) - @pytest.mark.skipif( - USE_UNCERTAINTIES, reason="uncertainties package does not implement divmod" - ) def test_divmod(self, data): + if USE_UNCERTAINTIES: + pytest.skip(reason="uncertainties does not implement divmod") s = pd.Series(data) self._check_divmod_op(s, divmod, 1 * ureg.Mm) self._check_divmod_op(1 * ureg.Mm, ops.rdivmod, s) From 289c604e19e8f65a39fb117fe3d33a07de561983 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sat, 5 Aug 2023 23:42:42 -0400 Subject: [PATCH 27/51] Updated to Pandas 2.1.0.dev0+1401.gb0bfd0effd The internal Pandas 2.1 interfaces continue to move and change. These changes adapt to those changes, adjsting and removing some redundant subclass methods. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 55 +-------- pint_pandas/testsuite/test_issues.py | 11 +- .../testsuite/test_pandas_extensiontests.py | 104 ++++++++---------- 3 files changed, 54 insertions(+), 116 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index a17e48fd..e75cb5c5 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd import pint -from pandas import DataFrame, Series +from pandas import DataFrame, Series, Index from pandas.api.extensions import ( ExtensionArray, ExtensionDtype, @@ -674,57 +674,6 @@ def unique(self): ) return self._from_sequence(unique(data), dtype=self.dtype) - def shift(self, periods: int = 1, fill_value=None): - """ - Shift values by desired number. - - Newly introduced missing values are filled with - a missing value type consistent with the existing elements - or ``self.dtype.na_value`` if none exist. - - Parameters - ---------- - periods : int, default 1 - The number of periods to shift. Negative values are allowed - for shifting backwards. - - fill_value : object, optional - The scalar value to use for newly introduced missing values. - The default is ``self.dtype.na_value``. - - Returns - ------- - ExtensionArray - Shifted. - - Notes - ----- - If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is - returned. - - If ``periods > len(self)``, then an array of size - len(self) is returned, with all values filled with - ``self.dtype.na_value``. - - For 2-dimensional ExtensionArrays, we are always shifting along axis=0. - """ - if not len(self) or periods == 0: - return self.copy() - - if pd.isna(fill_value): - fill_value = self.dtype.na_value.m - - empty = self._from_sequence( - [fill_value] * min(abs(periods), len(self)), dtype=self.dtype - ) - if periods > 0: - a = empty - b = self[:-periods] - else: - a = self[abs(periods) :] - b = empty - return self._concat_same_type([a, b]) - def __contains__(self, item) -> bool: if not isinstance(item, _Quantity): return False @@ -825,7 +774,7 @@ def convert_values(param): else: return param - if isinstance(other, (Series, DataFrame)): + if isinstance(other, (Series, DataFrame, Index)): return NotImplemented lvalues = self.quantity validate_length(lvalues, other) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index f795dfd0..c75718e8 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd +import pandas._testing as tm import pytest import pint from pandas.tests.extension.base.base import BaseExtensionTests @@ -51,7 +52,7 @@ def test_force_ndarray_like(self): expected = pd.DataFrame( {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]" ) - self.assert_equal(result, expected) + tm.assert_equal(result, expected) finally: # restore registry @@ -74,7 +75,7 @@ def test_offset_concat(self): expected = pd.DataFrame( {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]" ) - self.assert_equal(result, expected) + tm.assert_equal(result, expected) # issue #141 print(PintArray(q_a)) @@ -90,7 +91,7 @@ def test_assignment_add_empty(self): result = pd.Series(data) result[[]] += data[0] expected = pd.Series(data) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) class TestIssue80: @@ -210,9 +211,9 @@ def test_sum(self): row_sum = a.sum(axis=0) expected_1 = pd.Series([3, 5, 7], dtype="pint[m]") - self.assert_series_equal(row_sum, expected_1) + tm.assert_series_equal(row_sum, expected_1) col_sum = a.sum(axis=1) expected_2 = pd.Series([3, 12], dtype="pint[m]") - self.assert_series_equal(col_sum, expected_2) + tm.assert_series_equal(col_sum, expected_2) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 5dbe94de..d7060a40 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -471,7 +471,7 @@ def test_groupby_apply_identity(self, data_for_grouping): index=pd.Index([1, 2, 3, 4], name="A"), name="B", ) - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) @pytest.mark.xfail(run=True, reason="assert_frame_equal issue") @pytest.mark.parametrize("as_index", [True, False]) @@ -483,10 +483,10 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping): if as_index: index = pd.Index._with_infer(uniques, name="B") expected = pd.Series([3.0, 1.0, 4.0], index=index, name="A") - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) else: expected = pd.DataFrame({"B": uniques, "A": [3.0, 1.0, 4.0]}) - self.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected) def test_in_numeric_groupby(self, data_for_grouping): df = pd.DataFrame( @@ -510,7 +510,7 @@ def test_groupby_extension_no_sort(self, data_for_grouping): index = pd.Index._with_infer(index, name="B") expected = pd.Series([1.0, 3.0, 4.0], index=index, name="A") - self.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected) class TestInterface(base.BaseInterfaceTests): @@ -529,63 +529,43 @@ def test_insert_invalid(self): class TestArithmeticOps(base.BaseArithmeticOpsTests): - def _check_divmod_op(self, s, op, other, exc=None): - # divmod has multiple return values, so check separately - if exc is None: - result_div, result_mod = op(s, other) - if op is divmod: - expected_div, expected_mod = s // other, s % other - else: - expected_div, expected_mod = other // s, other % s - self.assert_series_equal(result_div, expected_div) - self.assert_series_equal(result_mod, expected_mod) - else: - with pytest.raises(exc): - divmod(s, other) - - def _get_exception(self, data, op_name): - if data.data.dtype == pd.core.dtypes.dtypes.NumpyEADtype("complex128"): - if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]: - return op_name, TypeError + divmod_exc = None + series_scalar_exc = None + frame_scalar_exc = None + series_array_exc = None + + def _get_expected_exception( + self, op_name: str, obj, other + ) -> type[Exception] | None: if op_name in ["__pow__", "__rpow__"]: - return op_name, DimensionalityError + return DimensionalityError + complex128_dtype = pd.core.dtypes.dtypes.NumpyEADtype("complex128") + if ((isinstance(obj, pd.Series) and obj.dtype == complex128_dtype) + or (isinstance(obj, pd.DataFrame) and any([dtype == complex128_dtype for dtype in obj.dtypes])) + or (isinstance(other, pd.Series) and other.dtype == complex128_dtype) + or (isinstance(other, pd.DataFrame) and any([dtype == complex128_dtype for dtype in other.dtypes]))): + if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]: + return TypeError + return super()._get_expected_exception(op_name, obj, other) - return op_name, None + # With Pint 0.21, series and scalar need to have compatible units for + # the arithmetic to work + # series & scalar + # parameterise this to try divisor not equal to 1 Mm @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) - def test_divmod_series_array(self, data, data_for_twos, USE_UNCERTAINTIES): + def test_divmod(self, data, USE_UNCERTAINTIES): if USE_UNCERTAINTIES: pytest.skip(reason="uncertainties does not implement divmod") - base.BaseArithmeticOpsTests.test_divmod_series_array(self, data, data_for_twos) - - def test_arith_series_with_scalar(self, data, all_arithmetic_operators): - # With Pint 0.21, series and scalar need to have compatible units for - # the arithmetic to work - # series & scalar - op_name, exc = self._get_exception(data, all_arithmetic_operators) s = pd.Series(data) - self.check_opname(s, op_name, s.iloc[0], exc=exc) - - def test_arith_series_with_array(self, data, all_arithmetic_operators): - # ndarray & other series - op_name, exc = self._get_exception(data, all_arithmetic_operators) - ser = pd.Series(data) - self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser)), exc) - - def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): - # frame & scalar - op_name, exc = self._get_exception(data, all_arithmetic_operators) - df = pd.DataFrame({"A": data}) - self.check_opname(df, op_name, data[0], exc=exc) + self._check_divmod_op(s, divmod, 1 * ureg.Mm) + self._check_divmod_op(1 * ureg.Mm, ops.rdivmod, s) - # parameterise this to try divisor not equal to 1 @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) - def test_divmod(self, data): + def test_divmod_series_array(self, data, data_for_twos, USE_UNCERTAINTIES): if USE_UNCERTAINTIES: pytest.skip(reason="uncertainties does not implement divmod") - s = pd.Series(data) - self._check_divmod_op(s, divmod, 1 * ureg.Mm) - self._check_divmod_op(1 * ureg.Mm, ops.rdivmod, s) + super().test_divmod_series_array(data, data_for_twos) class TestComparisonOps(base.BaseComparisonOpsTests): @@ -629,6 +609,13 @@ class TestMissing(base.BaseMissingTests): class TestNumericReduce(base.BaseNumericReduceTests): + def _supports_reduction(self, obj, op_name: str) -> bool: + # Specify if we expect this reduction to succeed. + if USE_UNCERTAINTIES and op_name in _all_numeric_reductions and op_name not in _uncertain_numeric_reductions: + if any([isinstance(v, UFloat) for v in obj.values.quantity._magnitude]): + pytest.skip(f"reduction {op_name} not implemented in uncertainties") + return super()._supports_reduction(obj, op_name) + def check_reduce(self, s, op_name, skipna): result = getattr(s, op_name)(skipna=skipna) expected_m = getattr(pd.Series(s.values.quantity._magnitude), op_name)( @@ -659,8 +646,9 @@ def test_reduce_scaling( This verifies that the result units are sensible. """ op_name = all_numeric_reductions - if USE_UNCERTAINTIES and op_name not in _uncertain_numeric_reductions: - pytest.skip(f"{op_name} not implemented in uncertainties") + if USE_UNCERTAINTIES and op_name in _all_numeric_reductions and op_name not in _uncertain_numeric_reductions: + if any([isinstance(v, UFloat) for v in data.quantity._magnitude]): + pytest.skip(f"reduction {op_name} not implemented in uncertainties") s_nm = pd.Series(data) # Attention: `mm` is fine here, but with `m`, the magnitudes become so small # that pandas discards them in the kurtosis calculation, leading to different results. @@ -695,8 +683,9 @@ def test_reduce_series( self, data, all_numeric_reductions, skipna, USE_UNCERTAINTIES ): op_name = all_numeric_reductions - if USE_UNCERTAINTIES and op_name not in _uncertain_numeric_reductions: - pytest.skip(f"{op_name} not implemented in uncertainties") + if USE_UNCERTAINTIES and op_name in _all_numeric_reductions and op_name not in _uncertain_numeric_reductions: + if any([isinstance(v, UFloat) for v in data.quantity._magnitude]): + pytest.skip(f"reduction {op_name} not implemented in uncertainties") s = pd.Series(data) # min/max with empty produce numpy warnings @@ -748,9 +737,8 @@ def test_setitem_scalar_key_sequence_raise(self, data): class TestAccumulate(base.BaseAccumulateTests): - @pytest.mark.parametrize("skipna", [True, False]) - def test_accumulate_series_raises(self, data, all_numeric_accumulations, skipna): - pass + def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool: + return True def check_accumulate(self, s, op_name, skipna): if op_name == "cumprod": @@ -761,4 +749,4 @@ def check_accumulate(self, s, op_name, skipna): s_unitless = pd.Series(s.values.data) expected = getattr(s_unitless, op_name)(skipna=skipna) expected = pd.Series(expected, dtype=s.dtype) - self.assert_series_equal(result, expected, check_dtype=False) + tm.assert_series_equal(result, expected, check_dtype=False) From 602a804bf9a00da63d3b7ba55554fa6aa9ccc924 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Fri, 11 Aug 2023 23:33:50 -0400 Subject: [PATCH 28/51] Keep up with pandas21_compat changes Merge in important bugfixes for `na_value` handling. And keep `black` happy. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 10 +++--- .../testsuite/test_pandas_extensiontests.py | 32 +++++++++++++++---- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index e75cb5c5..6695b7d4 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -601,9 +601,9 @@ def _values_for_factorize(self): if HAS_UNCERTAINTIES and arr.size > 0: # Canonicalize uncertain NaNs and pd.NA to np.nan arr = arr.map( - lambda x: self.dtype.na_value.m if x is pd.NA or unp.isnan(x) else x + lambda x: self.dtype.na_value if x is pd.NA or unp.isnan(x) else x ) - return np.array(arr, copy=False), self.dtype.na_value.m + return np.array(arr, copy=False), self.dtype.na_value return arr._values_for_factorize() def value_counts(self, dropna=True): @@ -631,7 +631,7 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data nafilt = data.map(lambda x: x is pd.NA or unp.isnan(x)) - na_value = self.dtype.na_value.m + na_value = self.dtype.na_value data = data[~nafilt] if HAS_UNCERTAINTIES and data.dtype.kind == "O": unique_data = [] @@ -661,7 +661,7 @@ def unique(self): from pandas import unique data = self._data - na_value = self.dtype.na_value.m + na_value = self.dtype.na_value if HAS_UNCERTAINTIES and data.dtype.kind == "O": unique_data = [] for item in data: @@ -824,7 +824,7 @@ def __array__(self, dtype=None, copy=False): def _to_array_of_quantity(self, copy=False): qtys = [ self._Q(item, self._dtype.units) - if item is not self.dtype.na_value.m + if item is not self.dtype.na_value else self.dtype.na_value for item in self._data ] diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index d7060a40..672de1cf 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -540,10 +540,18 @@ def _get_expected_exception( if op_name in ["__pow__", "__rpow__"]: return DimensionalityError complex128_dtype = pd.core.dtypes.dtypes.NumpyEADtype("complex128") - if ((isinstance(obj, pd.Series) and obj.dtype == complex128_dtype) - or (isinstance(obj, pd.DataFrame) and any([dtype == complex128_dtype for dtype in obj.dtypes])) + if ( + (isinstance(obj, pd.Series) and obj.dtype == complex128_dtype) + or ( + isinstance(obj, pd.DataFrame) + and any([dtype == complex128_dtype for dtype in obj.dtypes]) + ) or (isinstance(other, pd.Series) and other.dtype == complex128_dtype) - or (isinstance(other, pd.DataFrame) and any([dtype == complex128_dtype for dtype in other.dtypes]))): + or ( + isinstance(other, pd.DataFrame) + and any([dtype == complex128_dtype for dtype in other.dtypes]) + ) + ): if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]: return TypeError return super()._get_expected_exception(op_name, obj, other) @@ -611,7 +619,11 @@ class TestMissing(base.BaseMissingTests): class TestNumericReduce(base.BaseNumericReduceTests): def _supports_reduction(self, obj, op_name: str) -> bool: # Specify if we expect this reduction to succeed. - if USE_UNCERTAINTIES and op_name in _all_numeric_reductions and op_name not in _uncertain_numeric_reductions: + if ( + USE_UNCERTAINTIES + and op_name in _all_numeric_reductions + and op_name not in _uncertain_numeric_reductions + ): if any([isinstance(v, UFloat) for v in obj.values.quantity._magnitude]): pytest.skip(f"reduction {op_name} not implemented in uncertainties") return super()._supports_reduction(obj, op_name) @@ -646,7 +658,11 @@ def test_reduce_scaling( This verifies that the result units are sensible. """ op_name = all_numeric_reductions - if USE_UNCERTAINTIES and op_name in _all_numeric_reductions and op_name not in _uncertain_numeric_reductions: + if ( + USE_UNCERTAINTIES + and op_name in _all_numeric_reductions + and op_name not in _uncertain_numeric_reductions + ): if any([isinstance(v, UFloat) for v in data.quantity._magnitude]): pytest.skip(f"reduction {op_name} not implemented in uncertainties") s_nm = pd.Series(data) @@ -683,7 +699,11 @@ def test_reduce_series( self, data, all_numeric_reductions, skipna, USE_UNCERTAINTIES ): op_name = all_numeric_reductions - if USE_UNCERTAINTIES and op_name in _all_numeric_reductions and op_name not in _uncertain_numeric_reductions: + if ( + USE_UNCERTAINTIES + and op_name in _all_numeric_reductions + and op_name not in _uncertain_numeric_reductions + ): if any([isinstance(v, UFloat) for v in data.quantity._magnitude]): pytest.skip(f"reduction {op_name} not implemented in uncertainties") s = pd.Series(data) From f0c7e64c772983e7df4f67d64c8ce6de3f94f71c Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Mon, 14 Aug 2023 21:20:31 -0400 Subject: [PATCH 29/51] Cleanups after merge First draft of 100% working test cases after (re)merging with changes extracted from these changes as part of PR #196. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/pint_array.py | 46 +++++++++--- .../testsuite/test_pandas_extensiontests.py | 73 +++++++++---------- 2 files changed, 68 insertions(+), 51 deletions(-) diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index eaf7249d..e350bb4b 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -33,7 +33,7 @@ # from pint.facets.plain.unit import PlainUnit as _Unit if HAS_UNCERTAINTIES: - from uncertainties import UFloat, ufloat + from uncertainties import ufloat, UFloat from uncertainties import unumpy as unp _ufloat_nan = ufloat(np.nan, 0) @@ -330,12 +330,6 @@ def __setitem__(self, key, value): # doing nothing here seems to be ok return - master_scalar = None - try: - master_scalar = next(i for i in self._data if pd.notna(i)) - except StopIteration: - pass - if isinstance(value, _Quantity): value = value.to(self.units).magnitude elif is_list_like(value) and len(value) > 0: @@ -347,6 +341,36 @@ def __setitem__(self, key, value): key = check_array_indexer(self, key) # Filter out invalid values for our array type(s) try: + if HAS_UNCERTAINTIES and is_object_dtype(self._data): + from pandas.api.types import is_scalar, is_numeric_dtype + + def value_to_ufloat(value): + if pd.isna(value) or isinstance(value, UFloat): + return value + if is_numeric_dtype(type(value)): + return ufloat(value, 0) + raise ValueError + + try: + any_ufloats = next( + True for i in self._data if isinstance(i, UFloat) + ) + if any_ufloats: + if is_scalar(key): + if is_list_like(value): + # cannot do many:1 setitem + raise ValueError + # 1:1 setitem + value = value_to_ufloat(value) + elif is_list_like(value): + # many:many setitem + value = [value_to_ufloat(v) for v in value] + else: + # broadcast 1:many + value = value_to_ufloat(value) + except StopIteration: + # If array is full of nothingness, we can put anything inside it + pass self._data[key] = value except IndexError as e: msg = "Mask is wrong length. {}".format(e) @@ -593,9 +617,7 @@ def _values_for_factorize(self): if arr.dtype.kind == "O": if HAS_UNCERTAINTIES and arr.size > 0: # Canonicalize uncertain NaNs and pd.NA to np.nan - arr = arr.map( - lambda x: self.dtype.na_value if x is pd.NA or unp.isnan(x) else x - ) + arr = arr.map(lambda x: np.nan if x is pd.NA or unp.isnan(x) else x) return np.array(arr, copy=False), self.dtype.na_value return arr._values_for_factorize() @@ -627,7 +649,7 @@ def value_counts(self, dropna=True): nafilt = data.map(lambda x: x is pd.NA or unp.isnan(x)) else: nafilt = pd.isna(data) - na_value = self.dtype.na_value + na_value_for_index = pd.NA data = data[~nafilt] if HAS_UNCERTAINTIES and data.dtype.kind == "O": # This is a work-around for unhashable UFloats @@ -643,7 +665,7 @@ def value_counts(self, dropna=True): array = [data_list.count(item) for item in index] if not dropna: - index.append(na_value) + index.append(na_value_for_index) array.append(nafilt.sum()) return Series(np.asarray(array), index=index) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 4b966409..f9ad397f 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -10,10 +10,22 @@ try: import uncertainties.unumpy as unp - from uncertainties import ufloat, UFloat # noqa: F401 + from uncertainties import ufloat, UFloat + from uncertainties.core import AffineScalarFunc # noqa: F401 + + def AffineScalarFunc__hash__(self): + if not self._linear_part.expanded(): + self._linear_part.expand() + combo = tuple(iter(self._linear_part.linear_combo.items())) + if len(combo) > 1 or combo[0][1] != 1.0: + return hash(combo) + # The unique value that comes from a unique variable (which it also hashes to) + return id(combo[0][0]) + + AffineScalarFunc.__hash__ = AffineScalarFunc__hash__ - HAS_UNCERTAINTIES = True _ufloat_nan = ufloat(np.nan, 0) + HAS_UNCERTAINTIES = True except ImportError: unp = np HAS_UNCERTAINTIES = False @@ -170,8 +182,8 @@ def dtype(): _base_numeric_dtypes = [float, int] -_all_numeric_dtypes = ( - _base_numeric_dtypes + [] if HAS_UNCERTAINTIES else [np.complex128] +_all_numeric_dtypes = _base_numeric_dtypes + ( + [] if HAS_UNCERTAINTIES else [np.complex128] ) @@ -650,23 +662,9 @@ def _get_exception(self, data, op_name): if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]: return op_name, TypeError if op_name in ["__pow__", "__rpow__"]: - return DimensionalityError - complex128_dtype = pd.core.dtypes.dtypes.NumpyEADtype("complex128") - if ( - (isinstance(obj, pd.Series) and obj.dtype == complex128_dtype) - or ( - isinstance(obj, pd.DataFrame) - and any([dtype == complex128_dtype for dtype in obj.dtypes]) - ) - or (isinstance(other, pd.Series) and other.dtype == complex128_dtype) - or ( - isinstance(other, pd.DataFrame) - and any([dtype == complex128_dtype for dtype in other.dtypes]) - ) - ): - if op_name in ["__floordiv__", "__rfloordiv__", "__mod__", "__rmod__"]: - return TypeError - return super()._get_expected_exception(op_name, obj, other) + return op_name, DimensionalityError + + return op_name, None # With Pint 0.21, series and scalar need to have compatible units for # the arithmetic to work @@ -717,7 +715,9 @@ def test_divmod(self, data, USE_UNCERTAINTIES): self._check_divmod_op(1 * ureg.Mm, ops.rdivmod, ser) @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) - def test_divmod_series_array(self, data, data_for_twos): + def test_divmod_series_array(self, data, data_for_twos, USE_UNCERTAINTIES): + if USE_UNCERTAINTIES: + pytest.skip(reason="uncertainties does not implement divmod") ser = pd.Series(data) self._check_divmod_op(ser, divmod, data) @@ -727,12 +727,6 @@ def test_divmod_series_array(self, data, data_for_twos): other = pd.Series(other) self._check_divmod_op(other, ops.rdivmod, ser) - @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) - def test_divmod_series_array(self, data, data_for_twos, USE_UNCERTAINTIES): - if USE_UNCERTAINTIES: - pytest.skip(reason="uncertainties does not implement divmod") - super().test_divmod_series_array(data, data_for_twos) - class TestComparisonOps(base.BaseComparisonOpsTests): def _compare_other(self, s, data, op_name, other): @@ -871,16 +865,6 @@ def test_reduce_series( warnings.simplefilter("ignore", RuntimeWarning) self.check_reduce(s, op_name, skipna) - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series_xx(self, data, all_numeric_reductions, skipna): - op_name = all_numeric_reductions - s = pd.Series(data) - - # min/max with empty produce numpy warnings - with warnings.catch_warnings(): - warnings.simplefilter("ignore", RuntimeWarning) - self.check_reduce(s, op_name, skipna) - class TestBooleanReduce(base.BaseBooleanReduceTests): def check_reduce(self, s, op_name, skipna): @@ -922,7 +906,18 @@ class TestSetitem(base.BaseSetitemTests): @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) def test_setitem_scalar_key_sequence_raise(self, data): # This can be removed when https://github.com/pandas-dev/pandas/pull/54441 is accepted - base.BaseSetitemTests.test_setitem_scalar_key_sequence_raise(self, data) + arr = data[:5].copy() + with pytest.raises((ValueError, TypeError)): + arr[0] = arr[[0, 1]] + + def test_setitem_invalid(self, data, invalid_scalar): + # This can be removed when https://github.com/pandas-dev/pandas/pull/54441 is accepted + msg = "" # messages vary by subclass, so we do not test it + with pytest.raises((ValueError, TypeError), match=msg): + data[0] = invalid_scalar + + with pytest.raises((ValueError, TypeError), match=msg): + data[:] = invalid_scalar @pytest.mark.parametrize("numeric_dtype", _base_numeric_dtypes, indirect=True) def test_setitem_2d_values(self, data): From 5b39a3e9ece3706862707945e271a40292f84873 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Mon, 14 Aug 2023 21:46:53 -0400 Subject: [PATCH 30/51] Update test_issues.py Fix so that it works in a world without uncertainties (or with uncertainties). Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- pint_pandas/testsuite/test_issues.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 39b4eda0..35638ab3 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -11,12 +11,24 @@ try: import uncertainties.unumpy as unp - from uncertainties import ufloat, UFloat # noqa: F401 + from uncertainties import ufloat + from uncertainties.core import AffineScalarFunc # noqa: F401 + def AffineScalarFunc__hash__(self): + if not self._linear_part.expanded(): + self._linear_part.expand() + combo = tuple(iter(self._linear_part.linear_combo.items())) + if len(combo) > 1 or combo[0][1] != 1.0: + return hash(combo) + # The unique value that comes from a unique variable (which it also hashes to) + return id(combo[0][0]) + + AffineScalarFunc.__hash__ = AffineScalarFunc__hash__ + + _ufloat_nan = ufloat(np.nan, 0) HAS_UNCERTAINTIES = True except ImportError: unp = np - ufloat = Ufloat = None HAS_UNCERTAINTIES = False from pint_pandas import PintArray, PintType @@ -62,6 +74,10 @@ def test_force_ndarray_like(self): pint.set_application_registry(prev_appreg) +@pytest.mark.skipif( + not HAS_UNCERTAINTIES, + reason="this test depends entirely on HAS_UNCERTAINTIES being True", +) class TestIssue21(BaseExtensionTests): @pytest.mark.filterwarnings("ignore::RuntimeWarning") def test_offset_concat(self): From 8ed4c1d7ce77bfe3c947b220743288c85e6fb0ab Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Mon, 14 Aug 2023 22:28:25 -0400 Subject: [PATCH 31/51] Add uncertainties to CI/CD Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 5 +++++ pint_pandas/testsuite/test_pandas_extensiontests.py | 2 +- pyproject.toml | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 251070a6..7d0e9a65 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", "pandas==2.1.0rc0" ] pint: ["pint>=0.21.1", "pint==0.22"] + uncertainties: ["", "uncertainties==3.1.7"] runs-on: ubuntu-latest @@ -57,6 +58,10 @@ jobs: if: ${{ matrix.pandas != null }} run: pip install "${{matrix.pandas}}" + - name: Install uncertainties + if: ${{ matrix.uncertainties != null }} + run: pip install "${{matrix.uncertainties}}" + - name: Run Tests run: | pytest $TEST_OPTS diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index f9ad397f..9bc4c095 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -772,7 +772,7 @@ class TestNumericReduce(base.BaseNumericReduceTests): def _supports_reduction(self, obj, op_name: str) -> bool: # Specify if we expect this reduction to succeed. if ( - USE_UNCERTAINTIES + HAS_UNCERTAINTIES and op_name in _all_numeric_reductions and op_name not in _uncertain_numeric_reductions ): diff --git a/pyproject.toml b/pyproject.toml index 07483c87..03e31908 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,8 @@ test = [ "codecov", "coveralls", "nbval", - "pyarrow" + "pyarrow", + "uncertainties", ] [project.urls] From 2cb50f4fabf4c66b23a5d7774244d3a717deb9ff Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 15 Aug 2023 05:28:04 -0400 Subject: [PATCH 32/51] Update CI/CD to anticipate, not install or test uncertainties For now disable testing of `uncertainties` specifically, as that also requires an uncertainties-aware `pint`. When that becomes available we can update the CI/CD scripts to enable optional uncertainty testing with versions of pint and pint-pandas that can tolerate it. From local testing with uncertainties, remember: if you don't put a fixture XYZ in the parameter list, then testing `if XYZ` is really testing whether the fixture's function is not None (it's always not None). When XYZ is in the parameter list, testing `if XYZ` tells you whether the value of XYZ fixture is non-False. Also increase version compatibility by using `np.array([f(x) for x in arr])` instead of `arr.map(f)` when `arr` is a numpy array. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci-pint-master.yml | 5 +++++ .github/workflows/ci-pint-pre.yml | 5 +++++ .github/workflows/ci.yml | 2 +- pint_pandas/pint_array.py | 8 +++++--- pint_pandas/testsuite/test_pandas_extensiontests.py | 2 +- pyproject.toml | 1 - 6 files changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-pint-master.yml b/.github/workflows/ci-pint-master.yml index 3667109f..c646869b 100644 --- a/.github/workflows/ci-pint-master.yml +++ b/.github/workflows/ci-pint-master.yml @@ -10,6 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", ] pint: ["pint>=0.21.1"] + uncertainties: [""] # "uncertainties==3.1.7" Disabled until both pint and pint-pandas support uncertainties together runs-on: ubuntu-latest @@ -56,6 +57,10 @@ jobs: if: ${{ matrix.pandas != null }} run: pip install "${{matrix.pandas}}" + - name: Install uncertainties + if: ${{ matrix.uncertainties != null }} + run: pip install "${{matrix.uncertainties}}" + - name: Run Tests run: | pytest $TEST_OPTS diff --git a/.github/workflows/ci-pint-pre.yml b/.github/workflows/ci-pint-pre.yml index e484f0a2..e1b65d9e 100644 --- a/.github/workflows/ci-pint-pre.yml +++ b/.github/workflows/ci-pint-pre.yml @@ -10,6 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", ] pint: ["pint>=0.21.1"] + uncertainties: [""] # "uncertainties==3.1.7" Disabled until both pint and pint-pandas support uncertainties together runs-on: ubuntu-latest @@ -56,6 +57,10 @@ jobs: if: ${{ matrix.pandas != null }} run: pip install "${{matrix.pandas}}" + - name: Install uncertainties + if: ${{ matrix.uncertainties != null }} + run: pip install "${{matrix.uncertainties}}" + - name: Run Tests run: | pytest $TEST_OPTS diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7d0e9a65..a4b4465a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", "pandas==2.1.0rc0" ] pint: ["pint>=0.21.1", "pint==0.22"] - uncertainties: ["", "uncertainties==3.1.7"] + uncertainties: [""] # "uncertainties==3.1.7" Disabled until both pint and pint-pandas support uncertainties together runs-on: ubuntu-latest diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index e350bb4b..d94f09bf 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -429,7 +429,7 @@ def isna(self): return np.full((0), True) # NumpyEADtype('object') doesn't know about UFloats... if is_object_dtype(self._data.dtype): - return self._data.map(lambda x: x is pd.NA or unp.isnan(x)) + return np.array([pd.isna(x) or unp.isnan(x) for x in self._data]) return self._data.isna() def astype(self, dtype, copy=True): @@ -617,7 +617,9 @@ def _values_for_factorize(self): if arr.dtype.kind == "O": if HAS_UNCERTAINTIES and arr.size > 0: # Canonicalize uncertain NaNs and pd.NA to np.nan - arr = arr.map(lambda x: np.nan if x is pd.NA or unp.isnan(x) else x) + arr = np.array( + [np.nan if pd.isna(x) or unp.isnan(x) else x for x in arr] + ) return np.array(arr, copy=False), self.dtype.na_value return arr._values_for_factorize() @@ -646,7 +648,7 @@ def value_counts(self, dropna=True): # compute counts on the data with no nans data = self._data if HAS_UNCERTAINTIES: - nafilt = data.map(lambda x: x is pd.NA or unp.isnan(x)) + nafilt = np.array([pd.isna(x) or unp.isnan(x) for x in data]) else: nafilt = pd.isna(data) na_value_for_index = pd.NA diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 9bc4c095..85a3dc79 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -279,7 +279,7 @@ def data_for_sorting(numeric_dtype, USE_UNCERTAINTIES): @pytest.fixture -def data_missing_for_sorting(numeric_dtype, USE_UNCERTAINTIES): +def data_missing_for_sorting(numeric_dtype, USE_UNCERTAINTIES, USE_UFLOAT_NAN): numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) if USE_UNCERTAINTIES: numeric_dtype = None diff --git a/pyproject.toml b/pyproject.toml index 03e31908..ac3e5b0c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,6 @@ test = [ "coveralls", "nbval", "pyarrow", - "uncertainties", ] [project.urls] From 8c4bf7d6079ab8d9bccef65da5bfa5c2102854db Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 15 Aug 2023 06:13:23 -0400 Subject: [PATCH 33/51] Update CHANGES Added blurb about supporting `uncertainties`. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- CHANGES | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES b/CHANGES index dce1e071..af4d7b00 100644 --- a/CHANGES +++ b/CHANGES @@ -5,6 +5,7 @@ pint-pandas Changelog ---------------- <<<<<<< HEAD +- Support for uncertainties as magnitudes in PintArrays. #140 - Support for Pandas version 2.1.0. #196 - Support for dtype-preserving `PintArray.map` for both Pandas 2.0.2 and Pandas 2.1. #196 - Support for values in columns with integer magnitudes From e365cbc4d5018a4a784a825f0cc2bd688ae08f38 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 17 Sep 2023 21:27:33 -0400 Subject: [PATCH 34/51] Test with Pint-0.23rc0 and uncertainties in ci/cd Settle changes that work with Pandas 2.1.0 and Pint-0.23rc0 and see if we can pass Pint-Pandas CI/CD. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci-pint-master.yml | 2 +- .github/workflows/ci-pint-pre.yml | 2 +- .github/workflows/ci.yml | 4 +- pint_pandas/pint_array.py | 28 +++++++++-- .../testsuite/test_pandas_extensiontests.py | 46 +++++++++---------- 5 files changed, 51 insertions(+), 31 deletions(-) diff --git a/.github/workflows/ci-pint-master.yml b/.github/workflows/ci-pint-master.yml index c646869b..1c53bf6a 100644 --- a/.github/workflows/ci-pint-master.yml +++ b/.github/workflows/ci-pint-master.yml @@ -10,7 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", ] pint: ["pint>=0.21.1"] - uncertainties: [""] # "uncertainties==3.1.7" Disabled until both pint and pint-pandas support uncertainties together + uncertainties: [ "pint==0.23rc0 pandas==2.1.0 uncertainties==3.1.7", ] runs-on: ubuntu-latest diff --git a/.github/workflows/ci-pint-pre.yml b/.github/workflows/ci-pint-pre.yml index e1b65d9e..52eaa9bb 100644 --- a/.github/workflows/ci-pint-pre.yml +++ b/.github/workflows/ci-pint-pre.yml @@ -10,7 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", ] pint: ["pint>=0.21.1"] - uncertainties: [""] # "uncertainties==3.1.7" Disabled until both pint and pint-pandas support uncertainties together + uncertainties: [ "pint==0.23rc0 pandas==2.1.0 uncertainties==3.1.7", ] runs-on: ubuntu-latest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a4b4465a..fda892c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,9 +8,9 @@ jobs: matrix: python-version: [3.9, "3.10", "3.11"] numpy: ["numpy>=1.20.3,<2.0.0"] - pandas: ["pandas==2.0.2", "pandas==2.1.0rc0" ] + pandas: ["pandas==2.0.2", "pandas==2.1.0" ] pint: ["pint>=0.21.1", "pint==0.22"] - uncertainties: [""] # "uncertainties==3.1.7" Disabled until both pint and pint-pandas support uncertainties together + uncertainties: [ "pint==0.23rc0 pandas==2.1.0 uncertainties==3.1.7", ] runs-on: ubuntu-latest diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index b1a5d16e..c5b927f2 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -617,7 +617,11 @@ def _values_for_factorize(self): # provided dtype. This may be revisited in the future, see GH#48476. arr = self._data if arr.dtype.kind == "O": - if HAS_UNCERTAINTIES and arr.size > 0: + if ( + HAS_UNCERTAINTIES + and arr.size > 0 + and unp.isnan(arr[~pd.isna(arr)]).any() + ): # Canonicalize uncertain NaNs and pd.NA to np.nan arr = np.array( [np.nan if pd.isna(x) or unp.isnan(x) else x for x in arr] @@ -625,6 +629,25 @@ def _values_for_factorize(self): return np.array(arr, copy=False), self.dtype.na_value return arr._values_for_factorize() + def _values_for_argsort(self) -> np.ndarray: + """ + Return values for sorting. + Returns + ------- + ndarray + The transformed values should maintain the ordering between values + within the array. + """ + # In this case we want to return just the magnitude array stripped of units + # Must replace uncertain NaNs with np.nan + if HAS_UNCERTAINTIES: + arr = self._data[~pd.isna(self._data)] + if arr.size > 0 and unp.isnan(arr).any(): + return np.array( + [np.nan if pd.isna(x) or unp.isnan(x) else x for x in self._data] + ) + return self._data + def value_counts(self, dropna=True): """ Returns a Series containing counts of each category. @@ -681,7 +704,6 @@ def unique(self): ------- uniques : PintArray """ - from pandas import unique data = self._data na_value = self.dtype.na_value @@ -696,7 +718,7 @@ def unique(self): return self._from_sequence( pd.array(unique_data, dtype=data.dtype), dtype=self.dtype ) - return self._from_sequence(unique(data), dtype=self.dtype) + return self._from_sequence(data.unique(), dtype=self.dtype) def __contains__(self, item) -> bool: if not isinstance(item, _Quantity): diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index 85a3dc79..bca9519c 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -8,28 +8,6 @@ import pandas._testing as tm import pytest -try: - import uncertainties.unumpy as unp - from uncertainties import ufloat, UFloat - from uncertainties.core import AffineScalarFunc # noqa: F401 - - def AffineScalarFunc__hash__(self): - if not self._linear_part.expanded(): - self._linear_part.expand() - combo = tuple(iter(self._linear_part.linear_combo.items())) - if len(combo) > 1 or combo[0][1] != 1.0: - return hash(combo) - # The unique value that comes from a unique variable (which it also hashes to) - return id(combo[0][0]) - - AffineScalarFunc.__hash__ = AffineScalarFunc__hash__ - - _ufloat_nan = ufloat(np.nan, 0) - HAS_UNCERTAINTIES = True -except ImportError: - unp = np - HAS_UNCERTAINTIES = False - from pandas.core import ops from pandas.tests.extension import base from pandas.tests.extension.conftest import ( @@ -47,8 +25,6 @@ def AffineScalarFunc__hash__(self): from pint_pandas import PintArray, PintType from pint_pandas.pint_array import dtypemap, pandas_version_info -ureg = PintType.ureg - from pandas import ( Categorical, # noqa: F401 DataFrame, @@ -80,6 +56,28 @@ def AffineScalarFunc__hash__(self): assert_numpy_array_equal, # noqa: F401 ) +from pint.compat import HAS_UNCERTAINTIES + +ureg = PintType.ureg + +if HAS_UNCERTAINTIES: + import uncertainties.unumpy as unp + from uncertainties import ufloat, UFloat + from uncertainties.core import AffineScalarFunc # noqa: F401 + + def AffineScalarFunc__hash__(self): + if not self._linear_part.expanded(): + self._linear_part.expand() + combo = tuple(iter(self._linear_part.linear_combo.items())) + if len(combo) > 1 or combo[0][1] != 1.0: + return hash(combo) + # The unique value that comes from a unique variable (which it also hashes to) + return id(combo[0][0]) + + AffineScalarFunc.__hash__ = AffineScalarFunc__hash__ + + _ufloat_nan = ufloat(np.nan, 0) + def uassert_equal(left, right, **kwargs) -> None: """ From 108cb7133a66bc0b30ca1f65250bb46c1f9bc00b Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Sun, 17 Sep 2023 21:33:04 -0400 Subject: [PATCH 35/51] 2nd attempt integrating uncertainties and CI/CD Allow pip install command to unpack uncertainties option into multiple arguments. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci-pint-master.yml | 2 +- .github/workflows/ci-pint-pre.yml | 2 +- .github/workflows/ci.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci-pint-master.yml b/.github/workflows/ci-pint-master.yml index 1c53bf6a..d1b32204 100644 --- a/.github/workflows/ci-pint-master.yml +++ b/.github/workflows/ci-pint-master.yml @@ -59,7 +59,7 @@ jobs: - name: Install uncertainties if: ${{ matrix.uncertainties != null }} - run: pip install "${{matrix.uncertainties}}" + run: pip install ${{matrix.uncertainties}} - name: Run Tests run: | diff --git a/.github/workflows/ci-pint-pre.yml b/.github/workflows/ci-pint-pre.yml index 52eaa9bb..40cff0b7 100644 --- a/.github/workflows/ci-pint-pre.yml +++ b/.github/workflows/ci-pint-pre.yml @@ -59,7 +59,7 @@ jobs: - name: Install uncertainties if: ${{ matrix.uncertainties != null }} - run: pip install "${{matrix.uncertainties}}" + run: pip install ${{matrix.uncertainties}} - name: Run Tests run: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fda892c2..223b6ead 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: - name: Install uncertainties if: ${{ matrix.uncertainties != null }} - run: pip install "${{matrix.uncertainties}}" + run: pip install ${{matrix.uncertainties}} - name: Run Tests run: | From a5758e70e04f5fc3f808f77b83cce2bbbe3c1df1 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 05:02:55 -0400 Subject: [PATCH 36/51] Use `include` to handle uncertainties testing By using `include` directive, we can add a special case to the matrix of possibilities. In this case we add to the matrix the case where pandas>=2.1.0 and pint>=0.23rc0, which are both necessary for the UFloat array changes to work. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci-pint-master.yml | 7 +++++-- .github/workflows/ci-pint-pre.yml | 7 +++++-- .github/workflows/ci.yml | 11 +++++++---- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci-pint-master.yml b/.github/workflows/ci-pint-master.yml index d1b32204..35cb0385 100644 --- a/.github/workflows/ci-pint-master.yml +++ b/.github/workflows/ci-pint-master.yml @@ -10,7 +10,10 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", ] pint: ["pint>=0.21.1"] - uncertainties: [ "pint==0.23rc0 pandas==2.1.0 uncertainties==3.1.7", ] + include: + - pandas: "pandas==2.1.0" + pint: "pint>=0.23rc0" + uncertainties: "uncertainties==3.1.7" runs-on: ubuntu-latest @@ -59,7 +62,7 @@ jobs: - name: Install uncertainties if: ${{ matrix.uncertainties != null }} - run: pip install ${{matrix.uncertainties}} + run: pip install "${{matrix.uncertainties}}" - name: Run Tests run: | diff --git a/.github/workflows/ci-pint-pre.yml b/.github/workflows/ci-pint-pre.yml index 40cff0b7..77edb899 100644 --- a/.github/workflows/ci-pint-pre.yml +++ b/.github/workflows/ci-pint-pre.yml @@ -10,7 +10,10 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", ] pint: ["pint>=0.21.1"] - uncertainties: [ "pint==0.23rc0 pandas==2.1.0 uncertainties==3.1.7", ] + include: + - pandas: "pandas==2.1.0" + pint: "pint>=0.23rc0" + uncertainties: "uncertainties==3.1.7" runs-on: ubuntu-latest @@ -59,7 +62,7 @@ jobs: - name: Install uncertainties if: ${{ matrix.uncertainties != null }} - run: pip install ${{matrix.uncertainties}} + run: pip install "${{matrix.uncertainties}}" - name: Run Tests run: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 223b6ead..4b3f6918 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,9 +8,12 @@ jobs: matrix: python-version: [3.9, "3.10", "3.11"] numpy: ["numpy>=1.20.3,<2.0.0"] - pandas: ["pandas==2.0.2", "pandas==2.1.0" ] - pint: ["pint>=0.21.1", "pint==0.22"] - uncertainties: [ "pint==0.23rc0 pandas==2.1.0 uncertainties==3.1.7", ] + pandas: ["pandas==2.0.2", "pandas>=2.1.0" ] + pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] + include: + - pandas: "pandas>=2.1.0" + pint: "pint>=0.23rc0" + uncertainties: "uncertainties==3.1.7" runs-on: ubuntu-latest @@ -60,7 +63,7 @@ jobs: - name: Install uncertainties if: ${{ matrix.uncertainties != null }} - run: pip install ${{matrix.uncertainties}} + run: pip install "${{matrix.uncertainties}}" - name: Run Tests run: | From 82442ab3805270a2f2c3cdbd9be5faff227a01a7 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 05:23:23 -0400 Subject: [PATCH 37/51] Only test `uncertainties` in ci.yml Don't test uncertainties in ci-pint-pre or ci-pint-master, as these don't have the necessary versions of Pandas or Pint to make the matrix include operation work. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci-pint-master.yml | 4 ---- .github/workflows/ci-pint-pre.yml | 4 ---- 2 files changed, 8 deletions(-) diff --git a/.github/workflows/ci-pint-master.yml b/.github/workflows/ci-pint-master.yml index 35cb0385..a71306a7 100644 --- a/.github/workflows/ci-pint-master.yml +++ b/.github/workflows/ci-pint-master.yml @@ -10,10 +10,6 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", ] pint: ["pint>=0.21.1"] - include: - - pandas: "pandas==2.1.0" - pint: "pint>=0.23rc0" - uncertainties: "uncertainties==3.1.7" runs-on: ubuntu-latest diff --git a/.github/workflows/ci-pint-pre.yml b/.github/workflows/ci-pint-pre.yml index 77edb899..6d02a350 100644 --- a/.github/workflows/ci-pint-pre.yml +++ b/.github/workflows/ci-pint-pre.yml @@ -10,10 +10,6 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", ] pint: ["pint>=0.21.1"] - include: - - pandas: "pandas==2.1.0" - pint: "pint>=0.23rc0" - uncertainties: "uncertainties==3.1.7" runs-on: ubuntu-latest From 5d351fcc4d595a8a93fd7d5f5c6b6b75527515f5 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 05:29:11 -0400 Subject: [PATCH 38/51] Update ci.yml Also test Pint-Pandas without uncertainties. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4b3f6918..f9134b5c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,10 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", "pandas>=2.1.0" ] pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] + include: + - pandas: "pandas>=2.1.0" + pint: "pint>=0.23rc0" + # no uncertainties include: - pandas: "pandas>=2.1.0" pint: "pint>=0.23rc0" From c4e3a06fc8672e099b76c01188bfd92ac64b4fbe Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 05:40:38 -0400 Subject: [PATCH 39/51] Update ci.yml Trying again with different syntax to avoid duplicate key issue. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f9134b5c..550e0f20 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,11 +13,7 @@ jobs: include: - pandas: "pandas>=2.1.0" pint: "pint>=0.23rc0" - # no uncertainties - include: - - pandas: "pandas>=2.1.0" - pint: "pint>=0.23rc0" - uncertainties: "uncertainties==3.1.7" + uncertainties: ["", "uncertainties==3.1.7"] runs-on: ubuntu-latest From 8d5feb9fc83d3a45e6a8d0742f1cf126ac5d31e0 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 05:51:06 -0400 Subject: [PATCH 40/51] Update ci.yml Trying yet another syntax to make `uncertainties` one of two options when pandas==2.1.0 and pint==0.23rc0. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 550e0f20..2ac61d2f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,10 +10,11 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", "pandas>=2.1.0" ] pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] + uncertainties: "" include: - pandas: "pandas>=2.1.0" pint: "pint>=0.23rc0" - uncertainties: ["", "uncertainties==3.1.7"] + uncertainties: "uncertainties==3.1.7" runs-on: ubuntu-latest From bfe9a77e1ff273d02d875aa018c135212e4136af Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 05:55:08 -0400 Subject: [PATCH 41/51] Update ci.yml Try using null instead of "" for uncertainties that we don't want to run. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ac61d2f..77fe4c41 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", "pandas>=2.1.0" ] pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] - uncertainties: "" + uncertainties: null include: - pandas: "pandas>=2.1.0" pint: "pint>=0.23rc0" From 25adf35491eabe4a14fcef64e620b93ad5df1f04 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 05:57:07 -0400 Subject: [PATCH 42/51] Update ci.yml Try using "null" to represent a condition where we don't want uncertainties installed. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 77fe4c41..dbd0d72f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", "pandas>=2.1.0" ] pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] - uncertainties: null + uncertainties: "null" include: - pandas: "pandas>=2.1.0" pint: "pint>=0.23rc0" @@ -63,7 +63,7 @@ jobs: run: pip install "${{matrix.pandas}}" - name: Install uncertainties - if: ${{ matrix.uncertainties != null }} + if: ${{ matrix.uncertainties != "null" }} run: pip install "${{matrix.uncertainties}}" - name: Run Tests From afc3eb4c01587290178ce21d6f0b3f842ebd0887 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 06:16:39 -0400 Subject: [PATCH 43/51] Update ci.yml Try using "no-thank-you" instead of "null" for matrix permutations not intended to use uncertainties. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index dbd0d72f..f1c5d0aa 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", "pandas>=2.1.0" ] pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] - uncertainties: "null" + uncertainties: "no-thank-you" include: - pandas: "pandas>=2.1.0" pint: "pint>=0.23rc0" @@ -63,7 +63,7 @@ jobs: run: pip install "${{matrix.pandas}}" - name: Install uncertainties - if: ${{ matrix.uncertainties != "null" }} + if: ${{ matrix.uncertainties != "no-thank-you" }} run: pip install "${{matrix.uncertainties}}" - name: Run Tests From e281dfce9d69deee28d87d42a7f6c8b29c941ed1 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 06:19:11 -0400 Subject: [PATCH 44/51] Update ci.yml Wrap `uncertainties` matrix parameter in `[]`. If we get this working, can try setting parameter to `''`' from several commits ago. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1c5d0aa..29f30405 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", "pandas>=2.1.0" ] pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] - uncertainties: "no-thank-you" + uncertainties: ["no-thank-you"] include: - pandas: "pandas>=2.1.0" pint: "pint>=0.23rc0" From a208163563b649268e6f29e6607a03e942810419 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 06:22:03 -0400 Subject: [PATCH 45/51] Update ci.yml Use single quotes when testing the value of `uncertainties`. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 29f30405..572db6ee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -63,7 +63,7 @@ jobs: run: pip install "${{matrix.pandas}}" - name: Install uncertainties - if: ${{ matrix.uncertainties != "no-thank-you" }} + if: ${{ matrix.uncertainties != 'no-thank-you' }} run: pip install "${{matrix.uncertainties}}" - name: Run Tests From 296bbdccc0cd2664d3744d77611b5b34065bfbab Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 06:35:00 -0400 Subject: [PATCH 46/51] Update ci.yml Tweak conditional matrix logic by using a "same-but-different" value for `pint` value assignment. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 572db6ee..9658045d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,10 +10,10 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", "pandas>=2.1.0" ] pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] - uncertainties: ["no-thank-you"] include: - pandas: "pandas>=2.1.0" - pint: "pint>=0.23rc0" + # Make this look like an "extra" case via same-but-different pint value + pint: "pint==0.23rc0" uncertainties: "uncertainties==3.1.7" runs-on: ubuntu-latest @@ -63,7 +63,7 @@ jobs: run: pip install "${{matrix.pandas}}" - name: Install uncertainties - if: ${{ matrix.uncertainties != 'no-thank-you' }} + if: ${{ matrix.uncertainties != null }} run: pip install "${{matrix.uncertainties}}" - name: Run Tests From 585f38db7e4fa236688129e4fd5e0ac051c0711d Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 06:36:05 -0400 Subject: [PATCH 47/51] Update ci.yml Untabify yaml file... Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9658045d..e62b0689 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] include: - pandas: "pandas>=2.1.0" - # Make this look like an "extra" case via same-but-different pint value + # Make this look like an "extra" case via same-but-different pint value pint: "pint==0.23rc0" uncertainties: "uncertainties==3.1.7" From 75d4c56ab9e2938f1811a70831d508c3e4e905bc Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 06:46:56 -0400 Subject: [PATCH 48/51] Update ci.yml Make uncertainties a default null value in the matrix. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index e62b0689..d2a2c5f3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,6 +10,7 @@ jobs: numpy: ["numpy>=1.20.3,<2.0.0"] pandas: ["pandas==2.0.2", "pandas>=2.1.0" ] pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] + uncertainties: [""] include: - pandas: "pandas>=2.1.0" # Make this look like an "extra" case via same-but-different pint value From 69882124c18a83b2226ae8d1f4077d52d5523df8 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 06:52:59 -0400 Subject: [PATCH 49/51] Update ci.yml Attempt to explicitly add python-version and numpy to `uncertainties` case. Otherwise, pytest never gets installed. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d2a2c5f3..cfba0831 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,8 +12,9 @@ jobs: pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] uncertainties: [""] include: - - pandas: "pandas>=2.1.0" - # Make this look like an "extra" case via same-but-different pint value + - python-version: ${{ matrix.python-version }} + numpy: ${{ matrix.numpy }} + pandas: "pandas>=2.1.0" pint: "pint==0.23rc0" uncertainties: "uncertainties==3.1.7" From 764b609b2413aae5fd64f16c00f7e09abe32ac08 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 06:59:26 -0400 Subject: [PATCH 50/51] Update ci.yml We cannot reference the matrix when building the matrix, so unroll all parameter values. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci.yml | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cfba0831..65322b60 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,8 +12,20 @@ jobs: pint: ["pint>=0.21.1,<0.22", "pint==0.22", "pint>=0.23rc0"] uncertainties: [""] include: - - python-version: ${{ matrix.python-version }} - numpy: ${{ matrix.numpy }} + - python-version: 3.9 + numpy: "numpy>=1.20.3,<2.0.0" + pandas: "pandas>=2.1.0" + pint: "pint==0.23rc0" + uncertainties: "uncertainties==3.1.7" + include: + - python-version: "3.10" + numpy: "numpy>=1.20.3,<2.0.0" + pandas: "pandas>=2.1.0" + pint: "pint==0.23rc0" + uncertainties: "uncertainties==3.1.7" + include: + - python-version: "3.11" + numpy: "numpy>=1.20.3,<2.0.0" pandas: "pandas>=2.1.0" pint: "pint==0.23rc0" uncertainties: "uncertainties==3.1.7" From 9ed23c164ea3416d5defc1e80691874e98f4bed6 Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Tue, 19 Sep 2023 07:07:35 -0400 Subject: [PATCH 51/51] Update ci-*.yml files Remove unused `uncertainties` from -pre and -master CI files. Only test one configuration of Python against uncertainties (trying to work around duplicate key problem). Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- .github/workflows/ci-pint-master.yml | 4 ---- .github/workflows/ci-pint-pre.yml | 4 ---- .github/workflows/ci.yml | 12 ------------ 3 files changed, 20 deletions(-) diff --git a/.github/workflows/ci-pint-master.yml b/.github/workflows/ci-pint-master.yml index a71306a7..3667109f 100644 --- a/.github/workflows/ci-pint-master.yml +++ b/.github/workflows/ci-pint-master.yml @@ -56,10 +56,6 @@ jobs: if: ${{ matrix.pandas != null }} run: pip install "${{matrix.pandas}}" - - name: Install uncertainties - if: ${{ matrix.uncertainties != null }} - run: pip install "${{matrix.uncertainties}}" - - name: Run Tests run: | pytest $TEST_OPTS diff --git a/.github/workflows/ci-pint-pre.yml b/.github/workflows/ci-pint-pre.yml index 6d02a350..e484f0a2 100644 --- a/.github/workflows/ci-pint-pre.yml +++ b/.github/workflows/ci-pint-pre.yml @@ -56,10 +56,6 @@ jobs: if: ${{ matrix.pandas != null }} run: pip install "${{matrix.pandas}}" - - name: Install uncertainties - if: ${{ matrix.uncertainties != null }} - run: pip install "${{matrix.uncertainties}}" - - name: Run Tests run: | pytest $TEST_OPTS diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 65322b60..deda1858 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,18 +17,6 @@ jobs: pandas: "pandas>=2.1.0" pint: "pint==0.23rc0" uncertainties: "uncertainties==3.1.7" - include: - - python-version: "3.10" - numpy: "numpy>=1.20.3,<2.0.0" - pandas: "pandas>=2.1.0" - pint: "pint==0.23rc0" - uncertainties: "uncertainties==3.1.7" - include: - - python-version: "3.11" - numpy: "numpy>=1.20.3,<2.0.0" - pandas: "pandas>=2.1.0" - pint: "pint==0.23rc0" - uncertainties: "uncertainties==3.1.7" runs-on: ubuntu-latest