diff --git a/CHANGES b/CHANGES index f896b8e..7bfcbf9 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,7 @@ pint-pandas Changelog 0.6 (unreleased) ---------------- +- Fix dequantify duplicate column failure #202 - Fix astype issue #196 diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index fcbfe61..6fb96ee 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -1,7 +1,6 @@ import copy import re import warnings -from collections import OrderedDict from importlib.metadata import version import numpy as np @@ -981,23 +980,35 @@ def formatter_func(dtype): df_columns = df.columns.to_frame() df_columns["units"] = [ - formatter_func(df[col].dtype) - if isinstance(df[col].dtype, PintType) + formatter_func(df.dtypes.iloc[i]) + if isinstance(df.dtypes.iloc[i], PintType) else NO_UNIT - for col in df.columns + for i, col in enumerate(df.columns) ] - data_for_df = OrderedDict() + data_for_df = [] for i, col in enumerate(df.columns): - if isinstance(df[col].dtype, PintType): - data_for_df[tuple(df_columns.iloc[i])] = df[col].values.data + if isinstance(df.dtypes.iloc[i], PintType): + data_for_df.append( + pd.Series( + data=df.iloc[:, i].values.data, + name=tuple(df_columns.iloc[i]), + index=df.index, + copy=False, + ) + ) else: - data_for_df[tuple(df_columns.iloc[i])] = df[col].values - - df_new = DataFrame(data_for_df, columns=data_for_df.keys()) + data_for_df.append( + pd.Series( + data=df.iloc[:, i].values, + name=tuple(df_columns.iloc[i]), + index=df.index, + copy=False, + ) + ) + df_new = pd.concat(data_for_df, axis=1, copy=False) df_new.columns.names = df.columns.names + ["unit"] - df_new.index = df.index return df_new diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index d8d6ce0..fbcd0c6 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -194,3 +194,38 @@ def test_issue_194(dtype): s2 = s1.astype(dtype) tm.assert_series_equal(s0, s2) + + +class TestIssue202(BaseExtensionTests): + def test_dequantify(self): + df = pd.DataFrame() + df["test"] = pd.Series([1, 2, 3], dtype="pint[kN]") + df.insert(0, "test", df["test"], allow_duplicates=True) + + expected = pd.DataFrame.from_dict( + data={ + "index": [0, 1, 2], + "columns": [("test", "kilonewton")], + "data": [[1], [2], [3]], + "index_names": [None], + "column_names": [None, "unit"], + }, + orient="tight", + dtype="Int64", + ) + result = df.iloc[:, 1:].pint.dequantify() + tm.assert_frame_equal(expected, result) + + expected = pd.DataFrame.from_dict( + data={ + "index": [0, 1, 2], + "columns": [("test", "kilonewton"), ("test", "kilonewton")], + "data": [[1, 1], [2, 2], [3, 3]], + "index_names": [None], + "column_names": [None, "unit"], + }, + orient="tight", + dtype="Int64", + ) + result = df.pint.dequantify() + tm.assert_frame_equal(expected, result)