Merge #207

207: Dequantify duplicate cols r=andrewgsavage a=MichaelTiemannOSC Use enumeration to wade through duplicate column names. This does not preserve default column names when dequantifying, but it doesn't break. Should `pint.dequantify()` also preserve duplicated column names? - [x] Closes #202 - [x] Executed `pre-commit run --all-files` with no errors - [x] The change is fully covered by automated unit tests - [x] Documented in docs/ as appropriate - [x] Added an entry to the CHANGES file Co-authored-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com>
hgrecco · Oct 28, 2023 · 34c4b03 · 34c4b03
2 parents 2e087e6 + 2808bb8
commit 34c4b03
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 11 deletions.
diff --git a/CHANGES b/CHANGES
@@ -4,6 +4,7 @@ pint-pandas Changelog
 0.6 (unreleased)
 ----------------
 
+- Fix dequantify duplicate column failure #202
 - Fix astype issue #196
 
 

diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py
@@ -1,7 +1,6 @@
 import copy
 import re
 import warnings
-from collections import OrderedDict
 from importlib.metadata import version
 
 import numpy as np
@@ -981,23 +980,35 @@ def formatter_func(dtype):
 
         df_columns = df.columns.to_frame()
         df_columns["units"] = [
-            formatter_func(df[col].dtype)
-            if isinstance(df[col].dtype, PintType)
+            formatter_func(df.dtypes.iloc[i])
+            if isinstance(df.dtypes.iloc[i], PintType)
             else NO_UNIT
-            for col in df.columns
+            for i, col in enumerate(df.columns)
         ]
 
-        data_for_df = OrderedDict()
+        data_for_df = []
         for i, col in enumerate(df.columns):
-            if isinstance(df[col].dtype, PintType):
-                data_for_df[tuple(df_columns.iloc[i])] = df[col].values.data
+            if isinstance(df.dtypes.iloc[i], PintType):
+                data_for_df.append(
+                    pd.Series(
+                        data=df.iloc[:, i].values.data,
+                        name=tuple(df_columns.iloc[i]),
+                        index=df.index,
+                        copy=False,
+                    )
+                )
             else:
-                data_for_df[tuple(df_columns.iloc[i])] = df[col].values
-
-        df_new = DataFrame(data_for_df, columns=data_for_df.keys())
+                data_for_df.append(
+                    pd.Series(
+                        data=df.iloc[:, i].values,
+                        name=tuple(df_columns.iloc[i]),
+                        index=df.index,
+                        copy=False,
+                    )
+                )
 
+        df_new = pd.concat(data_for_df, axis=1, copy=False)
         df_new.columns.names = df.columns.names + ["unit"]
-        df_new.index = df.index
 
         return df_new
 

diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py
@@ -194,3 +194,38 @@ def test_issue_194(dtype):
     s2 = s1.astype(dtype)
 
     tm.assert_series_equal(s0, s2)
+
+
+class TestIssue202(BaseExtensionTests):
+    def test_dequantify(self):
+        df = pd.DataFrame()
+        df["test"] = pd.Series([1, 2, 3], dtype="pint[kN]")
+        df.insert(0, "test", df["test"], allow_duplicates=True)
+
+        expected = pd.DataFrame.from_dict(
+            data={
+                "index": [0, 1, 2],
+                "columns": [("test", "kilonewton")],
+                "data": [[1], [2], [3]],
+                "index_names": [None],
+                "column_names": [None, "unit"],
+            },
+            orient="tight",
+            dtype="Int64",
+        )
+        result = df.iloc[:, 1:].pint.dequantify()
+        tm.assert_frame_equal(expected, result)
+
+        expected = pd.DataFrame.from_dict(
+            data={
+                "index": [0, 1, 2],
+                "columns": [("test", "kilonewton"), ("test", "kilonewton")],
+                "data": [[1, 1], [2, 2], [3, 3]],
+                "index_names": [None],
+                "column_names": [None, "unit"],
+            },
+            orient="tight",
+            dtype="Int64",
+        )
+        result = df.pint.dequantify()
+        tm.assert_frame_equal(expected, result)
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,6 +4,7 @@ pint-pandas Changelog @@
 .6 (unreleased)
     ----------------
+    - Fix dequantify duplicate column failure #202
     - Fix astype issue #196
@@ Expand Down @@