Retrieve data without aggregation fix and numpy + pandas 2.0 compatib…

…ility (#180) * fix: reduced view * build: numpy 2 support * test: add retrieval tests * docs: add release notes
PyPSA · Jul 22, 2024 · 4355783 · 4355783
1 parent 5a9a0b4
commit 4355783
Show file tree

Hide file tree

Showing 6 changed files with 38 additions and 13 deletions.
diff --git a/doc/release-notes.rst b/doc/release-notes.rst
@@ -5,6 +5,10 @@ Release Notes
 Upcoming Version
 ----------------
 
+* Pandas 2.0.0 and Numpy 2.0.0 compatibility
+
+* Bugfix: Fix retrieving data without aggregation
+
 Version 0.5.15 (12.05.2024)
 ---------------------------
 

diff --git a/powerplantmatching/collection.py b/powerplantmatching/collection.py
@@ -241,9 +241,15 @@ def powerplants(
         matched = matched.powerplant.fill_geoposition()
 
     if filter_missing_geopositions:
-        matched = matched[matched.lat.notnull()]
+        if isinstance(matched.columns, pd.MultiIndex):
+            matched = matched[matched.lat.notnull().any(axis=1)]
+        else:
+            matched = matched[matched.lat.notnull()]
 
-    matched.drop_duplicates(["Name", "Fueltype", "Country"])
+    if isinstance(matched.columns, pd.MultiIndex):
+        matched.stack().drop_duplicates(["Name", "Fueltype", "Country"]).unstack(-1)
+    else:
+        matched.drop_duplicates(["Name", "Fueltype", "Country"])
 
     matched.reset_index(drop=True).to_csv(fn, index_label="id", encoding="utf-8")
 

diff --git a/powerplantmatching/heuristics.py b/powerplantmatching/heuristics.py
@@ -319,7 +319,7 @@ def wm(x):
         df.groupby(["Country", "DateIn", "Fueltype", "Technology"])
         .agg(f)
         .reset_index()
-        .replace({"-": np.NaN})
+        .replace({"-": np.nan})
     )
     df.columns = df.columns.droplevel(level=1)
     return df.assign(Set="PP", DateRetrofit=df.DateIn)
@@ -431,7 +431,7 @@ def reduceVintages(addition, mat, life, y_pres):
             mat = pd.DataFrame(
                 columns=range(y_start - life + 1, y_end + life),
                 index=range(y_start - life + 1, y_end),
-            ).astype(np.float)
+            ).astype(float)
             if dfs.Fueltype.iloc[0] in ["Solar", "Wind", "Bioenergy", "Geothermal"]:
                 mat = setInitial_Triangle(mat, dfs, life)
             else:

diff --git a/powerplantmatching/utils.py b/powerplantmatching/utils.py
@@ -615,12 +615,23 @@ def fill_geoposition(
 
     if use_saved_locations:
         logger.info(f"Adding stored geo-position from {fn}")
-        locs = pd.read_csv(fn, index_col=[0, 1])
-        locs = locs[~locs.index.duplicated()]
-        df = df.where(
-            df[["lat", "lon"]].notnull().all(1),
-            df.drop(columns=["lat", "lon"]).join(locs, on=["Name", "Country"]),
-        )
+        locs = pd.read_csv(fn, index_col=[0, 1]).drop_duplicates()
+        if isinstance(df.columns, pd.MultiIndex):
+            new_data = (
+                df.drop(columns=["lat", "lon"])
+                .stack()
+                .join(locs, on=["Name", "Country"])
+                .unstack(-1)
+                .reindex(columns=df.columns)
+            )
+        else:
+            new_data = (
+                df.drop(columns=["lat", "lon"])
+                .join(locs, on=["Name", "Country"])
+                .reindex(columns=df.columns)
+            )
+
+        df = df.where(df[["lat", "lon"]].notnull().all(axis=1), new_data)
     if saved_only:
         return df
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -13,4 +13,5 @@ select = [
     # 'D',   # pydocstyle
     'UP',  # pyupgrade
     'TID', # flake8-tidy-imports
+    'NPY', # numpy
 ]
diff --git a/test/test_data.py b/test/test_data.py
@@ -51,6 +51,9 @@ def test_OPSD_VRE_country():
     assert df.Capacity.sum() > 0
 
 
-# Enable after release of v0.5.0
-# def test_powerplants():
-#     pm.powerplants(from_url=True)
+def test_url_retrieval():
+    pm.powerplants(from_url=True)
+
+
+def test_reduced_retrieval():
+    pm.powerplants(reduced=False)