Skip to content

Commit

Permalink
Retrieve data without aggregation fix and numpy + pandas 2.0 compatib…
Browse files Browse the repository at this point in the history
…ility (#180)

* fix: reduced view

* build: numpy 2 support

* test: add retrieval tests

* docs: add release notes
  • Loading branch information
lkstrp authored Jul 22, 2024
1 parent 5a9a0b4 commit 4355783
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 13 deletions.
4 changes: 4 additions & 0 deletions doc/release-notes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Release Notes
Upcoming Version
----------------

* Pandas 2.0.0 and Numpy 2.0.0 compatibility

* Bugfix: Fix retrieving data without aggregation

Version 0.5.15 (12.05.2024)
---------------------------

Expand Down
10 changes: 8 additions & 2 deletions powerplantmatching/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,9 +241,15 @@ def powerplants(
matched = matched.powerplant.fill_geoposition()

if filter_missing_geopositions:
matched = matched[matched.lat.notnull()]
if isinstance(matched.columns, pd.MultiIndex):
matched = matched[matched.lat.notnull().any(axis=1)]
else:
matched = matched[matched.lat.notnull()]

matched.drop_duplicates(["Name", "Fueltype", "Country"])
if isinstance(matched.columns, pd.MultiIndex):
matched.stack().drop_duplicates(["Name", "Fueltype", "Country"]).unstack(-1)
else:
matched.drop_duplicates(["Name", "Fueltype", "Country"])

matched.reset_index(drop=True).to_csv(fn, index_label="id", encoding="utf-8")

Expand Down
4 changes: 2 additions & 2 deletions powerplantmatching/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def wm(x):
df.groupby(["Country", "DateIn", "Fueltype", "Technology"])
.agg(f)
.reset_index()
.replace({"-": np.NaN})
.replace({"-": np.nan})
)
df.columns = df.columns.droplevel(level=1)
return df.assign(Set="PP", DateRetrofit=df.DateIn)
Expand Down Expand Up @@ -431,7 +431,7 @@ def reduceVintages(addition, mat, life, y_pres):
mat = pd.DataFrame(
columns=range(y_start - life + 1, y_end + life),
index=range(y_start - life + 1, y_end),
).astype(np.float)
).astype(float)
if dfs.Fueltype.iloc[0] in ["Solar", "Wind", "Bioenergy", "Geothermal"]:
mat = setInitial_Triangle(mat, dfs, life)
else:
Expand Down
23 changes: 17 additions & 6 deletions powerplantmatching/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,12 +615,23 @@ def fill_geoposition(

if use_saved_locations:
logger.info(f"Adding stored geo-position from {fn}")
locs = pd.read_csv(fn, index_col=[0, 1])
locs = locs[~locs.index.duplicated()]
df = df.where(
df[["lat", "lon"]].notnull().all(1),
df.drop(columns=["lat", "lon"]).join(locs, on=["Name", "Country"]),
)
locs = pd.read_csv(fn, index_col=[0, 1]).drop_duplicates()
if isinstance(df.columns, pd.MultiIndex):
new_data = (
df.drop(columns=["lat", "lon"])
.stack()
.join(locs, on=["Name", "Country"])
.unstack(-1)
.reindex(columns=df.columns)
)
else:
new_data = (
df.drop(columns=["lat", "lon"])
.join(locs, on=["Name", "Country"])
.reindex(columns=df.columns)
)

df = df.where(df[["lat", "lon"]].notnull().all(axis=1), new_data)
if saved_only:
return df

Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ select = [
# 'D', # pydocstyle
'UP', # pyupgrade
'TID', # flake8-tidy-imports
'NPY', # numpy
]
9 changes: 6 additions & 3 deletions test/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ def test_OPSD_VRE_country():
assert df.Capacity.sum() > 0


# Enable after release of v0.5.0
# def test_powerplants():
# pm.powerplants(from_url=True)
def test_url_retrieval():
pm.powerplants(from_url=True)


def test_reduced_retrieval():
pm.powerplants(reduced=False)

0 comments on commit 4355783

Please sign in to comment.