From f3e6084113cfb9a6041927e7397b697f108e52f1 Mon Sep 17 00:00:00 2001
From: lkstrp <lkstrp@pm.me>
Date: Mon, 10 Jun 2024 15:15:58 +0200
Subject: [PATCH 1/3] refactor: clean up, add and apply `Ruff`

---
 README.md                                |  1 +
 analysis/compare-with-entsoe-stats.py    |  4 +--
 analysis/danish-powerplants.py           |  6 ----
 analysis/german-powerplants.py           |  6 ----
 analysis/interactive-map-with-widgets.py |  3 --
 analysis/interactive-map.py              |  4 ---
 doc/example.ipynb                        | 36 +++++++++++++---------
 powerplantmatching/__init__.py           | 20 ++++++++-----
 powerplantmatching/accessor.py           |  1 -
 powerplantmatching/cleaning.py           |  9 ++----
 powerplantmatching/collection.py         |  3 +-
 powerplantmatching/core.py               |  1 -
 powerplantmatching/data.py               | 20 +++++--------
 powerplantmatching/duke.py               | 16 ++++------
 powerplantmatching/export.py             |  8 ++---
 powerplantmatching/heuristics.py         | 38 +++++++++++-------------
 powerplantmatching/matching.py           | 10 ++-----
 powerplantmatching/plot.py               | 11 ++++---
 powerplantmatching/utils.py              | 10 ++-----
 pyproject.toml                           | 16 ++++++++++
 setup.py                                 |  4 +--
 test/test_cleaning.py                    |  2 --
 test/test_data.py                        |  3 +-
 23 files changed, 104 insertions(+), 128 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/README.md b/README.md
index 5ef73d0b..fcc3f321 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,7 @@
 
  [![pypi](https://img.shields.io/pypi/v/powerplantmatching.svg)](https://pypi.org/project/powerplantmatching/) [![conda](https://img.shields.io/conda/vn/conda-forge/powerplantmatching.svg)](https://anaconda.org/conda-forge/powerplantmatching) ![pythonversion](https://img.shields.io/pypi/pyversions/powerplantmatching) ![LICENSE](https://img.shields.io/pypi/l/powerplantmatching.svg) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3358985.svg)](https://zenodo.org/record/3358985#.XUReFPxS_MU) [![doc](https://readthedocs.org/projects/powerplantmatching/badge/?version=latest)](https://powerplantmatching.readthedocs.io/en/latest/) [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/FRESNA/powerplantmatching/master.svg)](https://results.pre-commit.ci/latest/github/FRESNA/powerplantmatching/master)
 [![Stack Exchange questions](https://img.shields.io/stackexchange/stackoverflow/t/pypsa)](https://stackoverflow.com/questions/tagged/pypsa)
+[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 
 A toolset for cleaning, standardizing and combining multiple power
 plant databases.
diff --git a/analysis/compare-with-entsoe-stats.py b/analysis/compare-with-entsoe-stats.py
index 48b9072c..ff52b1db 100644
--- a/analysis/compare-with-entsoe-stats.py
+++ b/analysis/compare-with-entsoe-stats.py
@@ -36,8 +36,8 @@
 def parse(c):
     try:
         return client.query_installed_generation_capacity(c, **kwargs).iloc[0]
-    except:
-        print(f"Country {c} failed")
+    except Exception as e:
+        print(f"Country {c} failed with {e}")
         return np.nan
 
 
diff --git a/analysis/danish-powerplants.py b/analysis/danish-powerplants.py
index d7958f6b..1aaeee39 100644
--- a/analysis/danish-powerplants.py
+++ b/analysis/danish-powerplants.py
@@ -1,15 +1,9 @@
 import copy
 
-import cartopy.crs as ccrs
 import hvplot
 import hvplot.pandas  # noqa
 import hvplot.xarray  # noqa
-import pandas as pd
 import panel as pn
-import xarray as xr
-from holoviews import opts
-from holoviews.plotting.util import process_cmap
-from xarray import align
 
 import powerplantmatching as pm
 
diff --git a/analysis/german-powerplants.py b/analysis/german-powerplants.py
index 26ff1c16..e3c57c38 100644
--- a/analysis/german-powerplants.py
+++ b/analysis/german-powerplants.py
@@ -1,15 +1,9 @@
 import copy
 
-import cartopy.crs as ccrs
 import hvplot
 import hvplot.pandas  # noqa
 import hvplot.xarray  # noqa
-import pandas as pd
 import panel as pn
-import xarray as xr
-from holoviews import opts
-from holoviews.plotting.util import process_cmap
-from xarray import align
 
 import powerplantmatching as pm
 
diff --git a/analysis/interactive-map-with-widgets.py b/analysis/interactive-map-with-widgets.py
index 00822274..45bc47b8 100644
--- a/analysis/interactive-map-with-widgets.py
+++ b/analysis/interactive-map-with-widgets.py
@@ -1,10 +1,7 @@
 import hvplot
 import hvplot.pandas  # noqa
 import hvplot.xarray  # noqa
-import pandas as pd
 import panel as pn
-import xarray as xr
-from xarray import align
 
 import powerplantmatching as pm
 
diff --git a/analysis/interactive-map.py b/analysis/interactive-map.py
index 4384d53a..cf5a4013 100644
--- a/analysis/interactive-map.py
+++ b/analysis/interactive-map.py
@@ -1,10 +1,6 @@
 import hvplot
 import hvplot.pandas  # noqa
 import hvplot.xarray  # noqa
-import pandas as pd
-import panel as pn
-import xarray as xr
-from xarray import align
 
 import powerplantmatching as pm
 
diff --git a/doc/example.ipynb b/doc/example.ipynb
index fc09ec6c..68afa232 100644
--- a/doc/example.ipynb
+++ b/doc/example.ipynb
@@ -22,8 +22,9 @@
     }
    ],
    "source": [
-    "import powerplantmatching as pm\n",
-    "import pandas as pd"
+    "import pandas as pd\n",
+    "\n",
+    "import powerplantmatching as pm"
    ]
   },
   {
@@ -844,8 +845,8 @@
     }
    ],
    "source": [
-    "print('Total capacity of GEO is: \\n  {} MW \\n'.format(geo.Capacity.sum()));\n",
-    "print('The technology types are: \\n {} '.format(geo.Technology.unique()))"
+    "print(f\"Total capacity of GEO is: \\n  {geo.Capacity.sum()} MW \\n\")\n",
+    "print(f\"The technology types are: \\n {geo.Technology.unique()} \")"
    ]
   },
   {
@@ -888,7 +889,7 @@
     }
    ],
    "source": [
-    "pm.plot.fueltype_totals_bar([geo, entsoe, stats], keys=[\"ENTSOE\", \"GEO\", 'Statistics']);"
+    "pm.plot.fueltype_totals_bar([geo, entsoe, stats], keys=[\"ENTSOE\", \"GEO\", \"Statistics\"]);"
    ]
   },
   {
@@ -1414,7 +1415,7 @@
     }
    ],
    "source": [
-    "pm.plot.fueltype_totals_bar([intersection, stats], keys=[\"Intersection\", 'Statistics']);"
+    "pm.plot.fueltype_totals_bar([intersection, stats], keys=[\"Intersection\", \"Statistics\"]);"
    ]
   },
   {
@@ -1432,7 +1433,9 @@
     }
    ],
    "source": [
-    "combined = intersection.powerplant.extend_by_non_matched(entsoe).powerplant.extend_by_non_matched(geo)"
+    "combined = intersection.powerplant.extend_by_non_matched(\n",
+    "    entsoe\n",
+    ").powerplant.extend_by_non_matched(geo)"
    ]
   },
   {
@@ -1452,7 +1455,7 @@
     }
    ],
    "source": [
-    "pm.plot.fueltype_totals_bar([combined, stats], keys=[\"Combined\", 'Statistics']);"
+    "pm.plot.fueltype_totals_bar([combined, stats], keys=[\"Combined\", \"Statistics\"]);"
    ]
   },
   {
@@ -1511,7 +1514,7 @@
     }
    ],
    "source": [
-    "m.powerplant.plot_map(figsize=(11,8));"
+    "m.powerplant.plot_map(figsize=(11, 8));"
    ]
   },
   {
@@ -1531,7 +1534,7 @@
     }
    ],
    "source": [
-    "pm.plot.fueltype_totals_bar([m, stats], keys=[\"Processed\", 'Statistics']);"
+    "pm.plot.fueltype_totals_bar([m, stats], keys=[\"Processed\", \"Statistics\"]);"
    ]
   },
   {
@@ -1569,7 +1572,7 @@
     }
    ],
    "source": [
-    "pm.plot.factor_comparison([m, stats], keys=['Processed', 'Statistics'])"
+    "pm.plot.factor_comparison([m, stats], keys=[\"Processed\", \"Statistics\"])"
    ]
   },
   {
@@ -1911,9 +1914,14 @@
     }
    ],
    "source": [
-    "pd.concat([m[m.DateIn.notnull()].groupby('Fueltype').DateIn.count(),\n",
-    "          m[m.DateIn.isna()].fillna(1).groupby('Fueltype').DateIn.count()], \n",
-    "          keys=['DateIn existent', 'DateIn missing'], axis=1)"
+    "pd.concat(\n",
+    "    [\n",
+    "        m[m.DateIn.notnull()].groupby(\"Fueltype\").DateIn.count(),\n",
+    "        m[m.DateIn.isna()].fillna(1).groupby(\"Fueltype\").DateIn.count(),\n",
+    "    ],\n",
+    "    keys=[\"DateIn existent\", \"DateIn missing\"],\n",
+    "    axis=1,\n",
+    ")"
    ]
   }
  ],
diff --git a/powerplantmatching/__init__.py b/powerplantmatching/__init__.py
index 988bc773..17870317 100644
--- a/powerplantmatching/__init__.py
+++ b/powerplantmatching/__init__.py
@@ -21,21 +21,25 @@
 power plant databases.
 """
 
-
-from __future__ import absolute_import
-
 __version__ = "0.5.15"
 __author__ = "Fabian Hofmann"
 __copyright__ = "Copyright 2017-2024 Technical University of Berlin"
 # The rough hierarchy of this package is
 # core, utils, heuristics, cleaning, matching, collection, data
 
-# from . import cleaning
-# from . import matching
-# from . import collection
-# Commonly used sub-modules. Imported here to provide end-user
-# convenience.
 from . import core, data, heuristics, plot, utils
 from .accessor import PowerPlantAccessor
 from .collection import powerplants
 from .core import get_config, package_config
+
+__all__ = [
+    "powerplants",
+    "get_config",
+    "package_config",
+    "PowerPlantAccessor",
+    "core",
+    "data",
+    "heuristics",
+    "plot",
+    "utils",
+]
diff --git a/powerplantmatching/accessor.py b/powerplantmatching/accessor.py
index 5d840a94..9f4aa249 100644
--- a/powerplantmatching/accessor.py
+++ b/powerplantmatching/accessor.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
 Created on Tue Jul 16 18:09:56 2019
 
diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 88b2c7a5..72ab7be3 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright 2016-2018 Fabian Hofmann (FIAS), Jonas Hoersch (KIT, IAI) and
 # Fabian Gotzens (FZJ, IEK-STE)
 
@@ -17,11 +16,8 @@
 """
 Functions for vertically cleaning a dataset.
 """
-from __future__ import absolute_import, print_function
 
 import logging
-import os
-import re
 
 import networkx as nx
 import numpy as np
@@ -160,14 +156,13 @@ def gather_and_replace(df, mapping):
     for key, pattern in mapping.items():
         if not pattern:
             # if pattern is not given, fall back to case-insensitive key
-            pattern = r"(?i)\b%s\b" % key
+            pattern = f"(?i)\b{key}\b"
         elif isinstance(pattern, list):
             # if pattern is a list, concat all entries in a case-insensitive regex
             pattern = r"(?i)" + "|".join([rf"\b{p}\b" for p in pattern])
         elif not isinstance(pattern, str):
             raise ValueError(f"Pattern must be string or list, not {type(pattern)}")
-        func = lambda ds: ds.str.contains(pattern)
-        where = df.astype(str).apply(func).any(axis=1)
+        where = df.astype(str).apply(lambda ds: ds.str.contains(pattern)).any(axis=1)
         res = res.where(~where, key)
     return res
 
diff --git a/powerplantmatching/collection.py b/powerplantmatching/collection.py
index a2c88847..66303d14 100644
--- a/powerplantmatching/collection.py
+++ b/powerplantmatching/collection.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright 2016-2018 Fabian Hofmann (FIAS), Jonas Hoersch (KIT, IAI) and
 # Fabian Gotzens (FZJ, IEK-STE)
 
@@ -17,6 +16,7 @@
 """
 Processed datasets of merged and/or adjusted data
 """
+
 import logging
 import os
 
@@ -31,7 +31,6 @@
     parmap,
     projectID_to_dict,
     set_column_name,
-    set_uncommon_fueltypes_to_other,
     to_dict_if_string,
 )
 
diff --git a/powerplantmatching/core.py b/powerplantmatching/core.py
index 2b0f2e72..5e4b7834 100644
--- a/powerplantmatching/core.py
+++ b/powerplantmatching/core.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
 Created on Tue Jul 16 15:47:46 2019
 
diff --git a/powerplantmatching/data.py b/powerplantmatching/data.py
index a4c18869..23e1aa03 100644
--- a/powerplantmatching/data.py
+++ b/powerplantmatching/data.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright 2016-2020 Fabian Hofmann (FIAS), Jonas Hoersch (KIT, IAI) and
 # Fabian Gotzens (FZJ, IEK-STE)
 
@@ -20,8 +19,6 @@
 
 import logging
 import os
-import re
-import xml.etree.ElementTree as ET
 from zipfile import ZipFile
 
 import entsoe
@@ -38,13 +35,12 @@
     gather_specifications,
     gather_technology_info,
 )
-from .core import _data_in, _package_data, get_config
+from .core import _package_data, get_config
 from .heuristics import scale_to_net_capacities
 from .utils import (
     config_filter,
     convert_to_short_name,
     correct_manually,
-    fill_geoposition,
     get_raw_file,
     set_column_name,
 )
@@ -266,7 +262,7 @@ def GEO(raw=False, update=False, config=None):
 
     def to_year(ds):
         years = pd.to_numeric(ds.dropna().astype(str).str[:4], errors="coerce")
-        year = years[lambda x: x > 1900]
+        year = years[lambda x: x > 1900]  # noqa
         return years.reindex_like(ds)
 
     fn = get_raw_file("GEO_units", update=update, config=config)
@@ -527,7 +523,7 @@ def Capacity_stats(
     else:
         source = "Capacity statistics"
 
-    fueltypes = config["target_fueltypes"]
+    fueltypes = config["target_fueltypes"]  # noqa
     df = (
         df.query("year == @year")
         .rename(columns={"technology": "Fueltype"})
@@ -579,8 +575,8 @@ def GPD(raw=False, update=False, config=None, filter_other_dbs=True):
 
     other_dbs = []
     if filter_other_dbs:
-        other_dbs = ["GEODB", "Open Power System Data", "ENTSOE"]
-    countries = config["target_countries"]
+        other_dbs = ["GEODB", "Open Power System Data", "ENTSOE"]  # noqa
+    countries = config["target_countries"]  # noqa
     return (
         df.rename(columns=lambda x: x.title())
         .drop(columns="Country")
@@ -1203,7 +1199,7 @@ def UBA(
         .str[0]
         .astype(float),
         Country="Germany",
-        projectID=["UBA{:03d}".format(i + header + 2) for i in uba.index],
+        projectID=[f"UBA{i + header + 2:03d}" for i in uba.index],
         Technology=uba.Technology.replace(RENAME_TECHNOLOGY),
     )
     uba.loc[uba.CHP.notnull(), "Set"] = "CHP"
@@ -1580,8 +1576,8 @@ def IRENASTAT(raw=False, update=False, config=None):
     df["Fueltype"] = df.Technology.map(fueltype_dict)
     df["Technology"] = df.Technology.replace(technology_dict)
 
-    l = list(set(df.columns).difference(set(["Capacity"])))
-    df = df.groupby(l, as_index=False, dropna=True).sum()
+    non_capacity_columns = list(set(df.columns).difference(set(["Capacity"])))
+    df = df.groupby(non_capacity_columns, as_index=False, dropna=True).sum()
 
     return df
 
diff --git a/powerplantmatching/duke.py b/powerplantmatching/duke.py
index 71bb9d3b..6eefe393 100644
--- a/powerplantmatching/duke.py
+++ b/powerplantmatching/duke.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright 2015-2016 Fabian Hofmann (FIAS), Jonas Hoersch (FIAS)
 
 # This program is free software; you can redistribute it and/or
@@ -14,7 +13,6 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-from __future__ import absolute_import, print_function
 
 import logging
 import os
@@ -78,7 +76,7 @@ def duke(
     """
 
     try:
-        sub.run(["java", "-version"], check=True, stderr=sub.PIPE, stdout=sub.PIPE)
+        sub.run(["java", "-version"], check=True, capture_output=True)
     except sub.CalledProcessError:
         err = "Java is not installed or not in the system's PATH. Please install Java and ensure it is in your system's PATH, then try again."
         logger.error(err)
@@ -112,9 +110,7 @@ def duke(
             if n == 1:
                 shift_by = datasets[0].index.max() + 1
                 df.index += shift_by
-            df.to_csv(
-                os.path.join(tmpdir, "file{}.csv".format(n + 1)), index_label="id"
-            )
+            df.to_csv(os.path.join(tmpdir, f"file{n + 1}.csv"), index_label="id")
             if n == 1:
                 df.index -= shift_by
 
@@ -145,9 +141,9 @@ def duke(
         if showmatches:
             print(_)
 
-        logger.debug("Stderr: {}".format(stderr))
+        logger.debug(f"Stderr: {stderr}")
         if any(word in stderr.lower() for word in ["error", "fehler"]):
-            raise RuntimeError("duke failed: {}".format(stderr))
+            raise RuntimeError(f"duke failed: {stderr}")
 
         if dedup:
             return pd.read_csv(
@@ -168,7 +164,7 @@ def duke(
 
     finally:
         if keepfiles:
-            logger.debug("Files of the duke run are kept in {}".format(tmpdir))
+            logger.debug(f"Files of the duke run are kept in {tmpdir}")
         else:
             shutil.rmtree(tmpdir)
-            logger.debug("Files of the duke run have been deleted in {}".format(tmpdir))
+            logger.debug(f"Files of the duke run have been deleted in {tmpdir}")
diff --git a/powerplantmatching/export.py b/powerplantmatching/export.py
index f62e9bc5..e2b6cfda 100644
--- a/powerplantmatching/export.py
+++ b/powerplantmatching/export.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright 2016-2018 Fabian Hofmann (FIAS), Jonas Hoersch (KIT, IAI) and
 # Fabian Gotzens (FZJ, IEK-STE)
 
@@ -302,9 +301,10 @@ def to_TIMES(df=None, use_scaled_capacity=False, baseyear=2015):
                     logger.error(
                         "For region '{}' and timestype '{}' the value for "
                         "year {} ({0.000}) is higher than in the year before "
-                        "({0.000}).".format(
-                            reg, tt, yr, df_exp.loc[row, reg], df_exp.loc[row - 1, reg]
-                        )
+                        "({0.000}).",
+                        reg,
+                        tt,
+                        yr,
                     )
             df_exp.loc[row, "Pset_Pn"] = tt
             row += 1
diff --git a/powerplantmatching/heuristics.py b/powerplantmatching/heuristics.py
index b0d089a2..1e31e28f 100644
--- a/powerplantmatching/heuristics.py
+++ b/powerplantmatching/heuristics.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright 2016-2018 Fabian Hofmann (FIAS), Jonas Hoersch (KIT, IAI) and
 # Fabian Gotzens (FZJ, IEK-STE)
 
@@ -26,7 +25,7 @@
 from six import iteritems
 
 from .core import _package_data, get_config, get_obj_if_Acc
-from .utils import get_name, lookup
+from .utils import lookup
 
 logger = logging.getLogger(__name__)
 
@@ -134,8 +133,9 @@ def isin(df, matched, label=None):
     if matched.columns.nlevels > 1:
         included_ids = matched["projectID", label].dropna().apply(list).sum()
     else:
-        get = lambda d: d.get(label)
-        included_ids = matched.projectID.map(get).dropna().apply(list).sum()
+        included_ids = (
+            matched.projectID.map(lambda d: d.get(label)).dropna().apply(list).sum()
+        )
     if included_ids == 0:
         included_ids = []
 
@@ -169,12 +169,12 @@ def rescale_capacities_to_country_totals(df, fueltypes=None):
     stats_df = lookup(df).loc[fueltypes]
     stats_entsoe = lookup(Capacity_stats()).loc[fueltypes]
     if ((stats_df == 0) & (stats_entsoe != 0)).any().any():
+        country_list = stats_df.loc[
+            :, ((stats_df == 0) & (stats_entsoe != 0)).any()
+        ].columns.tolist()
         print(
-            "Could not scale powerplants in the countries %s because of no \
-              occurring power plants in these countries"
-            % stats_df.loc[
-                :, ((stats_df == 0) & (stats_entsoe != 0)).any()
-            ].columns.tolist()
+            f"Could not scale powerplants in the countries {country_list} because of "
+            f"no occurring power plants in these countries"
         )
     ratio = (stats_entsoe / stats_df).fillna(1)
     df["Scaled Capacity"] = df.loc[:, "Capacity"]
@@ -243,12 +243,10 @@ def fill_missing_commissioning_years(df):
     if df.DateIn.isnull().any():
         count = len(df[df.DateIn.isnull()])
         logger.warn(
-            """There are still *{0}* empty values for
+            f"""There are still *{count}* empty values for
                         'DateIn' in the DataFrame. These should
                         be either be filled manually or dropped.
-            """.format(
-                count
-            )
+            """
         )
     df["DateIn"] = df.DateIn.astype(float)
     df["DateRetrofit"] = df.DateRetrofit.fillna(df.DateIn)
@@ -309,10 +307,8 @@ def wm(x):
         }
     else:
         raise TypeError(
-            "Value given for `agg_geo_by` is '{}' but must be either \
-                        'NoneType' or 'mean' or 'wm'.".format(
-                agg_geo_by
-            )
+            f"Value given for `agg_geo_by` is '{agg_geo_by}' but must be either \
+                        'NoneType' or 'mean' or 'wm'."
         )
 
     if target_fueltypes is None:
@@ -572,9 +568,9 @@ def scale_to_net_capacities(df, is_gross=True, catch_all=True):
     if is_gross:
         factors = gross_to_net_factors()
         for ftype, tech in factors.index.values:
-            df.loc[
-                (df.Fueltype == ftype) & (df.Technology == tech), "Capacity"
-            ] *= factors.loc[(ftype, tech)]
+            df.loc[(df.Fueltype == ftype) & (df.Technology == tech), "Capacity"] *= (
+                factors.loc[(ftype, tech)]
+            )
         if catch_all:
             for ftype in factors.index.levels[0]:
                 techs = factors.loc[ftype].index.tolist()
@@ -626,6 +622,6 @@ def set_known_retire_years(df):
         if name_match_b.any():
             ppl_de_nuc.loc[name_match_b, "YearRetire"] = year
         else:
-            logger.warn("'{}' was not found in given DataFrame.".format(name))
+            logger.warn(f"'{name}' was not found in given DataFrame.")
     df.loc[ppl_de_nuc.index, "YearRetire"] = ppl_de_nuc["YearRetire"]
     return df
diff --git a/powerplantmatching/matching.py b/powerplantmatching/matching.py
index 0b2fe2c4..dc0e2b48 100644
--- a/powerplantmatching/matching.py
+++ b/powerplantmatching/matching.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright 2016-2018 Fabian Hofmann (FIAS), Jonas Hoersch (KIT, IAI) and
 # Fabian Gotzens (FZJ, IEK-STE)
 
@@ -18,10 +17,7 @@
 Functions for linking and combining different datasets
 """
 
-from __future__ import absolute_import, print_function
-
 import logging
-import os.path
 from itertools import combinations
 
 import numpy as np
@@ -89,7 +85,7 @@ def compare_two_datasets(dfs, labels, country_wise=True, config=None, **dukeargs
         logger.warn(msg + f"{used_deprecated_args}")
 
     dfs = list(map(read_csv_if_string, dfs))
-    if not ("singlematch" in dukeargs):
+    if "singlematch" not in dukeargs:
         dukeargs["singlematch"] = True
 
     def country_link(dfs, country):
@@ -106,7 +102,7 @@ def country_link(dfs, country):
     if country_wise:
         countries = config["target_countries"]
         links = [country_link(dfs, c) for c in countries]
-        links = [l for l in links if not l.empty]
+        links = [link for link in links if not link.empty]
         if links:
             links = pd.concat(links, ignore_index=True)
         else:
@@ -210,7 +206,7 @@ def link_multiple_datasets(
     combs = list(combinations(range(len(labels)), 2))
 
     def comp_dfs(dfs_lbs):
-        logger.info("Comparing data sources `{0}` and `{1}`".format(*dfs_lbs[2:]))
+        logger.info("Comparing data sources `{}` and `{}`".format(*dfs_lbs[2:]))
         return compare_two_datasets(dfs_lbs[:2], dfs_lbs[2:], config=config, **dukeargs)
 
     mapargs = [[dfs[c], dfs[d], labels[c], labels[d]] for c, d in combs]
diff --git a/powerplantmatching/plot.py b/powerplantmatching/plot.py
index 5f371f38..41ccf62a 100644
--- a/powerplantmatching/plot.py
+++ b/powerplantmatching/plot.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright 2016-2018 Fabian Hofmann (FIAS), Jonas Hoersch (KIT, IAI) and
 # Fabian Gotzens (FZJ, IEK-STE)
 
@@ -164,7 +163,7 @@ def fueltype_totals_bar(
     last_as_marker=False,
     axes_style="whitegrid",
     exclude=[],
-    **kwargs
+    **kwargs,
 ):
     dfs = get_obj_if_Acc(dfs)
     dfs = to_list_if_other(dfs)
@@ -190,10 +189,10 @@ def fueltype_totals_bar(
                 marker="D",
                 linestyle="None",
                 markerfacecolor="darkslategray",
-                **kwargs
+                **kwargs,
             )
         ax.legend(loc=0)
-        ax.set_ylabel(r"Capacity [$%s$]" % unit)
+        ax.set_ylabel(f"Capacity [${unit}$]")
         ax.xaxis.grid(False)
         fig.tight_layout(pad=0.5)
         return fig, ax
@@ -215,7 +214,7 @@ def country_totals_hbar(
         countrytotals[::-1][1:].plot(
             kind="barh", ax=ax, legend="reverse", edgecolor="none"
         )
-        ax.set_xlabel("Capacity [%s]" % unit)
+        ax.set_xlabel(f"Capacity [{unit}]")
         ax.yaxis.grid(False)
         ax.set_ylabel("")
         fig.tight_layout(pad=0.5)
@@ -372,7 +371,7 @@ def draw_basemap(
     coast_linewidth=0.4,
     zorder=None,
     fillcontinents=True,
-    **kwds
+    **kwds,
 ):
     if cartopy_present:
         if ax is None:
diff --git a/powerplantmatching/utils.py b/powerplantmatching/utils.py
index 51f4b04d..e7603580 100644
--- a/powerplantmatching/utils.py
+++ b/powerplantmatching/utils.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 # Copyright 2016-2018 Fabian Hofmann (FIAS), Jonas Hoersch (KIT, IAI) and
 # Fabian Gotzens (FZJ, IEK-STE)
 
@@ -18,8 +17,6 @@
 Utility functions for checking data completeness and supporting other functions
 """
 
-from __future__ import absolute_import, print_function
-
 import multiprocessing
 import os
 from ast import literal_eval as liteval
@@ -30,7 +27,6 @@
 import pycountry as pyc
 import requests
 import six
-from deprecation import deprecated
 from numpy import atleast_1d
 from tqdm import tqdm
 
@@ -119,8 +115,8 @@ def config_filter(df, config):
     name = df.powerplant.get_name()
     assert name is not None, "No name given for data source"
 
-    countries = config["target_countries"]
-    fueltypes = config["target_fueltypes"]
+    countries = config["target_countries"]  # noqa
+    fueltypes = config["target_fueltypes"]  # noqa
     cols = config["target_columns"]
 
     target_query = "Country in @countries and Fueltype in @fueltypes"
@@ -355,7 +351,7 @@ def parmap(f, arg_list, config=None):
         config = get_config()
     if config["parallel_duke_processes"]:
         nprocs = min(multiprocessing.cpu_count(), config["process_limit"])
-        logger.info("Run process with {} parallel threads.".format(nprocs))
+        logger.info(f"Run process with {nprocs} parallel threads.")
         q_in = multiprocessing.Queue(1)
         q_out = multiprocessing.Queue()
 
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..70a46dd5
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,16 @@
+# Formater and linter settings
+
+[tool.ruff]
+line-length = 88
+extend-include = ['*.ipynb']
+
+[tool.ruff.lint]
+select = [
+    'F',   # pyflakes
+    # 'E',   # pycodestyle: Error
+    # 'W',   # pycodestyle: Warning
+    'I',   # isort
+    # 'D',   # pydocstyle
+    'UP',  # pyupgrade
+    'TID', # flake8-tidy-imports
+]
\ No newline at end of file
diff --git a/setup.py b/setup.py
index ff5db3ac..b2bd3a66 100644
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,6 @@
-from __future__ import absolute_import
-
 from codecs import open
 
-from setuptools import find_packages, setup
+from setuptools import setup
 
 with open("README.md", encoding="utf-8") as f:
     long_description = f.read()
diff --git a/test/test_cleaning.py b/test/test_cleaning.py
index 46d5525c..fcdd3963 100644
--- a/test/test_cleaning.py
+++ b/test/test_cleaning.py
@@ -6,9 +6,7 @@
     clean_name,
     gather_and_replace,
     gather_specifications,
-    gather_technology_info,
 )
-from powerplantmatching.data import OPSD
 
 TEST_DATA = {
     "Name": [
diff --git a/test/test_data.py b/test/test_data.py
index 9281310e..b8f03d96 100755
--- a/test/test_data.py
+++ b/test/test_data.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 """
 Created on Wed Nov 25 08:48:04 2020
 
@@ -46,7 +45,7 @@ def test_OPSD_VRE():
     assert df.Capacity.sum() > 0
 
 
-def test_OPSD_VRE():
+def test_OPSD_VRE_country():
     df = pm.data.OPSD_VRE_country("DE")
     assert not df.empty
     assert df.Capacity.sum() > 0

From ce20df21caa6793ddf4fd7a10dff5448e5a42634 Mon Sep 17 00:00:00 2001
From: lkstrp <lkstrp@pm.me>
Date: Mon, 10 Jun 2024 15:38:33 +0200
Subject: [PATCH 2/3] fix: run `Ruff` in `pre-commit`

---
 .pre-commit-config.yaml | 19 +++++++++++--------
 doc/release-notes.rst   |  2 +-
 2 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 88e63303..faeb2ad3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,15 +7,18 @@ repos:
     rev: v4.6.0
     hooks:
     -   id: check-yaml
--   repo: https://github.com/psf/black
-    rev: 24.4.2
-    hooks:
-    -   id: black
--   repo: https://github.com/PyCQA/isort
-    rev: 5.13.2
+
+# Run ruff to lint and format
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.4.8
     hooks:
-      - id: isort
-        args: ["--profile", "black", "--filter-files"]
+    # Run the linter.
+    -   id: ruff
+        args: [--fix]
+    # Run the formatter.
+    -   id: ruff-format
+
+# Run codespell to check spelling
 -   repo: https://github.com/codespell-project/codespell
     rev: v2.2.6
     hooks:
diff --git a/doc/release-notes.rst b/doc/release-notes.rst
index fc179ef7..b6135cea 100644
--- a/doc/release-notes.rst
+++ b/doc/release-notes.rst
@@ -13,7 +13,7 @@ Version 0.5.15 (12.05.2024)
 
 * Corrected capacity of some hydro powerplants in the United Kingdom contained
   in the ENTSOE data based on manual search. The error originated from the use
-  of the value 999 in the capacity column to represent NaN values. The chnages
+  of the value 999 in the capacity column to represent NaN values. The changes
   accumulate to a reduction of approximately 5 GW in capacity.
   (https://github.com/PyPSA/powerplantmatching/pull/141)
 

From 94b6a5e550f5ffaee8edf542f130d393db62cd8e Mon Sep 17 00:00:00 2001
From: lkstrp <lkstrp@pm.me>
Date: Mon, 10 Jun 2024 15:48:54 +0200
Subject: [PATCH 3/3] fix: bugs

---
 powerplantmatching/cleaning.py | 2 +-
 test/test_data.py              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/powerplantmatching/cleaning.py b/powerplantmatching/cleaning.py
index 72ab7be3..f4dcb2f0 100644
--- a/powerplantmatching/cleaning.py
+++ b/powerplantmatching/cleaning.py
@@ -156,7 +156,7 @@ def gather_and_replace(df, mapping):
     for key, pattern in mapping.items():
         if not pattern:
             # if pattern is not given, fall back to case-insensitive key
-            pattern = f"(?i)\b{key}\b"
+            pattern = rf"(?i)\b{key}\b"
         elif isinstance(pattern, list):
             # if pattern is a list, concat all entries in a case-insensitive regex
             pattern = r"(?i)" + "|".join([rf"\b{p}\b" for p in pattern])
diff --git a/test/test_data.py b/test/test_data.py
index b8f03d96..6461a8bf 100755
--- a/test/test_data.py
+++ b/test/test_data.py
@@ -40,7 +40,7 @@ def test_data_request_processed(source):
 
 
 def test_OPSD_VRE():
-    df = pm.data.OPSD_VRE().empty
+    df = pm.data.OPSD_VRE()
     assert not df.empty
     assert df.Capacity.sum() > 0