From ad6301eb42155c968f20b2c7e071cbec039acc03 Mon Sep 17 00:00:00 2001
From: Jared Lewis <jared@jared.kiwi.nz>
Date: Thu, 4 Aug 2022 21:47:20 +1000
Subject: [PATCH] Add black (#50)

* Add black to the test dependency

* Run black

* Ignore docs directory

* Fix stickler errors

* Fix stickler errors

* Rerun black

* Move pd_read/pd_write to utils to stop a circular dependency

* Needed to import pd_read from utils
---
 .gitignore                          |   4 +-
 .stickler.yml                       |   5 +-
 Makefile                            |   4 +
 aneris/__init__.py                  |   4 +-
 aneris/_io.py                       |  74 ++---
 aneris/cli.py                       |  83 +++---
 aneris/harmonize.py                 | 402 +++++++++++++++-------------
 aneris/methods.py                   | 102 +++----
 aneris/tutorial.py                  |  36 +--
 aneris/utils.py                     | 357 +++++++++++++-----------
 pyproject.toml                      |  10 +
 setup.cfg                           |   2 +-
 setup.py                            |  70 ++---
 tests/ci/download_data.py           |  19 +-
 tests/test_default_decision_tree.py | 136 ++++------
 tests/test_harmonize.py             | 196 ++++++++------
 tests/test_io.py                    |  36 +--
 tests/test_regression.py            |  83 +++---
 tests/test_tutorials.py             |  36 ++-
 tests/test_utils.py                 | 293 ++++++++++----------
 20 files changed, 1036 insertions(+), 916 deletions(-)
 create mode 100644 pyproject.toml

diff --git a/.gitignore b/.gitignore
index 46273c2..9775ca5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,6 @@ build
 dist
 *.egg-info
 .cache
-.*
\ No newline at end of file
+.*
+
+venv
\ No newline at end of file
diff --git a/.stickler.yml b/.stickler.yml
index 6e08f83..410a11a 100644
--- a/.stickler.yml
+++ b/.stickler.yml
@@ -3,10 +3,13 @@ linters:
     python: 3
     max-line-length: 88
     fixer: false
-    ignore: I002, F403, E402, E731, E203
+    ignore: I002, F403, E402, E731, E203, W503
     # stickler doesn't support 'exclude' for flake8 properly, so we disable it
     # below with files.ignore:
     # https://github.com/markstory/lint-review/issues/184
+  black:
+    config: ./pyproject.toml
+    fixer: false
 files:
   ignore:
     - doc/**/*.py
diff --git a/Makefile b/Makefile
index d474f0b..7d9c8a1 100644
--- a/Makefile
+++ b/Makefile
@@ -68,6 +68,10 @@ publish-on-pypi: $(VENV_DIR)  ## publish release on PyPI
 		echo run git status --porcelain to find dirty files >&2; \
 	fi;
 
+.PHONY: black
+black: $(VENV_DIR)
+	black .
+
 .PHONY: ci_dl
 ci_dl: $(VENV_DIR)  ## run all the tests
 	cd tests/ci; python download_data.py
diff --git a/aneris/__init__.py b/aneris/__init__.py
index 2e8a927..fa86a3e 100644
--- a/aneris/__init__.py
+++ b/aneris/__init__.py
@@ -1,8 +1,8 @@
-
 from aneris._io import *
 from aneris.harmonize import *
 from aneris.utils import *
 
 from ._version import get_versions
-__version__ = get_versions()['version']
+
+__version__ = get_versions()["version"]
 del get_versions
diff --git a/aneris/_io.py b/aneris/_io.py
index 7e723f0..5e535b1 100644
--- a/aneris/_io.py
+++ b/aneris/_io.py
@@ -8,7 +8,7 @@
 
 import pandas as pd
 
-from aneris.utils import isstr, isnum, iamc_idx
+from aneris.utils import isstr, isnum, iamc_idx, pd_read
 
 RC_DEFAULTS = """
 config:
@@ -26,7 +26,7 @@
 
 
 def _read_data(indfs):
-    datakeys = sorted([x for x in indfs if x.startswith('data')])
+    datakeys = sorted([x for x in indfs if x.startswith("data")])
     df = pd.concat([indfs[k] for k in datakeys])
     # don't know why reading from excel changes dtype and column types
     # but I have to reset them manually
@@ -50,46 +50,6 @@ def _recursive_update(d, u):
     return d
 
 
-def pd_read(f, str_cols=False, *args, **kwargs):
-    """Try to read a file with pandas, supports CSV and XLSX
-
-    Parameters
-    ----------
-    f : string
-        the file to read in
-    str_cols : bool, optional
-        turn all columns into strings (numerical column names are sometimes 
-        read in as numerical dtypes)
-    args, kwargs : sent directly to the Pandas read function
-
-    Returns
-    -------
-    df : pd.DataFrame
-    """
-    if f.endswith('csv'):
-        df = pd.read_csv(f, *args, **kwargs)
-    else:
-        df = pd.read_excel(f, *args, **kwargs)
-
-    if str_cols:
-        df.columns = [str(x) for x in df.columns]
-
-    return df
-
-
-def pd_write(df, f, *args, **kwargs):
-    """Try to write a file with pandas, supports CSV and XLSX"""
-    # guess whether to use index, unless we're told otherwise
-    index = kwargs.pop('index', isinstance(df.index, pd.MultiIndex))
-
-    if f.endswith('csv'):
-        df.to_csv(f, index=index, *args, **kwargs)
-    else:
-        writer = pd.ExcelWriter(f)
-        df.to_excel(writer, index=index, *args, **kwargs)
-        writer.save()
-
-
 def read_excel(f):
     """Read an excel-based input file for harmonization.
 
@@ -111,20 +71,23 @@ def read_excel(f):
     model = _read_data(indfs)
 
     # make an empty df which will be caught later
-    overrides = indfs['harmonization'] if 'harmonization' in indfs \
-        else pd.DataFrame([], columns=iamc_idx + ['Unit'])
+    overrides = (
+        indfs["harmonization"]
+        if "harmonization" in indfs
+        else pd.DataFrame([], columns=iamc_idx + ["Unit"])
+    )
 
     # get run control
     config = {}
-    if'Configuration' in overrides:
-        config = overrides[['Configuration', 'Value']].dropna()
-        config = config.set_index('Configuration').to_dict()['Value']
-        overrides = overrides.drop(['Configuration', 'Value'], axis=1)
+    if "Configuration" in overrides:
+        config = overrides[["Configuration", "Value"]].dropna()
+        config = config.set_index("Configuration").to_dict()["Value"]
+        overrides = overrides.drop(["Configuration", "Value"], axis=1)
 
     # a single row of nans implies only configs provided,
     # if so, only return the empty df
     if len(overrides) == 1 and overrides.isnull().values.all():
-        overrides = pd.DataFrame([], columns=iamc_idx + ['Unit'])
+        overrides = pd.DataFrame([], columns=iamc_idx + ["Unit"])
 
     return model, overrides, config
 
@@ -140,10 +103,10 @@ def __init__(self, rc=None, defaults=None):
         Parameters
         ----------
         rc : string, file, dictionary, optional
-            a path to a YAML file, a file handle for a YAML file, or a 
+            a path to a YAML file, a file handle for a YAML file, or a
             dictionary describing run control configuration
         defaults : string, file, dictionary, optional
-            a path to a YAML file, a file handle for a YAML file, or a 
+            a path to a YAML file, a file handle for a YAML file, or a
             dictionary describing **default** run control configuration
         """
         rc = rc or {}
@@ -171,14 +134,15 @@ def _get_path(self, key, fyaml, fname):
 
         _fname = os.path.join(os.path.dirname(fyaml), fname)
         if not os.path.exists(_fname):
-            msg = "YAML key '{}' in {}: {} is not a valid relative " + \
-                "or absolute path"
+            msg = (
+                "YAML key '{}' in {}: {} is not a valid relative " + "or absolute path"
+            )
             raise IOError(msg.format(key, fyaml, fname))
         return _fname
 
     def _fill_relative_paths(self, fyaml, d):
         file_keys = [
-            'exogenous',
+            "exogenous",
         ]
         for k in file_keys:
             if k in d:
@@ -186,7 +150,7 @@ def _fill_relative_paths(self, fyaml, d):
 
     def _load_yaml(self, obj):
         check_rel_paths = False
-        if hasattr(obj, 'read'):  # it's a file
+        if hasattr(obj, "read"):  # it's a file
             obj = obj.read()
         if isstr(obj) and os.path.exists(obj):
             check_rel_paths = True
diff --git a/aneris/cli.py b/aneris/cli.py
index ef95289..075fbe5 100644
--- a/aneris/cli.py
+++ b/aneris/cli.py
@@ -18,46 +18,54 @@ def read_args():
     aneris input.xlsx --history history.csv --regions regions.csv
     """
     parser = argparse.ArgumentParser(
-        description=descr,
-        formatter_class=argparse.RawDescriptionHelpFormatter
+        description=descr, formatter_class=argparse.RawDescriptionHelpFormatter
     )
-    input_file = 'Input data file.'
-    parser.add_argument('input_file', help=input_file)
-    history = 'Historical emissions in the base year.'
-    parser.add_argument('--history', help=history,
-                        default=hist_path('history.csv'))
-    regions = 'Mapping of country iso-codes to native regions.'
-    parser.add_argument('--regions', help=regions,
-                        default=region_path('message.csv'))
-    rc = 'Runcontrol YAML file (see http://mattgidden.com/aneris/config.html for examples).'
-    parser.add_argument('--rc', help=rc, default=None)
-    output_path = 'Path to use for output file names.'
-    parser.add_argument('--output_path', help=output_path, default='.')
-    output_prefix = 'Prefix to use for output file names.'
-    parser.add_argument('--output_prefix', help=output_prefix, default=None)
+    input_file = "Input data file."
+    parser.add_argument("input_file", help=input_file)
+    history = "Historical emissions in the base year."
+    parser.add_argument("--history", help=history, default=hist_path("history.csv"))
+    regions = "Mapping of country iso-codes to native regions."
+    parser.add_argument("--regions", help=regions, default=region_path("message.csv"))
+    rc = (
+        "Runcontrol YAML file "
+        "(see http://mattgidden.com/aneris/config.html for examples)."
+    )
+    parser.add_argument("--rc", help=rc, default=None)
+    output_path = "Path to use for output file names."
+    parser.add_argument("--output_path", help=output_path, default=".")
+    output_prefix = "Prefix to use for output file names."
+    parser.add_argument("--output_prefix", help=output_prefix, default=None)
 
     args = parser.parse_args()
     return args
 
 
-def harmonize(inf, history, regions, rc, output_path, output_prefix,
-              return_result=False, write_output=True):
+def harmonize(
+    inf,
+    history,
+    regions,
+    rc,
+    output_path,
+    output_prefix,
+    return_result=False,
+    write_output=True,
+):
     # check files exist
     check = [inf, history, regions, rc]
     for f in check:
         if f and not os.path.exists(f):
-            raise IOError('{} does not exist on the filesystem.'.format(f))
+            raise IOError("{} does not exist on the filesystem.".format(f))
 
     # read input
     hist = aneris.pd_read(history, str_cols=True)
     if hist.empty:
-        raise ValueError('History file is empty')
+        raise ValueError("History file is empty")
     regions = aneris.pd_read(regions, str_cols=True)
     if regions.empty:
-        raise ValueError('Region definition is empty')
+        raise ValueError("Region definition is empty")
     model, overrides, config = aneris.read_excel(inf)
     rc = aneris.RunControl(rc=rc)
-    rc.recursive_update('config', config)
+    rc.recursive_update("config", config)
 
     # do core harmonization
     driver = aneris.HarmonizationDriver(rc, hist, model, overrides, regions)
@@ -67,37 +75,40 @@ def harmonize(inf, history, regions, rc, output_path, output_prefix,
 
     if write_output:
         # write to excel
-        prefix = output_prefix or inf.split('.')[0]
-        fname = os.path.join(output_path, '{}_harmonized.xlsx'.format(prefix))
-        logger().info('Writing result to: {}'.format(fname))
-        aneris.pd_write(model, fname, sheet_name='data')
+        prefix = output_prefix or inf.split(".")[0]
+        fname = os.path.join(output_path, "{}_harmonized.xlsx".format(prefix))
+        logger().info("Writing result to: {}".format(fname))
+        aneris.pd_write(model, fname, sheet_name="data")
 
         # save data about harmonization
-        fname = os.path.join(output_path, '{}_metadata.xlsx'.format(prefix))
-        logger().info('Writing metadata to: {}'.format(fname))
+        fname = os.path.join(output_path, "{}_metadata.xlsx".format(prefix))
+        logger().info("Writing metadata to: {}".format(fname))
         aneris.pd_write(metadata, fname)
 
         # save data about harmonization
         if not diagnostics.empty:
-            fname = os.path.join(output_path,
-                                 '{}_diagnostics.xlsx'.format(prefix))
-            logger().info('Writing diagnostics to: {}'.format(fname))
+            fname = os.path.join(output_path, "{}_diagnostics.xlsx".format(prefix))
+            logger().info("Writing diagnostics to: {}".format(fname))
             aneris.pd_write(diagnostics, fname)
 
     if return_result:
         return model, metadata, diagnostics
 
 
-
-
 def main():
     # parse cli
     args = read_args()
 
     # run program
-    harmonize(args.input_file, args.history, args.regions,
-              args.rc, args.output_path, args.output_prefix)
+    harmonize(
+        args.input_file,
+        args.history,
+        args.regions,
+        args.rc,
+        args.output_path,
+        args.output_prefix,
+    )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/aneris/harmonize.py b/aneris/harmonize.py
index 6dfd5ce..d4f9eb4 100644
--- a/aneris/harmonize.py
+++ b/aneris/harmonize.py
@@ -6,11 +6,20 @@
 from functools import partial
 
 from aneris import utils
-from aneris import pd_read
-from aneris.utils import isin
-from aneris.methods import harmonize_factors, constant_offset, reduce_offset, \
-    constant_ratio, reduce_ratio, linear_interpolate, model_zero, hist_zero, \
-    budget, coeff_of_var, default_methods
+from aneris.utils import isin, pd_read
+from aneris.methods import (
+    harmonize_factors,
+    constant_offset,
+    reduce_offset,
+    constant_ratio,
+    reduce_ratio,
+    linear_interpolate,
+    model_zero,
+    hist_zero,
+    budget,
+    coeff_of_var,
+    default_methods,
+)
 
 
 def _log(msg, *args, **kwargs):
@@ -25,19 +34,20 @@ class Harmonizer(object):
     """A class used to harmonize model data to historical data in the
     standard calculation format
     """
+
     _methods = {
-        'model_zero': model_zero,
-        'hist_zero': hist_zero,
-        'budget': budget,
-        'constant_ratio': constant_ratio,
-        'constant_offset': constant_offset,
-        'reduce_offset_2150_cov': partial(reduce_offset, final_year='2150'),
-        'reduce_ratio_2150_cov': partial(reduce_ratio, final_year='2150'),
+        "model_zero": model_zero,
+        "hist_zero": hist_zero,
+        "budget": budget,
+        "constant_ratio": constant_ratio,
+        "constant_offset": constant_offset,
+        "reduce_offset_2150_cov": partial(reduce_offset, final_year="2150"),
+        "reduce_ratio_2150_cov": partial(reduce_ratio, final_year="2150"),
         **{
-            f'{method.__name__}_{year}': partial(method, final_year=str(year))
+            f"{method.__name__}_{year}": partial(method, final_year=str(year))
             for year in chain(range(2020, 2101, 10), [2150])
             for method in (reduce_offset, reduce_ratio, linear_interpolate)
-        }
+        },
     }
 
     def __init__(
@@ -57,77 +67,83 @@ def __init__(
             different
         """
         if not isinstance(data.index, pd.MultiIndex):
-            raise ValueError('Data must use utils.df_idx')
+            raise ValueError("Data must use utils.df_idx")
         if not isinstance(history.index, pd.MultiIndex):
-            raise ValueError('History must use utils.df_idx')
+            raise ValueError("History must use utils.df_idx")
         if verify_indicies and not data.index.equals(history.index):
             idx = history.index.difference(data.index)
-            msg = 'More history than model reports, adding 0 values {}'
+            msg = "More history than model reports, adding 0 values {}"
             _warn(msg.format(idx.to_series().head()))
             df = pd.DataFrame(0, columns=data.columns, index=idx)
             data = pd.concat([data, df]).sort_index().loc[history.index]
             assert data.index.equals(history.index)
 
-        key = 'harmonize_year'
+        key = "harmonize_year"
         # TODO type
-        self.base_year = str(config[key]) if key in config else '2015'
+        self.base_year = str(config[key]) if key in config else "2015"
         self.data = data[utils.numcols(data)]
-        self.model = pd.Series(index=self.data.index,
-                               name=self.base_year,
-                               dtype=float).to_frame()
+        self.model = pd.Series(
+            index=self.data.index, name=self.base_year, dtype=float
+        ).to_frame()
         self.history = history
         self.methods_used = None
         self.offsets, self.ratios = harmonize_factors(
-            self.data, self.history, self.base_year)
+            self.data, self.history, self.base_year
+        )
 
         self.method_choice = method_choice
 
         # get default methods to use in decision tree
-        self.ratio_method = config.get('default_ratio_method')
-        self.offset_method = config.get('default_offset_method')
-        self.luc_method = config.get('default_luc_method')
-        self.luc_cov_threshold = config.get('luc_cov_threshold')
+        self.ratio_method = config.get("default_ratio_method")
+        self.offset_method = config.get("default_offset_method")
+        self.luc_method = config.get("default_luc_method")
+        self.luc_cov_threshold = config.get("luc_cov_threshold")
 
     def metadata(self):
         """Return pd.DataFrame of method choice metadata"""
         methods = self.methods_used
         if isinstance(methods, pd.Series):  # only defaults used
             methods = methods.to_frame()
-            methods['default'] = methods['method']
-            methods['override'] = ''
-
-        meta = pd.concat([
-            methods['method'],
-            methods['default'],
-            methods['override'],
-            self.offsets,
-            self.ratios,
-            self.history[self.base_year],
-            self.history.apply(coeff_of_var, axis=1),
-            self.data[self.base_year],
-            self.model[self.base_year],
-        ], axis=1)
+            methods["default"] = methods["method"]
+            methods["override"] = ""
+
+        meta = pd.concat(
+            [
+                methods["method"],
+                methods["default"],
+                methods["override"],
+                self.offsets,
+                self.ratios,
+                self.history[self.base_year],
+                self.history.apply(coeff_of_var, axis=1),
+                self.data[self.base_year],
+                self.model[self.base_year],
+            ],
+            axis=1,
+        )
         meta.columns = [
-            'method',
-            'default',
-            'override',
-            'offset',
-            'ratio',
-            'history',
-            'cov',
-            'unharmonized',
-            'harmonized',
+            "method",
+            "default",
+            "override",
+            "offset",
+            "ratio",
+            "history",
+            "cov",
+            "unharmonized",
+            "harmonized",
         ]
         return meta
 
     def _default_methods(self):
         methods, diagnostics = default_methods(
-            self.history, self.data, self.base_year,
+            self.history,
+            self.data,
+            self.base_year,
             method_choice=self.method_choice,
             ratio_method=self.ratio_method,
             offset_method=self.offset_method,
             luc_method=self.luc_method,
-            luc_cov_threshold=self.luc_cov_threshold
+            luc_cov_threshold=self.luc_cov_threshold,
         )
         return methods
 
@@ -138,25 +154,23 @@ def _harmonize(self, method, idx, check_len):
         offsets = self.offsets.loc[idx]
         ratios = self.ratios.loc[idx]
         # get delta
-        delta = hist if method == 'budget' else ratios if 'ratio' in method else offsets
+        delta = hist if method == "budget" else ratios if "ratio" in method else offsets
 
         # checks
-        assert(not model.isnull().values.any())
-        assert(not hist.isnull().values.any())
-        assert(not delta.isnull().values.any())
+        assert not model.isnull().values.any()
+        assert not hist.isnull().values.any()
+        assert not delta.isnull().values.any()
         if check_len:
-            assert((len(model) < len(self.data)) & (len(hist) < len(self.history)))
+            assert (len(model) < len(self.data)) & (len(hist) < len(self.history))
 
         # harmonize
         model = Harmonizer._methods[method](model, delta, harmonize_year=self.base_year)
 
         y = str(self.base_year)
         if model.isnull().values.any():
-            msg = '{} method produced NaNs: {}, {}'
+            msg = "{} method produced NaNs: {}, {}"
             where = model.isnull().any(axis=1)
-            raise ValueError(msg.format(method,
-                                        model.loc[where, y],
-                                        delta.loc[where]))
+            raise ValueError(msg.format(method, model.loc[where, y], delta.loc[where]))
 
         # construct the full df of history and future
         return model
@@ -172,24 +186,24 @@ def methods(self, overrides=None):
             oidx = overrides.index
 
             # remove duplicate values
-            dup = oidx.duplicated(keep='last')
+            dup = oidx.duplicated(keep="last")
             if dup.any():
-                msg = 'Removing duplicated override entries found: {}\n'
+                msg = "Removing duplicated override entries found: {}\n"
                 _warn(msg.format(overrides.loc[dup]))
                 overrides = overrides.loc[~dup]
 
             # get subset of overrides which are in model
             outidx = oidx.difference(midx)
             if outidx.size > 0:
-                msg = 'Removing override methods not in processed model output:\n{}'
+                msg = "Removing override methods not in processed model output:\n{}"
                 _warn(msg.format(overrides.loc[outidx]))
                 inidx = oidx.intersection(midx)
                 overrides = overrides.loc[inidx]
 
             # overwrite defaults with overrides
             final_methods = overrides.combine_first(methods).to_frame()
-            final_methods['default'] = methods
-            final_methods['override'] = overrides
+            final_methods["default"] = methods
+            final_methods["override"] = overrides
             methods = final_methods
 
         return methods
@@ -204,25 +218,25 @@ def harmonize(self, overrides=None):
         # save for future inspection
         self.methods_used = methods
         if isinstance(methods, pd.DataFrame):
-            methods = methods['method']  # drop default and override info
-        if (methods == 'unicorn').any():
+            methods = methods["method"]  # drop default and override info
+        if (methods == "unicorn").any():
             msg = """Values found where model has positive and negative values
             and is zero in base year. Unsure how to proceed:\n{}\n{}"""
-            cols = ['history', 'unharmonized']
-            df1 = self.metadata().loc[methods == 'unicorn', cols]
-            df2 = self.data.loc[methods == 'unicorn']
+            cols = ["history", "unharmonized"]
+            df1 = self.metadata().loc[methods == "unicorn", cols]
+            df2 = self.data.loc[methods == "unicorn"]
             raise ValueError(msg.format(df1.reset_index(), df2.reset_index()))
 
         dfs = []
         y = str(self.base_year)
         for method in methods.unique():
-            _log('Harmonizing with {}'.format(method))
+            _log("Harmonizing with {}".format(method))
             # get subset indicies
             idx = methods[methods == method].index
             check_len = len(methods.unique()) > 1
             # harmonize
             df = self._harmonize(method, idx, check_len)
-            if method not in ['model_zero', 'hist_zero']:
+            if method not in ["model_zero", "hist_zero"]:
                 close = (df[y] - self.history.loc[df.index, y]).abs() < 1e-5
                 if not close.all():
                     report = df[~close][y].reset_index()
@@ -256,8 +270,8 @@ def _downselect_scen(self, scenario):
 
     def _downselect_var(self):
         # separate data
-        select = '|'.join([self.prefix, self.suffix])
-        _log('Downselecting {} variables'.format(select))
+        select = "|".join([self.prefix, self.suffix])
+        _log("Downselecting {} variables".format(select))
 
         hasprefix = lambda df: df.Variable.str.startswith(self.prefix)
         hassuffix = lambda df: df.Variable.str.endswith(self.suffix)
@@ -268,12 +282,12 @@ def _downselect_var(self):
         self.overrides = subset(self.overrides)
 
         if len(self.model) == 0:
-            msg = 'No Variables found for harmonization. Searched for {}.'
+            msg = "No Variables found for harmonization. Searched for {}."
             raise ValueError(msg.format(select))
-        assert(len(self.hist) > 0)
+        assert len(self.hist) > 0
 
     def _to_std(self):
-        _log('Translating to standard format')
+        _log("Translating to standard format")
         xlator = utils.FormatTranslator()
 
         self.model = (
@@ -288,28 +302,31 @@ def _to_std(self):
             .sort_index()
         )
         # override with special cases if more are found in history
-        self.hist = self.hist[~self.hist.index.duplicated(keep='last')]
+        self.hist = self.hist[~self.hist.index.duplicated(keep="last")]
 
         # hackery required because unit needed for df_idx
         if self.overrides.empty:
             self.overrides = None
         else:
-            self.overrides['Unit'] = 'kt'
+            self.overrides["Unit"] = "kt"
             self.overrides = (
                 xlator.to_std(df=self.overrides.copy(), set_metadata=False)
                 .set_index(utils.df_idx)
                 .sort_index()
             )
             self.overrides.columns = self.overrides.columns.str.lower()
-            self.overrides = self.overrides['method']
+            self.overrides = self.overrides["method"]
 
     def _agg_hist(self):
         # aggregate and clean hist
-        _log('Aggregating historical values to native regions')
+        _log("Aggregating historical values to native regions")
         # must set verify to false for now because some isos aren't included!
         self.hist = utils.agg_regions(
-            self.hist, verify=False, mapping=self.regions,
-            rfrom='ISO Code', rto='Native Region Code'
+            self.hist,
+            verify=False,
+            mapping=self.regions,
+            rfrom="ISO Code",
+            rto="Native Region Code",
         )
 
     def _fill_model_trajectories(self):
@@ -317,7 +334,7 @@ def _fill_model_trajectories(self):
         idx = self.hist.index
         notin = ~idx.isin(self.model.index)
         if notin.any():
-            msg = 'Not all of self.history is covered by self.model: \n{}'
+            msg = "Not all of self.history is covered by self.model: \n{}"
             _df = self.hist.loc[notin].reset_index()[utils.df_idx]
             _warn(msg.format(_df.head()))
             zeros = pd.DataFrame(0, index=idx, columns=self.model.columns)
@@ -337,8 +354,7 @@ def results(self):
 
 
 class HarmonizationDriver(object):
-    """A helper class to harmonize all scenarios for a model.
-    """
+    """A helper class to harmonize all scenarios for a model."""
 
     def __init__(self, rc, hist, model, overrides, regions):
         """Parameters
@@ -353,69 +369,70 @@ def __init__(self, rc, hist, model, overrides, regions):
         regions : pd.DataFrame
             regional aggregation mapping (ISO -> model regions)
         """
-        self.prefix = rc['prefix']
-        self.suffix = rc['suffix']
-        self.config = rc['config']
-        self.add_5regions = rc['add_5regions']
-        self.exog_files = rc['exogenous'] if 'exogenous' in rc else []
+        self.prefix = rc["prefix"]
+        self.suffix = rc["suffix"]
+        self.config = rc["config"]
+        self.add_5regions = rc["add_5regions"]
+        self.exog_files = rc["exogenous"] if "exogenous" in rc else []
         self.model = model
         self.hist = hist
         self.overrides = overrides
 
         self.regions = regions
-        if not self.regions['ISO Code'].isin(['World']).any():
+        if not self.regions["ISO Code"].isin(["World"]).any():
             glb = {
-                'ISO Code': 'World',
-                'Country': 'World',
-                'Native Region Code': 'World',
+                "ISO Code": "World",
+                "Country": "World",
+                "Native Region Code": "World",
             }
-            _log('Manually adding global regional definition: {}'.format(glb))
+            _log("Manually adding global regional definition: {}".format(glb))
             self.regions = self.regions.append(glb, ignore_index=True)
 
         model_names = self.model.Model.unique()
         if len(model_names) > 1:
-            raise ValueError('Can not have more than one model to harmonize')
+            raise ValueError("Can not have more than one model to harmonize")
         self.model_name = model_names[0]
-        self._xlator = utils.FormatTranslator(prefix=self.prefix,
-                                              suffix=self.suffix)
+        self._xlator = utils.FormatTranslator(prefix=self.prefix, suffix=self.suffix)
         self._model_dfs = []
         self._metadata_dfs = []
         self._diagnostic_dfs = []
         self.exogenous_trajectories = self._exogenous_trajectories()
 
         # TODO better type checking?
-        self.config['harmonize_year'] = str(self.config['harmonize_year'])
-        y = self.config['harmonize_year']
+        self.config["harmonize_year"] = str(self.config["harmonize_year"])
+        y = self.config["harmonize_year"]
         if y not in model.columns:
-            msg = 'Base year {} not found in model data. Existing columns are {}.'
+            msg = "Base year {} not found in model data. Existing columns are {}."
             raise ValueError(msg.format(y, model.columns))
         if y not in hist.columns:
-            msg = 'Base year {} not found in hist data. Existing columns are {}.'
+            msg = "Base year {} not found in hist data. Existing columns are {}."
             raise ValueError(msg.format(y, hist.columns))
 
     def _exogenous_trajectories(self):
         # add exogenous variables
         dfs = []
         for fname in self.exog_files:
-            exog = pd_read(fname, sheet_name='data')
+            exog = pd_read(fname, sheet_name="data")
             exog.columns = [str(x) for x in exog.columns]
-            exog['Model'] = self.model_name
+            exog["Model"] = self.model_name
             dfs.append(exog)
         if len(dfs) == 0:  # add empty df if none were provided
             dfs.append(pd.DataFrame(columns=self.model.columns))
         return pd.concat(dfs)
 
     def _postprocess_trajectories(self, scenario):
-        _log('Translating to IAMC template')
+        _log("Translating to IAMC template")
         # update variable name
         self._model = self._model.reset_index()
         self._model.sector = self._model.sector.str.replace(
-            self.suffix, self.config['replace_suffix'])
+            self.suffix, self.config["replace_suffix"]
+        )
         self._model = self._model.set_index(utils.df_idx)
         # from native to iamc format
         self._model = (
-            self._xlator.to_template(self._model, model=self.model_name,
-                                     scenario=scenario)
+            self._xlator.to_template(
+                self._model, model=self.model_name, scenario=scenario
+            )
             .sort_index()
             .reset_index()
         )
@@ -423,7 +440,7 @@ def _postprocess_trajectories(self, scenario):
         # add exogenous trajectories
         exog = self.exogenous_trajectories.copy()
         if not exog.empty:
-            exog['Scenario'] = scenario
+            exog["Scenario"] = scenario
         cols = [c for c in self._model.columns if c in exog.columns]
         exog = exog[cols]
         self._model = pd.concat([self._model, exog])
@@ -446,22 +463,31 @@ def harmonize(self, scenario, diagnostic_config=None):
         self._regions = self.regions.copy()
 
         # preprocess
-        pp = _TrajectoryPreprocessor(self._hist, self._model, self._overrides,
-                                     self._regions, self.prefix, self.suffix)
+        pp = _TrajectoryPreprocessor(
+            self._hist,
+            self._model,
+            self._overrides,
+            self._regions,
+            self.prefix,
+            self.suffix,
+        )
         # TODO, preprocess in init, just process here
-        self._hist, self._model, self._overrides = pp.process(
-            scenario).results()
+        self._hist, self._model, self._overrides = pp.process(scenario).results()
 
         unharmonized = self._model.copy()
 
         # flag if this run will be with only global trajectories. if so, then
         # only global totals are harmonized, rest is skipped.
-        global_harmonization_only = self.config['global_harmonization_only']
+        global_harmonization_only = self.config["global_harmonization_only"]
 
         # global only gases
         self._glb_model, self._glb_meta = _harmonize_global_total(
-            self.config, self.prefix, self.suffix,
-            self._hist, self._model.copy(), self._overrides,
+            self.config,
+            self.prefix,
+            self.suffix,
+            self._hist,
+            self._model.copy(),
+            self._overrides,
             default_global_gases=not global_harmonization_only,
         )
 
@@ -471,9 +497,15 @@ def harmonize(self, scenario, diagnostic_config=None):
         else:
             # regional gases
             self._model, self._meta = _harmonize_regions(
-                self.config, self.prefix, self.suffix, self._regions,
-                self._hist, self._model.copy(), self._overrides,
-                self.config['harmonize_year'], self.add_5regions
+                self.config,
+                self.prefix,
+                self.suffix,
+                self._regions,
+                self._hist,
+                self._model.copy(),
+                self._overrides,
+                self.config["harmonize_year"],
+                self.add_5regions,
             )
 
             # combine special case results with harmonized results
@@ -483,13 +515,14 @@ def harmonize(self, scenario, diagnostic_config=None):
 
         # perform any automated diagnostics/analysis
         self._diag = diagnostics(
-            unharmonized, self._model, self._meta, config=diagnostic_config)
+            unharmonized, self._model, self._meta, config=diagnostic_config
+        )
 
         # collect metadata
         self._meta = self._meta.reset_index()
-        self._meta['model'] = self.model_name
-        self._meta['scenario'] = scenario
-        self._meta = self._meta.set_index(['model', 'scenario'])
+        self._meta["model"] = self.model_name
+        self._meta["scenario"] = scenario
+        self._meta = self._meta.set_index(["model", "scenario"])
         self._postprocess_trajectories(scenario)
 
         # store results
@@ -499,7 +532,7 @@ def harmonize(self, scenario, diagnostic_config=None):
 
     def scenarios(self):
         """Return all known scenarios"""
-        return self.model['Scenario'].unique()
+        return self.model["Scenario"].unique()
 
     def harmonized_results(self):
         """Return 3-tuple of (pd.DataFrame of harmonized trajectories,
@@ -522,18 +555,19 @@ def _get_global_overrides(overrides, gases, sector):
     return o if not o.empty else None
 
 
-def _harmonize_global_total(config, prefix, suffix, hist, model, overrides,
-                            default_global_gases=True):
-    all_gases = list(model.index.get_level_values('gas').unique())
+def _harmonize_global_total(
+    config, prefix, suffix, hist, model, overrides, default_global_gases=True
+):
+    all_gases = list(model.index.get_level_values("gas").unique())
     gases = utils.harmonize_total_gases if default_global_gases else all_gases
-    sector = '|'.join([prefix, suffix])
+    sector = "|".join([prefix, suffix])
     idx = isin(region="World", gas=gases, sector=sector)
     h = hist.loc[idx].copy()
 
     try:
         m = model.loc[idx].copy()
     except TypeError:
-        _warn('Non-history gases not found in model')
+        _warn("Non-history gases not found in model")
         return None, None
 
     if m.empty:
@@ -542,53 +576,53 @@ def _harmonize_global_total(config, prefix, suffix, hist, model, overrides,
     # match override methods with global gases, None if no match
     o = _get_global_overrides(overrides, gases, sector)
 
-    utils.check_null(m, 'model')
-    utils.check_null(h, 'hist', fail=True)
+    utils.check_null(m, "model")
+    utils.check_null(h, "hist", fail=True)
     harmonizer = Harmonizer(m, h, config=config)
-    _log('Harmonizing (with example methods):')
+    _log("Harmonizing (with example methods):")
     _log(harmonizer.methods(overrides=o).head())
     if o is not None:
-        _log('and override methods:')
+        _log("and override methods:")
         _log(o.head())
     m = harmonizer.harmonize(overrides=o)
-    utils.check_null(m, 'model')
+    utils.check_null(m, "model")
 
     metadata = harmonizer.metadata()
     return m, metadata
 
 
-def _harmonize_regions(config, prefix, suffix, regions, hist, model, overrides,
-                       base_year, add_5regions):
+def _harmonize_regions(
+    config, prefix, suffix, regions, hist, model, overrides, base_year, add_5regions
+):
 
     # clean model
-    model = utils.subtract_regions_from_world(model, 'model', base_year)
+    model = utils.subtract_regions_from_world(model, "model", base_year)
     model = utils.remove_recalculated_sectors(model, prefix, suffix)
     # remove rows with all 0s
     model = model[(model.T > 0).any()]
 
     # clean hist
-    hist = utils.subtract_regions_from_world(hist, 'hist', base_year)
+    hist = utils.subtract_regions_from_world(hist, "hist", base_year)
     hist = utils.remove_recalculated_sectors(hist, prefix, suffix)
 
     # remove rows with all 0s
     hist = hist[(hist.T > 0).any()]
 
     if model.empty:
-        raise RuntimeError(
-            'Model is empty after downselecting regional values')
+        raise RuntimeError("Model is empty after downselecting regional values")
 
     # harmonize
-    utils.check_null(model, 'model')
-    utils.check_null(hist, 'hist', fail=True)
+    utils.check_null(model, "model")
+    utils.check_null(hist, "hist", fail=True)
     harmonizer = Harmonizer(model, hist, config=config)
-    _log('Harmonizing (with example methods):')
+    _log("Harmonizing (with example methods):")
     _log(harmonizer.methods(overrides=overrides).head())
 
     if overrides is not None:
-        _log('and override methods:')
+        _log("and override methods:")
         _log(overrides.head())
     model = harmonizer.harmonize(overrides=overrides)
-    utils.check_null(model, 'model')
+    utils.check_null(model, "model")
     metadata = harmonizer.metadata()
 
     # add aggregate variables. this works in three steps:
@@ -596,46 +630,45 @@ def _harmonize_regions(config, prefix, suffix, regions, hist, model, overrides,
     # be recalculated
     idx = utils.recalculated_row_idx(model, prefix, suffix)
     if idx.any():
-        msg = 'Removing sector aggregates. Recalculating with harmonized totals.'
+        msg = "Removing sector aggregates. Recalculating with harmonized totals."
         _warn(msg)
         model = model[~idx]
-    totals = '|'.join([prefix, suffix])
+    totals = "|".join([prefix, suffix])
     sector_total_idx = isin(model, sector=totals)
     subsector_idx = ~sector_total_idx
     # step 2: on the "clean" df, recalculate those totals
     subsectors_with_total_df = (
         utils.EmissionsAggregator(model[subsector_idx])
         .add_variables(totals=totals, aggregates=False)
-        .df
-        .set_index(utils.df_idx)
+        .df.set_index(utils.df_idx)
     )
     # step 3: recombine with model data that was sector total only
     sector_total_df = model[sector_total_idx]
     model = pd.concat([sector_total_df, subsectors_with_total_df])
-    utils.check_null(model, 'model')
+    utils.check_null(model, "model")
 
     # combine regional values to send back into template form
     model.reset_index(inplace=True)
     model = model.set_index(utils.df_idx).sort_index()
-    glb = utils.combine_rows(model, 'region', 'World',
-                             sumall=False, rowsonly=True)
+    glb = utils.combine_rows(model, "region", "World", sumall=False, rowsonly=True)
     model = glb.combine_first(model)
 
     # add 5regions
     if add_5regions:
-        _log('Adding 5region values')
+        _log("Adding 5region values")
         # explicitly don't add World, it already exists from aggregation
-        mapping = regions[regions['Native Region Code'] != 'World'].copy()
-        aggdf = utils.agg_regions(model, mapping=mapping,
-                                  rfrom='Native Region Code', rto='5_region')
+        mapping = regions[regions["Native Region Code"] != "World"].copy()
+        aggdf = utils.agg_regions(
+            model, mapping=mapping, rfrom="Native Region Code", rto="5_region"
+        )
         model = pd.concat([model, aggdf])
-        assert(not model.isnull().values.any())
+        assert not model.isnull().values.any()
 
     # duplicates come in from World and World being translated
-    duplicates = model.index.duplicated(keep='first')
+    duplicates = model.index.duplicated(keep="first")
     if duplicates.any():
-        regions = model[duplicates].index.get_level_values('region').unique()
-        msg = 'Dropping duplicate rows found for regions: {}'.format(regions)
+        regions = model[duplicates].index.get_level_values("region").unique()
+        msg = "Dropping duplicate rows found for regions: {}".format(regions)
         _warn(msg)
         model = model[~duplicates]
 
@@ -664,30 +697,28 @@ def diagnostics(unharmonized, model, metadata, config=None):
     config : dictionary, optional
         ratio values to use in diagnostics, key options include 'mid' and 'end'.
     """
-    config = config or {'mid': 4.0, 'end': 2.0}
+    config = config or {"mid": 4.0, "end": 2.0}
 
     #
     # Detect Large Missing Values
     #
-    num = metadata['history']
-    denom = metadata['history'].groupby(level=['region', 'gas']).sum()
+    num = metadata["history"]
+    denom = metadata["history"].groupby(level=["region", "gas"]).sum()
 
     # special merge because you can't do operations on multiindex
-    ratio = pd.merge(num.reset_index(),
-                     denom.reset_index(),
-                     on=['region', 'gas'])
-    ratio = ratio['history_x'] / ratio['history_y']
+    ratio = pd.merge(num.reset_index(), denom.reset_index(), on=["region", "gas"])
+    ratio = ratio["history_x"] / ratio["history_y"]
     ratio.index = num.index
-    ratio.name = 'fraction'
+    ratio.name = "fraction"
 
     # downselect
     big = ratio[ratio > 0.2]
-    bigmethods = metadata.loc[big.index, 'method']
-    bad = bigmethods[bigmethods == 'model_zero']
+    bigmethods = metadata.loc[big.index, "method"]
+    bad = bigmethods[bigmethods == "model_zero"]
     report = big.loc[bad.index].reset_index()
 
     if not report.empty:
-        _warn('LARGE MISSING Values Found!!:\n {}'.format(report))
+        _warn("LARGE MISSING Values Found!!:\n {}".format(report))
 
     #
     # report on large medium an dlong-term differences
@@ -696,31 +727,30 @@ def diagnostics(unharmonized, model, metadata, config=None):
     report = model.copy()
     mid, end = cols[len(cols) // 2 - 1], cols[-1]
 
-    if 'mid' in config:
+    if "mid" in config:
         bigmid = np.abs(model[mid] - unharmonized[mid]) / unharmonized[mid]
-        bigmid = bigmid[bigmid > config['mid']]
-        report['{}_diff'.format(mid)] = bigmid
+        bigmid = bigmid[bigmid > config["mid"]]
+        report["{}_diff".format(mid)] = bigmid
 
-    if 'end' in config:
+    if "end" in config:
         bigend = np.abs(model[end] - unharmonized[end]) / unharmonized[end]
-        bigend = bigend[bigend > config['end']]
-        report['{}_diff'.format(end)] = bigend
+        bigend = bigend[bigend > config["end"]]
+        report["{}_diff".format(end)] = bigend
 
-    report = report.drop(cols, axis=1).dropna(how='all')
+    report = report.drop(cols, axis=1).dropna(how="all")
     idx = metadata.index.intersection(report.index)
-    report['method'] = metadata.loc[idx, 'method']
-    report = report[~report['method'].isin(['model_zero', np.nan])]
+    report["method"] = metadata.loc[idx, "method"]
+    report = report[~report["method"].isin(["model_zero", np.nan])]
 
     #
     # Detect non-negative CO2 emissions
     #
     m = model.reset_index()
-    m = m[m.gas != 'CO2']
+    m = m[m.gas != "CO2"]
     neg = m[(m[utils.numcols(m)].T < 0).any()]
 
     if not neg.empty:
-        _warn(
-            'Negative Emissions found for non-CO2 gases:\n {}'.format(neg))
-        raise ValueError('Harmonization failed due to negative non-CO2 gases')
+        _warn("Negative Emissions found for non-CO2 gases:\n {}".format(neg))
+        raise ValueError("Harmonization failed due to negative non-CO2 gases")
 
     return report
diff --git a/aneris/methods.py b/aneris/methods.py
index 8d9243b..7f88b92 100644
--- a/aneris/methods.py
+++ b/aneris/methods.py
@@ -11,7 +11,7 @@
 from aneris import utils
 
 
-def harmonize_factors(df, hist, harmonize_year='2015'):
+def harmonize_factors(df, hist, harmonize_year="2015"):
     """Calculate offset and ratio values between data and history
 
     Parameters
@@ -32,13 +32,13 @@ def harmonize_factors(df, hist, harmonize_year='2015'):
     """
     c, m = hist[harmonize_year], df[harmonize_year]
     offset = (c - m).fillna(0)
-    offset.name = 'offset'
+    offset.name = "offset"
     ratios = (c / m).replace(np.inf, np.nan).fillna(0)
-    ratios.name = 'ratio'
+    ratios.name = "ratio"
     return offset, ratios
 
 
-def constant_offset(df, offset, harmonize_year='2015'):
+def constant_offset(df, offset, harmonize_year="2015"):
     """Calculate constant offset harmonized trajectory
 
     Parameters
@@ -62,7 +62,7 @@ def constant_offset(df, offset, harmonize_year='2015'):
     return df
 
 
-def constant_ratio(df, ratios, harmonize_year='2015'):
+def constant_ratio(df, ratios, harmonize_year="2015"):
     """Calculate constant ratio harmonized trajectory
 
     Parameters
@@ -86,7 +86,7 @@ def constant_ratio(df, ratios, harmonize_year='2015'):
     return df
 
 
-def linear_interpolate(df, offset, final_year='2050', harmonize_year='2015'):
+def linear_interpolate(df, offset, final_year="2050", harmonize_year="2015"):
     """Calculate linearly interpolated convergence harmonized trajectory
 
     Parameters
@@ -117,7 +117,7 @@ def linear_interpolate(df, offset, final_year='2050', harmonize_year='2015'):
     return df
 
 
-def reduce_offset(df, offset, final_year='2050', harmonize_year='2015'):
+def reduce_offset(df, offset, final_year="2050", harmonize_year="2015"):
     """Calculate offset convergence harmonized trajectory
 
     Parameters
@@ -144,13 +144,14 @@ def reduce_offset(df, offset, final_year='2050', harmonize_year='2015'):
     f = lambda year: -(year - yi) / float(yf - yi) + 1
     factors = [f(year) if year <= yf else 0.0 for year in numcols_int]
     # add existing values to offset time series
-    offsets = pd.DataFrame(np.outer(offset, factors),
-                           columns=numcols, index=offset.index)
+    offsets = pd.DataFrame(
+        np.outer(offset, factors), columns=numcols, index=offset.index
+    )
     df[numcols] = df[numcols] + offsets
     return df
 
 
-def reduce_ratio(df, ratios, final_year='2050', harmonize_year='2015'):
+def reduce_ratio(df, ratios, final_year="2050", harmonize_year="2015"):
     """Calculate ratio convergence harmonized trajectory
 
     Parameters
@@ -176,20 +177,20 @@ def reduce_ratio(df, ratios, final_year='2050', harmonize_year='2015'):
     # get factors that reduce from 1 to 0, but replace with 1s in years prior
     # to harmonization
     f = lambda year: -(year - yi) / float(yf - yi) + 1
-    prefactors = [f(yi)
-                  for year in numcols_int if year < yi]
-    postfactors = [f(year) if year <= yf else 0.0
-                   for year in numcols_int if year >= yi]
+    prefactors = [f(yi) for year in numcols_int if year < yi]
+    postfactors = [f(year) if year <= yf else 0.0 for year in numcols_int if year >= yi]
     factors = prefactors + postfactors
     # multiply existing values by ratio time series
-    ratios = pd.DataFrame(np.outer(ratios - 1, factors),
-                          columns=numcols, index=ratios.index) + 1
+    ratios = (
+        pd.DataFrame(np.outer(ratios - 1, factors), columns=numcols, index=ratios.index)
+        + 1
+    )
 
     df[numcols] = df[numcols] * ratios
     return df
 
 
-def budget(df, df_hist, harmonize_year='2015'):
+def budget(df, df_hist, harmonize_year="2015"):
     r"""Calculate budget harmonized trajectory
 
     Parameters
@@ -244,8 +245,8 @@ def budget(df, df_hist, harmonize_year='2015'):
 
     harmonize_year = int(harmonize_year)
 
-    df = df.set_axis(df.columns.astype(int), axis='columns')
-    df_hist = df_hist.set_axis(df_hist.columns.astype(int), axis='columns')
+    df = df.set_axis(df.columns.astype(int), axis="columns")
+    df_hist = df_hist.set_axis(df_hist.columns.astype(int), axis="columns")
 
     data_years = df.columns
     hist_years = df_hist.columns
@@ -254,10 +255,8 @@ def budget(df, df_hist, harmonize_year='2015'):
 
     if data_years[0] not in hist_years:
         hist_years = hist_years.insert(bisect(hist_years, data_years[0]), data_years[0])
-        df_hist = (
-            df_hist
-            .reindex(columns=hist_years)
-            .interpolate(method='slinear', axis=1)
+        df_hist = df_hist.reindex(columns=hist_years).interpolate(
+            method="slinear", axis=1
         )
 
     def carbon_budget(years, emissions):
@@ -343,7 +342,7 @@ def l2_norm():
     return df_harm
 
 
-def model_zero(df, offset, harmonize_year='2015'):
+def model_zero(df, offset, harmonize_year="2015"):
     """Returns result of aneris.methods.constant_offset()"""
     # current decision is to return a simple offset, this will be a straight
     # line for all time periods. previous behavior was to set df[numcols] = 0,
@@ -388,13 +387,13 @@ def default_method_choice(
     """
     # special cases
     if row.h == 0:
-        return 'hist_zero'
+        return "hist_zero"
     if row.zero_m:
-        return 'model_zero'
+        return "model_zero"
     if np.isinf(row.f) and row.neg_m and row.pos_m:
         # model == 0 in base year, and model goes negative
         # and positive
-        return 'unicorn'  # this shouldn't exist!
+        return "unicorn"  # this shouldn't exist!
 
     # model 0 in base year?
     if np.isclose(row.m, 0):
@@ -402,15 +401,15 @@ def default_method_choice(
         if row.neg_m:
             return offset_method
         else:
-            return 'constant_offset'
+            return "constant_offset"
     else:
         # is this co2?
         # ZN: This gas dependence isn't documented in the default
         # decision tree
-        if hasattr(row, "gas") and row.gas == 'CO2':
+        if hasattr(row, "gas") and row.gas == "CO2":
             return ratio_method
         # is cov big?
-        if np.isfinite(row['cov']) and row['cov'] > luc_cov_threshold:
+        if np.isfinite(row["cov"]) and row["cov"] > luc_cov_threshold:
             return luc_method
         else:
             # dH small?
@@ -419,9 +418,9 @@ def default_method_choice(
             else:
                 # goes negative?
                 if row.neg_m:
-                    return 'reduce_ratio_2100'
+                    return "reduce_ratio_2100"
                 else:
-                    return 'constant_ratio'
+                    return "constant_ratio"
 
 
 def default_methods(hist, model, base_year, method_choice=None, **kwargs):
@@ -464,14 +463,14 @@ def default_methods(hist, model, base_year, method_choice=None, **kwargs):
     `default_method_choice`
     """
 
-    if kwargs.get('ratio_method') is None:
-        kwargs['ratio_method'] = 'reduce_ratio_2080'
-    if kwargs.get('offset_method') is None:
-        kwargs['offset_method'] = 'reduce_offset_2080'
-    if kwargs.get('luc_method') is None:
-        kwargs['luc_method'] = 'reduce_offset_2150_cov'
-    if kwargs.get('luc_cov_threshold') is None:
-        kwargs['luc_cov_threshold'] = 10
+    if kwargs.get("ratio_method") is None:
+        kwargs["ratio_method"] = "reduce_ratio_2080"
+    if kwargs.get("offset_method") is None:
+        kwargs["offset_method"] = "reduce_offset_2080"
+    if kwargs.get("luc_method") is None:
+        kwargs["luc_method"] = "reduce_offset_2150_cov"
+    if kwargs.get("luc_cov_threshold") is None:
+        kwargs["luc_cov_threshold"] = 10
 
     y = str(base_year)
     try:
@@ -489,17 +488,24 @@ def default_methods(hist, model, base_year, method_choice=None, **kwargs):
     go_neg = ((model.min(axis=1) - h) < 0).any()
     cov = hist.apply(coeff_of_var, axis=1)
 
-    df = pd.DataFrame({
-        'dH': dH, 'f': f, 'dM': dM,
-        'neg_m': neg_m, 'pos_m': pos_m,
-        'zero_m': zero_m, 'go_neg': go_neg,
-        'cov': cov,
-        'h': h, 'm': m,
-    }).join(model.index.to_frame())
+    df = pd.DataFrame(
+        {
+            "dH": dH,
+            "f": f,
+            "dM": dM,
+            "neg_m": neg_m,
+            "pos_m": pos_m,
+            "zero_m": zero_m,
+            "go_neg": go_neg,
+            "cov": cov,
+            "h": h,
+            "m": m,
+        }
+    ).join(model.index.to_frame())
 
     if method_choice is None:
         method_choice = default_method_choice
 
     ret = df.apply(method_choice, axis=1, **kwargs)
-    ret.name = 'method'
+    ret.name = "method"
     return ret, df
diff --git a/aneris/tutorial.py b/aneris/tutorial.py
index 6be2628..c24b2ab 100644
--- a/aneris/tutorial.py
+++ b/aneris/tutorial.py
@@ -7,12 +7,15 @@
 
 import aneris
 
-_default_cache_dir = os.path.join('~', '.aneris_tutorial_data')
+_default_cache_dir = os.path.join("~", ".aneris_tutorial_data")
 
 
 # idea borrowed from Seaborn
-def load_data(cache_dir=_default_cache_dir, cache=True,
-              github_url='https://github.com/iiasa/aneris'):
+def load_data(
+    cache_dir=_default_cache_dir,
+    cache=True,
+    github_url="https://github.com/iiasa/aneris",
+):
     """
     Load a dataset from the online repository (requires internet).
 
@@ -32,32 +35,31 @@ def load_data(cache_dir=_default_cache_dir, cache=True,
         os.mkdir(longdir)
 
     files = {
-        'rc': 'aneris_regions_sectors.yaml',
-        'hist': 'history_regions_sectors.xls',
-        'model': 'model_regions_sectors.xls',
-        'regions': 'regions_regions_sectors.csv',
+        "rc": "aneris_regions_sectors.yaml",
+        "hist": "history_regions_sectors.xls",
+        "model": "model_regions_sectors.xls",
+        "regions": "regions_regions_sectors.csv",
     }
     files = {k: os.path.join(longdir, f) for k, f in files.items()}
 
     for localfile in files.values():
         if not os.path.exists(localfile):
             fname = os.path.basename(localfile)
-            url = '/'.join((github_url, 'raw', 'master',
-                            'tests', 'test_data', fname))
+            url = "/".join((github_url, "raw", "master", "tests", "test_data", fname))
             urlretrieve(url, localfile)
 
     # read input
-    hist = aneris.pd_read(files['hist'])
+    hist = aneris.pd_read(files["hist"])
     if hist.empty:
-        raise ValueError('History file is empty')
+        raise ValueError("History file is empty")
     hist.columns = hist.columns.astype(str)  # make sure they're all strings
-    regions = aneris.pd_read(files['regions'])
+    regions = aneris.pd_read(files["regions"])
     if regions.empty:
-        raise ValueError('Region definition is empty')
-    model, overrides, config = aneris.read_excel(files['model'])
+        raise ValueError("Region definition is empty")
+    model, overrides, config = aneris.read_excel(files["model"])
     model.columns = model.columns.astype(str)  # make sure they're all strings
-    rc = aneris.RunControl(rc=files['rc'])
-    rc.recursive_update('config', config)
+    rc = aneris.RunControl(rc=files["rc"])
+    rc.recursive_update("config", config)
 
     # get driver
     driver = aneris.HarmonizationDriver(rc, hist, model, overrides, regions)
@@ -69,7 +71,7 @@ def load_data(cache_dir=_default_cache_dir, cache=True,
     return model, hist, driver
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     model, hist, driver = load_data(cache=False)
     for scenario in driver.scenarios():
         driver.harmonize(scenario)
diff --git a/aneris/utils.py b/aneris/utils.py
index 6bcd3c9..7150b39 100644
--- a/aneris/utils.py
+++ b/aneris/utils.py
@@ -7,66 +7,77 @@
 import numpy as np
 import pandas as pd
 
+
 # Index for iamc
-iamc_idx = ['Model', 'Scenario', 'Region', 'Variable']
+iamc_idx = ["Model", "Scenario", "Region", "Variable"]
 
 # default dataframe index
-df_idx = ['region', 'gas', 'sector', 'units']
+df_idx = ["region", "gas", "sector", "units"]
 
 # paths to data dependencies
 here = os.path.join(os.path.dirname(os.path.realpath(__file__)))
-hist_path = lambda f: os.path.join(here, 'historical', f)
-iamc_path = lambda f: os.path.join(here, 'iamc_template', f)
-region_path = lambda f: os.path.join(here, 'regional_definitions', f)
+hist_path = lambda f: os.path.join(here, "historical", f)
+iamc_path = lambda f: os.path.join(here, "iamc_template", f)
+region_path = lambda f: os.path.join(here, "regional_definitions", f)
 
 # gases reported in kt of species
 kt_gases = [
-    'N2O',
-    'SF6',
-    'CF4',  # explicit species of PFC
-    'C2F6',  # explicit species of PFC
+    "N2O",
+    "SF6",
+    "CF4",  # explicit species of PFC
+    "C2F6",  # explicit species of PFC
     # individual f gases removed for now
     # # hfcs
     # 'HFC23', 'HFC32', 'HFC43-10', 'HFC125', 'HFC134a', 'HFC143a', 'HFC227ea', 'HFC245fa',
     # CFCs
-    'CFC-11',
-    'CFC-12',
-    'CFC-113',
-    'CFC-114',
-    'CFC-115',
-    'CH3CCl3',
-    'CCl4',
-    'HCFC-22',
-    'HCFC-141b',
-    'HCFC-142b',
-    'Halon1211',
-    'Halon1301',
-    'Halon2402',
-    'Halon1202',
-    'CH3Br',
-    'CH3Cl',
+    "CFC-11",
+    "CFC-12",
+    "CFC-113",
+    "CFC-114",
+    "CFC-115",
+    "CH3CCl3",
+    "CCl4",
+    "HCFC-22",
+    "HCFC-141b",
+    "HCFC-142b",
+    "Halon1211",
+    "Halon1301",
+    "Halon2402",
+    "Halon1202",
+    "CH3Br",
+    "CH3Cl",
 ]
 
 # gases reported in co2-equiv
 co2_eq_gases = [
-    'HFC',
+    "HFC",
 ]
 
 # gases reported in Mt of species
 mt_gases = [
     # IAMC names
-    'BC', 'CH4', 'CO2', 'CO', 'NOx', 'OC', 'Sulfur', 'NH3', 'VOC',
+    "BC",
+    "CH4",
+    "CO2",
+    "CO",
+    "NOx",
+    "OC",
+    "Sulfur",
+    "NH3",
+    "VOC",
     # non-IAMC names
-    'SO2', 'NOX', 'NMVOC',
+    "SO2",
+    "NOX",
+    "NMVOC",
 ]
 
 all_gases = sorted(kt_gases + co2_eq_gases + mt_gases)
 
 # gases for which only sectoral totals are reported
-total_gases = ['SF6', 'CF4', 'C2F6'] + co2_eq_gases
+total_gases = ["SF6", "CF4", "C2F6"] + co2_eq_gases
 
 # gases for which only sectoral totals are harmonized
-harmonize_total_gases = ['N2O'] + total_gases
+harmonize_total_gases = ["N2O"] + total_gases
 
 # gases for which full sectoral breakdown is reported
 sector_gases = sorted(set(all_gases) - set(total_gases))
@@ -75,19 +86,19 @@
 # TODO: can we remove this?
 # TODO: should probably be a dictionary..
 std_to_iamc_gases = [
-    ('SO2', 'Sulfur'),
-    ('NOX', 'NOx'),
-    ('NMVOC', 'VOC'),
+    ("SO2", "Sulfur"),
+    ("NOX", "NOx"),
+    ("NMVOC", "VOC"),
 ]
 
 # mapping from gas name to name to use in units
 unit_gas_names = {
-    'Sulfur': 'SO2',
-    'Kyoto Gases': 'CO2-equiv',
-    'F-Gases': 'CO2-equiv',
-    'HFC': 'CO2-equiv',
-    'PFC': 'CO2-equiv',
-    'CFC': 'CO2-equiv',
+    "Sulfur": "SO2",
+    "Kyoto Gases": "CO2-equiv",
+    "F-Gases": "CO2-equiv",
+    "HFC": "CO2-equiv",
+    "PFC": "CO2-equiv",
+    "CFC": "CO2-equiv",
 }
 
 _logger = None
@@ -99,7 +110,7 @@ def logger():
     if _logger is None:
         logging.basicConfig()
         _logger = logging.getLogger()
-        _logger.setLevel('INFO')
+        _logger.setLevel("INFO")
     return _logger
 
 
@@ -123,7 +134,7 @@ def isnum(s):
 def numcols(df):
     """Returns all columns in df that have data types of floats or ints"""
     dtypes = df.dtypes
-    return [i for i in dtypes.index if dtypes.loc[i].name.startswith(('float', 'int'))]
+    return [i for i in dtypes.index if dtypes.loc[i].name.startswith(("float", "int"))]
 
 
 def check_null(df, name=None, fail=False):
@@ -139,9 +150,9 @@ def check_null(df, name=None, fail=False):
     """
     anynull = df.isnull().values.any()
     if fail:
-        assert(not anynull)
+        assert not anynull
     if anynull:
-        msg = 'Null (missing) values found for {} indicies: \n{}'
+        msg = "Null (missing) values found for {} indicies: \n{}"
         _df = df[df.isnull().any(axis=1)].reset_index()[df_idx]
         logger().warning(msg.format(name, _df))
         df.dropna(inplace=True, axis=1)
@@ -149,8 +160,8 @@ def check_null(df, name=None, fail=False):
 
 def gases(var_col):
     """The gas associated with each variable"""
-    gasidx = lambda x: x.split('|').index('Emissions') + 1
-    return var_col.apply(lambda x: x.split('|')[gasidx(x)])
+    gasidx = lambda x: x.split("|").index("Emissions") + 1
+    return var_col.apply(lambda x: x.split("|")[gasidx(x)])
 
 
 def units(var_col):
@@ -163,44 +174,42 @@ def units(var_col):
     gas_col = gas_col.apply(replace)
 
     return gas_col.apply(
-        lambda gas: '{} {}/yr'.format('kt' if gas in kt_gases else 'Mt', gas))
+        lambda gas: "{} {}/yr".format("kt" if gas in kt_gases else "Mt", gas)
+    )
 
 
-def remove_emissions_prefix(x, gas='XXX'):
+def remove_emissions_prefix(x, gas="XXX"):
     """Return x with emissions prefix removed, e.g.,
     Emissions|XXX|foo|bar -> foo|bar
     """
-    return re.sub(r'^Emissions\|{}\|'.format(gas), '', x)
+    return re.sub(r"^Emissions\|{}\|".format(gas), "", x)
 
 
-def recalculated_row_idx(df, prefix='', suffix=''):
+def recalculated_row_idx(df, prefix="", suffix=""):
     """Return a boolean array with rows that need to be recalculated.
-       These are rows with total values for a gas species which is a sum of
-       subsectors.
-       During harmonization, subsector totals change, thus this summation must
-       be recalculated.
+    These are rows with total values for a gas species which is a sum of
+    subsectors.
+    During harmonization, subsector totals change, thus this summation must
+    be recalculated.
     """
     df = df.reset_index()
 
-    gas_sec_pairs = df[['gas', 'sector']].drop_duplicates()
-    total_sector = '|'.join([prefix, suffix])
+    gas_sec_pairs = df[["gas", "sector"]].drop_duplicates()
+    total_sector = "|".join([prefix, suffix])
     gases_with_subsectors = df.gas.isin(
-        gas_sec_pairs[gas_sec_pairs.sector != total_sector]
-        .gas
-        .unique()
+        gas_sec_pairs[gas_sec_pairs.sector != total_sector].gas.unique()
     )
     is_sector_total = df.sector == total_sector
     return np.array(gases_with_subsectors & is_sector_total)
 
 
-def remove_recalculated_sectors(df, prefix='', suffix=''):
-    """Return df with Total gas (sum of all sectors) removed
-    """
-    idx = recalculated_row_idx(df, prefix='', suffix='')
+def remove_recalculated_sectors(df, prefix="", suffix=""):
+    """Return df with Total gas (sum of all sectors) removed"""
+    idx = recalculated_row_idx(df, prefix="", suffix="")
     return df[~idx]
 
 
-def subtract_regions_from_world(df, name=None, base_year='2015', threshold=5e-2):
+def subtract_regions_from_world(df, name=None, base_year="2015", threshold=5e-2):
     """Subtract the sum of regional results in each variable from the World total.
     If the result is a World total below a threshold, set those values to 0.
 
@@ -216,31 +225,39 @@ def subtract_regions_from_world(df, name=None, base_year='2015', threshold=5e-2)
     """
     # make global only global (not global + sum of regions)
     check_null(df, name)
-    if (df.loc['World'][base_year] == 0).all():
+    if (df.loc["World"][base_year] == 0).all():
         # some models (gcam) are not reporting any values in World
         # without this, you get `0 - sum(other regions)`
-        logger().warning('Empty global region found in ' + name)
+        logger().warning("Empty global region found in " + name)
         return df
 
     # sum all rows where region == World
-    total = combine_rows(df, 'region', 'World', sumall=True,
-                         others=[], rowsonly=True)
+    total = combine_rows(df, "region", "World", sumall=True, others=[], rowsonly=True)
     # sum all rows where region != World
-    nonglb = combine_rows(df, 'region', 'World', sumall=False,
-                          others=None, rowsonly=True)
+    nonglb = combine_rows(
+        df, "region", "World", sumall=False, others=None, rowsonly=True
+    )
     glb = total.subtract(nonglb, fill_value=0)
     # pick up some precision issues
     # TODO: this precision is large because I have seen model results
     # be reported with this large of difference due to round off and values
     # approaching 0
-    glb[(glb / total).abs() < threshold] = 0.
+    glb[(glb / total).abs() < threshold] = 0.0
     df = glb.combine_first(df)
     check_null(df, name)
     return df
 
 
-def combine_rows(df, level, main, others=None, sumall=True, dropothers=True,
-                 rowsonly=False, newlabel=None):
+def combine_rows(
+    df,
+    level,
+    main,
+    others=None,
+    sumall=True,
+    dropothers=True,
+    rowsonly=False,
+    newlabel=None,
+):
     """Combine rows (add values) in a dataframe. Rows corresponding to the main and
     other values in a given level (or column) are added together and reattached
     taking the main value in the new column.
@@ -280,8 +297,7 @@ def combine_rows(df, level, main, others=None, sumall=True, dropothers=True,
     lvl_values = df[level].unique()
 
     # if others is none, then its everything other than the primary
-    others = others if others is not None else \
-        list(set(lvl_values) - set([main]))
+    others = others if others is not None else list(set(lvl_values) - set([main]))
 
     # set up df idx for operations
     grp_idx = [x for x in df_idx if x != level]
@@ -289,27 +305,14 @@ def combine_rows(df, level, main, others=None, sumall=True, dropothers=True,
 
     # generate new rows which are summation of subset of old rows
     sum_subset = [main] + others if sumall else others
-    rows = (
-        df.loc[sum_subset]
-        .groupby(level=grp_idx)
-        .sum()
-    )
+    rows = df.loc[sum_subset].groupby(level=grp_idx).sum()
     rows[level] = newlabel
-    rows = (
-        rows
-        .set_index(level, append=True)
-        .reorder_levels(df_idx)
-        .sort_index()
-    )
+    rows = rows.set_index(level, append=True).reorder_levels(df_idx).sort_index()
 
     # get rid of rows that aren't needed in final dataframe
     drop = [main] + others if dropothers else [main]
     drop = list(set(drop) & set(lvl_values))
-    df = (
-        df.drop(drop)
-        .reset_index()
-        .set_index(df_idx)
-    )
+    df = df.drop(drop).reset_index().set_index(df_idx)
 
     # construct final dataframe
     df = rows if rowsonly else pd.concat([df, rows]).sort_index()
@@ -320,8 +323,9 @@ def combine_rows(df, level, main, others=None, sumall=True, dropothers=True,
     return df
 
 
-def agg_regions(df, rfrom='ISO Code', rto='Native Region Code', mapping=None,
-                verify=True):
+def agg_regions(
+    df, rfrom="ISO Code", rto="Native Region Code", mapping=None, verify=True
+):
     """Aggregate values in a dataframe to a new regional composition
 
     Parameters
@@ -340,11 +344,11 @@ def agg_regions(df, rfrom='ISO Code', rto='Native Region Code', mapping=None,
     -------
     df : pd.DataFrame
     """
-    mapping = mapping if mapping is not None else \
-        pd.read_csv(region_path('message.csv'))
+    mapping = (
+        mapping if mapping is not None else pd.read_csv(region_path("message.csv"))
+    )
     mapping[rfrom] = mapping[rfrom].str.upper()
-    case_map = pd.Series(mapping[rto].unique(),
-                         index=mapping[rto].str.upper().unique())
+    case_map = pd.Series(mapping[rto].unique(), index=mapping[rto].str.upper().unique())
     mapping[rto] = mapping[rto].str.upper()
     mapping = mapping[[rfrom, rto]].drop_duplicates().dropna()
 
@@ -358,17 +362,17 @@ def agg_regions(df, rfrom='ISO Code', rto='Native Region Code', mapping=None,
     check = mapping[rfrom]
     notin = list(set(df.region) - set(check))
     if len(notin) > 0:
-        logger().warning(
-            'Removing regions without direct mapping: {}'.format(notin))
+        logger().warning("Removing regions without direct mapping: {}".format(notin))
         df = df[df.region.isin(check)]
 
     # map and sum
     dfto = (
-        df
-        .merge(mapping, left_on='region', right_on=rfrom, how='outer')
-        .drop([rfrom, 'region'], axis=1)
-        .rename(columns={rto: 'region'})
-        .groupby(df_idx).sum().reset_index()
+        df.merge(mapping, left_on="region", right_on=rfrom, how="outer")
+        .drop([rfrom, "region"], axis=1)
+        .rename(columns={rto: "region"})
+        .groupby(df_idx)
+        .sum()
+        .reset_index()
     )
     dfto.region = dfto.region.map(case_map)
     dfto = dfto.set_index(df_idx).sort_index()
@@ -379,8 +383,8 @@ def agg_regions(df, rfrom='ISO Code', rto='Native Region Code', mapping=None,
         end = dfto[numcols(dfto)].values.sum()
         diff = abs(start - end)
         if np.isnan(diff) or diff / start > 1e-6:
-            msg = 'Difference between before and after is large: {}'
-            raise(ValueError(msg.format(diff)))
+            msg = "Difference between before and after is large: {}"
+            raise (ValueError(msg.format(diff)))
 
     # revert form if needed
     if not multi_idx:
@@ -407,7 +411,7 @@ def __init__(self, df, model=None, scenario=None):
         self.df = df
         self.model = model
         self.scenario = scenario
-        assert((self.df.units == 'kt').all())
+        assert (self.df.units == "kt").all()
 
     def add_variables(self, totals=None, aggregates=True):
         """Add aggregates and variables with direct mappings.
@@ -433,26 +437,26 @@ def to_template(self, **kwargs):
         first_year: optional, the first year to report values for
         """
         self.df = FormatTranslator(self.df).to_template(
-            model=self.model, scenario=self.scenario, **kwargs)
+            model=self.model, scenario=self.scenario, **kwargs
+        )
         return self.df
 
     def _add_totals(self, totals):
-        assert(not (self.df.sector == totals).any())
-        grp_idx = [x for x in df_idx if x != 'sector']
+        assert not (self.df.sector == totals).any()
+        grp_idx = [x for x in df_idx if x != "sector"]
         rows = self.df.groupby(grp_idx).sum().reset_index()
-        rows['sector'] = totals
+        rows["sector"] = totals
         self.df = pd.concat([self.df, rows])
 
     def _add_aggregates(self):
-        mapping = pd_read(iamc_path('sector_mapping.xlsx'),
-                          sheet_name='Aggregates')
+        mapping = pd_read(iamc_path("sector_mapping.xlsx"), sheet_name="Aggregates")
         mapping = mapping.applymap(remove_emissions_prefix)
 
         rows = []
-        for sector in mapping['IAMC Parent'].unique():
+        for sector in mapping["IAMC Parent"].unique():
             # mapping for aggregate sector for all gases
-            _map = mapping[mapping['IAMC Parent'] == sector]
-            _map = _map.set_index('IAMC Child')['IAMC Parent']
+            _map = mapping[mapping["IAMC Parent"] == sector]
+            _map = _map.set_index("IAMC Child")["IAMC Parent"]
 
             # rename variable column for subset of rows
             subset = self.df[self.df.sector.isin(_map.index)].copy()
@@ -468,7 +472,7 @@ def _add_aggregates(self):
 class FormatTranslator(object):
     """Helper class to translate between IAMC and calcluation formats"""
 
-    def __init__(self, df=None, prefix='', suffix=''):
+    def __init__(self, df=None, prefix="", suffix=""):
         self.df = df if df is None else df.copy()
         self.model = None
         self.scenario = None
@@ -490,26 +494,25 @@ def to_std(self, df=None, set_metadata=True):
             df.reset_index(inplace=True)
 
         if len(set(iamc_idx) - set(df.columns)):
-            msg = 'Columns do not conform with IAMC index: {}'
+            msg = "Columns do not conform with IAMC index: {}"
             raise ValueError(msg.format(df.columns))
 
         # make sure we're working with good data
-        if len(df['Model'].unique()) > 1:
-            raise ValueError(
-                'Model not unique: {}'.format(df['Model'].unique()))
-        assert(len(df['Scenario'].unique()) <= 1)
-        assert(df['Variable'].apply(lambda x: 'Emissions' in x).all())
+        if len(df["Model"].unique()) > 1:
+            raise ValueError("Model not unique: {}".format(df["Model"].unique()))
+        assert len(df["Scenario"].unique()) <= 1
+        assert df["Variable"].apply(lambda x: "Emissions" in x).all()
 
         # save data
         if set_metadata:
-            self.model = df['Model'].iloc[0]
-            self.scenario = df['Scenario'].iloc[0]
+            self.model = df["Model"].iloc[0]
+            self.scenario = df["Scenario"].iloc[0]
 
         # add std columns needed for conversions
-        df['region'] = df['Region']
-        df['gas'] = gases(df['Variable'])
-        df['units'] = df['Unit'].apply(lambda x: x.split()[0])
-        df['sector'] = df['Variable']
+        df["region"] = df["Region"]
+        df["gas"] = gases(df["Variable"])
+        df["units"] = df["Unit"].apply(lambda x: x.split()[0])
+        df["sector"] = df["Variable"]
 
         # convert gas names
         self._convert_gases(df, tostd=True)
@@ -519,15 +522,16 @@ def to_std(self, df=None, set_metadata=True):
 
         # remove emissions prefix
         def update_sector(row):
-            sectors = row.sector.split('|')
-            idx = sectors.index('Emissions')
+            sectors = row.sector.split("|")
+            idx = sectors.index("Emissions")
             sectors.pop(idx)  # emissions
             sectors.pop(idx)  # gas
-            return '|'.join(sectors).strip('|')
+            return "|".join(sectors).strip("|")
+
         if not df.empty:
-            df['sector'] = df.apply(update_sector, axis=1)
+            df["sector"] = df.apply(update_sector, axis=1)
         # drop old columns
-        df.drop(iamc_idx + ['Unit'], axis=1, inplace=True)
+        df.drop(iamc_idx + ["Unit"], axis=1, inplace=True)
 
         # set up index and column order
         df.set_index(df_idx, inplace=True)
@@ -538,8 +542,7 @@ def update_sector(row):
 
         return df
 
-    def to_template(self, df=None, model=None, scenario=None,
-                    column_style=None):
+    def to_template(self, df=None, model=None, scenario=None, column_style=None):
         """Translate a dataframe from standard calculation format to IAMC
 
         Parameters
@@ -560,7 +563,7 @@ def to_template(self, df=None, model=None, scenario=None,
         scenario = scenario or self.scenario
 
         if set(df.columns) != set(df_idx + numcols(df)):
-            msg = 'Columns do not conform with standard index: {}'
+            msg = "Columns do not conform with standard index: {}"
             raise ValueError(msg.format(df.columns))
 
         # convert gas names
@@ -571,31 +574,32 @@ def to_template(self, df=None, model=None, scenario=None,
 
         # inject emissions prefix
         def update_sector(row):
-            sectors = row.sector.split('|')
-            idx = self.prefix.count('|') + 1
-            sectors.insert(idx, 'Emissions')
+            sectors = row.sector.split("|")
+            idx = self.prefix.count("|") + 1
+            sectors.insert(idx, "Emissions")
             sectors.insert(idx + 1, row.gas)
-            return '|'.join(sectors).strip('|')
-        df['sector'] = df.apply(update_sector, axis=1)
+            return "|".join(sectors).strip("|")
+
+        df["sector"] = df.apply(update_sector, axis=1)
         # write units correctly
-        df['units'] = units(df.sector)
+        df["units"] = units(df.sector)
 
         # add new columns, remove old
-        df['Model'] = model
-        df['Scenario'] = scenario
-        df['Variable'] = df.sector
-        df['Region'] = df.region
-        df['Unit'] = df.units
+        df["Model"] = model
+        df["Scenario"] = scenario
+        df["Variable"] = df.sector
+        df["Region"] = df.region
+        df["Unit"] = df.units
         df.drop(df_idx, axis=1, inplace=True)
 
         # unit magic to make it always first, would be easier if it was in idx.
-        hold = df['Unit']
-        df.drop('Unit', axis=1, inplace=True)
-        df.insert(0, 'Unit', hold)
+        hold = df["Unit"]
+        df.drop("Unit", axis=1, inplace=True)
+        df.insert(0, "Unit", hold)
 
         # set up index and column order
         idx = iamc_idx
-        if column_style == 'upper':
+        if column_style == "upper":
             df.columns = df.columns.str.upper()
             idx = [x.upper() for x in idx]
         df.set_index(idx, inplace=True)
@@ -614,19 +618,19 @@ def _convert_gases(self, df, tostd=True):
 
         # from, to
         for f, t in convert:
-            for col in ['gas', 'sector']:
+            for col in ["gas", "sector"]:
                 df[col] = df[col].replace(f, t)
 
     def _convert_units(self, df, tostd=True):
         where = ~df.gas.isin(kt_gases)
         if tostd:
             df.loc[where, numcols(df)] *= 1e3
-            df.loc[where, 'units'] = 'kt'
-            assert((df.units == 'kt').all())
+            df.loc[where, "units"] = "kt"
+            assert (df.units == "kt").all()
         else:
-            assert((df.units == 'kt').all())
+            assert (df.units == "kt").all()
             df.loc[where, numcols(df)] /= 1e3
-            df.loc[where, 'units'] = 'Mt'
+            df.loc[where, "units"] = "Mt"
 
 
 def isin(df=None, **filters):
@@ -638,8 +642,49 @@ def isin(df=None, **filters):
     or with explicit df to get boolean mask
     > isin(df, region="World", gas=["CO2", "N2O"])
     """
+
     def tester(df):
         tests = (df.index.isin(np.atleast_1d(v), level=k) for k, v in filters.items())
         return reduce(and_, tests, next(tests))
 
     return tester if df is None else tester(df)
+
+
+def pd_read(f, str_cols=False, *args, **kwargs):
+    """Try to read a file with pandas, supports CSV and XLSX
+
+    Parameters
+    ----------
+    f : string
+        the file to read in
+    str_cols : bool, optional
+        turn all columns into strings (numerical column names are sometimes
+        read in as numerical dtypes)
+    args, kwargs : sent directly to the Pandas read function
+
+    Returns
+    -------
+    df : pd.DataFrame
+    """
+    if f.endswith("csv"):
+        df = pd.read_csv(f, *args, **kwargs)
+    else:
+        df = pd.read_excel(f, *args, **kwargs)
+
+    if str_cols:
+        df.columns = [str(x) for x in df.columns]
+
+    return df
+
+
+def pd_write(df, f, *args, **kwargs):
+    """Try to write a file with pandas, supports CSV and XLSX"""
+    # guess whether to use index, unless we're told otherwise
+    index = kwargs.pop("index", isinstance(df.index, pd.MultiIndex))
+
+    if f.endswith("csv"):
+        df.to_csv(f, index=index, *args, **kwargs)
+    else:
+        writer = pd.ExcelWriter(f)
+        df.to_excel(writer, index=index, *args, **kwargs)
+        writer.save()
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..c59836a
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,10 @@
+[tool.black]
+line-length = 88
+target-version = ['py39']
+extend-exclude = '''
+(
+  _version.py |
+  versioneer.py |
+  ^/doc
+)
+'''
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index 145e53f..0f41d6d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -10,7 +10,7 @@ tag_prefix = v
 parentdir_prefix = aneris-
 
 [flake8]
-ignore = I002, F403, E402, E731, E203
+ignore = I002, F403, E402, E731, E203, W503
 max-line-length = 88
 exclude =
 	doc
diff --git a/setup.py b/setup.py
index 9abad7b..6967954 100755
--- a/setup.py
+++ b/setup.py
@@ -1,4 +1,3 @@
-
 #!/usr/bin/env python
 from __future__ import print_function
 
@@ -22,26 +21,27 @@
 """
 
 REQUIREMENTS = [
-    'numpy',
-    'pandas>=1.1',
-    'PyYAML',
-    'xlrd>=2.0',
-    'openpyxl',
-    'matplotlib',
-    'pyomo>=5'
+    "numpy",
+    "pandas>=1.1",
+    "PyYAML",
+    "xlrd>=2.0",
+    "openpyxl",
+    "matplotlib",
+    "pyomo>=5",
 ]
 
 EXTRA_REQUIREMENTS = {
-    'tests': ['pytest', 'coverage', 'coveralls', 'pytest', 'pytest-cov'],
-    'deploy': ['twine', 'setuptools', 'wheel'],
-    'units': ['openscm-units']
+    "tests": ["pytest", "coverage", "coveralls", "pytest", "pytest-cov", "black"],
+    "deploy": ["twine", "setuptools", "wheel"],
+    "units": ["openscm-units"],
 }
 
 
 # thank you https://stormpath.com/blog/building-simple-cli-interfaces-in-python
 class RunTests(Command):
     """Run all tests."""
-    description = 'run tests'
+
+    description = "run tests"
     user_options = []
 
     def initialize_options(self):
@@ -52,29 +52,29 @@ def finalize_options(self):
 
     def run(self):
         """Run all tests!"""
-        errno = call(['py.test', '--cov=skele', '--cov-report=term-missing'])
+        errno = call(["py.test", "--cov=skele", "--cov-report=term-missing"])
         raise SystemExit(errno)
 
 
 CMDCLASS = versioneer.get_cmdclass()
-CMDCLASS.update({'test': RunTests})
+CMDCLASS.update({"test": RunTests})
 
 
 def main():
     print(logo)
     classifiers = [
-        'License :: OSI Approved :: Apache Software License',
+        "License :: OSI Approved :: Apache Software License",
     ]
     packages = [
-        'aneris',
+        "aneris",
     ]
     pack_dir = {
-        'aneris': 'aneris',
+        "aneris": "aneris",
     }
     entry_points = {
-        'console_scripts': [
+        "console_scripts": [
             # list CLIs here
-            'aneris=aneris.cli:main',
+            "aneris=aneris.cli:main",
         ],
     }
     package_data = {
@@ -85,22 +85,22 @@ def main():
     extra_requirements = EXTRA_REQUIREMENTS
     setup_kwargs = {
         "name": "aneris-iamc",
-        'version': versioneer.get_version(),
-        "description": 'Harmonize Integrated Assessment Model Emissions '
-        'Trajectories',
-        "author": 'Matthew Gidden',
-        "author_email": 'matthew.gidden@gmail.com',
-        "url": 'http://github.com/iiasa/aneris',
-        'cmdclass': CMDCLASS,
-        'classifiers': classifiers,
-        'license': 'Apache License 2.0',
-        'packages': packages,
-        'package_dir': pack_dir,
-        'entry_points': entry_points,
-        'package_data': package_data,
-        'python_requires': '>=3.6',
-        'install_requires': install_requirements,
-        'extras_require': extra_requirements,
+        "version": versioneer.get_version(),
+        "description": "Harmonize Integrated Assessment Model Emissions "
+        "Trajectories",
+        "author": "Matthew Gidden",
+        "author_email": "matthew.gidden@gmail.com",
+        "url": "http://github.com/iiasa/aneris",
+        "cmdclass": CMDCLASS,
+        "classifiers": classifiers,
+        "license": "Apache License 2.0",
+        "packages": packages,
+        "package_dir": pack_dir,
+        "entry_points": entry_points,
+        "package_data": package_data,
+        "python_requires": ">=3.6",
+        "install_requires": install_requirements,
+        "extras_require": extra_requirements,
     }
     rtn = setup(**setup_kwargs)
 
diff --git a/tests/ci/download_data.py b/tests/ci/download_data.py
index 320b676..67ad0bc 100644
--- a/tests/ci/download_data.py
+++ b/tests/ci/download_data.py
@@ -2,30 +2,31 @@
 import requests
 import tarfile
 
-username = os.environ['ANERIS_CI_USER']
-password = os.environ['ANERIS_CI_PW']
+username = os.environ["ANERIS_CI_USER"]
+password = os.environ["ANERIS_CI_PW"]
 
-url = 'https://data.ene.iiasa.ac.at/continuous_integration/aneris/'
+url = "https://data.ene.iiasa.ac.at/continuous_integration/aneris/"
 
 
 def download(filename):
     r = requests.get(url + filename, auth=(username, password))
 
     if r.status_code == 200:
-        print('Downloading {} from {}'.format(filename, url))
-        with open(filename, 'wb') as out:
+        print("Downloading {} from {}".format(filename, url))
+        with open(filename, "wb") as out:
             for bits in r.iter_content():
                 out.write(bits)
         assert os.path.exists(filename)
-        print('Untarring {}'.format(filename))
+        print("Untarring {}".format(filename))
         tar = tarfile.open(filename, "r:gz")
         tar.extractall()
         tar.close()
         os.remove(filename)
     else:
         raise IOError(
-            'Failed download with user/pass: {}/{}'.format(username, password))
+            "Failed download with user/pass: {}/{}".format(username, password)
+        )
 
 
-download('data.tar.gz')
-download('output.tar.gz')
+download("data.tar.gz")
+download("output.tar.gz")
diff --git a/tests/test_default_decision_tree.py b/tests/test_default_decision_tree.py
index 796c7ed..fde47e1 100644
--- a/tests/test_default_decision_tree.py
+++ b/tests/test_default_decision_tree.py
@@ -6,10 +6,10 @@
 import pandas.testing as pdt
 
 
-def make_index(length, gas='CH4', sector='Energy'):
+def make_index(length, gas="CH4", sector="Energy"):
     return pd.MultiIndex.from_product(
         [["region_{i}" for i in range(length)], [gas], [sector]],
-        names=["region", "gas", "sector"]
+        names=["region", "gas", "sector"],
     )
 
 
@@ -20,153 +20,135 @@ def index1():
 
 @pytest.fixture
 def index1_co2():
-    return make_index(1, gas='CO2')
+    return make_index(1, gas="CO2")
 
 
 def test_hist_zero(index1):
-    hist = pd.DataFrame({'2015': [0]}, index1)
-    df = pd.DataFrame({'2015': [1.]}, index1)
+    hist = pd.DataFrame({"2015": [0]}, index1)
+    df = pd.DataFrame({"2015": [1.0]}, index1)
 
-    obs, diags = harmonize.default_methods(hist, df, '2015')
+    obs, diags = harmonize.default_methods(hist, df, "2015")
 
-    exp = pd.Series(['hist_zero'], index1, name='methods')
+    exp = pd.Series(["hist_zero"], index1, name="methods")
     pdt.assert_series_equal(exp, obs, check_names=False)
 
 
 def test_model_zero(index1):
-    hist = pd.DataFrame({'2015': [1.]}, index1)
-    df = pd.DataFrame({'2015': [0.]}, index1)
+    hist = pd.DataFrame({"2015": [1.0]}, index1)
+    df = pd.DataFrame({"2015": [0.0]}, index1)
 
-    obs, diags = harmonize.default_methods(hist, df, '2015')
+    obs, diags = harmonize.default_methods(hist, df, "2015")
 
-    exp = pd.Series(['model_zero'], index1, name='methods')
+    exp = pd.Series(["model_zero"], index1, name="methods")
     pdt.assert_series_equal(exp, obs, check_names=False)
 
 
 def test_branch1(index1):
-    hist = pd.DataFrame({'2015': [1.]}, index1)
-    df = pd.DataFrame(
-        {'2015': [0.], '2020': [-1.]},
-        index1
-    )
+    hist = pd.DataFrame({"2015": [1.0]}, index1)
+    df = pd.DataFrame({"2015": [0.0], "2020": [-1.0]}, index1)
 
-    obs, diags = harmonize.default_methods(hist, df, '2015')
-    exp = pd.Series(['reduce_offset_2080'], index1, name='methods')
+    obs, diags = harmonize.default_methods(hist, df, "2015")
+    exp = pd.Series(["reduce_offset_2080"], index1, name="methods")
     pdt.assert_series_equal(exp, obs, check_names=False)
 
-    obs, diags = harmonize.default_methods(hist, df, '2015',
-                                           offset_method='reduce_offset_2050')
-    exp = pd.Series(['reduce_offset_2050'], index1, name='methods')
+    obs, diags = harmonize.default_methods(
+        hist, df, "2015", offset_method="reduce_offset_2050"
+    )
+    exp = pd.Series(["reduce_offset_2050"], index1, name="methods")
     pdt.assert_series_equal(exp, obs, check_names=False)
 
 
 def test_branch2(index1):
-    hist = pd.DataFrame({'2015': [1.]}, index1)
-    df = pd.DataFrame(
-        {'2015': [0.], '2020': [1.]},
-        index1
-    )
+    hist = pd.DataFrame({"2015": [1.0]}, index1)
+    df = pd.DataFrame({"2015": [0.0], "2020": [1.0]}, index1)
 
-    obs, diags = harmonize.default_methods(hist, df, '2015')
-    exp = pd.Series(['constant_offset'], index1, name='methods')
+    obs, diags = harmonize.default_methods(hist, df, "2015")
+    exp = pd.Series(["constant_offset"], index1, name="methods")
     pdt.assert_series_equal(exp, obs, check_names=False)
 
 
 def test_branch3(index1):
-    hist = pd.DataFrame(
-        {'2015': [1.]},
-        index1
-    )
-    df = pd.DataFrame(
-        {'2015': [1.001], '2020': [-1.001]},
-        index1
-    )
+    hist = pd.DataFrame({"2015": [1.0]}, index1)
+    df = pd.DataFrame({"2015": [1.001], "2020": [-1.001]}, index1)
 
-    obs, diags = harmonize.default_methods(hist, df, '2015')
-    exp = pd.Series(['reduce_ratio_2080'], index1, name='methods')
+    obs, diags = harmonize.default_methods(hist, df, "2015")
+    exp = pd.Series(["reduce_ratio_2080"], index1, name="methods")
     pdt.assert_series_equal(exp, obs, check_names=False)
 
-    obs, diags = harmonize.default_methods(hist, df, '2015',
-                                           ratio_method='reduce_ratio_2050')
-    exp = pd.Series(['reduce_ratio_2050'], index1, name='methods')
+    obs, diags = harmonize.default_methods(
+        hist, df, "2015", ratio_method="reduce_ratio_2050"
+    )
+    exp = pd.Series(["reduce_ratio_2050"], index1, name="methods")
     pdt.assert_series_equal(exp, obs, check_names=False)
 
 
 def test_branch4(index1):
-    hist = pd.DataFrame({'2015': [1.]}, index1)
-    df = pd.DataFrame(
-        {'2015': [5.001], '2020': [-1.]},
-        index1
-    )
+    hist = pd.DataFrame({"2015": [1.0]}, index1)
+    df = pd.DataFrame({"2015": [5.001], "2020": [-1.0]}, index1)
 
-    obs, diags = harmonize.default_methods(hist, df, '2015')
+    obs, diags = harmonize.default_methods(hist, df, "2015")
 
-    exp = pd.Series(['reduce_ratio_2100'], index1, name='methods')
+    exp = pd.Series(["reduce_ratio_2100"], index1, name="methods")
     pdt.assert_series_equal(exp, obs, check_names=False)
 
 
 def test_branch5(index1):
-    hist = pd.DataFrame({'2015': [1.]}, index1)
-    df = pd.DataFrame(
-        {'2015': [5.001], '2020': [1.]},
-        index1
-    )
+    hist = pd.DataFrame({"2015": [1.0]}, index1)
+    df = pd.DataFrame({"2015": [5.001], "2020": [1.0]}, index1)
 
-    obs, diags = harmonize.default_methods(hist, df, '2015')
+    obs, diags = harmonize.default_methods(hist, df, "2015")
 
-    exp = pd.Series(['constant_ratio'], index1, name='methods')
+    exp = pd.Series(["constant_ratio"], index1, name="methods")
     pdt.assert_series_equal(exp, obs, check_names=False)
 
 
 def test_branch6(index1):
     hist = pd.DataFrame(
         {
-            '2000': [1.],
-            '2005': [1000.],
-            '2010': [1.],
-            '2015': [100.],
+            "2000": [1.0],
+            "2005": [1000.0],
+            "2010": [1.0],
+            "2015": [100.0],
         },
-        index1
+        index1,
     )
     df = pd.DataFrame(
         {
-            '2015': [5.001],
-            '2020': [1.],
+            "2015": [5.001],
+            "2020": [1.0],
         },
-        index1
+        index1,
     )
 
-    obs, diags = harmonize.default_methods(hist, df, '2015')
+    obs, diags = harmonize.default_methods(hist, df, "2015")
     print(diags)
 
-    exp = pd.Series(['reduce_offset_2150_cov'], index1, name='methods')
+    exp = pd.Series(["reduce_offset_2150_cov"], index1, name="methods")
     pdt.assert_series_equal(exp, obs, check_names=False)
 
 
 def test_custom_method_choice(index1, index1_co2):
-    def method_choice(
-        row, ratio_method, offset_method, luc_method, luc_cov_threshold
-    ):
-        return 'budget' if row.gas == 'CO2' else ratio_method
+    def method_choice(row, ratio_method, offset_method, luc_method, luc_cov_threshold):
+        return "budget" if row.gas == "CO2" else ratio_method
 
     # CH4
-    hist_ch4 = pd.DataFrame({'2015': [1.]}, index1)
-    df_ch4 = pd.DataFrame({'2015': [1.]}, index1)
+    hist_ch4 = pd.DataFrame({"2015": [1.0]}, index1)
+    df_ch4 = pd.DataFrame({"2015": [1.0]}, index1)
 
     obs_ch4, _ = harmonize.default_methods(
-        hist_ch4, df_ch4, '2015', method_choice=method_choice
+        hist_ch4, df_ch4, "2015", method_choice=method_choice
     )
 
-    exp_ch4 = pd.Series(['reduce_ratio_2080'], index1, name='methods')
+    exp_ch4 = pd.Series(["reduce_ratio_2080"], index1, name="methods")
     pdt.assert_series_equal(exp_ch4, obs_ch4, check_names=False)
 
     # CO2
-    hist_co2 = pd.DataFrame({'2015': [1.]}, index1_co2)
-    df_co2 = pd.DataFrame({'2015': [1.]}, index1_co2)
+    hist_co2 = pd.DataFrame({"2015": [1.0]}, index1_co2)
+    df_co2 = pd.DataFrame({"2015": [1.0]}, index1_co2)
 
     obs_co2, _ = harmonize.default_methods(
-        hist_co2, df_co2, '2015', method_choice=method_choice
+        hist_co2, df_co2, "2015", method_choice=method_choice
     )
 
-    exp_co2 = pd.Series(['budget'], index1_co2, name='methods')
+    exp_co2 = pd.Series(["budget"], index1_co2, name="methods")
     pdt.assert_series_equal(exp_co2, obs_co2, check_names=False)
diff --git a/tests/test_harmonize.py b/tests/test_harmonize.py
index a7b35ce..476a2b6 100644
--- a/tests/test_harmonize.py
+++ b/tests/test_harmonize.py
@@ -10,35 +10,53 @@
 nvals = 6
 
 
-_df = pd.DataFrame({
-    'gas': ['BC'] * nvals,
-    'region': ['a'] * nvals,
-    'units': ['Mt'] * nvals,
-    'sector': ['bar', 'foo'] + [str(x) for x in range(nvals - 2)],
-    '2010': [2, 1, 9000, 9000, 9000, 9000],
-    '2015': [3, 2, 0.51, 9000, 9000, -90],
-    '2040': [4.5, 1.5, 9000, 9000, 9000, 9000],
-    '2060': [6, 1, 9000, 9000, 9000, 9000],
-}).set_index(utils.df_idx).sort_index()
+_df = (
+    pd.DataFrame(
+        {
+            "gas": ["BC"] * nvals,
+            "region": ["a"] * nvals,
+            "units": ["Mt"] * nvals,
+            "sector": ["bar", "foo"] + [str(x) for x in range(nvals - 2)],
+            "2010": [2, 1, 9000, 9000, 9000, 9000],
+            "2015": [3, 2, 0.51, 9000, 9000, -90],
+            "2040": [4.5, 1.5, 9000, 9000, 9000, 9000],
+            "2060": [6, 1, 9000, 9000, 9000, 9000],
+        }
+    )
+    .set_index(utils.df_idx)
+    .sort_index()
+)
 
 _t_frac = lambda tf: (2040 - 2015) / float(tf - 2015)
 
-_hist = pd.DataFrame({
-    'gas': ['BC'] * nvals,
-    'region': ['a'] * nvals,
-    'units': ['Mt'] * nvals,
-    'sector': ['bar', 'foo'] + [str(x) for x in range(nvals - 2)],
-    '2010': [1., 0.34, 9000, 9000, 9000, 9000],
-    '2015': [0.01, 1., 0.5, 2 * 8999. / 9, 3 * 8999., 8999.],
-}).set_index(utils.df_idx).sort_index()
-
-_methods = pd.DataFrame({
-    'gas': _df.index.get_level_values('gas'),
-    'sector': _df.index.get_level_values('sector'),
-    'region': ['a'] * nvals,
-    'units': ['Mt'] * nvals,
-    'method': ['constant_offset'] * nvals,
-}).set_index(utils.df_idx).sort_index()
+_hist = (
+    pd.DataFrame(
+        {
+            "gas": ["BC"] * nvals,
+            "region": ["a"] * nvals,
+            "units": ["Mt"] * nvals,
+            "sector": ["bar", "foo"] + [str(x) for x in range(nvals - 2)],
+            "2010": [1.0, 0.34, 9000, 9000, 9000, 9000],
+            "2015": [0.01, 1.0, 0.5, 2 * 8999.0 / 9, 3 * 8999.0, 8999.0],
+        }
+    )
+    .set_index(utils.df_idx)
+    .sort_index()
+)
+
+_methods = (
+    pd.DataFrame(
+        {
+            "gas": _df.index.get_level_values("gas"),
+            "sector": _df.index.get_level_values("sector"),
+            "region": ["a"] * nvals,
+            "units": ["Mt"] * nvals,
+            "method": ["constant_offset"] * nvals,
+        }
+    )
+    .set_index(utils.df_idx)
+    .sort_index()
+)
 
 
 def test_factors():
@@ -46,7 +64,7 @@ def test_factors():
     hist = _hist.copy()
     obsoffset, obsratio = harmonize.harmonize_factors(df.copy(), hist.copy())
     # im lazy; test initially written when these were of length 2
-    exp = np.array([0.01 - 3, -1.])
+    exp = np.array([0.01 - 3, -1.0])
     npt.assert_array_almost_equal(exp, obsoffset[-2:])
     exp = np.array([0.01 / 3, 0.5])
     npt.assert_array_almost_equal(exp, obsratio[-2:])
@@ -57,22 +75,22 @@ def test_harmonize_constant_offset():
     hist = _hist.copy()
     methods = _methods.copy()
     h = harmonize.Harmonizer(df, hist)
-    res = h.harmonize(overrides=methods['method'])
+    res = h.harmonize(overrides=methods["method"])
 
     # base year
-    obs = res['2015']
-    exp = _hist['2015']
+    obs = res["2015"]
+    exp = _hist["2015"]
     npt.assert_array_almost_equal(obs, exp)
 
     # future year
-    obs = res['2060']
-    exp = _df['2060'] + (_hist['2015'] - _df['2015'])
+    obs = res["2060"]
+    exp = _df["2060"] + (_hist["2015"] - _df["2015"])
     npt.assert_array_almost_equal(obs, exp)
 
 
 def test_no_model():
-    df = pd.DataFrame({'2015': [0]})
-    hist = pd.DataFrame({'2015': [1.5]})
+    df = pd.DataFrame({"2015": [0]})
+    hist = pd.DataFrame({"2015": [1.5]})
     obsoffset, obsratio = harmonize.harmonize_factors(df.copy(), hist.copy())
     exp = np.array([1.5])
     npt.assert_array_almost_equal(exp, obsoffset)
@@ -85,17 +103,17 @@ def test_harmonize_constant_ratio():
     hist = _hist.copy()
     methods = _methods.copy()
     h = harmonize.Harmonizer(df, hist)
-    methods['method'] = ['constant_ratio'] * nvals
-    res = h.harmonize(overrides=methods['method'])
+    methods["method"] = ["constant_ratio"] * nvals
+    res = h.harmonize(overrides=methods["method"])
 
     # base year
-    obs = res['2015']
-    exp = _hist['2015']
+    obs = res["2015"]
+    exp = _hist["2015"]
     npt.assert_array_almost_equal(obs, exp)
 
     # future year
-    obs = res['2060']
-    exp = _df['2060'] * (_hist['2015'] / _df['2015'])
+    obs = res["2060"]
+    exp = _df["2060"] * (_hist["2015"] / _df["2015"])
     npt.assert_array_almost_equal(obs, exp)
 
 
@@ -108,24 +126,24 @@ def test_harmonize_reduce_offset():
     # this is bad, there should be a test for each case
     for tf in [2050, 2100, 2150]:
         print(tf)
-        method = 'reduce_offset_{}'.format(tf)
-        methods['method'] = [method] * nvals
-        res = h.harmonize(overrides=methods['method'])
+        method = "reduce_offset_{}".format(tf)
+        methods["method"] = [method] * nvals
+        res = h.harmonize(overrides=methods["method"])
 
         # base year
-        obs = res['2015']
-        exp = _hist['2015']
+        obs = res["2015"]
+        exp = _hist["2015"]
         npt.assert_array_almost_equal(obs, exp)
 
         # future year
-        obs = res['2040']
-        exp = _df['2040'] + (1 - _t_frac(tf)) * (_hist['2015'] - _df['2015'])
+        obs = res["2040"]
+        exp = _df["2040"] + (1 - _t_frac(tf)) * (_hist["2015"] - _df["2015"])
         npt.assert_array_almost_equal(obs, exp)
 
         # future year
         if tf < 2060:
-            obs = res['2060']
-            exp = _df['2060']
+            obs = res["2060"]
+            exp = _df["2060"]
             npt.assert_array_almost_equal(obs, exp)
 
 
@@ -138,25 +156,25 @@ def test_harmonize_reduce_ratio():
     # this is bad, there should be a test for each case
     for tf in [2050, 2100, 2150]:
         print(tf)
-        method = 'reduce_ratio_{}'.format(tf)
-        methods['method'] = [method] * nvals
-        res = h.harmonize(overrides=methods['method'])
+        method = "reduce_ratio_{}".format(tf)
+        methods["method"] = [method] * nvals
+        res = h.harmonize(overrides=methods["method"])
 
         # base year
-        obs = res['2015']
-        exp = _hist['2015']
+        obs = res["2015"]
+        exp = _hist["2015"]
         npt.assert_array_almost_equal(obs, exp)
 
         # future year
-        obs = res['2040']
-        ratio = _hist['2015'] / _df['2015']
-        exp = _df['2040'] * (ratio + _t_frac(tf) * (1 - ratio))
+        obs = res["2040"]
+        ratio = _hist["2015"] / _df["2015"]
+        exp = _df["2040"] * (ratio + _t_frac(tf) * (1 - ratio))
         npt.assert_array_almost_equal(obs, exp)
 
         # future year
         if tf < 2060:
-            obs = res['2060']
-            exp = _df['2060']
+            obs = res["2060"]
+            exp = _df["2060"]
             npt.assert_array_almost_equal(obs, exp)
 
 
@@ -172,13 +190,13 @@ def test_harmonize_reduce_ratio_different_units():
 
     tf = 2050
 
-    method = 'reduce_ratio_{}'.format(tf)
-    methods['method'] = [method] * nvals
-    res = h.harmonize(overrides=methods['method'])
+    method = "reduce_ratio_{}".format(tf)
+    methods["method"] = [method] * nvals
+    res = h.harmonize(overrides=methods["method"])
 
     # base year
-    obs = res['2015']
-    exp = hist['2015']
+    obs = res["2015"]
+    exp = hist["2015"]
     # should come back with input units
     obs_units = obs.index.get_level_values("units")
     df_units = df.index.get_level_values("units")
@@ -186,15 +204,15 @@ def test_harmonize_reduce_ratio_different_units():
     npt.assert_array_almost_equal(obs, exp)
 
     # future year
-    obs = res['2040']
-    ratio = _hist['2015'] / _df['2015']
-    exp = _df['2040'] * (ratio + _t_frac(tf) * (1 - ratio))
+    obs = res["2040"]
+    ratio = _hist["2015"] / _df["2015"]
+    exp = _df["2040"] * (ratio + _t_frac(tf) * (1 - ratio))
     npt.assert_array_almost_equal(obs, exp)
 
     # future year
     if tf < 2060:
-        obs = res['2060']
-        exp = _df['2060']
+        obs = res["2060"]
+        exp = _df["2060"]
         npt.assert_array_almost_equal(obs, exp)
 
 
@@ -203,18 +221,20 @@ def test_harmonize_mix():
     hist = _hist.copy()
     methods = _methods.copy()
     h = harmonize.Harmonizer(df, hist)
-    methods['method'] = ['constant_offset'] * nvals
-    res = h.harmonize(overrides=methods['method'])
+    methods["method"] = ["constant_offset"] * nvals
+    res = h.harmonize(overrides=methods["method"])
 
     # base year
-    obs = res['2015']
-    exp = _hist['2015']
+    obs = res["2015"]
+    exp = _hist["2015"]
     npt.assert_array_almost_equal(obs, exp)
 
     # future year
-    obs = res['2060'][:2]
-    exp = [_df['2060'][0] + (_hist['2015'][0] - _df['2015'][0]),
-           _df['2060'][1] * (_hist['2015'][1] / _df['2015'][1])]
+    obs = res["2060"][:2]
+    exp = [
+        _df["2060"][0] + (_hist["2015"][0] - _df["2015"][0]),
+        _df["2060"][1] * (_hist["2015"][1] / _df["2015"][1]),
+    ]
     npt.assert_array_almost_equal(obs, exp)
 
 
@@ -223,16 +243,16 @@ def test_harmonize_linear_interpolation():
     hist = _hist.copy()
     methods = _methods.copy()
     h = harmonize.Harmonizer(df, hist)
-    methods['method'] = ['linear_interpolate_2060'] * nvals
-    res = h.harmonize(overrides=methods['method'])
+    methods["method"] = ["linear_interpolate_2060"] * nvals
+    res = h.harmonize(overrides=methods["method"])
 
     # base year
-    obs = res['2015']
-    exp = _hist['2015']
+    obs = res["2015"]
+    exp = _hist["2015"]
     npt.assert_array_almost_equal(obs, exp)
 
     # future year
-    x1, x2, x = '2015', '2060', '2040'
+    x1, x2, x = "2015", "2060", "2040"
     y1, y2 = _hist[x1], _df[x2]
     m = (y2 - y1) / (float(x2) - float(x1))
     b = y1 - m * float(x1)
@@ -241,8 +261,8 @@ def test_harmonize_linear_interpolation():
     npt.assert_array_almost_equal(obs, exp)
 
     # year after interp
-    obs = res['2060']
-    exp = _df['2060']
+    obs = res["2060"]
+    exp = _df["2060"]
     npt.assert_array_almost_equal(obs, exp)
 
 
@@ -252,12 +272,12 @@ def test_harmonize_budget():
     methods = _methods.copy()
 
     h = harmonize.Harmonizer(df, hist)
-    methods['method'] = 'budget'
-    res = h.harmonize(overrides=methods['method'])
+    methods["method"] = "budget"
+    res = h.harmonize(overrides=methods["method"])
 
     # base year
-    obs = res['2015']
-    exp = _hist['2015']
+    obs = res["2015"]
+    exp = _hist["2015"]
     npt.assert_array_almost_equal(obs, exp)
 
     # carbon budget conserved
@@ -272,5 +292,5 @@ def _carbon_budget(emissions):
 
     npt.assert_array_almost_equal(
         _carbon_budget(res),
-        _carbon_budget(df) - _carbon_budget(hist.loc[:, '2010':'2015']),
+        _carbon_budget(df) - _carbon_budget(hist.loc[:, "2010":"2015"]),
     )
diff --git a/tests/test_io.py b/tests/test_io.py
index 568a88e..62134e4 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -4,18 +4,18 @@
 from aneris import _io
 
 _defaults = {
-    'config': {
-        'default_luc_method': 'reduce_ratio_2150_cov',
-        'default_offset_method': 'reduce_offset_2080',
-        'default_ratio_method': 'reduce_ratio_2080',
-        'cov_threshold': 20,
-        'harmonize_year': 2015,
-        'global_harmonization_only': False,
-        'replace_suffix': 'Harmonized-DB',
+    "config": {
+        "default_luc_method": "reduce_ratio_2150_cov",
+        "default_offset_method": "reduce_offset_2080",
+        "default_ratio_method": "reduce_ratio_2080",
+        "cov_threshold": 20,
+        "harmonize_year": 2015,
+        "global_harmonization_only": False,
+        "replace_suffix": "Harmonized-DB",
     },
-    'prefix': 'CEDS+|9+ Sectors',
-    'suffix': 'Unharmonized',
-    'add_5regions': True,
+    "prefix": "CEDS+|9+ Sectors",
+    "suffix": "Unharmonized",
+    "add_5regions": True,
 }
 
 
@@ -28,7 +28,7 @@ def test_default_rc():
 def test_mutable():
     obs = _io.RunControl()
     with pytest.raises(TypeError):
-        obs['foo'] = 'bar'
+        obs["foo"] = "bar"
 
 
 def test_nondefault_rc():
@@ -39,7 +39,7 @@ def test_nondefault_rc():
 
     obs = _io.RunControl(rcstr)
     exp = _defaults
-    exp['config']['cov_threshold'] = 42
+    exp["config"]["cov_threshold"] = 42
     assert exp == obs
 
 
@@ -53,18 +53,18 @@ def test_nondefault_rc_file_read():
         f.flush()
         obs = _io.RunControl(f.name)
         exp = _defaults
-        exp['config']['cov_threshold'] = 42
+        exp["config"]["cov_threshold"] = 42
         assert exp == obs
 
 
 def test_recursive_update():
     update = {
-        'foo': 'bar',
-        'cov_threshold': 42,
+        "foo": "bar",
+        "cov_threshold": 42,
     }
     exp = _defaults
-    exp['config'].update(update)
+    exp["config"].update(update)
 
     obs = _io.RunControl()
-    obs.recursive_update('config', update)
+    obs.recursive_update("config", update)
     assert obs == exp
diff --git a/tests/test_regression.py b/tests/test_regression.py
index 65d71f6..1dc83cf 100644
--- a/tests/test_regression.py
+++ b/tests/test_regression.py
@@ -18,29 +18,28 @@
 # worry about this again.
 
 here = join(os.path.dirname(os.path.realpath(__file__)))
-ci_path = join(here, 'ci')
+ci_path = join(here, "ci")
 
 # check variables for if we are on CI (will then run regression tests)
-ON_CI_REASON = 'No access to regression test credentials'
+ON_CI_REASON = "No access to regression test credentials"
 try:
-    os.environ['ANERIS_CI_USER']
+    os.environ["ANERIS_CI_USER"]
     ON_CI = True
 except KeyError:
     ON_CI = False
 
 FILE_SUFFIXES = [
-    'global_only',
-    'regions_sectors',
-    'global_sectors',
-    'mock_pipeline_prototype',
-    'pipeline_progress',
-    'full_ar6',
-    'global_ar6',
+    "global_only",
+    "regions_sectors",
+    "global_sectors",
+    "mock_pipeline_prototype",
+    "pipeline_progress",
+    "full_ar6",
+    "global_ar6",
 ]
 
 
-class TestHarmonizeRegression():
-
+class TestHarmonizeRegression:
     def _run(self, inf, checkf, hist, reg, rc, prefix, name):
         # path setup
         prefix = join(here, prefix)
@@ -48,9 +47,9 @@ def _run(self, inf, checkf, hist, reg, rc, prefix, name):
         reg = join(prefix, reg)
         rc = join(prefix, rc)
         inf = join(prefix, inf)
-        outf = join(prefix, '{}_harmonized.xlsx'.format(name))
-        outf_meta = join(prefix, '{}_metadata.xlsx'.format(name))
-        outf_diag = join(prefix, '{}_diagnostics.xlsx'.format(name))
+        outf = join(prefix, "{}_harmonized.xlsx".format(name))
+        outf_meta = join(prefix, "{}_metadata.xlsx".format(name))
+        outf_diag = join(prefix, "{}_diagnostics.xlsx".format(name))
         clean = [outf, outf_meta, outf_diag]
 
         # make sure we're fresh
@@ -61,21 +60,31 @@ def _run(self, inf, checkf, hist, reg, rc, prefix, name):
         # run
         print(inf, hist, reg, rc, name)
         cli.harmonize(
-            inf, hist, reg, rc, prefix, name, return_result=False,
+            inf,
+            hist,
+            reg,
+            rc,
+            prefix,
+            name,
+            return_result=False,
         )
 
         # test
         ncols = 5
         expfile = join(prefix, checkf)
-        exp = pd.read_excel(expfile, sheet_name='data',
-                            index_col=list(range(ncols)),
-                            engine='openpyxl',
-                            ).sort_index()
+        exp = pd.read_excel(
+            expfile,
+            sheet_name="data",
+            index_col=list(range(ncols)),
+            engine="openpyxl",
+        ).sort_index()
         exp.columns = exp.columns.astype(str)
-        obs = pd.read_excel(outf, sheet_name='data',
-                            index_col=list(range(ncols)),
-                            engine='openpyxl',
-                            ).sort_index()
+        obs = pd.read_excel(
+            outf,
+            sheet_name="data",
+            index_col=list(range(ncols)),
+            engine="openpyxl",
+        ).sort_index()
         assert_frame_equal(exp, obs, check_dtype=False)
 
         # tidy up after
@@ -86,25 +95,25 @@ def _run(self, inf, checkf, hist, reg, rc, prefix, name):
     @pytest.mark.parametrize("file_suffix", FILE_SUFFIXES)
     def test_basic_run(self, file_suffix):
         # this is run no matter what
-        prefix = 'test_data'
-        checkf = 'test_{}.xlsx'.format(file_suffix)
-        hist = 'history_{}.xls'.format(file_suffix)
-        reg = 'regions_{}.csv'.format(file_suffix)
-        inf = 'model_{}.xls'.format(file_suffix)
-        rc = 'aneris_{}.yaml'.format(file_suffix)
+        prefix = "test_data"
+        checkf = "test_{}.xlsx".format(file_suffix)
+        hist = "history_{}.xls".format(file_suffix)
+        reg = "regions_{}.csv".format(file_suffix)
+        inf = "model_{}.xls".format(file_suffix)
+        rc = "aneris_{}.yaml".format(file_suffix)
 
         # get all arguments
         self._run(inf, checkf, hist, reg, rc, prefix, file_suffix)
 
     @pytest.mark.skipif(not ON_CI, reason=ON_CI_REASON)
-    @pytest.mark.parametrize("name", ['msg', 'gcam'])
+    @pytest.mark.parametrize("name", ["msg", "gcam"])
     def test_regression_ci(self, name):
-        prefix = join(ci_path, 'test-{}'.format(name))
-        checkf = '{}_harmonized.xlsx'.format(name)
-        hist = 'history.csv'
-        reg = 'regiondef.xlsx'
-        rc = 'rc.yaml'
-        inf = 'inputfile.xlsx'
+        prefix = join(ci_path, "test-{}".format(name))
+        checkf = "{}_harmonized.xlsx".format(name)
+        hist = "history.csv"
+        reg = "regiondef.xlsx"
+        rc = "rc.yaml"
+        inf = "inputfile.xlsx"
 
         # copy needed files
         for fname in [hist, rc, checkf]:
diff --git a/tests/test_tutorials.py b/tests/test_tutorials.py
index 1155d06..d6cb3c3 100644
--- a/tests/test_tutorials.py
+++ b/tests/test_tutorials.py
@@ -8,7 +8,7 @@
 import jupyter
 
 here = os.path.dirname(os.path.realpath(__file__))
-tut_path = os.path.join(here, '..', 'doc', 'source')
+tut_path = os.path.join(here, "..", "doc", "source")
 
 # taken from the execellent example here:
 # https://blog.thedataincubator.com/2016/06/testing-jupyter-notebooks/
@@ -20,27 +20,35 @@ def _notebook_run(path, kernel=None, capsys=None):
     """
     assert os.path.exists(path)
     major_version = sys.version_info[0]
-    kernel = kernel or 'python{}'.format(major_version)
+    kernel = kernel or "python{}".format(major_version)
     if capsys is not None:
         with capsys.disabled():
-            print('using py version {} with kernel {}'.format(
-                major_version, kernel))
+            print("using py version {} with kernel {}".format(major_version, kernel))
     dirname, __ = os.path.split(path)
     os.chdir(dirname)
-    fname = os.path.join(here, 'test.ipynb')
+    fname = os.path.join(here, "test.ipynb")
     args = [
-        'jupyter', 'nbconvert', '--to', 'notebook', '--execute',
-        '--ExecutePreprocessor.timeout=60',
-        '--ExecutePreprocessor.kernel_name={}'.format(kernel),
-        "--output", fname, path]
+        "jupyter",
+        "nbconvert",
+        "--to",
+        "notebook",
+        "--execute",
+        "--ExecutePreprocessor.timeout=60",
+        "--ExecutePreprocessor.kernel_name={}".format(kernel),
+        "--output",
+        fname,
+        path,
+    ]
     subprocess.check_call(args)
 
-    nb = nbformat.read(io.open(fname, encoding='utf-8'),
-                       nbformat.current_nbformat)
+    nb = nbformat.read(io.open(fname, encoding="utf-8"), nbformat.current_nbformat)
 
     errors = [
-        output for cell in nb.cells if "outputs" in cell
-        for output in cell["outputs"] if output.output_type == "error"
+        output
+        for cell in nb.cells
+        if "outputs" in cell
+        for output in cell["outputs"]
+        if output.output_type == "error"
     ]
 
     os.remove(fname)
@@ -49,6 +57,6 @@ def _notebook_run(path, kernel=None, capsys=None):
 
 
 def test_tutorial(capsys):
-    fname = os.path.join(tut_path, 'tutorial.ipynb')
+    fname = os.path.join(tut_path, "tutorial.ipynb")
     nb, errors = _notebook_run(fname, capsys=capsys)
     assert errors == []
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 97e7a47..2fb8896 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -7,32 +7,41 @@
 
 
 def test_remove_emissions_prefix():
-    assert 'foo' == utils.remove_emissions_prefix('foo')
-    assert 'foo' == utils.remove_emissions_prefix('Emissions|XXX|foo')
-    assert 'Emissions|bar|foo' == \
-        utils.remove_emissions_prefix('Emissions|bar|foo')
-    assert 'foo' == \
-        utils.remove_emissions_prefix('Emissions|bar|foo', gas='bar')
+    assert "foo" == utils.remove_emissions_prefix("foo")
+    assert "foo" == utils.remove_emissions_prefix("Emissions|XXX|foo")
+    assert "Emissions|bar|foo" == utils.remove_emissions_prefix("Emissions|bar|foo")
+    assert "foo" == utils.remove_emissions_prefix("Emissions|bar|foo", gas="bar")
 
 
 def test_region_agg_funky_name():
-    df = pd.DataFrame({
-        'sector': ['foo', 'foo'],
-        'region': ['a', 'b'],
-        '2010': [1.0, 4.0],
-        'units': ['Mt'] * 2,
-        'gas': ['BC'] * 2,
-    }).set_index(utils.df_idx).sort_index()
-    mapping = pd.DataFrame(
-        [['fOO_Bar', 'a'], ['fOO_Bar', 'b']], columns=['x', 'y'])
-    exp = pd.DataFrame({
-        'sector': ['foo'],
-        'region': ['fOO_Bar'],
-        '2010': [5.0],
-        'units': ['Mt'],
-        'gas': ['BC'],
-    }).set_index(utils.df_idx).sort_index()
-    obs = utils.agg_regions(df, rfrom='y', rto='x', mapping=mapping)
+    df = (
+        pd.DataFrame(
+            {
+                "sector": ["foo", "foo"],
+                "region": ["a", "b"],
+                "2010": [1.0, 4.0],
+                "units": ["Mt"] * 2,
+                "gas": ["BC"] * 2,
+            }
+        )
+        .set_index(utils.df_idx)
+        .sort_index()
+    )
+    mapping = pd.DataFrame([["fOO_Bar", "a"], ["fOO_Bar", "b"]], columns=["x", "y"])
+    exp = (
+        pd.DataFrame(
+            {
+                "sector": ["foo"],
+                "region": ["fOO_Bar"],
+                "2010": [5.0],
+                "units": ["Mt"],
+                "gas": ["BC"],
+            }
+        )
+        .set_index(utils.df_idx)
+        .sort_index()
+    )
+    obs = utils.agg_regions(df, rfrom="y", rto="x", mapping=mapping)
     pdt.assert_frame_equal(obs, exp)
 
 
@@ -42,65 +51,69 @@ def test_no_repeat_gases():
 
 
 def test_gases():
-    var_col = pd.Series(['foo|Emissions|CH4|bar', 'Emissions|N2O|baz|zing'])
-    exp = pd.Series(['CH4', 'N2O'])
+    var_col = pd.Series(["foo|Emissions|CH4|bar", "Emissions|N2O|baz|zing"])
+    exp = pd.Series(["CH4", "N2O"])
     obs = utils.gases(var_col)
     pdt.assert_series_equal(obs, exp)
 
 
 def test_units():
-    var_col = pd.Series(['foo|Emissions|CH4|bar', 'Emissions|N2O|baz|zing'])
-    exp = pd.Series(['Mt CH4/yr', 'kt N2O/yr'])
+    var_col = pd.Series(["foo|Emissions|CH4|bar", "Emissions|N2O|baz|zing"])
+    exp = pd.Series(["Mt CH4/yr", "kt N2O/yr"])
     obs = utils.units(var_col)
     pdt.assert_series_equal(obs, exp)
 
 
 def test_formatter_to_std():
-    df = pd.DataFrame({
-        'Variable': [
-            'CEDS+|9+ Sectors|Emissions|BC|foo|Unharmonized',
-            'Emissions|BC|bar|baz',
-        ],
-        'Region': ['a', 'b'],
-        '2010': [5.0, 2.0],
-        '2020': [-1.0, 3.0],
-        'Unit': ['Mt foo/yr'] * 2,
-        'Model': ['foo'] * 2,
-        'Scenario': ['foo'] * 2,
-    })
+    df = pd.DataFrame(
+        {
+            "Variable": [
+                "CEDS+|9+ Sectors|Emissions|BC|foo|Unharmonized",
+                "Emissions|BC|bar|baz",
+            ],
+            "Region": ["a", "b"],
+            "2010": [5.0, 2.0],
+            "2020": [-1.0, 3.0],
+            "Unit": ["Mt foo/yr"] * 2,
+            "Model": ["foo"] * 2,
+            "Scenario": ["foo"] * 2,
+        }
+    )
 
     fmt = utils.FormatTranslator(df.copy())
     obs = fmt.to_std()
-    exp = pd.DataFrame({
-        'sector': [
-            'CEDS+|9+ Sectors|foo|Unharmonized',
-            'bar|baz',
-        ],
-        'region': ['a', 'b'],
-        '2010': [5000.0, 2000.0],
-        '2020': [-1000.0, 3000.0],
-        'units': ['kt'] * 2,
-        'gas': ['BC'] * 2,
-    })
-    pdt.assert_frame_equal(obs.set_index(utils.df_idx),
-                           exp.set_index(utils.df_idx))
+    exp = pd.DataFrame(
+        {
+            "sector": [
+                "CEDS+|9+ Sectors|foo|Unharmonized",
+                "bar|baz",
+            ],
+            "region": ["a", "b"],
+            "2010": [5000.0, 2000.0],
+            "2020": [-1000.0, 3000.0],
+            "units": ["kt"] * 2,
+            "gas": ["BC"] * 2,
+        }
+    )
+    pdt.assert_frame_equal(obs.set_index(utils.df_idx), exp.set_index(utils.df_idx))
 
 
 def test_formatter_to_template():
-    df = pd.DataFrame({
-        'Variable': [
-            'CEDS+|9+ Sectors|Emissions|BC|foo|Unharmonized',
-            'CEDS+|9+ Sectors|Emissions|BC|bar|Unharmonized',
-        ],
-        'Region': ['a', 'b'],
-        '2010': [5.0, 2.0],
-        '2020': [-1.0, 3.0],
-        'Unit': ['Mt BC/yr'] * 2,
-        'Model': ['foo'] * 2,
-        'Scenario': ['foo'] * 2,
-    }).set_index(utils.iamc_idx)
-    fmt = utils.FormatTranslator(df, prefix='CEDS+|9+ Sectors',
-                                 suffix='Unharmonized')
+    df = pd.DataFrame(
+        {
+            "Variable": [
+                "CEDS+|9+ Sectors|Emissions|BC|foo|Unharmonized",
+                "CEDS+|9+ Sectors|Emissions|BC|bar|Unharmonized",
+            ],
+            "Region": ["a", "b"],
+            "2010": [5.0, 2.0],
+            "2020": [-1.0, 3.0],
+            "Unit": ["Mt BC/yr"] * 2,
+            "Model": ["foo"] * 2,
+            "Scenario": ["foo"] * 2,
+        }
+    ).set_index(utils.iamc_idx)
+    fmt = utils.FormatTranslator(df, prefix="CEDS+|9+ Sectors", suffix="Unharmonized")
     fmt.to_std()
     obs = fmt.to_template()
     exp = df.reindex(columns=obs.columns)
@@ -108,39 +121,43 @@ def test_formatter_to_template():
 
 
 def combine_rows_df():
-    df = pd.DataFrame({
-        'sector': [
-            'sector1',
-            'sector2',
-            'sector1',
-            'extra_b',
-            'sector1',
-        ],
-        'region': ['a', 'a', 'b', 'b', 'c'],
-        '2010': [1.0, 4.0, 2.0, 21, 42],
-        'foo': [-1.0, -4.0, 2.0, 21, 42],
-        'units': ['Mt'] * 5,
-        'gas': ['BC'] * 5,
-    }).set_index(utils.df_idx)
+    df = pd.DataFrame(
+        {
+            "sector": [
+                "sector1",
+                "sector2",
+                "sector1",
+                "extra_b",
+                "sector1",
+            ],
+            "region": ["a", "a", "b", "b", "c"],
+            "2010": [1.0, 4.0, 2.0, 21, 42],
+            "foo": [-1.0, -4.0, 2.0, 21, 42],
+            "units": ["Mt"] * 5,
+            "gas": ["BC"] * 5,
+        }
+    ).set_index(utils.df_idx)
     return df
 
 
 def test_combine_rows_default():
     df = combine_rows_df()
-    exp = pd.DataFrame({
-        'sector': [
-            'sector1',
-            'sector2',
-            'extra_b',
-            'sector1',
-        ],
-        'region': ['a', 'a', 'a', 'c'],
-        '2010': [3.0, 4.0, 21, 42],
-        'foo': [1.0, -4.0, 21, 42],
-        'units': ['Mt'] * 4,
-        'gas': ['BC'] * 4,
-    }).set_index(utils.df_idx)
-    obs = utils.combine_rows(df, 'region', 'a', ['b'])
+    exp = pd.DataFrame(
+        {
+            "sector": [
+                "sector1",
+                "sector2",
+                "extra_b",
+                "sector1",
+            ],
+            "region": ["a", "a", "a", "c"],
+            "2010": [3.0, 4.0, 21, 42],
+            "foo": [1.0, -4.0, 21, 42],
+            "units": ["Mt"] * 4,
+            "gas": ["BC"] * 4,
+        }
+    ).set_index(utils.df_idx)
+    obs = utils.combine_rows(df, "region", "a", ["b"])
 
     exp = exp.reindex(columns=obs.columns)
     clean = lambda df: df.sort_index().reset_index()
@@ -149,22 +166,24 @@ def test_combine_rows_default():
 
 def test_combine_rows_dropothers():
     df = combine_rows_df()
-    exp = pd.DataFrame({
-        'sector': [
-            'sector1',
-            'sector2',
-            'extra_b',
-            'sector1',
-            'extra_b',
-            'sector1',
-        ],
-        'region': ['a', 'a', 'a', 'b', 'b', 'c'],
-        '2010': [3.0, 4.0, 21, 2.0, 21, 42],
-        'foo': [1.0, -4.0, 21, 2.0, 21, 42],
-        'units': ['Mt'] * 6,
-        'gas': ['BC'] * 6,
-    }).set_index(utils.df_idx)
-    obs = utils.combine_rows(df, 'region', 'a', ['b'], dropothers=False)
+    exp = pd.DataFrame(
+        {
+            "sector": [
+                "sector1",
+                "sector2",
+                "extra_b",
+                "sector1",
+                "extra_b",
+                "sector1",
+            ],
+            "region": ["a", "a", "a", "b", "b", "c"],
+            "2010": [3.0, 4.0, 21, 2.0, 21, 42],
+            "foo": [1.0, -4.0, 21, 2.0, 21, 42],
+            "units": ["Mt"] * 6,
+            "gas": ["BC"] * 6,
+        }
+    ).set_index(utils.df_idx)
+    obs = utils.combine_rows(df, "region", "a", ["b"], dropothers=False)
 
     exp = exp.reindex(columns=obs.columns)
     clean = lambda df: df.sort_index().reset_index()
@@ -173,19 +192,21 @@ def test_combine_rows_dropothers():
 
 def test_combine_rows_sumall():
     df = combine_rows_df()
-    exp = pd.DataFrame({
-        'sector': [
-            'sector1',
-            'extra_b',
-            'sector1',
-        ],
-        'region': ['a', 'a', 'c'],
-        '2010': [2.0, 21, 42],
-        'foo': [2.0, 21, 42],
-        'units': ['Mt'] * 3,
-        'gas': ['BC'] * 3,
-    }).set_index(utils.df_idx)
-    obs = utils.combine_rows(df, 'region', 'a', ['b'], sumall=False)
+    exp = pd.DataFrame(
+        {
+            "sector": [
+                "sector1",
+                "extra_b",
+                "sector1",
+            ],
+            "region": ["a", "a", "c"],
+            "2010": [2.0, 21, 42],
+            "foo": [2.0, 21, 42],
+            "units": ["Mt"] * 3,
+            "gas": ["BC"] * 3,
+        }
+    ).set_index(utils.df_idx)
+    obs = utils.combine_rows(df, "region", "a", ["b"], sumall=False)
 
     exp = exp.reindex(columns=obs.columns)
     clean = lambda df: df.sort_index().reset_index()
@@ -194,18 +215,20 @@ def test_combine_rows_sumall():
 
 def test_isin():
     df = combine_rows_df()
-    exp = pd.DataFrame({
-        'sector': [
-            'sector1',
-            'sector2',
-            'sector1',
-        ],
-        'region': ['a', 'a', 'b'],
-        '2010': [1.0, 4.0, 2.0],
-        'foo': [-1.0, -4.0, 2.0],
-        'units': ['Mt'] * 3,
-        'gas': ['BC'] * 3,
-    }).set_index(utils.df_idx)
+    exp = pd.DataFrame(
+        {
+            "sector": [
+                "sector1",
+                "sector2",
+                "sector1",
+            ],
+            "region": ["a", "a", "b"],
+            "2010": [1.0, 4.0, 2.0],
+            "foo": [-1.0, -4.0, 2.0],
+            "units": ["Mt"] * 3,
+            "gas": ["BC"] * 3,
+        }
+    ).set_index(utils.df_idx)
     obs = exp.loc[
         utils.isin(sector=["sector1", "sector2"], region=["a", "b", "non-existent"])
     ]