Add black (#50)

* Add black to the test dependency * Run black * Ignore docs directory * Fix stickler errors * Fix stickler errors * Rerun black * Move pd_read/pd_write to utils to stop a circular dependency * Needed to import pd_read from utils
iiasa · Aug 4, 2022 · ad6301e · ad6301e
1 parent c009308
commit ad6301e
Show file tree

Hide file tree

Showing 20 changed files with 1,036 additions and 916 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,4 +6,6 @@ build
 dist
 *.egg-info
 .cache
-.*
+.*
+
+venv
diff --git a/.stickler.yml b/.stickler.yml
@@ -3,10 +3,13 @@ linters:
     python: 3
     max-line-length: 88
     fixer: false
-    ignore: I002, F403, E402, E731, E203
+    ignore: I002, F403, E402, E731, E203, W503
     # stickler doesn't support 'exclude' for flake8 properly, so we disable it
     # below with files.ignore:
     # https://github.com/markstory/lint-review/issues/184
+  black:
+    config: ./pyproject.toml
+    fixer: false
 files:
   ignore:
     - doc/**/*.py
diff --git a/Makefile b/Makefile
@@ -68,6 +68,10 @@ publish-on-pypi: $(VENV_DIR)  ## publish release on PyPI
 		echo run git status --porcelain to find dirty files >&2; \
 	fi;
 
+.PHONY: black
+black: $(VENV_DIR)
+	black .
+
 .PHONY: ci_dl
 ci_dl: $(VENV_DIR)  ## run all the tests
 	cd tests/ci; python download_data.py

diff --git a/aneris/__init__.py b/aneris/__init__.py
@@ -1,8 +1,8 @@
-
 from aneris._io import *
 from aneris.harmonize import *
 from aneris.utils import *
 
 from ._version import get_versions
-__version__ = get_versions()['version']
+
+__version__ = get_versions()["version"]
 del get_versions
diff --git a/aneris/_io.py b/aneris/_io.py
@@ -8,7 +8,7 @@
 
 import pandas as pd
 
-from aneris.utils import isstr, isnum, iamc_idx
+from aneris.utils import isstr, isnum, iamc_idx, pd_read
 
 RC_DEFAULTS = """
 config:
@@ -26,7 +26,7 @@
 
 
 def _read_data(indfs):
-    datakeys = sorted([x for x in indfs if x.startswith('data')])
+    datakeys = sorted([x for x in indfs if x.startswith("data")])
     df = pd.concat([indfs[k] for k in datakeys])
     # don't know why reading from excel changes dtype and column types
     # but I have to reset them manually
@@ -50,46 +50,6 @@ def _recursive_update(d, u):
     return d
 
 
-def pd_read(f, str_cols=False, *args, **kwargs):
-    """Try to read a file with pandas, supports CSV and XLSX
-
-    Parameters
-    ----------
-    f : string
-        the file to read in
-    str_cols : bool, optional
-        turn all columns into strings (numerical column names are sometimes 
-        read in as numerical dtypes)
-    args, kwargs : sent directly to the Pandas read function
-
-    Returns
-    -------
-    df : pd.DataFrame
-    """
-    if f.endswith('csv'):
-        df = pd.read_csv(f, *args, **kwargs)
-    else:
-        df = pd.read_excel(f, *args, **kwargs)
-
-    if str_cols:
-        df.columns = [str(x) for x in df.columns]
-
-    return df
-
-
-def pd_write(df, f, *args, **kwargs):
-    """Try to write a file with pandas, supports CSV and XLSX"""
-    # guess whether to use index, unless we're told otherwise
-    index = kwargs.pop('index', isinstance(df.index, pd.MultiIndex))
-
-    if f.endswith('csv'):
-        df.to_csv(f, index=index, *args, **kwargs)
-    else:
-        writer = pd.ExcelWriter(f)
-        df.to_excel(writer, index=index, *args, **kwargs)
-        writer.save()
-
-
 def read_excel(f):
     """Read an excel-based input file for harmonization.
 
@@ -111,20 +71,23 @@ def read_excel(f):
     model = _read_data(indfs)
 
     # make an empty df which will be caught later
-    overrides = indfs['harmonization'] if 'harmonization' in indfs \
-        else pd.DataFrame([], columns=iamc_idx + ['Unit'])
+    overrides = (
+        indfs["harmonization"]
+        if "harmonization" in indfs
+        else pd.DataFrame([], columns=iamc_idx + ["Unit"])
+    )
 
     # get run control
     config = {}
-    if'Configuration' in overrides:
-        config = overrides[['Configuration', 'Value']].dropna()
-        config = config.set_index('Configuration').to_dict()['Value']
-        overrides = overrides.drop(['Configuration', 'Value'], axis=1)
+    if "Configuration" in overrides:
+        config = overrides[["Configuration", "Value"]].dropna()
+        config = config.set_index("Configuration").to_dict()["Value"]
+        overrides = overrides.drop(["Configuration", "Value"], axis=1)
 
     # a single row of nans implies only configs provided,
     # if so, only return the empty df
     if len(overrides) == 1 and overrides.isnull().values.all():
-        overrides = pd.DataFrame([], columns=iamc_idx + ['Unit'])
+        overrides = pd.DataFrame([], columns=iamc_idx + ["Unit"])
 
     return model, overrides, config
 
@@ -140,10 +103,10 @@ def __init__(self, rc=None, defaults=None):
         Parameters
         ----------
         rc : string, file, dictionary, optional
-            a path to a YAML file, a file handle for a YAML file, or a 
+            a path to a YAML file, a file handle for a YAML file, or a
             dictionary describing run control configuration
         defaults : string, file, dictionary, optional
-            a path to a YAML file, a file handle for a YAML file, or a 
+            a path to a YAML file, a file handle for a YAML file, or a
             dictionary describing **default** run control configuration
         """
         rc = rc or {}
@@ -171,22 +134,23 @@ def _get_path(self, key, fyaml, fname):
 
         _fname = os.path.join(os.path.dirname(fyaml), fname)
         if not os.path.exists(_fname):
-            msg = "YAML key '{}' in {}: {} is not a valid relative " + \
-                "or absolute path"
+            msg = (
+                "YAML key '{}' in {}: {} is not a valid relative " + "or absolute path"
+            )
             raise IOError(msg.format(key, fyaml, fname))
         return _fname
 
     def _fill_relative_paths(self, fyaml, d):
         file_keys = [
-            'exogenous',
+            "exogenous",
         ]
         for k in file_keys:
             if k in d:
                 d[k] = [self._get_path(k, fyaml, fname) for fname in d[k]]
 
     def _load_yaml(self, obj):
         check_rel_paths = False
-        if hasattr(obj, 'read'):  # it's a file
+        if hasattr(obj, "read"):  # it's a file
             obj = obj.read()
         if isstr(obj) and os.path.exists(obj):
             check_rel_paths = True

diff --git a/aneris/cli.py b/aneris/cli.py
@@ -18,46 +18,54 @@ def read_args():
     aneris input.xlsx --history history.csv --regions regions.csv
     """
     parser = argparse.ArgumentParser(
-        description=descr,
-        formatter_class=argparse.RawDescriptionHelpFormatter
+        description=descr, formatter_class=argparse.RawDescriptionHelpFormatter
     )
-    input_file = 'Input data file.'
-    parser.add_argument('input_file', help=input_file)
-    history = 'Historical emissions in the base year.'
-    parser.add_argument('--history', help=history,
-                        default=hist_path('history.csv'))
-    regions = 'Mapping of country iso-codes to native regions.'
-    parser.add_argument('--regions', help=regions,
-                        default=region_path('message.csv'))
-    rc = 'Runcontrol YAML file (see http://mattgidden.com/aneris/config.html for examples).'
-    parser.add_argument('--rc', help=rc, default=None)
-    output_path = 'Path to use for output file names.'
-    parser.add_argument('--output_path', help=output_path, default='.')
-    output_prefix = 'Prefix to use for output file names.'
-    parser.add_argument('--output_prefix', help=output_prefix, default=None)
+    input_file = "Input data file."
+    parser.add_argument("input_file", help=input_file)
+    history = "Historical emissions in the base year."
+    parser.add_argument("--history", help=history, default=hist_path("history.csv"))
+    regions = "Mapping of country iso-codes to native regions."
+    parser.add_argument("--regions", help=regions, default=region_path("message.csv"))
+    rc = (
+        "Runcontrol YAML file "
+        "(see http://mattgidden.com/aneris/config.html for examples)."
+    )
+    parser.add_argument("--rc", help=rc, default=None)
+    output_path = "Path to use for output file names."
+    parser.add_argument("--output_path", help=output_path, default=".")
+    output_prefix = "Prefix to use for output file names."
+    parser.add_argument("--output_prefix", help=output_prefix, default=None)
 
     args = parser.parse_args()
     return args
 
 
-def harmonize(inf, history, regions, rc, output_path, output_prefix,
-              return_result=False, write_output=True):
+def harmonize(
+    inf,
+    history,
+    regions,
+    rc,
+    output_path,
+    output_prefix,
+    return_result=False,
+    write_output=True,
+):
     # check files exist
     check = [inf, history, regions, rc]
     for f in check:
         if f and not os.path.exists(f):
-            raise IOError('{} does not exist on the filesystem.'.format(f))
+            raise IOError("{} does not exist on the filesystem.".format(f))
 
     # read input
     hist = aneris.pd_read(history, str_cols=True)
     if hist.empty:
-        raise ValueError('History file is empty')
+        raise ValueError("History file is empty")
     regions = aneris.pd_read(regions, str_cols=True)
     if regions.empty:
-        raise ValueError('Region definition is empty')
+        raise ValueError("Region definition is empty")
     model, overrides, config = aneris.read_excel(inf)
     rc = aneris.RunControl(rc=rc)
-    rc.recursive_update('config', config)
+    rc.recursive_update("config", config)
 
     # do core harmonization
     driver = aneris.HarmonizationDriver(rc, hist, model, overrides, regions)
@@ -67,37 +75,40 @@ def harmonize(inf, history, regions, rc, output_path, output_prefix,
 
     if write_output:
         # write to excel
-        prefix = output_prefix or inf.split('.')[0]
-        fname = os.path.join(output_path, '{}_harmonized.xlsx'.format(prefix))
-        logger().info('Writing result to: {}'.format(fname))
-        aneris.pd_write(model, fname, sheet_name='data')
+        prefix = output_prefix or inf.split(".")[0]
+        fname = os.path.join(output_path, "{}_harmonized.xlsx".format(prefix))
+        logger().info("Writing result to: {}".format(fname))
+        aneris.pd_write(model, fname, sheet_name="data")
 
         # save data about harmonization
-        fname = os.path.join(output_path, '{}_metadata.xlsx'.format(prefix))
-        logger().info('Writing metadata to: {}'.format(fname))
+        fname = os.path.join(output_path, "{}_metadata.xlsx".format(prefix))
+        logger().info("Writing metadata to: {}".format(fname))
         aneris.pd_write(metadata, fname)
 
         # save data about harmonization
         if not diagnostics.empty:
-            fname = os.path.join(output_path,
-                                 '{}_diagnostics.xlsx'.format(prefix))
-            logger().info('Writing diagnostics to: {}'.format(fname))
+            fname = os.path.join(output_path, "{}_diagnostics.xlsx".format(prefix))
+            logger().info("Writing diagnostics to: {}".format(fname))
             aneris.pd_write(diagnostics, fname)
 
     if return_result:
         return model, metadata, diagnostics
 
 
-
-
 def main():
     # parse cli
     args = read_args()
 
     # run program
-    harmonize(args.input_file, args.history, args.regions,
-              args.rc, args.output_path, args.output_prefix)
+    harmonize(
+        args.input_file,
+        args.history,
+        args.regions,
+        args.rc,
+        args.output_path,
+        args.output_prefix,
+    )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()