Merge pull request #216 from SPF-OST/integrate-collect-jsons

Ship `collectJsonsIntoCsv.py` as part of `pytrnsys`.
SPF-OST · Nov 4, 2024 · c819307 · c819307
2 parents a0bbc7c + 25181af
commit c819307
Show file tree

Hide file tree

Showing 5 changed files with 111 additions and 11 deletions.
diff --git a/dev-tools/devTools.py b/dev-tools/devTools.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3.9
+#!/usr/bin/python3.12
 
 # Run from top-level directory
 
@@ -13,7 +13,7 @@
 
 _SCRIPTS_DIR = pl.Path(sc.get_path("scripts"))
 
-_SOURCE_DIRS = ["pytrnsys", "tests", "dev-tools"]
+_SOURCE_DIRS = ["pytrnsys", "scripts", "tests", "dev-tools"]
 
 _EXCLUDED_PATH_PATTERNS = [
     "^tests/(.+/)?data/.*",

diff --git a/requirements/test-3rd-party.in b/requirements/test-3rd-party.in
@@ -9,4 +9,6 @@ mypy
 pylint
 black
 
-diff-pdf-visually
+diff-pdf-visually
+
+pandas-stubs
diff --git a/requirements/test-3rd-party.txt b/requirements/test-3rd-party.txt
@@ -1,4 +1,4 @@
-# SHA1:001a7da4a0aebd29d560675676be789e6645088e
+# SHA1:d28b606e5b85b97bcecaa3467bf174f6d9784aba
 #
 # This file is autogenerated by pip-compile-multi
 # To update, run:
@@ -35,6 +35,8 @@ mypy-extensions==1.0.0
     # via
     #   black
     #   mypy
+pandas-stubs==2.2.3.241009
+    # via -r requirements\test-3rd-party.in
 pathspec==0.12.1
     # via black
 platformdirs==4.2.0
@@ -59,6 +61,8 @@ pytest-metadata==3.0.0
     # via pytest-html
 tomlkit==0.12.3
     # via pylint
+types-pytz==2024.2.0.20241003
+    # via pandas-stubs
 typing-extensions==4.9.0
     # via mypy
 wheel==0.42.0

diff --git a/scripts/collectJsonsIntoCsv.py b/scripts/collectJsonsIntoCsv.py
@@ -0,0 +1,100 @@
+import json
+import pathlib as pl
+import sys
+import typing as tp
+
+import pandas as pd
+
+
+def main():
+    if len(sys.argv) != 3:
+        scriptName = pl.Path(sys.argv[0]).name
+        print(f"Usage: {scriptName} <path-to-directory-containing-json-files> <path-to-csv-file>")
+        sys.exit(-1)
+
+    dirPath = pl.Path(sys.argv[1])
+    csvFilePath = pl.Path(sys.argv[2])
+
+    allData = {}
+
+    jsonFilePaths = dirPath.glob("**/*-results.json")
+    for rowNumber, jsonFilePath in enumerate(jsonFilePaths, start=1):
+        data = _getFlattenedData(dirPath, jsonFilePath)
+        _addData(data, rowNumber, allData)
+
+    df = pd.DataFrame(allData)
+    df.to_csv(csvFilePath, index=False)
+
+
+def _addData(newRow, rowNumber, allRows):
+    theirColumnNames = set(allRows.keys())
+    myColumnNames = set(newRow.keys())
+
+    newColumnNames = myColumnNames - theirColumnNames
+    missingColumnNames = theirColumnNames - myColumnNames
+
+    emptyColumn = [None for _ in range(rowNumber - 1)]
+    for newColumnName in newColumnNames:
+        allRows[newColumnName] = emptyColumn.copy()
+
+    for missingColumnName in missingColumnNames:
+        newRow[missingColumnName] = None
+
+    for columnName, values in allRows.items():
+        newValue = newRow[columnName]
+        values.append(newValue)
+
+
+def _getFlattenedData(dirPath, jsonFilePath):
+    relativeJsonFilePath = jsonFilePath.relative_to(dirPath)
+
+    with jsonFilePath.open() as jsonFile:
+        data: tp.Mapping[str, tp.Any] = json.load(jsonFile)
+
+    flattenedData = {"FileName": relativeJsonFilePath}
+    for key, value in data.items():
+        if _isScalar(value):
+            flattenedData[key] = value
+        elif isinstance(value, list):
+            isAllScalars = all(_isScalar(e) for e in value)
+            if len(value) == 12 and isAllScalars:
+                for month, subValue in enumerate(value, start=1):
+                    subKey = f"{key}_{month}"
+                    flattenedData[subKey] = subValue
+            else:
+                print(
+                    f'Ignoring file entry "{key}": in file {relativeJsonFilePath}: '
+                    f"only scalar lists of length 12 are supported."
+                )
+        elif _isMeanWithBounds(value):
+            flattenedData[f"{key}_toLower"] = value["toLowerBound"]
+            flattenedData[f"{key}_mean"] = value["mean"]
+            flattenedData[f"{key}_toUpper"] = value["toUpperBound"]
+        else:
+            print(
+                f'Ignoring file entry "{key}" of unsupported type `{type(value).__name__}` '
+                f"in file {relativeJsonFilePath}."
+            )
+
+    return flattenedData
+
+
+def _isScalar(value) -> bool:
+    return isinstance(value, (str, int, float))
+
+
+def _isMeanWithBounds(value) -> bool:
+    if not isinstance(value, dict):
+        return False
+
+    if set(value.keys()) != {"mean", "toLowerBound", "toUpperBound"}:
+        return False
+
+    if not all(isinstance(v, (float, int)) for v in value.values()):
+        return False
+
+    return True
+
+
+if __name__ == "__main__":
+    main()
diff --git a/setup.py b/setup.py
@@ -56,13 +56,7 @@ def _getDataFilePairs():
         "License :: OSI Approved :: MIT License",
         "Operating System :: Microsoft :: Windows",
     ],
-    entry_points="""
-    [console_scripts]
-    pytrnsys-dll = pytrnsys.utils.copyDllFiles:dllCopy
-    pytrnsys-run = pytrnsys.rsim.runParallelTrnsys:run
-    pytrnsys-process = pytrnsys.psim.processParallelTrnsys:process
-    pytrnsys-load = pytrnsys.utils.loadExamplesAndDdcks:load
-    """,
+    scripts=["scripts/collectJsonsIntoCsv.py"],
     setup_requires=["setuptools-git-versioning"],
     python_requires=">=3.9",
 )