Big-Life-Lab · zargot · Dec 12, 2023 · Dec 12, 2023 · Dec 7, 2023
diff --git a/requirements.txt b/requirements.txt
@@ -3,3 +3,4 @@ PyYAML==6.0.1
 python-dateutil==2.8.2
 semver==3.0.0.dev3
 toml==0.10.2
+typing_extensions==4.8.0
diff --git a/src/odm_validation/reports.py b/src/odm_validation/reports.py
@@ -2,6 +2,7 @@
 from dataclasses import dataclass, field
 from enum import Enum
 from typing import Any, Dict, List, Optional, Set
+from typing_extensions import TypedDict
 # from pprint import pprint
 
 import part_tables as pt
@@ -38,8 +39,7 @@ class ErrorCtx:
     verbosity: int = 2
 
 
-@dataclass(frozen=True)
-class TableInfo:
+class TableInfo(TypedDict):
     columns: int
     rows: int
 

diff --git a/src/odm_validation/summarization.py b/src/odm_validation/summarization.py
@@ -214,8 +214,8 @@ def _gen_overview(report: ValidationReport,
     table_overviews = {}
     for table_id, info in report.table_info.items():
         table_overviews[table_id] = {
-            'columns': info.columns,
-            'rows': info.rows,
+            'columns': info['columns'],
+            'rows': info['rows'],
         }
 
     overview = {

diff --git a/tools/reportutils.py b/tools/reportutils.py
@@ -4,9 +4,8 @@
 from enum import Enum
 from os.path import join
 from typing import IO, Optional
-# from pprint import pprint
+from pprint import pprint
 
-import jsons
 import yaml
 
 root_dir = join(os.path.dirname(os.path.realpath(__file__)), '..')
@@ -54,19 +53,6 @@ def detect_report_format_from_content(data: str) -> Optional[ReportFormat]:
         return ReportFormat.YAML
 
 
-def serialize(obj) -> dict:
-    # serialization (with 'jsons') is needed to:
-    # - avoid writing serialization methods for our objects
-    # - avoid 'tags' when writing yaml
-    return jsons.dump(obj)
-
-
-def deserialize(data, cls) -> object:
-    # deserialization is needed to:
-    # - be able to safely load yaml (without tags specifying objects)
-    return jsons.load(data, cls)
-
-
 def write_txt_report(output: IO, report):
     # XXX: Make sure to start txt format output with '#' to be able to infer
     # the format later. '#' is chosen because it's how a text/markdown document
@@ -88,29 +74,32 @@ def get_msg(e) -> str:
 
 
 def write_json_report(output: IO, report: ValidationReport):
-    data = serialize(report)
-    json.dump(data, output)
+    json.dump(report, output)
 
 
 def write_yaml_report(output: IO, report: ValidationReport):
-    # XXX: serialize before dumping to avoid yaml-tags
-    data = serialize(report)
-    yaml.dump(data, output)
+    # XXX: dump dict to avoid yaml-tags from class types
+    yaml.dump(report.__dict__, output)
 
 
 def read_report_from_file(file) -> ValidationReport:
     # - data is normalized as text/json before being deserialized into obj
     # - must use yaml.safe_load to avoid running arbitrary python code on
     #   the user machine
-    data = file.read()
-    fmt = detect_report_format_from_content(data)  # only peeks
-    if not fmt:
-        quit('unable to detect report format')
-    if fmt == ReportFormat.TXT:
-        quit(f'report format {fmt} can\'t be summarized')
+    raw_data: str = file.read()
+    fmt = detect_report_format_from_content(raw_data)  # only peeks
+    report_obj = None
+    if fmt == ReportFormat.JSON:
+        report_obj = json.loads(raw_data)
     elif fmt == ReportFormat.YAML:
-        data = yaml.safe_load(data)
-    report = jsons.load(data, ValidationReport)
+        report_obj = yaml.safe_load(raw_data)
+    elif fmt == ReportFormat.TXT:
+        quit(f'report format {fmt} can\'t be summarized')
+    else:
+        quit('unable to detect report format')
+    assert type(report_obj) is not str, \
+           "report data should be dict/obj, but was loaded as string"
+    report = ValidationReport(**report_obj)
     return report
 
 

diff --git a/tools/requirements.txt b/tools/requirements.txt
@@ -1,3 +1,2 @@
-jsons==1.6.3
 typer==0.7.0
 xlsx2csv==0.7.8