Skip to content

Commit

Permalink
fix report serialization (#215)
Browse files Browse the repository at this point in the history
The TableInfo class which appears in the validation report, had to be
made into a TypedDict to support normal json serialization in tools
using it (namely the web validation tool).
  • Loading branch information
zargot authored Dec 12, 2023
2 parents 72a685f + d5a3abc commit 9863f9a
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 33 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ PyYAML==6.0.1
python-dateutil==2.8.2
semver==3.0.0.dev3
toml==0.10.2
typing_extensions==4.8.0
4 changes: 2 additions & 2 deletions src/odm_validation/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Set
from typing_extensions import TypedDict
# from pprint import pprint

import part_tables as pt
Expand Down Expand Up @@ -38,8 +39,7 @@ class ErrorCtx:
verbosity: int = 2


@dataclass(frozen=True)
class TableInfo:
class TableInfo(TypedDict):
columns: int
rows: int

Expand Down
4 changes: 2 additions & 2 deletions src/odm_validation/summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,8 @@ def _gen_overview(report: ValidationReport,
table_overviews = {}
for table_id, info in report.table_info.items():
table_overviews[table_id] = {
'columns': info.columns,
'rows': info.rows,
'columns': info['columns'],
'rows': info['rows'],
}

overview = {
Expand Down
45 changes: 17 additions & 28 deletions tools/reportutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
from enum import Enum
from os.path import join
from typing import IO, Optional
# from pprint import pprint
from pprint import pprint

import jsons
import yaml

root_dir = join(os.path.dirname(os.path.realpath(__file__)), '..')
Expand Down Expand Up @@ -54,19 +53,6 @@ def detect_report_format_from_content(data: str) -> Optional[ReportFormat]:
return ReportFormat.YAML


def serialize(obj) -> dict:
# serialization (with 'jsons') is needed to:
# - avoid writing serialization methods for our objects
# - avoid 'tags' when writing yaml
return jsons.dump(obj)


def deserialize(data, cls) -> object:
# deserialization is needed to:
# - be able to safely load yaml (without tags specifying objects)
return jsons.load(data, cls)


def write_txt_report(output: IO, report):
# XXX: Make sure to start txt format output with '#' to be able to infer
# the format later. '#' is chosen because it's how a text/markdown document
Expand All @@ -88,29 +74,32 @@ def get_msg(e) -> str:


def write_json_report(output: IO, report: ValidationReport):
data = serialize(report)
json.dump(data, output)
json.dump(report, output)


def write_yaml_report(output: IO, report: ValidationReport):
# XXX: serialize before dumping to avoid yaml-tags
data = serialize(report)
yaml.dump(data, output)
# XXX: dump dict to avoid yaml-tags from class types
yaml.dump(report.__dict__, output)


def read_report_from_file(file) -> ValidationReport:
# - data is normalized as text/json before being deserialized into obj
# - must use yaml.safe_load to avoid running arbitrary python code on
# the user machine
data = file.read()
fmt = detect_report_format_from_content(data) # only peeks
if not fmt:
quit('unable to detect report format')
if fmt == ReportFormat.TXT:
quit(f'report format {fmt} can\'t be summarized')
raw_data: str = file.read()
fmt = detect_report_format_from_content(raw_data) # only peeks
report_obj = None
if fmt == ReportFormat.JSON:
report_obj = json.loads(raw_data)
elif fmt == ReportFormat.YAML:
data = yaml.safe_load(data)
report = jsons.load(data, ValidationReport)
report_obj = yaml.safe_load(raw_data)
elif fmt == ReportFormat.TXT:
quit(f'report format {fmt} can\'t be summarized')
else:
quit('unable to detect report format')
assert type(report_obj) is not str, \
"report data should be dict/obj, but was loaded as string"
report = ValidationReport(**report_obj)
return report


Expand Down
1 change: 0 additions & 1 deletion tools/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
jsons==1.6.3
typer==0.7.0
xlsx2csv==0.7.8

0 comments on commit 9863f9a

Please sign in to comment.