Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix report serialization #215

Merged
merged 2 commits into from
Dec 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ PyYAML==6.0.1
python-dateutil==2.8.2
semver==3.0.0.dev3
toml==0.10.2
typing_extensions==4.8.0
4 changes: 2 additions & 2 deletions src/odm_validation/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Set
from typing_extensions import TypedDict
# from pprint import pprint

import part_tables as pt
Expand Down Expand Up @@ -38,8 +39,7 @@ class ErrorCtx:
verbosity: int = 2


@dataclass(frozen=True)
class TableInfo:
class TableInfo(TypedDict):
columns: int
rows: int

Expand Down
4 changes: 2 additions & 2 deletions src/odm_validation/summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,8 +214,8 @@ def _gen_overview(report: ValidationReport,
table_overviews = {}
for table_id, info in report.table_info.items():
table_overviews[table_id] = {
'columns': info.columns,
'rows': info.rows,
'columns': info['columns'],
'rows': info['rows'],
}

overview = {
Expand Down
45 changes: 17 additions & 28 deletions tools/reportutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@
from enum import Enum
from os.path import join
from typing import IO, Optional
# from pprint import pprint
from pprint import pprint

import jsons
import yaml

root_dir = join(os.path.dirname(os.path.realpath(__file__)), '..')
Expand Down Expand Up @@ -54,19 +53,6 @@ def detect_report_format_from_content(data: str) -> Optional[ReportFormat]:
return ReportFormat.YAML


def serialize(obj) -> dict:
# serialization (with 'jsons') is needed to:
# - avoid writing serialization methods for our objects
# - avoid 'tags' when writing yaml
return jsons.dump(obj)


def deserialize(data, cls) -> object:
# deserialization is needed to:
# - be able to safely load yaml (without tags specifying objects)
return jsons.load(data, cls)


def write_txt_report(output: IO, report):
# XXX: Make sure to start txt format output with '#' to be able to infer
# the format later. '#' is chosen because it's how a text/markdown document
Expand All @@ -88,29 +74,32 @@ def get_msg(e) -> str:


def write_json_report(output: IO, report: ValidationReport):
data = serialize(report)
json.dump(data, output)
json.dump(report, output)


def write_yaml_report(output: IO, report: ValidationReport):
# XXX: serialize before dumping to avoid yaml-tags
data = serialize(report)
yaml.dump(data, output)
# XXX: dump dict to avoid yaml-tags from class types
yaml.dump(report.__dict__, output)


def read_report_from_file(file) -> ValidationReport:
# - data is normalized as text/json before being deserialized into obj
# - must use yaml.safe_load to avoid running arbitrary python code on
# the user machine
data = file.read()
fmt = detect_report_format_from_content(data) # only peeks
if not fmt:
quit('unable to detect report format')
if fmt == ReportFormat.TXT:
quit(f'report format {fmt} can\'t be summarized')
raw_data: str = file.read()
fmt = detect_report_format_from_content(raw_data) # only peeks
report_obj = None
if fmt == ReportFormat.JSON:
report_obj = json.loads(raw_data)
elif fmt == ReportFormat.YAML:
data = yaml.safe_load(data)
report = jsons.load(data, ValidationReport)
report_obj = yaml.safe_load(raw_data)
elif fmt == ReportFormat.TXT:
quit(f'report format {fmt} can\'t be summarized')
else:
quit('unable to detect report format')
assert type(report_obj) is not str, \
"report data should be dict/obj, but was loaded as string"
report = ValidationReport(**report_obj)
return report


Expand Down
1 change: 0 additions & 1 deletion tools/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
jsons==1.6.3
typer==0.7.0
xlsx2csv==0.7.8
Loading