Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cli: add odm-validate cmd #228

Merged
merged 11 commits into from
Nov 27, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-code.yml
Original file line number Diff line number Diff line change
@@ -13,6 +13,6 @@ jobs:
python-version: '3.10'
cache: 'pip' # caching pip dependencies
- run: pip install -r requirements.txt
- run: pip install -r tools/requirements.txt
- run: pip install -r tests/requirements.txt
- run: pip install .
- run: python -m unittest discover tests
2 changes: 1 addition & 1 deletion .github/workflows/test-docs.yml
Original file line number Diff line number Diff line change
@@ -27,7 +27,7 @@ jobs:
pip install -r requirements.txt
pip install -r docs/requirements.txt
- name: Render Quarto docs
run: python tools/render_quarto_docs.py
run: python src/odm_validation/tools/render_quarto_docs.py
- name: Build and Commit
uses: sphinx-notes/pages@2.1 # requires checkout with `fetch-depth: 0`
with:
2 changes: 1 addition & 1 deletion docs/specs/summarize-tool.qmd
Original file line number Diff line number Diff line change
@@ -46,7 +46,7 @@ summarized. (The report content is simplified for readability.)
```{shell}
> DIR=./example-directory
> REPORT=$DIR/report.yml
> python tools/validate.py data.xlsx --out=$REPORT
> odm-validate data.xlsx --out=$REPORT
> cat $REPORT

errors:
6 changes: 3 additions & 3 deletions docs/specs/validate-tool.qmd
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@ validation result to another program.

## Usage

` python tools/validate.py [OPTIONS] DATA_FILE...`
` odm-validate [OPTIONS] DATA_FILE...`

### Arguments

@@ -46,9 +46,9 @@ validation result to another program.
- Validate two CSV files with the latest ODM version, and print human readable
errors to the console:

`python tools/validate.py measures.csv samples.csv`
`odm-validate measures.csv samples.csv`

- Validate an Excel file with version 1.1.0 of the ODM, and write the result to
a YAML file:

`python tools/validate.py lab-data.xlsx --version=1.1.0 --out=./report.yaml`
`odm-validate lab-data.xlsx --version=1.1.0 --out=./report.yaml`
12 changes: 9 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -18,15 +18,21 @@ dynamic = ["dependencies"]
"Homepage" = "https://github.com/Big-Life-Lab/PHES-ODM-Validation"
"Bug Tracker" = "https://github.com/Big-Life-Lab/PHES-ODM-Validation/issues"

[project.scripts]
odm-validate = "odm_validation.tools.validate:main"

[build-system]
requires = ["hatchling", "hatch-requirements-txt"]
build-backend = "hatchling.build"

# https://stackoverflow.com/a/75397171
# specify package dir
[tool.hatch.build.targets.wheel]
packages = ["src/odm_validation"]

# include validation schemas
[tool.hatch.build.targets.wheel.force-include]
"assets/validation-schemas" = "odm_validation/assets/validation-schemas"

# This ensures that the dependencies are installed when the package is
# installed
# install dependencies automatically
[tool.hatch.metadata.hooks.requirements_txt]
files = ["requirements.txt"]
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -3,4 +3,6 @@ PyYAML==6.0.1
python-dateutil==2.8.2
semver==3.0.0.dev3
toml==0.10.2
typer==0.12.5
typing_extensions==4.8.0
xlsx2csv==0.8.3
Original file line number Diff line number Diff line change
@@ -9,7 +9,8 @@
from pathlib import Path
from semver import Version

root_dir = join(os.path.dirname(os.path.realpath(__file__)), '..')
tool_dir = Path(__file__).parent
root_dir = tool_dir.parent.parent.parent
sys.path.append(join(root_dir, 'src'))

import odm_validation.utils as utils # noqa:E402
@@ -57,8 +58,7 @@ def generate_schemas_from_odm_tables(odm_dir, schema_dir):


def main():
dir = os.path.dirname(os.path.realpath(__file__))
asset_dir = join(dir, '../assets')
asset_dir = join(root_dir, 'assets')
schema_dir = normpath(join(asset_dir, 'validation-schemas'))
dataset_dir = normpath(join(asset_dir, 'dictionary'))

Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@
from pathlib import Path

tool_dir = Path(__file__).parent
root_dir = tool_dir.parent
root_dir = tool_dir.parent.parent.parent
glob_expr = 'docs/**/*.qmd'

print(f'rendering {glob_expr}')
Original file line number Diff line number Diff line change
@@ -4,7 +4,6 @@
from enum import Enum
from os.path import join
from typing import IO, Optional
from pprint import pprint

import yaml

6 changes: 2 additions & 4 deletions tools/summarize.py → src/odm_validation/tools/summarize.py
Original file line number Diff line number Diff line change
@@ -5,17 +5,15 @@
import sys
from enum import Enum
from os.path import join
from typing import List, Set
from typing import List
# from pprint import pprint

import typer

root_dir = join(os.path.dirname(os.path.realpath(__file__)), '..')
sys.path.append(join(root_dir, 'src'))

from odm_validation import reports # noqa:E402

from reports import ErrorKind, ValidationReport # noqa:E402
from odm_validation.reports import ErrorKind # noqa:E402

from odm_validation.summarization import ( # noqa:E402
SummaryEntry,
58 changes: 40 additions & 18 deletions tools/validate.py → src/odm_validation/tools/validate.py
Original file line number Diff line number Diff line change
@@ -5,7 +5,7 @@
import tempfile
from enum import Enum
from math import ceil
from os.path import basename, join, normpath, splitext
from os.path import basename, join, splitext
from pathlib import Path
from typing import Dict, IO, List, Optional
# from pprint import pprint
@@ -18,7 +18,6 @@

import odm_validation.part_tables as pt # noqa:E402
import odm_validation.utils as utils # noqa:E402
import odm_validation.validation as validation # noqa:E402
from odm_validation.validation import _validate_data_ext, DataKind # noqa:E402

from odm_validation.reports import ( # noqa:E402
@@ -27,7 +26,7 @@
join_reports
)

from reportutils import ( # noqa:E402
from odm_validation.tools.reportutils import ( # noqa:E402
ReportFormat,
detect_report_format_from_path,
get_ext,
@@ -78,7 +77,7 @@ def import_xlsx(src_file, dst_dir) -> List[str]:
def get_sheet_table_id(schema, sheet_name) -> Optional[str]:
table_ids = list(schema['schema'].keys())
for table_id in table_ids:
if sheet_name.endswith(' ' + table_id):
if sheet_name.endswith(table_id):
return table_id


@@ -103,10 +102,26 @@ def detect_data_format(path: str) -> Optional[DataFormat]:
return


def get_pkg_dir() -> str:
# aka src dir
tools_dir = Path(__file__).parent
return str(tools_dir.parent)


def get_asset_dir() -> str:
'''returns package asset dir, or repo asset dir when not installed as a
package'''
pkgdir = get_pkg_dir()
p = join(pkgdir, 'assets')
if os.path.exists(p):
return p
projdir = join(pkgdir, '..', '..')
return join(projdir, 'assets')


def get_schema_path(version: str) -> str:
script_dir = os.path.dirname(os.path.realpath(__file__))
asset_dir = join(script_dir, '../assets')
schema_dir = Path(normpath(join(asset_dir, 'validation-schemas')))
asset_dir = get_asset_dir()
schema_dir = join(asset_dir, 'validation-schemas')
schema_filename = f'schema-v{version}.yml'
return join(schema_dir, schema_filename)

@@ -170,18 +185,19 @@ def write_report(output: IO, report, fmt: ReportFormat):


# XXX: locals must be disabled to avoid `schema` being dumped to console on an
# exception (and makeing it unreadable)
# exception (which makes it unreadable)
app = typer.Typer(pretty_exceptions_show_locals=False)


@app.command()
def main(data_file: List[str] = typer.Argument(..., help=DATA_FILE_DESC),
version: str = typer.Option(default=DEF_VER, help=VERSION_DESC),
out: str = typer.Option(default="", help=OUT_DESC),
format: Optional[ReportFormat] = typer.Option(default=None,
help=FORMAT_DESC),
verbosity: int = typer.Option(default=2, help=VERB_DESC)):

def main_cli(
data_file: List[str] = typer.Argument(default=..., help=DATA_FILE_DESC),
version: str = typer.Option(default=DEF_VER, help=VERSION_DESC),
out: str = typer.Option(default="", help=OUT_DESC),
format: Optional[ReportFormat] = typer.Option(default=None,
help=FORMAT_DESC),
verbosity: int = typer.Option(default=2, help=VERB_DESC)
):
out_path = out
out_fmt = format
in_paths: list = data_file
@@ -219,7 +235,6 @@ def main(data_file: List[str] = typer.Argument(..., help=DATA_FILE_DESC),
try:
info(f'validating {in_paths}')
info(f'using schema "{os.path.basename(schema_path)}"')
validation._VERBOSITY = verbosity

if in_fmt == DataFormat.XLSX:
in_paths = convert_excel_to_csv(in_paths[0])
@@ -229,7 +244,8 @@ def main(data_file: List[str] = typer.Argument(..., help=DATA_FILE_DESC),
def validate(data):
report = _validate_data_ext(schema, data, DataKind.spreadsheet,
version, on_progress=on_progress,
with_metadata=False)
with_metadata=False,
verbosity=verbosity)
strip_report(report)
info() # newline after progressbar

@@ -273,5 +289,11 @@ def validate(data):
info('done!')


if __name__ == '__main__':
def main():
# XXX: needed to make odm-validate work
# runs main_cli
app()


if __name__ == '__main__':
main()
2 changes: 2 additions & 0 deletions tests/common.py
Original file line number Diff line number Diff line change
@@ -13,6 +13,8 @@
unused_import_dummy = 0

ASSET_DIR = ''
PKG_NAME = 'odm_validation'

_dir = os.path.dirname(os.path.realpath(__file__))
root_dir = join(_dir, '..')

7 changes: 4 additions & 3 deletions tests/test_summarize_tool.py
Original file line number Diff line number Diff line change
@@ -4,11 +4,12 @@
from os.path import join, relpath

import common
from common import PKG_NAME, root_dir

cwd = os.getcwd()
asset_dir = relpath(join(common.root_dir, join('assets', 'tools')), cwd)
tools_dir = relpath(join(common.root_dir, 'tools'), cwd)
validate_tool = relpath(join(tools_dir, 'validate.py'), cwd)
asset_dir = relpath(join(root_dir, 'assets', 'tools'), cwd)
tools_dir = relpath(join(root_dir, 'src', PKG_NAME, 'tools'), cwd)
validate_tool = 'odm-validate'
summarize_tool = relpath(join(tools_dir, 'summarize.py'), cwd)


2 changes: 0 additions & 2 deletions tools/requirements.txt

This file was deleted.