Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ORCA-175] Add CsvUpdater and update_csv command #39

Merged
merged 19 commits into from
Jun 14, 2023
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/dcqc/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from dcqc.suites.suite_abc import SuiteABC
from dcqc.target import SingleTarget
from dcqc.tests.base_test import BaseTest, ExternalTestMixin
from dcqc.updaters import CsvUpdater

# Make commands optional to allow for `dcqc --version`
app = Typer(invoke_without_command=True)
Expand Down Expand Up @@ -204,3 +205,15 @@ def qc_file(
report = JsonReport()
suite_json = report.generate(suite)
json.dump(suite_json, sys.stdout, indent=2)


@app.command()
def update_csv(
thomasyu888 marked this conversation as resolved.
Show resolved Hide resolved
suites_file: Path = input_path_arg,
input_file: Path = input_path_arg,
output_file: Path = output_path_arg,
):
"""Update input CSV file with dcqc_status column"""
suites = JsonParser.parse_objects(suites_file, SuiteABC)
updater = CsvUpdater(input_file, output_file)
updater.update(suites)
6 changes: 6 additions & 0 deletions src/dcqc/suites/suite_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,3 +291,9 @@ def from_dict(cls, dictionary: SerializedObject) -> SuiteABC:
def get_base_class(cls):
"""Retrieve base class."""
return SuiteABC

def get_status(self) -> SuiteStatus:
"""Compute (if applicable) and return the suite status."""
if self._status == SuiteStatus.NONE:
self._status = self.compute_status()
return self._status
57 changes: 57 additions & 0 deletions src/dcqc/updaters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from collections import defaultdict
from csv import DictWriter
from dataclasses import dataclass
from pathlib import Path
from typing import List

from dcqc.parsers import CsvParser
from dcqc.suites.suite_abc import SuiteABC


@dataclass
class CsvUpdater:
BWMac marked this conversation as resolved.
Show resolved Hide resolved
input_path: Path
output_path: Path
parser: CsvParser

def __init__(self, input_path: Path, output_path: Path):
self.output_path = output_path
self.input_path = input_path

def update(self, suites: List[SuiteABC]):
suite_dict = defaultdict(list)
# {url: [list_of_statuses]} data structure to allow for multi-file targets
for suite in suites:
BWMac marked this conversation as resolved.
Show resolved Hide resolved
url = suite.target.files[0].url
status = suite.get_status()
suite_dict[url].append(status.value)
# Evaluate dcqc_status for each url
collapsed_dict = {}
for url, statuses in suite_dict.items():
if "RED" in statuses:
collapsed_dict[url] = "RED"
elif "AMBER" in statuses:
collapsed_dict[url] = "AMBER"
elif "GREEN" in statuses:
collapsed_dict[url] = "GREEN"
BWMac marked this conversation as resolved.
Show resolved Hide resolved
else:
collapsed_dict[url] = "NONE"
BWMac marked this conversation as resolved.
Show resolved Hide resolved
# Create CSV data structure
row_list = []
parser = CsvParser(self.input_path)
for _, csv_data in parser.list_rows():
csv_data["dcqc_status"] = collapsed_dict[csv_data["url"]]
row_list.append(csv_data)

if row_list:
keys = row_list[0].keys()
# Export updated CSV
self.output_path.parent.mkdir(parents=True, exist_ok=True)
with open(
str(self.output_path), "w+", newline="", encoding="utf-8"
) as output_file:
dict_writer = DictWriter(output_file, keys)
dict_writer.writeheader()
dict_writer.writerows(row_list)
else:
raise ValueError("No rows found in input CSV")
35 changes: 34 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@
from datetime import datetime
from getpass import getuser
from pathlib import Path
from unittest.mock import MagicMock
from uuid import uuid4

import pytest

from dcqc.file import File
from dcqc.suites.suite_abc import SuiteABC
from dcqc.suites.suite_abc import SuiteABC, SuiteStatus
from dcqc.target import SingleTarget

CNFPATH = Path(__file__).resolve()
Expand Down Expand Up @@ -129,3 +130,35 @@ def _get_output(filename: str) -> Path:
return output

yield _get_output


@pytest.fixture
def mocked_suites_single_targets():
mock_dict_single = {
"syn://syn51585496": SuiteStatus.GREEN,
"syn://syn51585494": SuiteStatus.RED,
"syn://syn51585495": SuiteStatus.AMBER,
}
mocked_suites = []
for url, status in mock_dict_single.items():
suite = MagicMock(cls=SuiteABC)
suite.target.files[0].url = url
suite.get_status.return_value = status
mocked_suites.append(suite)
return mocked_suites


@pytest.fixture
def mocked_suites_multi_targets():
BWMac marked this conversation as resolved.
Show resolved Hide resolved
mock_dict_multi = {
"syn://syn51585496": SuiteStatus.GREEN,
"syn://syn51585494": SuiteStatus.RED,
"syn://syn51585495": SuiteStatus.AMBER,
}
mocked_suites = []
for url, status in mock_dict_multi.items():
suite = MagicMock(cls=SuiteABC)
suite.target.files[0].url = url
suite.get_status.return_value = status
mocked_suites.append(suite)
return mocked_suites
1 change: 1 addition & 0 deletions tests/data/empty_input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
url,file_type,md5_checksum
13 changes: 13 additions & 0 deletions tests/data/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from dcqc import tests
from dcqc.file import File
from dcqc.mixins import SerializableMixin
from dcqc.parsers import JsonParser
from dcqc.reports import JsonReport
from dcqc.suites.suite_abc import SuiteABC
from dcqc.target import SingleTarget
Expand Down Expand Up @@ -60,3 +61,15 @@ def export(obj: SerializableMixin | Sequence[SerializableMixin], filename: str):
skipped_tests = ["LibTiffInfoTest"]
suite = SuiteABC.from_tests(suite_tests, required_tests, skipped_tests)
export(suite, "suite.json")

# suites.json
input_jsons = [
Path(file_path)
for file_path in [
"tests/data/suites_files/suites_1.json",
"tests/data/suites_files/suites_2.json",
"tests/data/suites_files/suites_3.json",
]
]
suites = [JsonParser.parse_object(json_, SuiteABC) for json_ in input_jsons]
export(suites, "suites.json")
4 changes: 4 additions & 0 deletions tests/data/input.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
url,file_type,md5_checksum
syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9
syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff
syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9
4 changes: 4 additions & 0 deletions tests/data/output.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
url,file_type,md5_checksum,dcqc_status
syn://syn51585496,TXT,38b86a456d1f441008986c6f798d5ef9,GREEN
syn://syn51585494,TXT,a542e9b744bedcfd874129ab0f98c4ff,RED
syn://syn51585495,TIFF,38b86a456d1f441008986c6f798d5ef9,AMBER
179 changes: 179 additions & 0 deletions tests/data/suites.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
[
{
"type": "TiffSuite",
"target": {
"id": "0001",
"files": [
{
"url": "syn://syn51585496",
"metadata": {
"md5_checksum": "c7b08f6decb5e7572efbe6074926a843"
},
"type": "TIFF",
"name": "circuit.tif",
"local_path": "/tmp/dcqc-staged-7onezxv1/circuit.tif"
}
],
"type": "SingleTarget"
},
"suite_status": {
"required_tests": [
"Md5ChecksumTest",
"FileExtensionTest",
"LibTiffInfoTest"
],
"skipped_tests": [],
"status": "GREEN"
},
"tests": [
{
"type": "FileExtensionTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "GrepDateTest",
"tier": 4,
"is_external_test": true,
"status": "passed"
},
{
"type": "LibTiffInfoTest",
"tier": 2,
"is_external_test": true,
"status": "passed"
},
{
"type": "Md5ChecksumTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "TiffTag306DateTimeTest",
"tier": 4,
"is_external_test": true,
"status": "passed"
}
]
},
{
"type": "TiffSuite",
"target": {
"id": "0002",
"files": [
{
"url": "syn://syn51585494",
"metadata": {
"md5_checksum": "9cee1b0e8c4d051fabea82b62ae69404"
},
"type": "TIFF",
"name": "test_contains_word_date.tif",
"local_path": "/tmp/dcqc-staged-ddxo9fx2/test_contains_word_date.tif"
}
],
"type": "SingleTarget"
},
"suite_status": {
"required_tests": [
"Md5ChecksumTest",
"FileExtensionTest",
"LibTiffInfoTest"
],
"skipped_tests": [],
"status": "RED"
},
"tests": [
{
"type": "FileExtensionTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "GrepDateTest",
"tier": 4,
"is_external_test": true,
"status": "failed"
},
{
"type": "LibTiffInfoTest",
"tier": 2,
"is_external_test": true,
"status": "failed"
},
{
"type": "Md5ChecksumTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "TiffTag306DateTimeTest",
"tier": 4,
"is_external_test": true,
"status": "passed"
}
]
},
{
"type": "TiffSuite",
"target": {
"id": "0003",
"files": [
{
"url": "syn://syn51585495",
"metadata": {
"md5_checksum": "28a9ee7d0e994d494068ce8d6cda0268"
},
"type": "TIFF",
"name": "test_image_dirty_datetime.tif",
"local_path": "/tmp/dcqc-staged-5m6d8fdj/test_image_dirty_datetime.tif"
}
],
"type": "SingleTarget"
},
"suite_status": {
"required_tests": [
"Md5ChecksumTest",
"FileExtensionTest",
"LibTiffInfoTest"
],
"skipped_tests": [],
"status": "AMBER"
},
"tests": [
{
"type": "FileExtensionTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "GrepDateTest",
"tier": 4,
"is_external_test": true,
"status": "passed"
},
{
"type": "LibTiffInfoTest",
"tier": 2,
"is_external_test": true,
"status": "passed"
},
{
"type": "Md5ChecksumTest",
"tier": 1,
"is_external_test": false,
"status": "passed"
},
{
"type": "TiffTag306DateTimeTest",
"tier": 4,
"is_external_test": true,
"status": "failed"
}
]
}
]
Loading