Skip to content

Commit

Permalink
Start lightweight client-side validation of formats #666
Browse files Browse the repository at this point in the history
  • Loading branch information
mam10eks committed Sep 24, 2024
1 parent 860cf8c commit 23d835d
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 0 deletions.
6 changes: 6 additions & 0 deletions python-client/tests/format_check/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from pathlib import Path

RESOURCES = Path(__file__).parent.parent / "resources"
VALID_RUN_OUTPUT = RESOURCES / "ranking-outputs"
EMPTY_OUTPUT = RESOURCES / "input-run-01" / "1"
IR_QUERY_OUTPUT = RESOURCES / "query-processing-outputs" / "query-segmentation"
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import unittest

from tira.check_format import check_format

from . import EMPTY_OUTPUT, IR_QUERY_OUTPUT, VALID_RUN_OUTPUT


class TestCheckFormatForNonExistingFormats(unittest.TestCase):
def test_invalid_validator_on_empty_output(self):
with self.assertRaises(Exception):
check_format(EMPTY_OUTPUT, "does-not-exist")

def test_invalid_validator_on_query_output(self):
with self.assertRaises(Exception):
check_format(IR_QUERY_OUTPUT, "does-not-exist")

def test_invalid_validator_on_valid_run_output_output(self):
with self.assertRaises(Exception):
check_format(VALID_RUN_OUTPUT, "does-not-exist")

def test_multiple_invalid_validators_on_empty_output(self):
with self.assertRaises(Exception):
check_format(EMPTY_OUTPUT, ["d1", "d2"])

def test_multiple_invalid_validators_on_query_output(self):
with self.assertRaises(Exception):
check_format(IR_QUERY_OUTPUT, ["d1", "d2"])

def test_multiple_invalid_validators_on_valid_run_output_output(self):
with self.assertRaises(Exception):
check_format(VALID_RUN_OUTPUT, ["d1", "d2"])
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import unittest

from tira.check_format import check_format

from . import EMPTY_OUTPUT, IR_QUERY_OUTPUT, VALID_RUN_OUTPUT


class TestCheckFormatForNonExistingFormats(unittest.TestCase):
def test_invalid_validator_on_empty_output(self):
expected = ["ERROR", "No file run.txt was found, only the files ['.gitkeep'] were available."]
actual = check_format(EMPTY_OUTPUT, "run.txt")
self.assertEqual(expected, actual)

def test_invalid_validator_on_query_output(self):
expected = ["ERROR", "No file run.txt was found, only the files ['queries.jsonl'] were available."]
actual = check_format(IR_QUERY_OUTPUT, "run.txt")
self.assertEqual(expected, actual)

def test_invalid_validator_on_valid_run_output_output(self):
expected = ["OK", "The run.txt file has the correct format."]
actual = check_format(VALID_RUN_OUTPUT, "run.txt")
self.assertEqual(expected, actual)
28 changes: 28 additions & 0 deletions python-client/tira/check_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
from pathlib import Path
from typing import Sequence, Union


class RunFormat:
"""Checks if a given output is a valid run file."""

def check_format(self, run_output: Path):
if not (run_output / "run.txt").exists():
msg = "No file run.txt was found, only the files "
msg += str(os.listdir(run_output)) + " were available."
return ["ERROR", msg]
else:
return ["OK", "The run.txt file has the correct format."]


def check_format(run_output: Path, format: Union[str, Sequence[str]]):
"""Check if the provided run output is in the specified format. Provides debug messages intended for users.
Args:
format (Union[str, Sequence[str]]): The allowed format or a list of allowed formats.
run_output (Path): the output produced by some run that is to-be checked.
"""
if format == "run.txt":
return RunFormat().check_format(run_output)

raise ValueError("Not yet implemented.", run_output, format)

0 comments on commit 23d835d

Please sign in to comment.