Skip to content

Commit

Permalink
refactor(validate-data): simplify mapping from violation to user faci…
Browse files Browse the repository at this point in the history
…ng problem (#1411)
  • Loading branch information
Nora-Olivia-Ammann authored Feb 7, 2025
1 parent ee1b39e commit 06c0bc7
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 104 deletions.
141 changes: 38 additions & 103 deletions src/dsp_tools/commands/validate_data/query_validation_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from dsp_tools.commands.validate_data.models.validation import ValidationResult
from dsp_tools.commands.validate_data.models.validation import ValidationResultBaseInfo
from dsp_tools.commands.validate_data.models.validation import ViolationType
from dsp_tools.commands.validate_data.utils import reformat_any_iri
from dsp_tools.commands.validate_data.utils import reformat_data_iri
from dsp_tools.commands.validate_data.utils import reformat_onto_iri
from dsp_tools.models.exceptions import BaseError
Expand Down Expand Up @@ -366,134 +367,68 @@ def _reformat_extracted_results(results: list[ValidationResult]) -> list[InputPr
return [_reformat_one_validation_result(x) for x in results]


def _reformat_one_validation_result(validation_result: ValidationResult) -> InputProblem: # noqa: PLR0911 Too many return statements
def _reformat_one_validation_result(validation_result: ValidationResult) -> InputProblem:
match validation_result.violation_type:
case ViolationType.MAX_CARD | ViolationType.MIN_CARD as violation:
case (
ViolationType.MAX_CARD
| ViolationType.MIN_CARD
| ViolationType.GENERIC
| ViolationType.NON_EXISTING_CARD
| ViolationType.PATTERN
| ViolationType.UNIQUE_VALUE
| ViolationType.VALUE_TYPE as violation
):
problem = RESULT_TO_PROBLEM_MAPPER[violation]
return _reformat_with_prop_and_message(result=validation_result, problem_type=problem)
case ViolationType.NON_EXISTING_CARD:
iris = _reformat_main_iris(validation_result)
return InputProblem(
problem_type=ProblemType.NON_EXISTING_CARD,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name=iris.prop_name,
)
case ViolationType.FILEVALUE_PROHIBITED:
iris = _reformat_main_iris(validation_result)
return InputProblem(
problem_type=ProblemType.FILE_VALUE_PROHIBITED,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name="bitstream / iiif-uri",
)
case ViolationType.GENERIC:
iris = _reformat_main_iris(validation_result)
return InputProblem(
problem_type=ProblemType.GENERIC,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name=iris.prop_name,
message=_convert_rdflib_input_data_to_string(validation_result.message),
input_value=_convert_rdflib_input_data_to_string(validation_result.input_value),
)
return _reformat_generic(result=validation_result, problem_type=problem)
case ViolationType.FILEVALUE_PROHIBITED | ViolationType.FILE_VALUE as violation:
problem = RESULT_TO_PROBLEM_MAPPER[violation]
return _reformat_generic(result=validation_result, problem_type=problem, prop_string="bitstream / iiif-uri")
case ViolationType.SEQNUM_IS_PART_OF:
iris = _reformat_main_iris(validation_result)
return InputProblem(
problem_type=ProblemType.GENERIC,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name="seqnum or isPartOf",
message=_convert_rdflib_input_data_to_string(validation_result.message),
return _reformat_generic(
result=validation_result, problem_type=ProblemType.GENERIC, prop_string="seqnum or isPartOf"
)
case ViolationType.VALUE_TYPE:
return _reformat_value_type_violation_result(validation_result)
case ViolationType.PATTERN:
return _reformat_pattern_violation_result(validation_result)
case ViolationType.LINK_TARGET:
return _reformat_link_target_violation_result(validation_result)
case ViolationType.UNIQUE_VALUE:
return _reformat_unique_value_violation_result(validation_result)
case ViolationType.FILE_VALUE:
iris = _reformat_main_iris(validation_result)
return InputProblem(
problem_type=ProblemType.FILE_VALUE,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name="bitstream / iiif-uri",
expected=_convert_rdflib_input_data_to_string(validation_result.expected),
)
case _:
raise BaseError(f"An unknown violation result was found: {validation_result.__class__.__name__}")


def _reformat_with_prop_and_message(
result: ValidationResult,
problem_type: ProblemType,
def _reformat_generic(
result: ValidationResult, problem_type: ProblemType, prop_string: str | None = None
) -> InputProblem:
iris = _reformat_main_iris(result)
user_prop = iris.prop_name if not prop_string else prop_string
return InputProblem(
problem_type=problem_type,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name=iris.prop_name,
expected=_convert_rdflib_input_data_to_string(result.expected),
)


def _reformat_value_type_violation_result(result: ValidationResult) -> InputProblem:
iris = _reformat_main_iris(result)
return InputProblem(
problem_type=ProblemType.VALUE_TYPE_MISMATCH,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name=iris.prop_name,
input_type=reformat_onto_iri(str(result.input_type)),
expected=_convert_rdflib_input_data_to_string(result.expected),
)


def _reformat_pattern_violation_result(result: ValidationResult) -> InputProblem:
iris = _reformat_main_iris(result)
return InputProblem(
problem_type=ProblemType.INPUT_REGEX,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name=iris.prop_name,
input_value=_convert_rdflib_input_data_to_string(result.input_value),
expected=_convert_rdflib_input_data_to_string(result.expected),
prop_name=user_prop,
message=_convert_rdflib_input_to_string(result.message),
input_value=_convert_rdflib_input_to_string(result.input_value),
input_type=_convert_rdflib_input_to_string(result.input_type),
expected=_convert_rdflib_input_to_string(result.expected),
)


def _reformat_link_target_violation_result(result: ValidationResult) -> InputProblem:
iris = _reformat_main_iris(result)
if not result.input_type:
return InputProblem(
problem_type=ProblemType.INEXISTENT_LINKED_RESOURCE,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name=iris.prop_name,
input_value=_convert_rdflib_input_data_to_string(result.input_value),
)
return InputProblem(
problem_type=ProblemType.LINK_TARGET_TYPE_MISMATCH,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name=iris.prop_name,
input_value=_convert_rdflib_input_data_to_string(result.input_value),
input_type=reformat_onto_iri(str(result.input_type)),
expected=reformat_onto_iri(str(result.expected)),
)
input_type = None
expected = None
problem_type = ProblemType.INEXISTENT_LINKED_RESOURCE

if result.input_type:
problem_type = ProblemType.LINK_TARGET_TYPE_MISMATCH
input_type = reformat_onto_iri(str(result.input_type))
expected = reformat_onto_iri(str(result.expected))

def _reformat_unique_value_violation_result(result: ValidationResult) -> InputProblem:
iris = _reformat_main_iris(result)
return InputProblem(
problem_type=ProblemType.DUPLICATE_VALUE,
problem_type=problem_type,
res_id=iris.res_id,
res_type=iris.res_type,
prop_name=iris.prop_name,
input_value=_convert_rdflib_input_data_to_string(result.input_value),
input_value=reformat_data_iri(str(result.input_value)),
input_type=input_type,
expected=expected,
)


Expand All @@ -504,9 +439,9 @@ def _reformat_main_iris(result: ValidationResult) -> ReformattedIRI:
return ReformattedIRI(res_id=subject_id, res_type=res_type, prop_name=prop_name)


def _convert_rdflib_input_data_to_string(input_val: SubjectObjectTypeAlias | None) -> str | None:
def _convert_rdflib_input_to_string(input_val: SubjectObjectTypeAlias | None) -> str | None:
if not input_val:
return None
if isinstance(input_val, URIRef):
return reformat_data_iri(input_val)
return reformat_any_iri(input_val)
return str(input_val)
19 changes: 18 additions & 1 deletion src/dsp_tools/commands/validate_data/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
from dsp_tools.commands.validate_data.constants import SubjectObjectTypeAlias


def reformat_any_iri(iri: SubjectObjectTypeAlias | str) -> str:
"""
Reformats any kind of IRI, if it starts with data then it treats is like a data IRI.
Otherwise, like an ontology IRI.
Args:
iri: Input IRI
Returns:
reformatted string
"""
iri_str = str(iri)
if iri_str.startswith("http://data/"):
return reformat_data_iri(iri_str)
return reformat_onto_iri(iri_str)


def reformat_onto_iri(iri: SubjectObjectTypeAlias | str) -> str:
"""Takes a rdflib Node and returns a prefixed IRI in string form."""
iri_str = str(iri)
Expand All @@ -15,4 +32,4 @@ def reformat_onto_iri(iri: SubjectObjectTypeAlias | str) -> str:

def reformat_data_iri(iri: SubjectObjectTypeAlias | str) -> str:
"""Takes a rdflib Node with in the data namespace and returns only the suffix."""
return str(iri).replace("http://data/", "")
return str(iri).replace("http://data/", "", 1)

0 comments on commit 06c0bc7

Please sign in to comment.