Skip to content

Commit

Permalink
feat(xmlvalidate): send validation request to API (DEV-4177) (#1186)
Browse files Browse the repository at this point in the history
  • Loading branch information
Nora-Olivia-Ammann authored Oct 1, 2024
1 parent e05e215 commit 57099e4
Show file tree
Hide file tree
Showing 11 changed files with 275 additions and 33 deletions.
39 changes: 39 additions & 0 deletions src/dsp_tools/commands/xml_validate/api_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import requests
from loguru import logger
from rdflib import Graph
from requests import ReadTimeout
from requests import RequestException
from requests import Response
Expand Down Expand Up @@ -76,3 +77,41 @@ def _get_ontology_iris(self) -> list[str]:
def _get_one_ontology(self, ontology_iri: str) -> str:
response = self._get(ontology_iri, headers={"Accept": "text/turtle"})
return response.text


@dataclass
class ShaclValidator:
"""Client to validate RDF data against a given SHACL shape."""

dsp_api_url: str

def validate(self, data_ttl: str, shacl_ttl: str) -> Graph:
"""
Sends a multipart/form-data request with two turtle files (data.ttl and shacl.ttl) to the given URL
and expects a response containing a single text/turtle body which is loaded into an rdflib Graph.
Args:
data_ttl (str): The turtle content for the data.ttl file (as a string).
shacl_ttl (str): The turtle content for the shacl.ttl file (as a string).
Returns:
Graph: The rdflib Graph object loaded with the response turtle data.
Raises:
InternalError: in case of a non-ok response
"""
files = {
"data.ttl": ("data.ttl", data_ttl, "text/turtle"),
"shacl.ttl": ("shacl.ttl", shacl_ttl, "text/turtle"),
}
timeout = 10
request_url = f"{self.dsp_api_url}/shacl/validate"
logger.debug(f"REQUEST: POST to {request_url}, timeout: {timeout}")
response = requests.post(request_url, files=files, timeout=timeout)
if not response.ok:
msg = f"Failed to send request. Status code: {response.status_code}, Original Message:\n{response.text}"
logger.error(msg)
raise InternalError(msg)
graph = Graph()
graph.parse(data=response.text, format="turtle")
return graph
10 changes: 5 additions & 5 deletions src/dsp_tools/commands/xml_validate/sparql/resource_shacl.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ def _construct_resource_nodeshape(onto_graph: Graph) -> Graph:
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX api-shapes: <http://api.knora.org/ontology/knora-api/shapes/v2#>
PREFIX knora-api: <http://api.knora.org/ontology/knora-api/v2#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
CONSTRUCT {
?shapesIRI a sh:NodeShape ;
sh:targetClass ?class ;
sh:property api-shapes:RDFS_label ;
sh:ignoredProperties ( rdf:type ) ;
sh:ignoredProperties ( rdf:type rdfs:label ) ;
sh:closed true .
} WHERE {
Expand Down Expand Up @@ -72,7 +72,7 @@ def _construct_1_cardinality(onto_graph: Graph) -> Graph:
sh:minCount 1 ;
sh:maxCount 1 ;
sh:severity sh:Violation ;
sh:message "Cardinality: 1" ;
sh:message "1" ;
] .
} WHERE {
Expand Down Expand Up @@ -110,7 +110,7 @@ def _construct_0_1_cardinality(onto_graph: Graph) -> Graph:
sh:minCount 0 ;
sh:maxCount 1 ;
sh:severity sh:Violation ;
sh:message "Cardinality: 0-1" ;
sh:message "0-1" ;
] .
} WHERE {
Expand Down Expand Up @@ -147,7 +147,7 @@ def _construct_1_n_cardinality(onto_graph: Graph) -> Graph:
sh:path ?propRestriction ;
sh:minCount 1 ;
sh:severity sh:Violation ;
sh:message "Cardinality: 1-n" ;
sh:message "1-n" ;
] .
} WHERE {
Expand Down
42 changes: 39 additions & 3 deletions src/dsp_tools/commands/xml_validate/xml_validate.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,55 @@
from pathlib import Path

from lxml import etree
from rdflib import SH
from rdflib import Graph

from dsp_tools.commands.xml_validate.api_connection import OntologyConnection
from dsp_tools.commands.xml_validate.api_connection import ShaclValidator
from dsp_tools.commands.xml_validate.deserialise_input import deserialise_xml
from dsp_tools.commands.xml_validate.make_data_rdf import make_data_rdf
from dsp_tools.commands.xml_validate.models.data_deserialised import ProjectDeserialised
from dsp_tools.commands.xml_validate.models.data_deserialised import ProjectInformation
from dsp_tools.commands.xml_validate.models.data_rdf import DataRDF
from dsp_tools.commands.xml_validate.sparql.construct_shapes import construct_shapes_graph
from dsp_tools.utils.xml_utils import parse_xml_file
from dsp_tools.utils.xml_utils import remove_comments_from_element_tree
from dsp_tools.utils.xml_utils import transform_into_localnames
from dsp_tools.utils.xml_validation import validate_xml

LIST_SEPARATOR = "\n - "


def xml_validate(filepath: Path, shortcode: str, api_url: str) -> None:
"""
Takes a file and project information and validates it against the ontologies on the server.
Args:
filepath: path to the xml data file
shortcode: shortcode of the project
api_url: url of the api host
"""
onto_con = OntologyConnection(api_url, shortcode)
data_rdf = _get_data_info_from_file(filepath, api_url)
ontologies = _get_project_ontos(onto_con)
data_graph = data_rdf.make_graph() + ontologies
val = ShaclValidator(api_url)
conforms, result = _validate(val, ontologies, data_graph)
if conforms:
print("Validation passed!")
else:
print("Validation errors found!")


def _validate(validator: ShaclValidator, onto_graph: Graph, data_graph: Graph) -> tuple[bool, Graph]:
what_is_validated = ["The following information of your data is being validated:", "Cardinalities"]
print(LIST_SEPARATOR.join(what_is_validated))
shapes = construct_shapes_graph(onto_graph)
shape_str = shapes.serialize(format="ttl")
data_str = data_graph.serialize(format="ttl")
results = validator.validate(data_str, shape_str)
conforms = bool(next(results.objects(None, SH.conforms)))
return conforms, results


def _get_project_ontos(onto_con: OntologyConnection) -> Graph:
all_ontos = onto_con.get_ontologies()
Expand All @@ -25,11 +61,11 @@ def _get_project_ontos(onto_con: OntologyConnection) -> Graph:
return g


def _get_data_info_from_file(file: Path, api_url: str) -> tuple[ProjectInformation, DataRDF]:
def _get_data_info_from_file(file: Path, api_url: str) -> DataRDF:
cleaned_root = _parse_and_clean_file(file, api_url)
deserialised: ProjectDeserialised = deserialise_xml(cleaned_root)
rdf_data: DataRDF = make_data_rdf(deserialised.data)
return deserialised.info, rdf_data
return rdf_data


def _parse_and_clean_file(file: Path, api_url: str) -> etree._Element:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ class TestCheckTripleNumbersOnto:
def test_nodeshape(self, onto_graph: Graph) -> None:
result = _construct_resource_nodeshape(onto_graph)
number_of_resource_classes = 6
triples_collection_ignored_props = 2 * number_of_resource_classes
triples_cls_nodeshape = 5 * number_of_resource_classes
triples_collection_ignored_props = 4 * number_of_resource_classes
triples_cls_nodeshape = 4 * number_of_resource_classes
assert len(result) == triples_cls_nodeshape + triples_collection_ignored_props

def test_cardinality_1(self, onto_graph: Graph) -> None:
Expand Down Expand Up @@ -205,10 +205,9 @@ def test_construct_resource_nodeshape_one_res(one_res_one_prop: Graph) -> None:
subject_iri = subjects.pop()
assert subject_iri == ONTO.CardOneResource_Shape
node_triples = list(result.triples((subject_iri, None, None)))
num_triples = 5
num_triples = 4
assert len(node_triples) == num_triples
assert next(result.subjects(RDF.type, SH.NodeShape)) == subject_iri
assert next(result.subjects(SH.property, API_SHAPES.RDFS_label)) == subject_iri
assert next(result.subjects(SH.ignoredProperties)) == subject_iri
assert next(result.objects(subject_iri, SH.closed)) == Literal(True)

Expand All @@ -229,7 +228,7 @@ def test_good(self, card_1: Graph) -> None:
assert str(next(result.objects(bn, SH.maxCount))) == "1"
assert next(result.objects(bn, SH.path)) == ONTO.testBoolean
assert next(result.objects(bn, SH.severity)) == SH.Violation
assert str(next(result.objects(bn, SH.message))) == "Cardinality: 1"
assert str(next(result.objects(bn, SH.message))) == "1"

def test_empty_0_1(self, card_0_1: Graph) -> None:
result = _construct_1_cardinality(card_0_1)
Expand All @@ -255,7 +254,7 @@ def test_good(self, card_0_1: Graph) -> None:
assert str(next(result.objects(bn, SH.maxCount))) == "1"
assert next(result.objects(bn, SH.path)) == ONTO.testDecimalSimpleText
assert next(result.objects(bn, SH.severity)) == SH.Violation
assert str(next(result.objects(bn, SH.message))) == "Cardinality: 0-1"
assert str(next(result.objects(bn, SH.message))) == "0-1"

def test_empty_1(self, card_1: Graph) -> None:
result = _construct_0_1_cardinality(card_1)
Expand All @@ -280,7 +279,7 @@ def test_good(self, card_1_n: Graph) -> None:
assert str(next(result.objects(bn, SH.minCount))) == "1"
assert next(result.objects(bn, SH.path)) == ONTO.testGeoname
assert next(result.objects(bn, SH.severity)) == SH.Violation
assert str(next(result.objects(bn, SH.message))) == "Cardinality: 1-n"
assert str(next(result.objects(bn, SH.message))) == "1-n"

def test_empty_1(self, card_1: Graph) -> None:
result = _construct_1_n_cardinality(card_1)
Expand Down Expand Up @@ -327,7 +326,7 @@ def test_construct_all_cardinalities(one_res_one_prop: Graph) -> None:
assert str(next(result.objects(bn, SH.maxCount))) == "1"
assert next(result.objects(bn, SH.path)) == ONTO.testBoolean
assert next(result.objects(bn, SH.severity)) == SH.Violation
assert str(next(result.objects(bn, SH.message))) == "Cardinality: 1"
assert str(next(result.objects(bn, SH.message))) == "1"


if __name__ == "__main__":
Expand Down
14 changes: 5 additions & 9 deletions testdata/xml-validate/data/cardinality_correct.xml
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,17 @@
</boolean-prop>
</resource>

<resource label="Card Mixed" restype=":ClassInheritedCardinalityOverwriting" id="id_3">
<resource label="Card Mixed" restype=":ClassMixedCard" id="id_3">
<boolean-prop name=":testBoolean">
<boolean>true</boolean>
</boolean-prop>
<decimal-prop name=":testDecimalSimpleText">
<decimal>2.71</decimal>
</decimal-prop>
<integer-prop name=":testIntegerSimpleText">
<integer>1</integer>
<integer>2</integer>
</integer-prop>
<decimal-prop name=":testDecimalSpinbox">
<decimal>2.71</decimal>
<decimal>2.00</decimal>
</decimal-prop>
<geoname-prop name=":testGeoname">
<geoname>1111111</geoname>
<geoname>2222222</geoname>
</geoname-prop>
</resource>

</knora>
18 changes: 10 additions & 8 deletions testdata/xml-validate/data/cardinality_violation.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,34 @@
shortcode="9999"
default-ontology="onto">

<!-- 4 Violations -->

<!-- 'testBoolean' cardinality 1 -->
<resource label="Bool Card 1" restype=":ClassInheritedCardinalityOverwriting" id="id_1"/>
<resource label="Bool Card 1" restype=":ClassInheritedCardinalityOverwriting" id="id_card_one"/>

<!-- This class does not have a cardinality for the property -->
<resource label="Int card does not exist" restype=":CardOneResource" id="id_2">
<resource label="Int card does not exist" restype=":CardOneResource" id="id_closed_constraint">
<integer-prop name=":testIntegerSimpleText">
<integer>1</integer>
</integer-prop>
</resource>

<!-- 'testDecimalSimpleText' max cardinality 1 -->
<resource label="Decimal Card 0-1" restype=":ClassWithEverything" id="id_3">
<resource label="Decimal Card 0-1" restype=":ClassMixedCard" id="id_max_card">
<boolean-prop name=":testBoolean">
<boolean>true</boolean>
</boolean-prop>
<decimal-prop name=":testDecimalSimpleText">
<decimal>2.71</decimal>
<decimal>2.00</decimal>
</decimal-prop>
<integer-prop name=":testIntegerSimpleText">
<integer>1</integer>
</integer-prop>
<geoname-prop name=":testGeoname">
<geoname>111111</geoname>
</geoname-prop>
</resource>

<!-- 'testIntegerSimpleText' cardinality 1-n -->
<resource label="Int Card 1-n" restype=":ClassWithEverything" id="id_4">
<!-- 'testGeoname' cardinality 1-n -->
<resource label="Geoname Card 1-n" restype=":ClassMixedCard" id="id_min_card">
<boolean-prop name=":testBoolean">
<boolean>true</boolean>
</boolean-prop>
Expand Down
2 changes: 2 additions & 0 deletions testdata/xml-validate/data/content_violation.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
shortcode="9999"
default-ontology="onto">

<!-- 10 Violations -->

<!-- This is an integer -->
<resource label="Decimal" restype=":ClassWithEverything" id="id_4">
<decimal-prop name=":testDecimalSimpleText">
Expand Down
2 changes: 2 additions & 0 deletions testdata/xml-validate/data/value_type_violation.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
shortcode="9999"
default-ontology="onto">

<!-- 12 Violations -->

<!--
In this file, the property type does not match with the ontology.
The content is valid (eg. a boolean is a valid boolean)
Expand Down
Loading

0 comments on commit 57099e4

Please sign in to comment.