From 30f3bac18a78d0c921a182d0329b3ff7e56881d1 Mon Sep 17 00:00:00 2001 From: Mark Janse Date: Wed, 18 Dec 2024 21:35:55 +0100 Subject: [PATCH] Move qualified relations to generic CKAN DCAT scheming class --- .../dcat/profiles/euro_dcat_ap_scheming.py | 83 ++++++++++++++++++- ckanext/dcat/profiles/euro_health_dcat_ap.py | 31 ------- 2 files changed, 82 insertions(+), 32 deletions(-) diff --git a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py index 3a2742a1..7a70bd77 100644 --- a/ckanext/dcat/profiles/euro_dcat_ap_scheming.py +++ b/ckanext/dcat/profiles/euro_dcat_ap_scheming.py @@ -1,6 +1,6 @@ import json -from rdflib import URIRef, BNode, Literal +from rdflib import URIRef, BNode, Literal, term from .base import RDFProfile, CleanedURIRef, URIRefOrLiteral from .base import ( RDF, @@ -118,6 +118,11 @@ def _parse_list_value(data_dict, field_name): if agents: dataset_dict[key] = agents + # Add any qualifiedRelations + qual_relations = self._relationship_details(dataset_ref, DCAT.qualifiedRelation) + if qual_relations: + dataset_dict["qualified_relation"] = qual_relations + # Repeating subfields: resources for schema_field in self._dataset_schema["resource_fields"]: if "repeating_subfields" in schema_field: @@ -227,6 +232,10 @@ def _graph_from_dataset_v2_scheming(self, dataset_dict, dataset_ref): spatial_ref, field[1], item[field[0]] ) + self._add_relationship( + dataset_ref, dataset_dict, "qualified_relation", DCAT.qualifiedRelation + ) + resources = dataset_dict.get("resources", []) for resource in resources: if resource.get("access_services"): @@ -292,6 +301,78 @@ def _add_agents( _type=URIRefOrLiteral, ) + def _relationship_details(self, subject, predicate): + """ + Returns a list of dicts with details about a dcat:Relationship property, e.g. + dcat:qualifiedRelation + + Both subject and predicate must be rdflib URIRef or BNode objects + + Returns keys for uri, role, and relation with the values set to + an empty string if they could not be found. + """ + + relations = [] + for relation in self.g.objects(subject, predicate): + relation_details = {} + relation_details["uri"] = ( + str(relation) if isinstance(relation, term.URIRef) else "" + ) + relation_details["role"] = self._object_value(relation, DCAT.hadRole) + relation_details["relation"] = self._object_value(relation, DCT.relation) + relations.append(relation_details) + + return relations + + def _add_relationship( + self, + dataset_ref, + dataset_dict, + relation_key, + rdf_predicate, + ): + """ + Adds one or more Relationships to the RDF graph. + + :param dataset_ref: The RDF reference of the dataset + :param dataset_dict: The dataset dictionary containing agent information + :param relation_key: field name in the CKAN dict (.e.g. "qualifiedRelation") + :param rdf_predicate: The RDF predicate (DCAT.qualifiedRelation) + """ + relation = dataset_dict.get(relation_key) + if ( + isinstance(relation, list) + and len(relation) + and self._not_empty_dict(relation[0]) + ): + relations = relation + + for relation in relations: + + agent_uri = relation.get("uri") + if agent_uri: + agent_ref = CleanedURIRef(agent_uri) + else: + agent_ref = BNode() + + self.g.add((agent_ref, DCT.type, DCAT.Relationship)) + self.g.add((dataset_ref, rdf_predicate, agent_ref)) + + self._add_triple_from_dict( + relation, + agent_ref, + DCT.relation, + "relation", + _type=URIRefOrLiteral, + ) + self._add_triple_from_dict( + relation, + agent_ref, + DCAT.hadRole, + "role", + _type=URIRefOrLiteral, + ) + @staticmethod def _not_empty_dict(data_dict): return any(data_dict.values()) diff --git a/ckanext/dcat/profiles/euro_health_dcat_ap.py b/ckanext/dcat/profiles/euro_health_dcat_ap.py index 2144f097..6e560b8c 100644 --- a/ckanext/dcat/profiles/euro_health_dcat_ap.py +++ b/ckanext/dcat/profiles/euro_health_dcat_ap.py @@ -42,11 +42,6 @@ def _parse_health_fields(self, dataset_dict, dataset_ref): if agents: dataset_dict["hdab"] = agents - # Add any qualifiedRelations - qual_relations = self._relationship_details(dataset_ref, DCAT.qualifiedRelation) - if qual_relations: - dataset_dict["qualified_relation"] = qual_relations - # Retention period retention_start, retention_end = self._time_interval( dataset_ref, HEALTHDCATAP.retentionPeriod, dcat_ap_version=2 @@ -130,9 +125,6 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): self._add_nonneg_integer_triple(dataset_dict, dataset_ref, key, predicate) self._add_agents(dataset_ref, dataset_dict, "hdab", HEALTHDCATAP.hdab) - self._add_relationship( - dataset_ref, dataset_dict, "qualified_relation", DCAT.qualifiedRelation - ) def _add_nonneg_integer_triple(self, dataset_dict, dataset_ref, key, predicate): """ @@ -174,29 +166,6 @@ def _add_timeframe_triple(self, dataset_dict, dataset_ref): self._add_date_triple(temporal_ref, DCAT.endDate, item["end"]) self.g.add((dataset_ref, DCT.temporal, temporal_ref)) - def _relationship_details(self, subject, predicate): - """ - Returns a list of dicts with details about a dcat:Relationship property, e.g. - dcat:qualifiedRelation - - Both subject and predicate must be rdflib URIRef or BNode objects - - Returns keys for uri, role, and relation with the values set to - an empty string if they could not be found. - """ - - relations = [] - for relation in self.g.objects(subject, predicate): - relation_details = {} - relation_details["uri"] = ( - str(relation) if isinstance(relation, term.URIRef) else "" - ) - relation_details["role"] = self._object_value(relation, DCAT.hadRole) - relation_details["relation"] = self._object_value(relation, DCT.relation) - relations.append(relation_details) - - return relations - def _add_relationship( self, dataset_ref,