From ec4d5bfef1e3ac821fe6c730e494cbea008748ca Mon Sep 17 00:00:00 2001 From: Matt Goldberg Date: Wed, 25 Jan 2023 15:59:30 -0500 Subject: [PATCH 1/2] Add SPARQL DESCRIBE query implementation --- rdflib/plugins/sparql/algebra.py | 24 +++++++- rdflib/plugins/sparql/evaluate.py | 35 +++++++++++- rdflib/plugins/sparql/parser.py | 2 +- test/test_sparql/test_sparql.py | 92 +++++++++++++++++++++++++++++++ 4 files changed, 148 insertions(+), 5 deletions(-) diff --git a/rdflib/plugins/sparql/algebra.py b/rdflib/plugins/sparql/algebra.py index 1429012b7..5ac9177d7 100644 --- a/rdflib/plugins/sparql/algebra.py +++ b/rdflib/plugins/sparql/algebra.py @@ -623,7 +623,7 @@ def translateValues( return Values(res) -def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]: +def translate(q: CompValue) -> Tuple[Optional[CompValue], List[Variable]]: """ http://www.w3.org/TR/sparql11-query/#convertSolMod @@ -635,9 +635,27 @@ def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]: # TODO: Var scope test VS: Set[Variable] = set() - traverse(q.where, functools.partial(_findVars, res=VS)) - # all query types have a where part + # All query types have a WHERE clause EXCEPT some DESCRIBE queries + # where only explicit IRIs are provided. + if q.name == "DescribeQuery": + # For DESCRIBE queries, use the vars provided in q.var. + # If there is no WHERE clause, vars should be explicit IRIs to describe. + # If there is a WHERE clause, vars can be any combination of explicit IRIs + # and variables. + VS = set(q.var) + + # If there is no WHERE clause, just return the vars projected + if q.where is None: + return None, list(VS) + + # Otherwise, evaluate the WHERE clause like SELECT DISTINCT + else: + q.modifier = "DISTINCT" + + else: + traverse(q.where, functools.partial(_findVars, res=VS)) + # depth-first recursive generation of mapped query tree M = translateGroupGraphPattern(q.where) diff --git a/rdflib/plugins/sparql/evaluate.py b/rdflib/plugins/sparql/evaluate.py index aafd0fe7b..e3e04ba0c 100644 --- a/rdflib/plugins/sparql/evaluate.py +++ b/rdflib/plugins/sparql/evaluate.py @@ -309,7 +309,7 @@ def evalPart(ctx: QueryContext, part: CompValue): return evalServiceQuery(ctx, part) elif part.name == "DescribeQuery": - raise Exception("DESCRIBE not implemented") + return evalDescribeQuery(ctx, part) else: raise Exception("I dont know: %s" % part.name) @@ -585,6 +585,39 @@ def evalConstructQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]] return res +def evalDescribeQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]]: + # Create a result graph and bind namespaces from the graph being queried + graph = Graph() + for pfx, ns in ctx.graph.namespaces(): + graph.bind(pfx, ns) + + to_describe = set() + + # Explicit IRIs may be provided to a DESCRIBE query. + # If there is a WHERE clause, explicit IRIs may be provided in + # addition to projected variables. Find those explicit IRIs and + # prepare to describe them. + for iri in query.PV: + if isinstance(iri, URIRef): + to_describe.add(iri) + + # If there is a WHERE clause, evaluate it then find the unique set of + # resources to describe across all bindings and projected variables + if query.p is not None: + bindings = evalPart(ctx, query.p) + to_describe.update(*(set(binding.values()) for binding in bindings)) + + # Get a CBD for all resources identified to describe + for resource in to_describe: + graph += ctx.graph.cbd(resource) + + res: Dict[str, Union[str, Graph]] = {} + res["type_"] = "DESCRIBE" + res["graph"] = graph + + return res + + def evalQuery(graph: Graph, query: Query, initBindings, base=None): initBindings = dict((Variable(k), v) for k, v in initBindings.items()) diff --git a/rdflib/plugins/sparql/parser.py b/rdflib/plugins/sparql/parser.py index 2035b4f08..2a897f822 100644 --- a/rdflib/plugins/sparql/parser.py +++ b/rdflib/plugins/sparql/parser.py @@ -1479,7 +1479,7 @@ def expandCollection(terms): "DescribeQuery", Keyword("DESCRIBE") + (OneOrMore(ParamList("var", VarOrIri)) | "*") - + Param("datasetClause", ZeroOrMore(DatasetClause)) + + ZeroOrMore(ParamList("datasetClause", DatasetClause)) + Optional(WhereClause) + SolutionModifier + ValuesClause, diff --git a/test/test_sparql/test_sparql.py b/test/test_sparql/test_sparql.py index 32cc82d72..80510f185 100644 --- a/test/test_sparql/test_sparql.py +++ b/test/test_sparql/test_sparql.py @@ -867,3 +867,95 @@ def test_queries( result = rdfs_graph.query(query) logging.debug("result = %s", result) assert expected_bindings == result.bindings + + +@pytest.mark.parametrize( + ["query_string", "expected_subjects", "expected_size"], + [ + pytest.param( + """ + DESCRIBE rdfs:Class + """, + {RDFS.Class}, + 5, + id="1-explicit", + ), + pytest.param( + """ + DESCRIBE rdfs:Class rdfs:subClassOf + """, + {RDFS.Class, RDFS.subClassOf}, + 11, + id="2-explict", + ), + pytest.param( + """ + DESCRIBE rdfs:Class rdfs:subClassOf owl:Class + """, + {RDFS.Class, RDFS.subClassOf}, + 11, + id="3-explict-1-missing", + ), + pytest.param( + """ + DESCRIBE ?prop + WHERE { + ?prop a rdf:Property + } + """, + { + RDFS.seeAlso, + RDFS.member, + RDFS.subPropertyOf, + RDFS.subClassOf, + RDFS.domain, + RDFS.range, + RDFS.label, + RDFS.comment, + RDFS.isDefinedBy, + }, + 55, + id="1-var", + ), + pytest.param( + """ + DESCRIBE ?s + WHERE { + ?s a ?type ; + rdfs:subClassOf rdfs:Class . + } + """, + {RDFS.Datatype}, + 5, + id="2-var-1-projected", + ), + pytest.param( + """ + DESCRIBE ?s rdfs:Class + WHERE { + ?s a ?type ; + rdfs:subClassOf rdfs:Class . + } + """, + {RDFS.Datatype, RDFS.Class}, + 10, + id="2-var-1-projected-1-explicit", + ), + pytest.param("DESCRIBE ?s", set(), 0, id="empty"), + ], +) +def test_sparql_describe( + query_string: str, + expected_subjects: set[Identifier], + expected_size: int, + rdfs_graph: Graph, +) -> None: + """ + Check results of DESCRIBE queries against rdfs.ttl to ensure + the subjects described and the number of triples returned are correct. + """ + r = rdfs_graph.query(query_string) + assert r.graph is not None + subjects = {s for s in r.graph.subjects() if not isinstance(s, BNode)} + assert subjects == expected_subjects + assert len(r.graph) == expected_size From 63755b8330cd6ce7650130732cb13a4712131944 Mon Sep 17 00:00:00 2001 From: Matt Goldberg Date: Thu, 2 Feb 2023 10:42:10 -0500 Subject: [PATCH 2/2] chore: Tweaks to satisfy mypy --- rdflib/plugins/sparql/algebra.py | 6 +++++- rdflib/plugins/sparql/evaluate.py | 6 ++++-- test/test_sparql/test_sparql.py | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/rdflib/plugins/sparql/algebra.py b/rdflib/plugins/sparql/algebra.py index 5ac9177d7..5f6a774aa 100644 --- a/rdflib/plugins/sparql/algebra.py +++ b/rdflib/plugins/sparql/algebra.py @@ -335,7 +335,11 @@ def translateGroupGraphPattern(graphPattern: CompValue) -> CompValue: """ if graphPattern.name == "SubSelect": - return ToMultiSet(translate(graphPattern)[0]) + # The first output from translate cannot be None for a subselect query + # as it can only be None for certain DESCRIBE queries. + # type error: Argument 1 to "ToMultiSet" has incompatible type "Optional[CompValue]"; + # expected "Union[List[Dict[Variable, str]], CompValue]" + return ToMultiSet(translate(graphPattern)[0]) # type: ignore[arg-type] if not graphPattern.part: graphPattern.part = [] # empty { } diff --git a/rdflib/plugins/sparql/evaluate.py b/rdflib/plugins/sparql/evaluate.py index e3e04ba0c..06fc170d1 100644 --- a/rdflib/plugins/sparql/evaluate.py +++ b/rdflib/plugins/sparql/evaluate.py @@ -588,7 +588,8 @@ def evalConstructQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]] def evalDescribeQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]]: # Create a result graph and bind namespaces from the graph being queried graph = Graph() - for pfx, ns in ctx.graph.namespaces(): + # type error: Item "None" of "Optional[Graph]" has no attribute "namespaces" + for pfx, ns in ctx.graph.namespaces(): # type: ignore[union-attr] graph.bind(pfx, ns) to_describe = set() @@ -609,7 +610,8 @@ def evalDescribeQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]]: # Get a CBD for all resources identified to describe for resource in to_describe: - graph += ctx.graph.cbd(resource) + # type error: Item "None" of "Optional[Graph]" has no attribute "cbd" + graph += ctx.graph.cbd(resource) # type: ignore[union-attr] res: Dict[str, Union[str, Graph]] = {} res["type_"] = "DESCRIBE" diff --git a/test/test_sparql/test_sparql.py b/test/test_sparql/test_sparql.py index 80510f185..7d66f2c1f 100644 --- a/test/test_sparql/test_sparql.py +++ b/test/test_sparql/test_sparql.py @@ -946,7 +946,7 @@ def test_queries( ) def test_sparql_describe( query_string: str, - expected_subjects: set[Identifier], + expected_subjects: set, expected_size: int, rdfs_graph: Graph, ) -> None: