Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add SPARQL DESCRIBE query implementation #2221

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions rdflib/plugins/sparql/algebra.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,11 @@ def translateGroupGraphPattern(graphPattern: CompValue) -> CompValue:
"""

if graphPattern.name == "SubSelect":
return ToMultiSet(translate(graphPattern)[0])
# The first output from translate cannot be None for a subselect query
# as it can only be None for certain DESCRIBE queries.
# type error: Argument 1 to "ToMultiSet" has incompatible type "Optional[CompValue]";
# expected "Union[List[Dict[Variable, str]], CompValue]"
return ToMultiSet(translate(graphPattern)[0]) # type: ignore[arg-type]

if not graphPattern.part:
graphPattern.part = [] # empty { }
Expand Down Expand Up @@ -623,7 +627,7 @@ def translateValues(
return Values(res)


def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]:
def translate(q: CompValue) -> Tuple[Optional[CompValue], List[Variable]]:
"""
http://www.w3.org/TR/sparql11-query/#convertSolMod

Expand All @@ -635,9 +639,27 @@ def translate(q: CompValue) -> Tuple[CompValue, List[Variable]]:

# TODO: Var scope test
VS: Set[Variable] = set()
traverse(q.where, functools.partial(_findVars, res=VS))

# all query types have a where part
# All query types have a WHERE clause EXCEPT some DESCRIBE queries
# where only explicit IRIs are provided.
if q.name == "DescribeQuery":
# For DESCRIBE queries, use the vars provided in q.var.
# If there is no WHERE clause, vars should be explicit IRIs to describe.
# If there is a WHERE clause, vars can be any combination of explicit IRIs
# and variables.
VS = set(q.var)

# If there is no WHERE clause, just return the vars projected
if q.where is None:
return None, list(VS)

# Otherwise, evaluate the WHERE clause like SELECT DISTINCT
else:
q.modifier = "DISTINCT"

else:
traverse(q.where, functools.partial(_findVars, res=VS))

# depth-first recursive generation of mapped query tree
M = translateGroupGraphPattern(q.where)

Expand Down
37 changes: 36 additions & 1 deletion rdflib/plugins/sparql/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def evalPart(ctx: QueryContext, part: CompValue):
return evalServiceQuery(ctx, part)

elif part.name == "DescribeQuery":
raise Exception("DESCRIBE not implemented")
return evalDescribeQuery(ctx, part)

else:
raise Exception("I dont know: %s" % part.name)
Expand Down Expand Up @@ -585,6 +585,41 @@ def evalConstructQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]]
return res


def evalDescribeQuery(ctx: QueryContext, query) -> Dict[str, Union[str, Graph]]:
# Create a result graph and bind namespaces from the graph being queried
graph = Graph()
# type error: Item "None" of "Optional[Graph]" has no attribute "namespaces"
for pfx, ns in ctx.graph.namespaces(): # type: ignore[union-attr]
graph.bind(pfx, ns)

to_describe = set()

# Explicit IRIs may be provided to a DESCRIBE query.
# If there is a WHERE clause, explicit IRIs may be provided in
# addition to projected variables. Find those explicit IRIs and
# prepare to describe them.
for iri in query.PV:
if isinstance(iri, URIRef):
to_describe.add(iri)

# If there is a WHERE clause, evaluate it then find the unique set of
# resources to describe across all bindings and projected variables
if query.p is not None:
bindings = evalPart(ctx, query.p)
to_describe.update(*(set(binding.values()) for binding in bindings))

# Get a CBD for all resources identified to describe
for resource in to_describe:
# type error: Item "None" of "Optional[Graph]" has no attribute "cbd"
graph += ctx.graph.cbd(resource) # type: ignore[union-attr]

res: Dict[str, Union[str, Graph]] = {}
res["type_"] = "DESCRIBE"
res["graph"] = graph

return res


def evalQuery(graph: Graph, query: Query, initBindings, base=None):

initBindings = dict((Variable(k), v) for k, v in initBindings.items())
Expand Down
2 changes: 1 addition & 1 deletion rdflib/plugins/sparql/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1479,7 +1479,7 @@ def expandCollection(terms):
"DescribeQuery",
Keyword("DESCRIBE")
+ (OneOrMore(ParamList("var", VarOrIri)) | "*")
+ Param("datasetClause", ZeroOrMore(DatasetClause))
+ ZeroOrMore(ParamList("datasetClause", DatasetClause))
aucampia marked this conversation as resolved.
Show resolved Hide resolved
+ Optional(WhereClause)
+ SolutionModifier
+ ValuesClause,
Expand Down
92 changes: 92 additions & 0 deletions test/test_sparql/test_sparql.py
Original file line number Diff line number Diff line change
Expand Up @@ -867,3 +867,95 @@ def test_queries(
result = rdfs_graph.query(query)
logging.debug("result = %s", result)
assert expected_bindings == result.bindings


@pytest.mark.parametrize(
["query_string", "expected_subjects", "expected_size"],
[
pytest.param(
"""
DESCRIBE rdfs:Class
""",
{RDFS.Class},
5,
id="1-explicit",
),
pytest.param(
"""
DESCRIBE rdfs:Class rdfs:subClassOf
""",
{RDFS.Class, RDFS.subClassOf},
11,
id="2-explict",
),
pytest.param(
"""
DESCRIBE rdfs:Class rdfs:subClassOf owl:Class
""",
{RDFS.Class, RDFS.subClassOf},
11,
id="3-explict-1-missing",
),
pytest.param(
"""
DESCRIBE ?prop
WHERE {
?prop a rdf:Property
}
""",
{
RDFS.seeAlso,
RDFS.member,
RDFS.subPropertyOf,
RDFS.subClassOf,
RDFS.domain,
RDFS.range,
RDFS.label,
RDFS.comment,
RDFS.isDefinedBy,
},
55,
id="1-var",
),
pytest.param(
"""
DESCRIBE ?s
WHERE {
?s a ?type ;
rdfs:subClassOf rdfs:Class .
}
""",
{RDFS.Datatype},
5,
id="2-var-1-projected",
),
pytest.param(
"""
DESCRIBE ?s rdfs:Class
WHERE {
?s a ?type ;
rdfs:subClassOf rdfs:Class .
}
""",
{RDFS.Datatype, RDFS.Class},
10,
id="2-var-1-projected-1-explicit",
),
pytest.param("DESCRIBE ?s", set(), 0, id="empty"),
],
)
def test_sparql_describe(
query_string: str,
expected_subjects: set,
expected_size: int,
rdfs_graph: Graph,
) -> None:
"""
Check results of DESCRIBE queries against rdfs.ttl to ensure
the subjects described and the number of triples returned are correct.
"""
r = rdfs_graph.query(query_string)
assert r.graph is not None
subjects = {s for s in r.graph.subjects() if not isinstance(s, BNode)}
assert subjects == expected_subjects
assert len(r.graph) == expected_size