From 4fdeca111ef941366ac2b08cdb03d94085fb7592 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 27 May 2024 17:15:30 +0200 Subject: [PATCH 1/8] Improved the handling of different return types from the query() method --- pyproject.toml | 2 +- tests/test_sparql.py | 2 +- tripper/backends/rdflib.py | 111 ++++++++++++++++++++++++++++--------- tripper/interface.py | 5 +- tripper/triplestore.py | 18 ++++-- 5 files changed, 103 insertions(+), 35 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 98daf0d3..451f59cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -93,7 +93,7 @@ line_length = 79 # PEP8 line-length = 79 [tool.mypy] -python_version = "3.7" +python_version = "3.11" ignore_missing_imports = true scripts_are_modules = true warn_unused_configs = true diff --git a/tests/test_sparql.py b/tests/test_sparql.py index 465904b9..a9b65e35 100644 --- a/tests/test_sparql.py +++ b/tests/test_sparql.py @@ -76,7 +76,7 @@ def test_sparql_construct(): ts.parse(data=data) VCARD = ts.bind("vcard", "http://www.w3.org/2001/vcard-rdf/3.0#") - r = ts.query(query) + r = list(ts.query(query)) assert len(r) == 6 assert len([s for s, p, o in r if p == VCARD.givenName]) == 2 diff --git a/tripper/backends/rdflib.py b/tripper/backends/rdflib.py index 08dbf9d9..ba769a2c 100644 --- a/tripper/backends/rdflib.py +++ b/tripper/backends/rdflib.py @@ -6,7 +6,7 @@ # pylint: disable=line-too-long import warnings -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Generator try: import rdflib # pylint: disable=unused-import @@ -26,7 +26,7 @@ if TYPE_CHECKING: # pragma: no cover from collections.abc import Sequence - from typing import Generator, List, Optional, Tuple, Union + from typing import List, Optional, Tuple, Union from tripper.triplestore import Triple @@ -71,7 +71,7 @@ def __init__( database: "Optional[str]" = None, triplestore_url: "Optional[str]" = None, format: "Optional[str]" = None, # pylint: disable=redefined-builtin - graph: "Graph" = None, + graph: "Optional[Graph]" = None, ) -> None: # Note that although `base_iri` is unused in this backend, it may # still be used by calling Triplestore object. @@ -88,26 +88,30 @@ def __init__( def triples(self, triple: "Triple") -> "Generator[Triple, None, None]": """Returns a generator over matching triples.""" - for s, p, o in self.graph.triples( # pylint: disable=not-an-iterable - astriple(triple) - ): - yield ( - ( - f"_:{s}" - if isinstance(s, BNode) and not s.startswith("_:") - else str(s) - ), - str(p), - ( - parse_literal(o) - if isinstance(o, rdflibLiteral) - else ( - f"_:{o}" - if isinstance(o, BNode) and not o.startswith("_:") - else str(o) - ) - ), - ) + return _convert_triples_to_tripper( + self.graph.triples(astriple(triple)) + ) + + # for s, p, o in self.graph.triples( # pylint: disable=not-an-iterable + # astriple(triple) + # ): + # yield ( + # ( + # f"_:{s}" + # if isinstance(s, BNode) and not s.startswith("_:") + # else str(s) + # ), + # str(p), + # ( + # parse_literal(o) + # if isinstance(o, rdflibLiteral) + # else ( + # f"_:{o}" + # if isinstance(o, BNode) and not o.startswith("_:") + # else str(o) + # ) + # ), + # ) def add_triples(self, triples: "Sequence[Triple]"): """Add a sequence of triples.""" @@ -182,7 +186,9 @@ def serialize( return result if isinstance(result, str) else result.decode() return None - def query(self, query_object, **kwargs) -> "List[Tuple[str, ...]]": + def query( + self, query_object, **kwargs + ) -> "Union[List[Tuple[str, ...]], bool, Generator[Triple, None, None]]": """SPARQL query. Parameters: @@ -190,11 +196,39 @@ def query(self, query_object, **kwargs) -> "List[Tuple[str, ...]]": kwargs: Keyword arguments passed to rdflib.Graph.query(). Returns: - List of tuples of IRIs for each matching row. + The return type depends on type of query: + - SELECT: list of tuples of IRIs for each matching row + - ASK: bool + - CONSTRUCT, DESCRIBE: generator over triples + For more info, see + https://rdflib.readthedocs.io/en/stable/apidocs/rdflib.html#rdflib.query.Result """ - rows = self.graph.query(query_object=query_object, **kwargs) - return [tuple(str(v) for v in row) for row in rows] + result = self.graph.query(query_object=query_object, **kwargs) + + # The type of the result object depends not only on the type of query, + # but also on the version of rdflib... We try to be general here. + if hasattr(result, "type"): + resulttype = result.type + elif result.__class__.__name__ == "ResultRow": + resulttype = "SELECT" + elif isinstance(result, bool): + resulttype = "ASK" + elif isinstance(result, Generator): + resulttype = "CONSTRUCT" # also DESCRIBE + else: + warnings.warn( + "Unknown return type from rdflib.query(). Return it unprocessed." + ) + return result # type: ignore + + if resulttype == "SELECT": + return [tuple(str(v) for v in row) for row in result] # type: ignore + if resulttype == "ASK": + return bool(result) + if resulttype in ("CONSTRUCT", "DESCRIBE"): + return _convert_triples_to_tripper(result) + assert False, "should never be reached" # nosec def update(self, update_object, **kwargs) -> None: """Update triplestore with SPARQL. @@ -234,3 +268,26 @@ def namespaces(self) -> dict: prefix: str(namespace) for prefix, namespace in self.graph.namespaces() } + + +def _convert_triples_to_tripper(triples) -> "Generator[Triple, None, None]": + """Help function that converts a iterator/generator of rdflib triples + to tripper triples.""" + for s, p, o in triples: ### p ylint: disable=not-an-iterable + yield ( + ( + f"_:{s}" + if isinstance(s, BNode) and not s.startswith("_:") + else str(s) + ), + str(p), + ( + parse_literal(o) + if isinstance(o, rdflibLiteral) + else ( + f"_:{o}" + if isinstance(o, BNode) and not o.startswith("_:") + else str(o) + ) + ), + ) diff --git a/tripper/interface.py b/tripper/interface.py index 6e15cc81..7c1da86e 100644 --- a/tripper/interface.py +++ b/tripper/interface.py @@ -73,7 +73,10 @@ def query(self, query_object: str, **kwargs) -> List[Tuple[str, ...]]: kwargs: Additional backend-specific keyword arguments. Returns: - List of tuples of IRIs for each matching row. + The return type depends on type of query: + - SELECT: list of tuples of IRIs for each matching row + - ASK: bool + - CONSTRUCT, DESCRIBE: generator over triples """ def update(self, update_object: str, **kwargs): diff --git a/tripper/triplestore.py b/tripper/triplestore.py index c1407c8e..c7907051 100644 --- a/tripper/triplestore.py +++ b/tripper/triplestore.py @@ -386,7 +386,9 @@ def serialize( ts.bind(prefix, iri) return ts.serialize(destination=destination, format=format, **kwargs) - def query(self, query_object, **kwargs) -> "List[Tuple[str, ...]]": + def query( + self, query_object, **kwargs + ) -> "Union[List[Tuple[str, ...]], bool, Generator[Triple, None, None]]": """SPARQL query. Parameters: @@ -394,11 +396,17 @@ def query(self, query_object, **kwargs) -> "List[Tuple[str, ...]]": kwargs: Keyword arguments passed to the backend query() method. Returns: - List of tuples of IRIs for each matching row. + The return type depends on type of query: + - SELECT: list of tuples of IRIs for each matching row + - ASK: bool + - CONSTRUCT, DESCRIBE: generator over triples Note: - This method is intended for SELECT queries. Use - the update() method for INSERT and DELETE queries. + This method is intended for SELECT, ASK, CONSTRUCT and + DESCRIBE queries. Use the update() method for INSERT and + DELETE queries. + + Not all backends may support all types of queries. """ self._check_method("query") @@ -413,7 +421,7 @@ def update(self, update_object, **kwargs) -> None: Note: This method is intended for INSERT and DELETE queries. Use - the query() method for SELECT queries. + the query() method for SELECT, ASK, CONSTRUCT and DESCRIBE queries. """ self._check_method("update") From 6b03c210128f7372e01ad2bf9adf873d90b4bfc7 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Mon, 27 May 2024 22:42:02 +0200 Subject: [PATCH 2/8] Fixed test failure --- tests/test_literals.py | 9 ++++++++- tests/test_triplestore.py | 3 +-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/test_literals.py b/tests/test_literals.py index a2e13bac..1c9ae63b 100644 --- a/tests/test_literals.py +++ b/tests/test_literals.py @@ -5,6 +5,8 @@ def test_untyped() -> None: """Test creating a untyped literal.""" + import pytest + from tripper.literal import RDF, XSD, Literal literal = Literal("Hello world!") @@ -19,9 +21,14 @@ def test_untyped() -> None: assert literal == Literal("Hello world!", datatype=XSD.string) assert literal == Literal("Hello world!", datatype=XSD.token) assert literal == Literal("Hello world!", datatype=RDF.JSON) - assert literal != Literal("Hello world!", datatype=XSD.ENTITY) assert literal == Literal("Hello world!", lang="en") + # Check two things here: + # 1) that a plain literal compares false to a non-string literal + # 2) that we get a warning about unknown XSD.ENTITY datatype + with pytest.warns(UserWarning): + assert literal != Literal("Hello world!", datatype=XSD.ENTITY) + def test_string() -> None: """Test creating a string literal.""" diff --git a/tests/test_triplestore.py b/tests/test_triplestore.py index 33910e60..5a51c88f 100644 --- a/tests/test_triplestore.py +++ b/tests/test_triplestore.py @@ -415,12 +415,11 @@ def test_find_literal_triples() -> None: ts.triples(predicate=FAM.hasName, object=Literal("Per")) ) == set( [ - (FAM.Per, FAM.hasName, Literal("Per", datatype=XSD.string)), + (FAM.Per, FAM.hasName, Literal("Per")), ] ) -# if True: def test_bind_errors(): """Test for errors in Triplestore.bind().""" pytest.importorskip("rdflib") From 0f4e2d2efe27354f0cd34170e0e5e2d8343affb8 Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 21 Jun 2024 12:44:37 +0200 Subject: [PATCH 3/8] Added test for CONSTRUCT query --- tests/test_sparql.py | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/tests/test_sparql.py b/tests/test_sparql.py index a9b65e35..812ea137 100644 --- a/tests/test_sparql.py +++ b/tests/test_sparql.py @@ -6,8 +6,8 @@ # if True: -def test_sparql(): - """Test SPARQL query.""" +def test_sparql_select(): + """Test SPARQL SELECT query.""" pytest.importorskip("rdflib") from tripper import Triplestore @@ -86,3 +86,43 @@ def test_sparql_construct(): assert ( len([s for s, p, o in r if p == VCARD.givenName and o == "Cyril"]) == 0 ) + + +#if True: +def test_sparql_construct(): + """Test SPARQL CONSTRUCT query.""" + # From https://www.w3.org/TR/rdf-sparql-query/#construct + pytest.importorskip("rdflib") + from textwrap import dedent + from tripper import Literal, Triplestore + + # Load pre-inferred EMMO + ts = Triplestore("rdflib") + + data = dedent( + """ + @prefix foaf: . + + _:a foaf:name "Alice" . + _:a foaf:mbox . + """ + ) + query = dedent( + """ + PREFIX foaf: + PREFIX vcard: + CONSTRUCT { vcard:FN ?name } + WHERE { ?x foaf:name ?name } + """ + ) + ts = Triplestore("rdflib") + ts.parse(data=data) + r = ts.query(query) + + assert set(r) == { + ( + 'http://example.org/person#Alice', + 'http://www.w3.org/2001/vcard-rdf/3.0#FN', + Literal('Alice') + ) + } From 6c511e695483720e301c04f472ddaf723f42b0a8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 10:46:11 +0000 Subject: [PATCH 4/8] [pre-commit.ci] auto fixes from pre-commit hooks For more information, see https://pre-commit.ci --- tests/test_sparql.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_sparql.py b/tests/test_sparql.py index 812ea137..627e59e2 100644 --- a/tests/test_sparql.py +++ b/tests/test_sparql.py @@ -88,12 +88,13 @@ def test_sparql_construct(): ) -#if True: +# if True: def test_sparql_construct(): """Test SPARQL CONSTRUCT query.""" # From https://www.w3.org/TR/rdf-sparql-query/#construct pytest.importorskip("rdflib") from textwrap import dedent + from tripper import Literal, Triplestore # Load pre-inferred EMMO @@ -121,8 +122,8 @@ def test_sparql_construct(): assert set(r) == { ( - 'http://example.org/person#Alice', - 'http://www.w3.org/2001/vcard-rdf/3.0#FN', - Literal('Alice') + "http://example.org/person#Alice", + "http://www.w3.org/2001/vcard-rdf/3.0#FN", + Literal("Alice"), ) } From 7bed1a332437be8e46634395cc07fcf99ebf3e1d Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Fri, 21 Jun 2024 14:48:08 +0200 Subject: [PATCH 5/8] Added more sparql tests --- tests/test_sparql.py | 133 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 131 insertions(+), 2 deletions(-) diff --git a/tests/test_sparql.py b/tests/test_sparql.py index 812ea137..209e700f 100644 --- a/tests/test_sparql.py +++ b/tests/test_sparql.py @@ -5,7 +5,7 @@ import pytest -# if True: +#if True: def test_sparql_select(): """Test SPARQL SELECT query.""" pytest.importorskip("rdflib") @@ -88,8 +88,50 @@ def test_sparql_construct(): ) +# if True: +def test_sparql_select2(): + """Test SPARQL SELECT query.""" + # From https://www.w3.org/TR/rdf-sparql-query/#construct + pytest.importorskip("rdflib") + from textwrap import dedent + from tripper import Literal, Triplestore + + data = dedent( + """ + @prefix : . + @prefix foaf: . + + :allice foaf:name "Alice" . + :allice foaf:knows :bob . + :allice foaf:knows :clare . + + :bob foaf:name "Bob" . + + :clare foaf:name "Clare" . + :clare foaf:nick "CT" . + """ + ) + query = dedent( + """ + PREFIX foaf: + SELECT ?nameX ?nameY ?nickY + WHERE + { ?x foaf:knows ?y ; + foaf:name ?nameX . + ?y foaf:name ?nameY . + OPTIONAL { ?y foaf:nick ?nickY } + } + """ + ) + ts = Triplestore("rdflib") + ts.parse(data=data) + r = ts.query(query) + + assert set(r) == {('Alice', 'Bob', 'None'), ('Alice', 'Clare', 'CT')} + + #if True: -def test_sparql_construct(): +def test_sparql_construct2(): """Test SPARQL CONSTRUCT query.""" # From https://www.w3.org/TR/rdf-sparql-query/#construct pytest.importorskip("rdflib") @@ -126,3 +168,90 @@ def test_sparql_construct(): Literal('Alice') ) } + + +#if True: +def test_sparql_ask(): + """Test SPARQL ASK query.""" + # From https://www.w3.org/TR/rdf-sparql-query/#construct + pytest.importorskip("rdflib") + from textwrap import dedent + from tripper import Literal, Triplestore + + # Load pre-inferred EMMO + ts = Triplestore("rdflib") + + data = dedent( + """ + @prefix foaf: . + + _:a foaf:name "Alice" . + _:a foaf:homepage . + + _:b foaf:name "Bob" . + _:b foaf:mbox . + """ + ) + query = dedent( + """ + PREFIX foaf: + ASK { ?x foaf:name "Alice" } + """ + ) + ts = Triplestore("rdflib") + ts.parse(data=data) + r = ts.query(query) + assert r == True + + +#if True: +def test_sparql_describe(): + """Test SPARQL DESCRIBE query.""" + # From https://www.w3.org/TR/rdf-sparql-query/#construct + pytest.importorskip("rdflib") + from textwrap import dedent + from tripper import Literal, Triplestore + + # Load pre-inferred EMMO + ts = Triplestore("rdflib") + + data = dedent( + """ + @prefix foaf: . + @prefix vcard: . + @prefix exOrg: . + @prefix rdf: . + @prefix owl: . + + exOrg:Allice + exOrg:employeeId "1234" ; + foaf:mbox_sha1sum "ABCD1234" . + + foaf:mbox_sha1sum rdf:type owl:InverseFunctionalProperty . + """ + ) + query = dedent( + """ + PREFIX foaf: + DESCRIBE ?x + WHERE { ?x foaf:mbox_sha1sum "ABCD1234" } + """ + ) + ts = Triplestore("rdflib") + ts.parse(data=data) + r = ts.query(query) + + assert set(r) == set( + [ + ( + 'http://org.example.com/employees#Allice', + 'http://xmlns.com/foaf/0.1/mbox_sha1sum', + Literal('ABCD1234') + ), + ( + 'http://org.example.com/employees#Allice', + 'http://org.example.com/employees#employeeId', + Literal('1234') + ), + ] + ) From 6e43b24221e07115edd2188d15f55998be31be66 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 21 Jun 2024 22:30:27 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit hooks For more information, see https://pre-commit.ci --- tests/test_sparql.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/tests/test_sparql.py b/tests/test_sparql.py index 9f0682db..dcb1ea71 100644 --- a/tests/test_sparql.py +++ b/tests/test_sparql.py @@ -5,7 +5,7 @@ import pytest -#if True: +# if True: def test_sparql_select(): """Test SPARQL SELECT query.""" pytest.importorskip("rdflib") @@ -94,6 +94,7 @@ def test_sparql_select2(): # From https://www.w3.org/TR/rdf-sparql-query/#construct pytest.importorskip("rdflib") from textwrap import dedent + from tripper import Literal, Triplestore data = dedent( @@ -127,10 +128,10 @@ def test_sparql_select2(): ts.parse(data=data) r = ts.query(query) - assert set(r) == {('Alice', 'Bob', 'None'), ('Alice', 'Clare', 'CT')} + assert set(r) == {("Alice", "Bob", "None"), ("Alice", "Clare", "CT")} -#if True: +# if True: def test_sparql_construct2(): """Test SPARQL CONSTRUCT query.""" # From https://www.w3.org/TR/rdf-sparql-query/#construct @@ -171,12 +172,13 @@ def test_sparql_construct2(): } -#if True: +# if True: def test_sparql_ask(): """Test SPARQL ASK query.""" # From https://www.w3.org/TR/rdf-sparql-query/#construct pytest.importorskip("rdflib") from textwrap import dedent + from tripper import Literal, Triplestore # Load pre-inferred EMMO @@ -205,12 +207,13 @@ def test_sparql_ask(): assert r == True -#if True: +# if True: def test_sparql_describe(): """Test SPARQL DESCRIBE query.""" # From https://www.w3.org/TR/rdf-sparql-query/#construct pytest.importorskip("rdflib") from textwrap import dedent + from tripper import Literal, Triplestore # Load pre-inferred EMMO @@ -245,14 +248,14 @@ def test_sparql_describe(): assert set(r) == set( [ ( - 'http://org.example.com/employees#Allice', - 'http://xmlns.com/foaf/0.1/mbox_sha1sum', - Literal('ABCD1234') + "http://org.example.com/employees#Allice", + "http://xmlns.com/foaf/0.1/mbox_sha1sum", + Literal("ABCD1234"), ), ( - 'http://org.example.com/employees#Allice', - 'http://org.example.com/employees#employeeId', - Literal('1234') + "http://org.example.com/employees#Allice", + "http://org.example.com/employees#employeeId", + Literal("1234"), ), ] ) From 06ed9722eaccdd2c6fb1f132955409adfb0c7ddd Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 22 Jun 2024 00:35:17 +0200 Subject: [PATCH 7/8] Fixed some pre-commit warnings --- tests/test_sparql.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_sparql.py b/tests/test_sparql.py index dcb1ea71..88fe21e4 100644 --- a/tests/test_sparql.py +++ b/tests/test_sparql.py @@ -95,7 +95,7 @@ def test_sparql_select2(): pytest.importorskip("rdflib") from textwrap import dedent - from tripper import Literal, Triplestore + from tripper import Triplestore data = dedent( """ @@ -179,7 +179,7 @@ def test_sparql_ask(): pytest.importorskip("rdflib") from textwrap import dedent - from tripper import Literal, Triplestore + from tripper import Triplestore # Load pre-inferred EMMO ts = Triplestore("rdflib") @@ -204,7 +204,7 @@ def test_sparql_ask(): ts = Triplestore("rdflib") ts.parse(data=data) r = ts.query(query) - assert r == True + assert r is True # if True: From c6811b351806e28aadce7536805e5272dd68ea5d Mon Sep 17 00:00:00 2001 From: Jesper Friis Date: Sat, 22 Jun 2024 08:55:41 +0200 Subject: [PATCH 8/8] Corrected references to source for SPARQL examples --- tests/test_sparql.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_sparql.py b/tests/test_sparql.py index 88fe21e4..f64c25e7 100644 --- a/tests/test_sparql.py +++ b/tests/test_sparql.py @@ -91,7 +91,7 @@ def test_sparql_construct(): # if True: def test_sparql_select2(): """Test SPARQL SELECT query.""" - # From https://www.w3.org/TR/rdf-sparql-query/#construct + # From https://www.w3.org/TR/rdf-sparql-query/#select pytest.importorskip("rdflib") from textwrap import dedent @@ -175,7 +175,7 @@ def test_sparql_construct2(): # if True: def test_sparql_ask(): """Test SPARQL ASK query.""" - # From https://www.w3.org/TR/rdf-sparql-query/#construct + # From https://www.w3.org/TR/rdf-sparql-query/#ask pytest.importorskip("rdflib") from textwrap import dedent @@ -210,7 +210,7 @@ def test_sparql_ask(): # if True: def test_sparql_describe(): """Test SPARQL DESCRIBE query.""" - # From https://www.w3.org/TR/rdf-sparql-query/#construct + # From https://www.w3.org/TR/rdf-sparql-query/#describe pytest.importorskip("rdflib") from textwrap import dedent