From 34f2c10079ef2c075aff7c2ca0ef3aa770690cf5 Mon Sep 17 00:00:00 2001 From: Lukas Plank Date: Wed, 30 Oct 2024 20:17:35 +0100 Subject: [PATCH] feat: implement query sanity checking --- poetry.lock | 43 +++++++-------------------------- pyproject.toml | 1 + rdfproxy/adapter.py | 3 ++- rdfproxy/checks/checkers.py | 34 ++++++++++++++++++++++++++ rdfproxy/checks/query_checks.py | 29 ++++++++++++++++++++++ rdfproxy/utils/_exceptions.py | 4 +++ rdfproxy/utils/predicates.py | 18 ++++++++++++++ 7 files changed, 97 insertions(+), 35 deletions(-) create mode 100644 rdfproxy/checks/checkers.py create mode 100644 rdfproxy/checks/query_checks.py create mode 100644 rdfproxy/utils/predicates.py diff --git a/poetry.lock b/poetry.lock index d3bd3a7..7324f0f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -361,20 +361,6 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] -[[package]] -name = "isodate" -version = "0.6.1" -description = "An ISO 8601 date/time/duration parser and formatter" -optional = false -python-versions = "*" -files = [ - {file = "isodate-0.6.1-py2.py3-none-any.whl", hash = "sha256:0751eece944162659049d35f4f549ed815792b38793f07cf73381c1c87cbed96"}, - {file = "isodate-0.6.1.tar.gz", hash = "sha256:48c5881de7e8b0a0d648cb024c8062dc84e7b840ed81e864c7614fd3c127bde9"}, -] - -[package.dependencies] -six = "*" - [[package]] name = "jinja2" version = "3.1.4" @@ -818,24 +804,24 @@ files = [ [[package]] name = "rdflib" -version = "7.0.0" +version = "7.1.1" description = "RDFLib is a Python library for working with RDF, a simple yet powerful language for representing information." optional = false -python-versions = ">=3.8.1,<4.0.0" +python-versions = "<4.0.0,>=3.8.1" files = [ - {file = "rdflib-7.0.0-py3-none-any.whl", hash = "sha256:0438920912a642c866a513de6fe8a0001bd86ef975057d6962c79ce4771687cd"}, - {file = "rdflib-7.0.0.tar.gz", hash = "sha256:9995eb8569428059b8c1affd26b25eac510d64f5043d9ce8c84e0d0036e995ae"}, + {file = "rdflib-7.1.1-py3-none-any.whl", hash = "sha256:e590fa9a2c34ba33a667818b5a84be3fb8a4d85868f8038f17912ec84f912a25"}, + {file = "rdflib-7.1.1.tar.gz", hash = "sha256:164de86bd3564558802ca983d84f6616a4a1a420c7a17a8152f5016076b2913e"}, ] [package.dependencies] -isodate = ">=0.6.0,<0.7.0" pyparsing = ">=2.1.0,<4" [package.extras] berkeleydb = ["berkeleydb (>=18.1.0,<19.0.0)"] -html = ["html5lib (>=1.0,<2.0)"] -lxml = ["lxml (>=4.3.0,<5.0.0)"] -networkx = ["networkx (>=2.0.0,<3.0.0)"] +html = ["html5rdf (>=1.2,<2)"] +lxml = ["lxml (>=4.3,<6.0)"] +networkx = ["networkx (>=2,<4)"] +orjson = ["orjson (>=3.9.14,<4)"] [[package]] name = "rich" @@ -893,17 +879,6 @@ files = [ {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"}, ] -[[package]] -name = "six" -version = "1.16.0" -description = "Python 2 and 3 compatibility utilities" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" -files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] - [[package]] name = "sniffio" version = "1.3.1" @@ -1246,4 +1221,4 @@ files = [ [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "4e42db8b57c13c2a19b9920a69d9d853bb73bd0c7d34fcf6fb9fbfacc93c5d1d" +content-hash = "e861223dfdf4c46c1ff3049e4790727200d30b195f2563c936418af16a919482" diff --git a/pyproject.toml b/pyproject.toml index ee5a8e4..037f6a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ sparqlwrapper = "^2.0.0" pydantic = "^2.9.2" +rdflib = "^7.1.1" [tool.poetry.group.dev.dependencies] ruff = "^0.7.0" deptry = "^0.20.0" diff --git a/rdfproxy/adapter.py b/rdfproxy/adapter.py index 0b3fe48..fe9629c 100644 --- a/rdfproxy/adapter.py +++ b/rdfproxy/adapter.py @@ -5,6 +5,7 @@ from typing import Generic from SPARQLWrapper import JSON, SPARQLWrapper +from rdfproxy.checks.checkers import check_query from rdfproxy.mapper import ModelBindingsMapper from rdfproxy.utils._types import _TModelInstance from rdfproxy.utils.models import Page @@ -34,7 +35,7 @@ class SPARQLModelAdapter(Generic[_TModelInstance]): def __init__( self, target: str | SPARQLWrapper, query: str, model: type[_TModelInstance] ) -> None: - self._query = query + self._query = check_query(query) self._model = model self.sparql_wrapper: SPARQLWrapper = ( diff --git a/rdfproxy/checks/checkers.py b/rdfproxy/checks/checkers.py new file mode 100644 index 0000000..3899a1b --- /dev/null +++ b/rdfproxy/checks/checkers.py @@ -0,0 +1,34 @@ +"""RDFProxy check runners.""" + +from collections.abc import Callable +from typing import Annotated, NoReturn, TypeVar + +from rdfproxy.checks.query_checks import ( + check_parse_query, + check_select_query, + check_solution_modifiers, +) + + +T = TypeVar("T") +_TCheck = Callable[[T], T | NoReturn] + + +def compose_left(*fns: Callable) -> Callable: + def _left_wrapper(*fns): + fn, *rest_fns = fns + + if rest_fns: + return lambda *args, **kwargs: fn(_left_wrapper(*rest_fns)(*args, **kwargs)) + return fn + + return _left_wrapper(*reversed(fns)) + + +def compose_checker(*checkers: _TCheck) -> _TCheck: + return compose_left(*checkers) + + +check_query: Annotated[ + _TCheck, "Run a series of checks on a query and return the query." +] = compose_checker(check_parse_query, check_select_query, check_solution_modifiers) diff --git a/rdfproxy/checks/query_checks.py b/rdfproxy/checks/query_checks.py new file mode 100644 index 0000000..49003a0 --- /dev/null +++ b/rdfproxy/checks/query_checks.py @@ -0,0 +1,29 @@ +"""Query checks definitions.""" + +from typing import NoReturn, TypeVar + +from rdflib.plugins.sparql.parser import parseQuery +from rdfproxy.utils._exceptions import UnsupportedQueryException +from rdfproxy.utils.predicates import ( + query_has_solution_modifiers, + query_is_select_query, +) + +TQuery = TypeVar("TQuery", bound=str) + + +def check_parse_query(query: TQuery) -> TQuery | NoReturn: + parseQuery(query) + return query + + +def check_select_query(query: TQuery) -> TQuery | NoReturn: + if not query_is_select_query(query): + raise UnsupportedQueryException("Only SELECT queries are applicable.") + return query + + +def check_solution_modifiers(query: TQuery) -> TQuery | NoReturn: + if query_has_solution_modifiers(query): + raise UnsupportedQueryException("SPARQL solution modifieres are not supported.") + return query diff --git a/rdfproxy/utils/_exceptions.py b/rdfproxy/utils/_exceptions.py index 612214b..72a8f43 100644 --- a/rdfproxy/utils/_exceptions.py +++ b/rdfproxy/utils/_exceptions.py @@ -7,3 +7,7 @@ class MissingModelConfigException(Exception): class UnboundGroupingKeyException(Exception): """Exception for indicating that no SPARQL binding corresponds to the requested grouping key.""" + + +class UnsupportedQueryException(Exception): + """Exception for indicating that a given SPARQL query is not supported.""" diff --git a/rdfproxy/utils/predicates.py b/rdfproxy/utils/predicates.py new file mode 100644 index 0000000..fa1af00 --- /dev/null +++ b/rdfproxy/utils/predicates.py @@ -0,0 +1,18 @@ +"""RDFProxy predicate functions.""" + +import re + +from rdflib.plugins.sparql.parser import parseQuery + + +def query_is_select_query(query: str) -> bool: + """Check if a SPARQL query is a SELECT query.""" + _, query_type = parseQuery(query) + return query_type.name == "SelectQuery" + + +def query_has_solution_modifiers(query: str) -> bool: + """Predicate for checking if a SPARQL query has a solution modifier.""" + pattern = r"}[^}]*\w+$" + result = re.search(pattern, query) + return bool(result)