Skip to content

Commit

Permalink
feat: implement strategies for SPARQL query functionality
Browse files Browse the repository at this point in the history
Currently, rdfproxy relies on SPARQLWrapper for querying triplestores;
this is not ideal since SPARQLWrapper occasionally gets
blacklisted e.g. by wikidata and caused severe performance issues in
the past.

The change introduces SPARQLQuery strategies for better control over
what SPARQL query backend should run in RDFProxy. The
SPARQLWrapperStrategy, implements exactly the previous SPARQLWrapper behavior.
An HttpxStrategy (default) implements the query functionality required by
RDFProxy using raw httpx instead of SPARQLWrapper.

Closes #169.
  • Loading branch information
lu-pl committed Dec 11, 2024
1 parent 705077a commit 8e32a05
Show file tree
Hide file tree
Showing 2 changed files with 67 additions and 13 deletions.
22 changes: 9 additions & 13 deletions rdfproxy/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@
import math
from typing import Generic

from SPARQLWrapper import JSON, SPARQLWrapper
from rdfproxy.mapper import ModelBindingsMapper
from rdfproxy.sparql_strategies import HttpxStrategy, SPARQLStrategy
from rdfproxy.utils._types import _TModelInstance
from rdfproxy.utils.models import Page, QueryParameters
from rdfproxy.utils.sparql_utils import (
calculate_offset,
construct_count_query,
construct_items_query,
query_with_wrapper,
)


Expand All @@ -32,15 +31,16 @@ class SPARQLModelAdapter(Generic[_TModelInstance]):
"""

def __init__(
self, target: str | SPARQLWrapper, query: str, model: type[_TModelInstance]
self,
target: str,
query: str,
model: type[_TModelInstance],
sparql_strategy: type[SPARQLStrategy] = HttpxStrategy,
) -> None:
self._query = query
self._model = model

self.sparql_wrapper: SPARQLWrapper = (
SPARQLWrapper(target) if isinstance(target, str) else target
)
self.sparql_wrapper.setReturnFormat(JSON)
self.sparql_strategy = sparql_strategy(target)

def query(self, query_parameters: QueryParameters) -> Page[_TModelInstance]:
"""Run a query against an endpoint and return a Page model object."""
Expand All @@ -52,9 +52,7 @@ def query(self, query_parameters: QueryParameters) -> Page[_TModelInstance]:
offset=calculate_offset(query_parameters.page, query_parameters.size),
)

items_query_bindings: Iterator[dict] = query_with_wrapper(
query=items_query, sparql_wrapper=self.sparql_wrapper
)
items_query_bindings: Iterator[dict] = self.sparql_strategy.query(items_query)

mapper = ModelBindingsMapper(self._model, *items_query_bindings)

Expand All @@ -75,7 +73,5 @@ def _get_count(self, query: str) -> int:
Helper for SPARQLModelAdapter.query.
"""
result: Iterator[dict] = query_with_wrapper(
query=query, sparql_wrapper=self.sparql_wrapper
)
result: Iterator[dict] = self.sparql_strategy.query(query)
return int(next(result)["cnt"])
58 changes: 58 additions & 0 deletions rdfproxy/sparql_strategies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""Strategy classes for SPARQL query functionality."""

import abc
from collections.abc import Iterator

from SPARQLWrapper import JSON, QueryResult, SPARQLWrapper
import httpx


class SPARQLStrategy(abc.ABC):
def __init__(self, endpoint: str):
self.endpoint = endpoint

@abc.abstractmethod
def query(self, sparql_query: str) -> Iterator[dict[str, str]]:
raise NotImplementedError

@staticmethod
def _get_bindings_from_bindings_dict(bindings_dict: dict) -> Iterator[dict]:
bindings = map(
lambda binding: {k: v["value"] for k, v in binding.items()},
bindings_dict["results"]["bindings"],
)
return bindings


class SPARQLWrapperStrategy(SPARQLStrategy):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

self._sparql_wrapper = SPARQLWrapper(self.endpoint)
self._sparql_wrapper.setReturnFormat(JSON)

def query(self, sparql_query: str) -> Iterator[dict[str, str]]:
self._sparql_wrapper.setQuery(sparql_query)

result: QueryResult = self._sparql_wrapper.query()
return self._get_bindings_from_bindings_dict(result.convert())


class HttpxStrategy(SPARQLStrategy):
def query(self, sparql_query: str) -> Iterator[dict[str, str]]:
result: httpx.Response = self._httpx_run_sparql_query(sparql_query)
return self._get_bindings_from_bindings_dict(result.json())

def _httpx_run_sparql_query(self, query: str) -> httpx.Response:
data = {"output": "json", "query": query}
headers = {
"Accept": "application/sparql-results+json",
}

response = httpx.post(
self.endpoint,
headers=headers,
data=data,
)

return response

0 comments on commit 8e32a05

Please sign in to comment.