Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: ungrouped ordering #140

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 9 additions & 34 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ sparqlwrapper = "^2.0.0"
pydantic = "^2.9.2"


rdflib = "^7.1.1"
[tool.poetry.group.dev.dependencies]
ruff = "^0.7.0"
deptry = "^0.20.0"
Expand Down
69 changes: 65 additions & 4 deletions rdfproxy/utils/sparql_utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,80 @@
"""Functionality for dynamic SPARQL query modifcation."""

from collections.abc import Iterator
from collections.abc import Iterable, Iterator
from contextlib import contextmanager
from functools import partial
from itertools import chain
import re
from typing import cast
from typing import cast, overload

from SPARQLWrapper import QueryResult, SPARQLWrapper
from rdflib import Variable
from rdflib.plugins.sparql.parser import parseQuery
from rdflib.plugins.sparql.parserutils import CompValue, ParseResults
from rdfproxy.utils._exceptions import QueryConstructionException
from rdfproxy.utils._types import ItemsQueryConstructor, SPARQLBinding, _TModelInstance
from rdfproxy.utils.utils import _is_iterable_of_str


def construct_ungrouped_pagination_query(query: str, limit: int, offset: int) -> str:
@overload
def _compvalue_to_dict(comp_value: dict | CompValue) -> dict: ...


@overload
def _compvalue_to_dict(comp_value: list | ParseResults) -> list: ...


def _compvalue_to_dict(comp_value: CompValue):
"""Convert a CompValue parsing object into a Python dict/list representation."""
if isinstance(comp_value, dict):
return {key: _compvalue_to_dict(value) for key, value in comp_value.items()}
elif isinstance(comp_value, list | ParseResults):
return [_compvalue_to_dict(item) for item in comp_value]
else:
return comp_value


def get_query_projection(query: str) -> list[str]:
"""Parse a SPARQL SELECT query and extract the ordered bindings projection.

The first case handles explicit/literal binding projections.
The second case handles implicit/* binding projections.
The third case handles implicit/* binding projections with VALUES.
"""
_parse_result: CompValue = parseQuery(query)[1]
parsed_query: dict = _compvalue_to_dict(_parse_result)

match parsed_query:
case {"projection": projection}:
return [i["var"] for i in projection]
case {"where": {"part": [{"triples": triples}]}}:
projection = dict.fromkeys(
i for i in chain.from_iterable(triples) if isinstance(i, Variable)
)
return list(projection)
case {"where": {"part": [{"var": var}]}}:
return var
case _:
raise Exception("Unable to obtain query projection.")


def construct_ungrouped_pagination_query(
query: str, limit: int, offset: int, order_by: str | Iterable[str] | None = None
) -> str:
"""Construct an ungrouped pagination query."""
return f"{query} limit {limit} offset {offset}"
match order_by:
case None:
order_by_variables = get_query_projection(query)
case str():
order_by_variables = [order_by]
case order_by if _is_iterable_of_str(order_by):
order_by_variables = order_by
case _:
raise TypeError(
"order_by value must be of type str | Iterable[str] | None."
)

return f"{query} order by {' '.join(map(lambda x: f'?{x}', order_by_variables))} limit {limit} offset {offset}"


def replace_query_select_clause(query: str, repl: str) -> str:
Expand Down
72 changes: 72 additions & 0 deletions tests/tests_adapter/test_adapter_ungrouped_pagionation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Basic tests for rdfproxy.SPARQLModelAdapter pagination with ungrouped models."""

from collections.abc import Iterator
from itertools import permutations
from string import Template

import pytest

from pydantic import BaseModel
from rdfproxy import Page, QueryParameters, SPARQLModelAdapter


def _generate_queries() -> Iterator[str]:
"""Generate static queries using permuations of a VALUES data block."""
_values = [
"('z' 'a' 'foo')",
"('y' 'b' UNDEF)",
"('y' 'a' UNDEF)",
"('x' UNDEF UNDEF)",
]

values_permutations = permutations(_values, r=4)
query_template = Template(
"""
select ?parent ?child ?name
where {
values (?parent ?child ?name)
{ $values }
}
"""
)

for values in values_permutations:
values = " ".join(values)
yield query_template.substitute(values=values)


class Model(BaseModel):
parent: str
child: str | None = None
name: str | None = None


@pytest.mark.remote
@pytest.mark.parametrize("query", _generate_queries())
def test_ungrouped_pagination(query):
"""Run SPARQLModelAdapter.query with test queries and check for consistent result ordering.

The duplicated parent 'y' rows are expected to be orderd by 'child'.
This requires ordering by all bindings of a given projection.
"""
expected = Page[Model](
items=[
{"parent": "x", "child": None, "name": None},
{"parent": "y", "child": "a", "name": None},
{"parent": "y", "child": "b", "name": None},
{"parent": "z", "child": "a", "name": "foo"},
],
page=1,
size=100,
total=4,
pages=1,
)

adapter = SPARQLModelAdapter(
target="https://graphdb.r11.eu/repositories/RELEVEN",
query=query,
model=Model,
)

query_parameters = QueryParameters(page=1, size=100)
assert adapter.query(query_parameters=query_parameters) == expected
86 changes: 86 additions & 0 deletions tests/unit/test_construct_ungrouped_pagination_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
"""Unit tests for rdfproxy.utils.sparql_utils.construct_ungrouped_pagination_query."""

from collections.abc import Iterable
from typing import NamedTuple

import pytest

from rdfproxy.utils.sparql_utils import construct_ungrouped_pagination_query


class UngroupedPaginationQueryParameter(NamedTuple):
query: str
expected: str
order_by: str | Iterable[str] | None = None


ungrouped_pagination_query_parameters = [
UngroupedPaginationQueryParameter(
query="select ?s ?p ?o where {?s ?p ?o .}",
expected="select ?s ?p ?o where {?s ?p ?o .} order by ?s ?p ?o limit 1 offset 2",
),
UngroupedPaginationQueryParameter(
query="select ?s ?p ?o where {?s ?p ?o .}",
expected="select ?s ?p ?o where {?s ?p ?o .} order by ?test limit 1 offset 2",
order_by="test",
),
UngroupedPaginationQueryParameter(
query="select ?s ?p ?o where {?s ?p ?o .}",
expected="select ?s ?p ?o where {?s ?p ?o .} order by ?test limit 1 offset 2",
order_by=["test"],
),
UngroupedPaginationQueryParameter(
query="select ?s ?p ?o where {?s ?p ?o .}",
expected="select ?s ?p ?o where {?s ?p ?o .} order by ?test ?another_test limit 1 offset 2",
order_by=["test", "another_test"],
),
UngroupedPaginationQueryParameter(
query="select * where {?s ?p ?o .}",
expected="select * where {?s ?p ?o .} order by ?s ?p ?o limit 1 offset 2",
),
UngroupedPaginationQueryParameter(
query="select * where {?s ?p ?o .}",
expected="select * where {?s ?p ?o .} order by ?test limit 1 offset 2",
order_by="test",
),
UngroupedPaginationQueryParameter(
query="select * where {?s ?p ?o .}",
expected="select * where {?s ?p ?o .} order by ?test limit 1 offset 2",
order_by=["test"],
),
UngroupedPaginationQueryParameter(
query="select * where {?s ?p ?o .}",
expected="select * where {?s ?p ?o .} order by ?test ?another_test limit 1 offset 2",
order_by=["test", "another_test"],
),
UngroupedPaginationQueryParameter(
query="select ?s ?p ?o where {values (?s ?p ?o) {(1 2 3)}}",
expected="select ?s ?p ?o where {values (?s ?p ?o) {(1 2 3)}} order by ?test limit 1 offset 2",
order_by="test",
),
UngroupedPaginationQueryParameter(
query="select * where {values (?s ?p ?o) {(1 2 3)}}",
expected="select * where {values (?s ?p ?o) {(1 2 3)}} order by ?test limit 1 offset 2",
order_by="test",
),
UngroupedPaginationQueryParameter(
query="select * where {values (?s ?p ?o) {(1 2 3)}}",
expected="select * where {values (?s ?p ?o) {(1 2 3)}} order by ?test limit 1 offset 2",
order_by=["test"],
),
UngroupedPaginationQueryParameter(
query="select * where {values (?s ?p ?o) {(1 2 3)}}",
expected="select * where {values (?s ?p ?o) {(1 2 3)}} order by ?test ?another_test limit 1 offset 2",
order_by=["test", "another_test"],
),
]


@pytest.mark.parametrize(
["query", "expected", "order_by"], ungrouped_pagination_query_parameters
)
def test_basic_construct_ungrouped_pagination_query_default(query, expected, order_by):
constructed_query = construct_ungrouped_pagination_query(
query, 1, 2, order_by=order_by
)
assert constructed_query == expected
Loading
Loading