Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support generic sorting #3373

Merged
merged 3 commits into from
Apr 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion docs/API/V1/list.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ can only list datasets with access `public`.)

The collection of datasets may be filtered using any combination of a number
of query parameters, including `owner`, `access`, `name` substring, date range,
and arbitrary metadata filter expressions.
and arbitrary metadata filter expressions. The selected datasets may be sorted
by any metadata key value in either ascending or descending order. Multiple
webbnh marked this conversation as resolved.
Show resolved Hide resolved
sort parameters will be processed in order.

Large collections can be paginated for efficiency using the `limit` and `offset`
query parameters.
Expand Down Expand Up @@ -113,6 +115,24 @@ with a paginated display or to limit data transfer requirements.
Select only datasets owned by the specified username. Unless the username
matches the authenticated user, only "public" datasets can be selected.

`sort` sort expression \
Sort the returned datasets by one or more sort expressions. You can separate
multiple expressions using comma lists, or across separate `sort` query
parameters, which will be processed in order. Any Metadata namespace key can
be specified.

Specify a sort order using the keywords `asc` (ascending) or `desc`
(descending), separated from the key name with a colon (`:`). For example,
`dataset.name:asc` or `dataset.metalog.pbench.script:desc`. The default is
"ascending" if no order is specified. If no sort expressions are specified,
datasets are returned sorted by `dataset.resource_id`.

For example, `GET /api/v1/datasets?sort=global.dashboard.seen:desc,dataset.name`
will return selected datasets sorted first in descending order based on whether
the dataset has been marked "seen" by the dashboard, and secondly sorted by the
dataset name. The Pbench Dashboard stores `global.dashboard.seen` as a `boolean`
value, so in this case `true` values will appear before `false` values.

`start` date/time \
Select only datasets created on or after the specified time. Time should be
specified in ISO standard format, as `YYYY-MM-DDThh:mm:ss.ffffff[+|-]HH:MM`.
Expand Down
67 changes: 62 additions & 5 deletions lib/pbench/server/api/resources/datasets_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from flask import current_app
from flask.json import jsonify
from flask.wrappers import Request, Response
from sqlalchemy import and_, cast, func, or_, String
from sqlalchemy import and_, asc, cast, desc, func, or_, String
from sqlalchemy.exc import ProgrammingError, StatementError
from sqlalchemy.orm import aliased, Query
from sqlalchemy.sql.expression import Alias
Expand Down Expand Up @@ -80,6 +80,12 @@ def __init__(self, config: PbenchServerConfig):
string_list=",",
metalog_ok=True,
),
Parameter(
"sort",
ParamType.LIST,
element_type=ParamType.STRING,
string_list=",",
),
),
authorization=ApiAuthorizationType.USER_ACCESS,
),
Expand All @@ -105,7 +111,7 @@ def get_paginated_obj(
start to narrow down the result.
"""
paginated_result = {}
query = query.order_by(Dataset.resource_id).distinct()
query = query.distinct()
total_count = query.count()

# Shift the query search by user specified offset value,
Expand Down Expand Up @@ -222,7 +228,7 @@ def filter_query(
k, v = kw.split(":", maxsplit=1)
except ValueError:
raise APIAbort(
HTTPStatus.BAD_REQUEST, f"filter {kw!r} must have the form 'k=v'"
HTTPStatus.BAD_REQUEST, f"filter {kw!r} must have the form 'k:v'"
)
if k.startswith("^"):
combine_or = True
Expand Down Expand Up @@ -372,20 +378,71 @@ def daterange(self, query: Query) -> JSONOBJECT:
else:
return {}

def datasets(self, request: Request, json: JSONOBJECT, query: Query) -> JSONOBJECT:
def datasets(
self, request: Request, aliases: dict[str, Any], json: JSONOBJECT, query: Query
) -> JSONOBJECT:
"""Gather and paginate the selected datasets

Run the query we've compiled, with pagination limits applied; collect
results into a list of JSON objects including selected metadata keys.

Args:
request: The HTTP Request object
aliases: Map of join column aliases for each Metadata namespace
json: The JSON query parameters
query: The basic filtered SQLAlchemy query object

Returns:
The paginated dataset listing
"""

# Process a possible list of sort terms. By default, we sort by the
# dataset resource_id.
sorters = []
for sort in json.get("sort", ["dataset.resource_id"]):
if ":" not in sort:
k = sort
order = asc
else:
k, o = sort.split(":", maxsplit=1)
if o.lower() == "asc":
order = asc
elif o.lower() == "desc":
order = desc
else:
raise APIAbort(
HTTPStatus.BAD_REQUEST,
f"The sort order {o!r} for key {k!r} must be 'asc' or 'desc'",
)

if not Metadata.is_key_path(k, Metadata.METADATA_KEYS, metalog_key_ok=True):
raise APIAbort(HTTPStatus.BAD_REQUEST, str(MetadataBadKey(k)))
keys = k.split(".")
native_key = keys.pop(0).lower()
sorter = None
if native_key == Metadata.DATASET:
second = keys[0].lower()
# The dataset namespace requires special handling because
# "dataset.metalog" is really a special native key space
# named "metalog", while other "dataset" sub-keys are primary
# columns in the Dataset table.
if second == Metadata.METALOG:
native_key = keys.pop(0).lower()
else:
try:
c = getattr(Dataset, second)
except AttributeError as e:
raise APIAbort(
HTTPStatus.BAD_REQUEST, str(MetadataBadKey(k))
) from e
sorter = order(c)
if sorter is None:
sorter = order(aliases[native_key].value[keys])
sorters.append(sorter)

# Apply our list of sort terms
query = query.order_by(*sorters)

try:
datasets, paginated_result = self.get_paginated_obj(
query=query, json=json, url=request.url
Expand Down Expand Up @@ -534,5 +591,5 @@ def _get(
result.update(self.daterange(query))
done = True
if not done:
result = self.datasets(request, json, query)
result = self.datasets(request, aliases, json, query)
return jsonify(result)
88 changes: 46 additions & 42 deletions lib/pbench/test/unit/server/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,10 +386,10 @@ def more_datasets(
test 20 private 1970-01-01:00:42
fio_1 3 public 1978-06-26:08:00
fio_2 20 public 2022-01-01:00:00
uperf_1 20 private 1978-06-26:08:00
uperf_2 20 private 1978-06-26:08:00
uperf_3 20 private 1978-06-26:08:00
uperf_4 20 private 1978-06-26:08:00
uperf_1 20 private 1978-06-26:08:01
uperf_2 20 private 1978-06-26:09:00
uperf_3 20 private 1978-06-26:09:30
uperf_4 20 private 1978-06-26:10:00

Args:
client: Provide a Flask API client
Expand All @@ -399,44 +399,48 @@ def more_datasets(
attach_dataset: Provide some datasets
create_user: Create the "test" user
"""
with freeze_time("1978-06-26 08:00:00"):
Dataset(
owner=create_drb_user,
name="fio_1",
access="public",
resource_id="random_md5_string3",
).add()
Dataset(
owner=create_user,
uploaded=datetime.datetime(2022, 1, 1),
name="fio_2",
access="public",
resource_id="random_md5_string4",
).add()
Dataset(
owner=create_user,
name="uperf_1",
access="private",
resource_id="random_md5_string5",
).add()
Dataset(
owner=create_user,
name="uperf_2",
access="private",
resource_id="random_md5_string6",
).add()
Dataset(
owner=create_user,
name="uperf_3",
access="private",
resource_id="random_md5_string7",
).add()
Dataset(
owner=create_user,
name="uperf_4",
access="private",
resource_id="random_md5_string8",
).add()
Dataset(
owner=create_drb_user,
uploaded=datetime.datetime(1978, 6, 26, 8, 0, 0, 0),
name="fio_1",
access="public",
resource_id="random_md5_string3",
).add()
Dataset(
owner=create_user,
uploaded=datetime.datetime(2022, 1, 1),
name="fio_2",
access="public",
resource_id="random_md5_string4",
).add()
Dataset(
owner=create_user,
uploaded=datetime.datetime(1978, 6, 26, 8, 1, 0, 0),
name="uperf_1",
access="private",
resource_id="random_md5_string5",
).add()
Dataset(
owner=create_user,
uploaded=datetime.datetime(1978, 6, 26, 9, 0, 0, 0),
name="uperf_2",
access="private",
resource_id="random_md5_string6",
).add()
Dataset(
owner=create_user,
uploaded=datetime.datetime(1978, 6, 26, 9, 30, 0, 0),
name="uperf_3",
access="private",
resource_id="random_md5_string7",
).add()
Dataset(
owner=create_user,
uploaded=datetime.datetime(1978, 6, 26, 10, 0, 0, 0),
name="uperf_4",
access="private",
resource_id="random_md5_string8",
).add()


@pytest.fixture()
Expand Down
Loading