Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compare datasets - Integrate Quisby into Pbench Server API #3470

Merged
merged 7 commits into from
Jun 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/pbench/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class API(Enum):
"""

DATASETS = "datasets"
DATASETS_COMPARE = "datasets_compare"
DATASETS_CONTENTS = "datasets_contents"
DATASETS_DETAIL = "datasets_detail"
DATASETS_INVENTORY = "datasets_inventory"
Expand Down
7 changes: 7 additions & 0 deletions lib/pbench/server/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pbench.common.logger import get_pbench_logger
from pbench.server import PbenchServerConfig
from pbench.server.api.resources.api_key import APIKeyManage
from pbench.server.api.resources.datasets_compare import DatasetsCompare
from pbench.server.api.resources.datasets_inventory import DatasetsInventory
from pbench.server.api.resources.datasets_list import DatasetsList
from pbench.server.api.resources.datasets_metadata import DatasetsMetadata
Expand Down Expand Up @@ -63,6 +64,12 @@ def register_endpoints(api: Api, app: Flask, config: PbenchServerConfig):
endpoint="datasets",
resource_class_args=(config,),
)
api.add_resource(
DatasetsCompare,
f"{base_uri}/compare",
endpoint="datasets_compare",
resource_class_args=(config,),
)
api.add_resource(
DatasetsContents,
f"{base_uri}/datasets/<string:dataset>/contents/",
Expand Down
125 changes: 125 additions & 0 deletions lib/pbench/server/api/resources/datasets_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
from http import HTTPStatus
from urllib.request import Request

from flask import current_app, jsonify
from flask.wrappers import Response
from pquisby.lib.post_processing import BenchmarkName, InputType, QuisbyProcessing

from pbench.server import OperationCode, PbenchServerConfig
from pbench.server.api.resources import (
APIAbort,
ApiAuthorization,
ApiAuthorizationType,
ApiBase,
ApiContext,
APIInternalError,
ApiMethod,
ApiParams,
ApiSchema,
Parameter,
ParamType,
Schema,
)
from pbench.server.cache_manager import (
CacheManager,
TarballNotFound,
TarballUnpackError,
)
from pbench.server.database.models.datasets import Metadata


class DatasetsCompare(ApiBase):
"""
This class implements the Server API used to retrieve comparison data for visualization.
"""

def __init__(self, config: PbenchServerConfig):
super().__init__(
config,
ApiSchema(
ApiMethod.GET,
OperationCode.READ,
query_schema=Schema(
Parameter(
"datasets",
ParamType.LIST,
element_type=ParamType.DATASET,
string_list=",",
required=True,
),
),
authorization=ApiAuthorizationType.NONE,
),
)

def _get(
self, params: ApiParams, request: Request, context: ApiContext
) -> Response:
"""
This function is using Quisby to compare results into a form that supports visualization

Args:
params: includes the uri parameters, which provide the list of dataset.
request: Original incoming Request object
context: API context dictionary

Raises:
UnauthorizedAccess : The user isn't authorized for the requested access.
APIAbort, reporting "NOT_FOUND" and "INTERNAL_SERVER_ERROR"
APIInternalError, reporting the failure message

GET /api/v1/compare?datasets=d1,d2,d3
"""

datasets = params.query.get("datasets")
benchmark_choice = None
for dataset in datasets:
benchmark = Metadata.getvalue(dataset, "dataset.metalog.pbench.script")
# Validate if all the selected datasets is of same benchmark
if not benchmark_choice:
benchmark_choice = benchmark
elif benchmark != benchmark_choice:
raise APIAbort(
HTTPStatus.BAD_REQUEST,
f"Selected dataset benchmarks must match: {benchmark_choice} and {benchmark} cannot be compared.",
)

# Validate if the user is authorized to access the selected datasets
self._check_authorization(
ApiAuthorization(
ApiAuthorizationType.USER_ACCESS,
OperationCode.READ,
dataset.owner_id,
dataset.access,
)
)
cache_m = CacheManager(self.config, current_app.logger)
stream_file = {}
for dataset in datasets:
try:
tarball = cache_m.find_dataset(dataset.resource_id)
except TarballNotFound as e:
raise APIInternalError(
f"Expected dataset with ID '{dataset.resource_id}' is missing from the cache manager."
) from e
try:
file = tarball.extract(
tarball.tarball_path, f"{tarball.name}/result.csv"
)
except TarballUnpackError as e:
raise APIInternalError(str(e)) from e
stream_file[dataset.name] = file

benchmark_type = BenchmarkName.__members__.get(benchmark.upper())
if not benchmark_type:
raise APIAbort(
HTTPStatus.UNSUPPORTED_MEDIA_TYPE, f"Unsupported Benchmark: {benchmark}"
)
get_quisby_data = QuisbyProcessing().compare_csv_to_json(
benchmark_type, InputType.STREAM, stream_file
)
if get_quisby_data["status"] != "success":
raise APIInternalError(
f"Quisby processing failure. Exception: {get_quisby_data['exception']}"
)
return jsonify(get_quisby_data)
166 changes: 166 additions & 0 deletions lib/pbench/test/unit/server/test_datasets_compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from http import HTTPStatus
from pathlib import Path
from typing import Optional

from pquisby.lib.post_processing import QuisbyProcessing
import pytest
import requests

from pbench.server import JSON
from pbench.server.cache_manager import CacheManager, TarballUnpackError
from pbench.server.database.models.datasets import Dataset, DatasetNotFound, Metadata
from pbench.server.database.models.users import User


def mock_get_value(dataset: Dataset, key: str, user: Optional[User] = None) -> str:
if dataset.name == "uperf_3" or dataset.name == "uperf_4":
return "hammerDB"
dbutenhof marked this conversation as resolved.
Show resolved Hide resolved
return "uperf"


class TestCompareDatasets:
@pytest.fixture()
def query_get_as(self, client, server_config, more_datasets, get_token_func):
"""
Helper fixture to perform the API query and validate an expected
return status.

Args:
client: Flask test API client fixture
server_config: Pbench config fixture
more_datasets: Dataset construction fixture
get_token_func: Pbench token fixture
"""

def query_api(
datasets: list, user: str, expected_status: HTTPStatus
) -> requests.Response:
ds_list = []
for dataset in datasets:
try:
dataset_id = Dataset.query(name=dataset).resource_id
ds_list.append(dataset_id)
except DatasetNotFound:
ds_list.append(dataset) # Allow passing deliberately bad value
headers = None
if user:
headers = {"authorization": f"bearer {get_token_func(user)}"}
response = client.get(
f"{server_config.rest_uri}/compare",
query_string={"datasets": ds_list},
headers=headers,
)
assert response.status_code == expected_status
return response

return query_api

class MockTarball:
tarball_path = Path("/dataset/tarball.tar.xz")
name = "tarball"

@staticmethod
def extract(_tarball_path: Path, _path: str) -> str:
return "CSV_file_as_a_string"
webbnh marked this conversation as resolved.
Show resolved Hide resolved

def mock_find_dataset(self, dataset) -> MockTarball:
# Validate the resource_id
Dataset.query(resource_id=dataset)
return self.MockTarball()

def test_dataset_not_present(self, query_get_as, monkeypatch):
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)

query_get_as(["fio_2"], "drb", HTTPStatus.INTERNAL_SERVER_ERROR)

def test_unsuccessful_get_with_incorrect_data(self, query_get_as, monkeypatch):
@staticmethod
def mock_extract(_tarball_path: Path, _path: str) -> str:
return "IncorrectData"
dbutenhof marked this conversation as resolved.
Show resolved Hide resolved

def mock_compare_csv_to_json(
self, benchmark_name, input_type, data_stream
) -> JSON:
return {"status": "failed", "exception": "Unsupported Media Type"}

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(self.MockTarball, "extract", mock_extract)
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
monkeypatch.setattr(
QuisbyProcessing, "compare_csv_to_json", mock_compare_csv_to_json
)
query_get_as(["uperf_1", "uperf_2"], "test", HTTPStatus.INTERNAL_SERVER_ERROR)

def test_tarball_unpack_exception(self, query_get_as, monkeypatch):
@staticmethod
def mock_extract(_tarball_path: Path, _path: str):
raise TarballUnpackError(
_tarball_path, f"Testing unpack exception for path {_path}"
)

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(self.MockTarball, "extract", mock_extract)
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
query_get_as(["uperf_1", "uperf_2"], "test", HTTPStatus.INTERNAL_SERVER_ERROR)

@pytest.mark.parametrize(
"user,datasets,exp_status,exp_message",
(
(
"drb",
["uperf_1", "nonexistent-dataset"],
HTTPStatus.BAD_REQUEST,
"Unrecognized list value ['nonexistent-dataset'] given for parameter datasets; expected Dataset",
),
(
"drb",
["uperf_1", "uperf_2"],
HTTPStatus.FORBIDDEN,
"User drb is not authorized to READ a resource owned by test with private access",
),
(
"test",
["uperf_1", "uperf_2"],
HTTPStatus.OK,
None,
),
(
None,
["fio_1", "fio_2"],
HTTPStatus.OK,
None,
),
(
"test",
["fio_1", "uperf_3"],
HTTPStatus.BAD_REQUEST,
"Selected dataset benchmarks must match: uperf and hammerDB cannot be compared.",
),
(
"test",
["uperf_3", "uperf_4"],
HTTPStatus.UNSUPPORTED_MEDIA_TYPE,
"Unsupported Benchmark: hammerDB",
),
),
)
def test_datasets_with_different_benchmark(
self, user, datasets, exp_status, exp_message, query_get_as, monkeypatch
):
def mock_compare_csv_to_json(
self, benchmark_name, input_type, data_stream
) -> JSON:
return {"status": "success", "json_data": "quisby_data"}
dbutenhof marked this conversation as resolved.
Show resolved Hide resolved

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(Metadata, "getvalue", mock_get_value)
monkeypatch.setattr(
QuisbyProcessing, "compare_csv_to_json", mock_compare_csv_to_json
)

response = query_get_as(datasets, user, exp_status)
if exp_status == HTTPStatus.OK:
assert response.json["status"] == "success"
assert response.json["json_data"] == "quisby_data"
else:
assert response.json["message"] == exp_message
1 change: 1 addition & 0 deletions lib/pbench/test/unit/server/test_endpoint_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ def check_config(self, client, server_config, host, my_headers={}):
"template": f"{uri}/datasets/{{dataset}}",
"params": {"dataset": {"type": "string"}},
},
"datasets_compare": {"template": f"{uri}/compare", "params": {}},
"datasets_contents": {
"template": f"{uri}/datasets/{{dataset}}/contents/{{target}}",
"params": {
Expand Down