Skip to content

Commit

Permalink
PBENCH-1127 Implementation of Quisby API (#3463)
Browse files Browse the repository at this point in the history
Integrated pquisby package into the Pbench server. The first pass of this API implementation will be used for retrieving quisby data for single dataset visualization.

Currently, `pquisby` supports only the `uperf` benchmark; eventually, we will increase the support for other benchmarks too.

We fetch the benchmark type from `dataset.metalog.pbench.script`. Unfortunately this can't recognize a `pbench-user-benchmark` with a `uperf` command. Solving this issue will require migrating Agent postprocessing into the server as `pbench-user-benchmark` won't create the `result.csv` file.

`GET /api/v1/datasets/{dataset}/visualize`

Co-authored-by: siddardh <sira@redhat27!>
  • Loading branch information
siddardh-ra and siddardh authored Jun 21, 2023
1 parent d8e6b81 commit 68d543d
Show file tree
Hide file tree
Showing 9 changed files with 245 additions and 13 deletions.
1 change: 1 addition & 0 deletions lib/pbench/client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class API(Enum):
DATASETS_NAMESPACE = "datasets_namespace"
DATASETS_SEARCH = "datasets_search"
DATASETS_VALUES = "datasets_values"
DATASETS_VISUALIZE = "datasets_visualize"
ENDPOINTS = "endpoints"
KEY = "key"
RELAY = "relay"
Expand Down
7 changes: 7 additions & 0 deletions lib/pbench/server/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pbench.server.api.resources.datasets_inventory import DatasetsInventory
from pbench.server.api.resources.datasets_list import DatasetsList
from pbench.server.api.resources.datasets_metadata import DatasetsMetadata
from pbench.server.api.resources.datasets_visualize import DatasetsVisualize
from pbench.server.api.resources.endpoint_configure import EndpointConfig
from pbench.server.api.resources.query_apis.dataset import Datasets
from pbench.server.api.resources.query_apis.datasets.datasets_contents import (
Expand Down Expand Up @@ -119,6 +120,12 @@ def register_endpoints(api: Api, app: Flask, config: PbenchServerConfig):
endpoint="datasets_search",
resource_class_args=(config,),
)
api.add_resource(
DatasetsVisualize,
f"{base_uri}/datasets/<string:dataset>/visualize",
endpoint="datasets_visualize",
resource_class_args=(config,),
)
api.add_resource(
EndpointConfig,
f"{base_uri}/endpoints",
Expand Down
99 changes: 99 additions & 0 deletions lib/pbench/server/api/resources/datasets_visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from http import HTTPStatus
from urllib.request import Request

from flask import current_app, jsonify
from flask.wrappers import Response
from pquisby.lib.post_processing import BenchmarkName, InputType, QuisbyProcessing

from pbench.server import OperationCode, PbenchServerConfig
from pbench.server.api.resources import (
APIAbort,
ApiAuthorizationType,
ApiBase,
ApiContext,
APIInternalError,
ApiMethod,
ApiParams,
ApiSchema,
Parameter,
ParamType,
Schema,
)
from pbench.server.cache_manager import (
CacheManager,
TarballNotFound,
TarballUnpackError,
)
from pbench.server.database import Dataset


class DatasetsVisualize(ApiBase):
"""
This class implements the Server API used to retrieve data for visualization.
"""

def __init__(self, config: PbenchServerConfig):
super().__init__(
config,
ApiSchema(
ApiMethod.GET,
OperationCode.READ,
uri_schema=Schema(
Parameter("dataset", ParamType.DATASET, required=True),
),
authorization=ApiAuthorizationType.DATASET,
),
)

def _get(
self, params: ApiParams, request: Request, context: ApiContext
) -> Response:
"""
This function is using Quisby to process results into a form that supports visualization
Args:
params: includes the uri parameters, which provide the dataset.
request: Original incoming Request object
context: API context dictionary
Raises:
APIAbort, reporting "NOT_FOUND" and "INTERNAL_SERVER_ERROR"
GET /api/v1/visualize/{dataset}
"""

dataset = params.uri["dataset"]
cache_m = CacheManager(self.config, current_app.logger)

try:
tarball = cache_m.find_dataset(dataset.resource_id)
except TarballNotFound as e:
raise APIAbort(
HTTPStatus.NOT_FOUND, f"No dataset with ID '{e.tarball}' found"
) from e

metadata = self._get_dataset_metadata(
dataset, ["dataset.metalog.pbench.script"]
)
benchmark = metadata["dataset.metalog.pbench.script"].upper()
benchmark_type = BenchmarkName.__members__.get(benchmark)
if not benchmark_type:
raise APIAbort(
HTTPStatus.UNSUPPORTED_MEDIA_TYPE, f"Unsupported Benchmark: {benchmark}"
)

name = Dataset.stem(tarball.tarball_path)
try:
file = tarball.extract(tarball.tarball_path, f"{name}/result.csv")
except TarballUnpackError as e:
raise APIInternalError(str(e)) from e

get_quisby_data = QuisbyProcessing().extract_data(
benchmark_type, dataset.name, InputType.STREAM, file
)

if get_quisby_data["status"] != "success":
raise APIInternalError(
f"Quisby processing failure. Exception: {get_quisby_data['exception']}"
)
return jsonify(get_quisby_data)
4 changes: 2 additions & 2 deletions lib/pbench/server/cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(self, tarball: str):
self.tarball = tarball

def __str__(self) -> str:
return f"The dataset tarball named {self.tarball!r} is not present in the cache manager"
return f"The dataset tarball named {self.tarball!r} is not found"


class DuplicateTarball(CacheManagerError):
Expand All @@ -59,7 +59,7 @@ def __init__(self, tarball: str):
self.tarball = tarball

def __str__(self) -> str:
return f"A dataset tarball named {self.tarball!r} is already present in the cache manager"
return f"A dataset tarball named {self.tarball!r} is already present"


class MetadataError(CacheManagerError):
Expand Down
12 changes: 3 additions & 9 deletions lib/pbench/test/unit/server/test_cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ def test_create_bad(
cm.create(tarball.tarball_path)
assert (
str(exc.value)
== "A dataset tarball named 'pbench-user-benchmark_some + config_2021.05.01T12.42.42' is already present in the cache manager"
== "A dataset tarball named 'pbench-user-benchmark_some + config_2021.05.01T12.42.42' is already present"
)
assert tarball.metadata == fake_get_metadata(tarball.tarball_path)
assert exc.value.tarball == tarball.name
Expand Down Expand Up @@ -924,10 +924,7 @@ def mock_run(args, **kwargs):
assert tarball == cm[md5]
with pytest.raises(TarballNotFound) as exc:
cm["foobar"]
assert (
str(exc.value)
== "The dataset tarball named 'foobar' is not present in the cache manager"
)
assert str(exc.value) == "The dataset tarball named 'foobar' is not found"

# Test __contains__
assert md5 in cm
Expand All @@ -946,10 +943,7 @@ def mock_run(args, **kwargs):
# Try to find a dataset that doesn't exist
with pytest.raises(TarballNotFound) as exc:
cm.find_dataset("foobar")
assert (
str(exc.value)
== "The dataset tarball named 'foobar' is not present in the cache manager"
)
assert str(exc.value) == "The dataset tarball named 'foobar' is not found"
assert exc.value.tarball == "foobar"

# Unpack the dataset, creating INCOMING and RESULTS links
Expand Down
2 changes: 1 addition & 1 deletion lib/pbench/test/unit/server/test_datasets_inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def test_get_no_dataset(self, query_get_as):
def test_dataset_not_present(self, query_get_as):
response = query_get_as("fio_2", "metadata.log", HTTPStatus.NOT_FOUND)
assert response.json == {
"message": "The dataset tarball named 'random_md5_string4' is not present in the cache manager"
"message": "The dataset tarball named 'random_md5_string4' is not found"
}

def test_unauthorized_access(self, query_get_as):
Expand Down
127 changes: 127 additions & 0 deletions lib/pbench/test/unit/server/test_datasets_visualize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from http import HTTPStatus
from pathlib import Path

from pquisby.lib.post_processing import QuisbyProcessing
import pytest
import requests

from pbench.server import JSON
from pbench.server.api.resources import ApiBase
from pbench.server.cache_manager import CacheManager, Tarball
from pbench.server.database.models.datasets import Dataset, DatasetNotFound


class TestVisualize:
@pytest.fixture()
def query_get_as(self, client, server_config, more_datasets, get_token_func):
"""
Helper fixture to perform the API query and validate an expected
return status.
Args:
client: Flask test API client fixture
server_config: Pbench config fixture
more_datasets: Dataset construction fixture
get_token_func: Pbench token fixture
"""

def query_api(
dataset: str, user, expected_status: HTTPStatus
) -> requests.Response:
try:
dataset_id = Dataset.query(name=dataset).resource_id
except DatasetNotFound:
dataset_id = dataset # Allow passing deliberately bad value
headers = {"authorization": f"bearer {get_token_func(user)}"}
response = client.get(
f"{server_config.rest_uri}/datasets/{dataset_id}/visualize",
headers=headers,
)
assert response.status_code == expected_status
return response

return query_api

def mock_find_dataset(self, _dataset: str) -> Tarball:
class Tarball(object):
tarball_path = Path("/dataset/tarball.tar.xz")

def extract(_tarball_path: Path, _path: str) -> str:
return "CSV_file_as_a_byte_stream"

return Tarball

def mock_get_dataset_metadata(self, _dataset, _key) -> JSON:
return {"dataset.metalog.pbench.script": "uperf"}

def test_get_no_dataset(self, query_get_as):
response = query_get_as("nonexistent-dataset", "drb", HTTPStatus.NOT_FOUND)
assert response.json == {"message": "Dataset 'nonexistent-dataset' not found"}

def test_dataset_not_present(self, query_get_as):
response = query_get_as("fio_2", "drb", HTTPStatus.NOT_FOUND)
assert response.json == {
"message": "No dataset with ID 'random_md5_string4' found"
}

def test_unauthorized_access(self, query_get_as):
response = query_get_as("test", "drb", HTTPStatus.FORBIDDEN)
assert response.json == {
"message": "User drb is not authorized to READ a resource owned by test with private access"
}

def test_successful_get(self, query_get_as, monkeypatch):
def mock_extract_data(self, test_name, dataset_name, input_type, data) -> JSON:
return {"status": "success", "json_data": "quisby_data"}

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(
ApiBase, "_get_dataset_metadata", self.mock_get_dataset_metadata
)
monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data)

response = query_get_as("uperf_1", "test", HTTPStatus.OK)
assert response.json["status"] == "success"
assert response.json["json_data"] == "quisby_data"

def test_unsuccessful_get_with_incorrect_data(self, query_get_as, monkeypatch):
def mock_find_dataset_with_incorrect_data(self, dataset) -> Tarball:
class Tarball(object):
tarball_path = Path("/dataset/tarball.tar.xz")

def extract(tarball_path, path) -> str:
return "IncorrectData"

return Tarball

def mock_extract_data(self, test_name, dataset_name, input_type, data) -> JSON:
return {"status": "failed", "exception": "Unsupported Media Type"}

monkeypatch.setattr(
CacheManager, "find_dataset", mock_find_dataset_with_incorrect_data
)
monkeypatch.setattr(
ApiBase, "_get_dataset_metadata", self.mock_get_dataset_metadata
)
monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data)
response = query_get_as("uperf_1", "test", HTTPStatus.INTERNAL_SERVER_ERROR)
assert response.json["message"].startswith(
"Internal Pbench Server Error: log reference "
)

def test_unsupported_benchmark(self, query_get_as, monkeypatch):
flag = True

def mock_extract_data(*args, **kwargs) -> JSON:
nonlocal flag
flag = False

def mock_get_metadata(self, dataset, key) -> JSON:
return {"dataset.metalog.pbench.script": "hammerDB"}

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(ApiBase, "_get_dataset_metadata", mock_get_metadata)
monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data)
response = query_get_as("uperf_1", "test", HTTPStatus.UNSUPPORTED_MEDIA_TYPE)
assert response.json["message"] == "Unsupported Benchmark: HAMMERDB"
assert flag is True
4 changes: 4 additions & 0 deletions lib/pbench/test/unit/server/test_endpoint_configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ def check_config(self, client, server_config, host, my_headers={}):
"dataset_view": {"type": "string"},
},
},
"datasets_visualize": {
"template": f"{uri}/datasets/{{dataset}}/visualize",
"params": {"dataset": {"type": "string"}},
},
"endpoints": {"template": f"{uri}/endpoints", "params": {}},
"key": {
"template": f"{uri}/key/{{key}}",
Expand Down
2 changes: 1 addition & 1 deletion server/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ python-dateutil
requests # TODO CVE-2023-32681 (>=2.31.0)
sdnotify
sqlalchemy>=1.4.23
sqlalchemy_utils>=0.37.6
sqlalchemy_utils>=0.37.6

0 comments on commit 68d543d

Please sign in to comment.