From 68d543d944a620033dfcfff2dc4d6f3842ecf282 Mon Sep 17 00:00:00 2001 From: Siddardh Ramesh <30310381+siddardh-ra@users.noreply.github.com> Date: Thu, 22 Jun 2023 02:08:37 +0530 Subject: [PATCH] PBENCH-1127 Implementation of Quisby API (#3463) Integrated pquisby package into the Pbench server. The first pass of this API implementation will be used for retrieving quisby data for single dataset visualization. Currently, `pquisby` supports only the `uperf` benchmark; eventually, we will increase the support for other benchmarks too. We fetch the benchmark type from `dataset.metalog.pbench.script`. Unfortunately this can't recognize a `pbench-user-benchmark` with a `uperf` command. Solving this issue will require migrating Agent postprocessing into the server as `pbench-user-benchmark` won't create the `result.csv` file. `GET /api/v1/datasets/{dataset}/visualize` Co-authored-by: siddardh --- lib/pbench/client/__init__.py | 1 + lib/pbench/server/api/__init__.py | 7 + .../api/resources/datasets_visualize.py | 99 ++++++++++++++ lib/pbench/server/cache_manager.py | 4 +- .../test/unit/server/test_cache_manager.py | 12 +- .../unit/server/test_datasets_inventory.py | 2 +- .../unit/server/test_datasets_visualize.py | 127 ++++++++++++++++++ .../unit/server/test_endpoint_configure.py | 4 + server/requirements.txt | 2 +- 9 files changed, 245 insertions(+), 13 deletions(-) create mode 100644 lib/pbench/server/api/resources/datasets_visualize.py create mode 100644 lib/pbench/test/unit/server/test_datasets_visualize.py diff --git a/lib/pbench/client/__init__.py b/lib/pbench/client/__init__.py index 8870dd812f..c7dd0426e7 100644 --- a/lib/pbench/client/__init__.py +++ b/lib/pbench/client/__init__.py @@ -47,6 +47,7 @@ class API(Enum): DATASETS_NAMESPACE = "datasets_namespace" DATASETS_SEARCH = "datasets_search" DATASETS_VALUES = "datasets_values" + DATASETS_VISUALIZE = "datasets_visualize" ENDPOINTS = "endpoints" KEY = "key" RELAY = "relay" diff --git a/lib/pbench/server/api/__init__.py b/lib/pbench/server/api/__init__.py index 7942e06e55..32ebe56065 100644 --- a/lib/pbench/server/api/__init__.py +++ b/lib/pbench/server/api/__init__.py @@ -17,6 +17,7 @@ from pbench.server.api.resources.datasets_inventory import DatasetsInventory from pbench.server.api.resources.datasets_list import DatasetsList from pbench.server.api.resources.datasets_metadata import DatasetsMetadata +from pbench.server.api.resources.datasets_visualize import DatasetsVisualize from pbench.server.api.resources.endpoint_configure import EndpointConfig from pbench.server.api.resources.query_apis.dataset import Datasets from pbench.server.api.resources.query_apis.datasets.datasets_contents import ( @@ -119,6 +120,12 @@ def register_endpoints(api: Api, app: Flask, config: PbenchServerConfig): endpoint="datasets_search", resource_class_args=(config,), ) + api.add_resource( + DatasetsVisualize, + f"{base_uri}/datasets//visualize", + endpoint="datasets_visualize", + resource_class_args=(config,), + ) api.add_resource( EndpointConfig, f"{base_uri}/endpoints", diff --git a/lib/pbench/server/api/resources/datasets_visualize.py b/lib/pbench/server/api/resources/datasets_visualize.py new file mode 100644 index 0000000000..87673ada79 --- /dev/null +++ b/lib/pbench/server/api/resources/datasets_visualize.py @@ -0,0 +1,99 @@ +from http import HTTPStatus +from urllib.request import Request + +from flask import current_app, jsonify +from flask.wrappers import Response +from pquisby.lib.post_processing import BenchmarkName, InputType, QuisbyProcessing + +from pbench.server import OperationCode, PbenchServerConfig +from pbench.server.api.resources import ( + APIAbort, + ApiAuthorizationType, + ApiBase, + ApiContext, + APIInternalError, + ApiMethod, + ApiParams, + ApiSchema, + Parameter, + ParamType, + Schema, +) +from pbench.server.cache_manager import ( + CacheManager, + TarballNotFound, + TarballUnpackError, +) +from pbench.server.database import Dataset + + +class DatasetsVisualize(ApiBase): + """ + This class implements the Server API used to retrieve data for visualization. + """ + + def __init__(self, config: PbenchServerConfig): + super().__init__( + config, + ApiSchema( + ApiMethod.GET, + OperationCode.READ, + uri_schema=Schema( + Parameter("dataset", ParamType.DATASET, required=True), + ), + authorization=ApiAuthorizationType.DATASET, + ), + ) + + def _get( + self, params: ApiParams, request: Request, context: ApiContext + ) -> Response: + """ + This function is using Quisby to process results into a form that supports visualization + + Args: + params: includes the uri parameters, which provide the dataset. + request: Original incoming Request object + context: API context dictionary + + Raises: + APIAbort, reporting "NOT_FOUND" and "INTERNAL_SERVER_ERROR" + + GET /api/v1/visualize/{dataset} + """ + + dataset = params.uri["dataset"] + cache_m = CacheManager(self.config, current_app.logger) + + try: + tarball = cache_m.find_dataset(dataset.resource_id) + except TarballNotFound as e: + raise APIAbort( + HTTPStatus.NOT_FOUND, f"No dataset with ID '{e.tarball}' found" + ) from e + + metadata = self._get_dataset_metadata( + dataset, ["dataset.metalog.pbench.script"] + ) + benchmark = metadata["dataset.metalog.pbench.script"].upper() + benchmark_type = BenchmarkName.__members__.get(benchmark) + if not benchmark_type: + raise APIAbort( + HTTPStatus.UNSUPPORTED_MEDIA_TYPE, f"Unsupported Benchmark: {benchmark}" + ) + + name = Dataset.stem(tarball.tarball_path) + try: + file = tarball.extract(tarball.tarball_path, f"{name}/result.csv") + except TarballUnpackError as e: + raise APIInternalError(str(e)) from e + + get_quisby_data = QuisbyProcessing().extract_data( + benchmark_type, dataset.name, InputType.STREAM, file + ) + + if get_quisby_data["status"] != "success": + raise APIInternalError( + f"Quisby processing failure. Exception: {get_quisby_data['exception']}" + ) + return jsonify(get_quisby_data) diff --git a/lib/pbench/server/cache_manager.py b/lib/pbench/server/cache_manager.py index aea677f0b9..d594a49efd 100644 --- a/lib/pbench/server/cache_manager.py +++ b/lib/pbench/server/cache_manager.py @@ -49,7 +49,7 @@ def __init__(self, tarball: str): self.tarball = tarball def __str__(self) -> str: - return f"The dataset tarball named {self.tarball!r} is not present in the cache manager" + return f"The dataset tarball named {self.tarball!r} is not found" class DuplicateTarball(CacheManagerError): @@ -59,7 +59,7 @@ def __init__(self, tarball: str): self.tarball = tarball def __str__(self) -> str: - return f"A dataset tarball named {self.tarball!r} is already present in the cache manager" + return f"A dataset tarball named {self.tarball!r} is already present" class MetadataError(CacheManagerError): diff --git a/lib/pbench/test/unit/server/test_cache_manager.py b/lib/pbench/test/unit/server/test_cache_manager.py index fe096db460..93333e1ea6 100644 --- a/lib/pbench/test/unit/server/test_cache_manager.py +++ b/lib/pbench/test/unit/server/test_cache_manager.py @@ -236,7 +236,7 @@ def test_create_bad( cm.create(tarball.tarball_path) assert ( str(exc.value) - == "A dataset tarball named 'pbench-user-benchmark_some + config_2021.05.01T12.42.42' is already present in the cache manager" + == "A dataset tarball named 'pbench-user-benchmark_some + config_2021.05.01T12.42.42' is already present" ) assert tarball.metadata == fake_get_metadata(tarball.tarball_path) assert exc.value.tarball == tarball.name @@ -924,10 +924,7 @@ def mock_run(args, **kwargs): assert tarball == cm[md5] with pytest.raises(TarballNotFound) as exc: cm["foobar"] - assert ( - str(exc.value) - == "The dataset tarball named 'foobar' is not present in the cache manager" - ) + assert str(exc.value) == "The dataset tarball named 'foobar' is not found" # Test __contains__ assert md5 in cm @@ -946,10 +943,7 @@ def mock_run(args, **kwargs): # Try to find a dataset that doesn't exist with pytest.raises(TarballNotFound) as exc: cm.find_dataset("foobar") - assert ( - str(exc.value) - == "The dataset tarball named 'foobar' is not present in the cache manager" - ) + assert str(exc.value) == "The dataset tarball named 'foobar' is not found" assert exc.value.tarball == "foobar" # Unpack the dataset, creating INCOMING and RESULTS links diff --git a/lib/pbench/test/unit/server/test_datasets_inventory.py b/lib/pbench/test/unit/server/test_datasets_inventory.py index c48319eb22..8f046b2c0d 100644 --- a/lib/pbench/test/unit/server/test_datasets_inventory.py +++ b/lib/pbench/test/unit/server/test_datasets_inventory.py @@ -59,7 +59,7 @@ def test_get_no_dataset(self, query_get_as): def test_dataset_not_present(self, query_get_as): response = query_get_as("fio_2", "metadata.log", HTTPStatus.NOT_FOUND) assert response.json == { - "message": "The dataset tarball named 'random_md5_string4' is not present in the cache manager" + "message": "The dataset tarball named 'random_md5_string4' is not found" } def test_unauthorized_access(self, query_get_as): diff --git a/lib/pbench/test/unit/server/test_datasets_visualize.py b/lib/pbench/test/unit/server/test_datasets_visualize.py new file mode 100644 index 0000000000..92e14d1a9d --- /dev/null +++ b/lib/pbench/test/unit/server/test_datasets_visualize.py @@ -0,0 +1,127 @@ +from http import HTTPStatus +from pathlib import Path + +from pquisby.lib.post_processing import QuisbyProcessing +import pytest +import requests + +from pbench.server import JSON +from pbench.server.api.resources import ApiBase +from pbench.server.cache_manager import CacheManager, Tarball +from pbench.server.database.models.datasets import Dataset, DatasetNotFound + + +class TestVisualize: + @pytest.fixture() + def query_get_as(self, client, server_config, more_datasets, get_token_func): + """ + Helper fixture to perform the API query and validate an expected + return status. + + Args: + client: Flask test API client fixture + server_config: Pbench config fixture + more_datasets: Dataset construction fixture + get_token_func: Pbench token fixture + """ + + def query_api( + dataset: str, user, expected_status: HTTPStatus + ) -> requests.Response: + try: + dataset_id = Dataset.query(name=dataset).resource_id + except DatasetNotFound: + dataset_id = dataset # Allow passing deliberately bad value + headers = {"authorization": f"bearer {get_token_func(user)}"} + response = client.get( + f"{server_config.rest_uri}/datasets/{dataset_id}/visualize", + headers=headers, + ) + assert response.status_code == expected_status + return response + + return query_api + + def mock_find_dataset(self, _dataset: str) -> Tarball: + class Tarball(object): + tarball_path = Path("/dataset/tarball.tar.xz") + + def extract(_tarball_path: Path, _path: str) -> str: + return "CSV_file_as_a_byte_stream" + + return Tarball + + def mock_get_dataset_metadata(self, _dataset, _key) -> JSON: + return {"dataset.metalog.pbench.script": "uperf"} + + def test_get_no_dataset(self, query_get_as): + response = query_get_as("nonexistent-dataset", "drb", HTTPStatus.NOT_FOUND) + assert response.json == {"message": "Dataset 'nonexistent-dataset' not found"} + + def test_dataset_not_present(self, query_get_as): + response = query_get_as("fio_2", "drb", HTTPStatus.NOT_FOUND) + assert response.json == { + "message": "No dataset with ID 'random_md5_string4' found" + } + + def test_unauthorized_access(self, query_get_as): + response = query_get_as("test", "drb", HTTPStatus.FORBIDDEN) + assert response.json == { + "message": "User drb is not authorized to READ a resource owned by test with private access" + } + + def test_successful_get(self, query_get_as, monkeypatch): + def mock_extract_data(self, test_name, dataset_name, input_type, data) -> JSON: + return {"status": "success", "json_data": "quisby_data"} + + monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset) + monkeypatch.setattr( + ApiBase, "_get_dataset_metadata", self.mock_get_dataset_metadata + ) + monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data) + + response = query_get_as("uperf_1", "test", HTTPStatus.OK) + assert response.json["status"] == "success" + assert response.json["json_data"] == "quisby_data" + + def test_unsuccessful_get_with_incorrect_data(self, query_get_as, monkeypatch): + def mock_find_dataset_with_incorrect_data(self, dataset) -> Tarball: + class Tarball(object): + tarball_path = Path("/dataset/tarball.tar.xz") + + def extract(tarball_path, path) -> str: + return "IncorrectData" + + return Tarball + + def mock_extract_data(self, test_name, dataset_name, input_type, data) -> JSON: + return {"status": "failed", "exception": "Unsupported Media Type"} + + monkeypatch.setattr( + CacheManager, "find_dataset", mock_find_dataset_with_incorrect_data + ) + monkeypatch.setattr( + ApiBase, "_get_dataset_metadata", self.mock_get_dataset_metadata + ) + monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data) + response = query_get_as("uperf_1", "test", HTTPStatus.INTERNAL_SERVER_ERROR) + assert response.json["message"].startswith( + "Internal Pbench Server Error: log reference " + ) + + def test_unsupported_benchmark(self, query_get_as, monkeypatch): + flag = True + + def mock_extract_data(*args, **kwargs) -> JSON: + nonlocal flag + flag = False + + def mock_get_metadata(self, dataset, key) -> JSON: + return {"dataset.metalog.pbench.script": "hammerDB"} + + monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset) + monkeypatch.setattr(ApiBase, "_get_dataset_metadata", mock_get_metadata) + monkeypatch.setattr(QuisbyProcessing, "extract_data", mock_extract_data) + response = query_get_as("uperf_1", "test", HTTPStatus.UNSUPPORTED_MEDIA_TYPE) + assert response.json["message"] == "Unsupported Benchmark: HAMMERDB" + assert flag is True diff --git a/lib/pbench/test/unit/server/test_endpoint_configure.py b/lib/pbench/test/unit/server/test_endpoint_configure.py index ac167d1e45..a1c14cb61d 100644 --- a/lib/pbench/test/unit/server/test_endpoint_configure.py +++ b/lib/pbench/test/unit/server/test_endpoint_configure.py @@ -101,6 +101,10 @@ def check_config(self, client, server_config, host, my_headers={}): "dataset_view": {"type": "string"}, }, }, + "datasets_visualize": { + "template": f"{uri}/datasets/{{dataset}}/visualize", + "params": {"dataset": {"type": "string"}}, + }, "endpoints": {"template": f"{uri}/endpoints", "params": {}}, "key": { "template": f"{uri}/key/{{key}}", diff --git a/server/requirements.txt b/server/requirements.txt index b433249642..5d0b62843e 100644 --- a/server/requirements.txt +++ b/server/requirements.txt @@ -20,4 +20,4 @@ python-dateutil requests # TODO CVE-2023-32681 (>=2.31.0) sdnotify sqlalchemy>=1.4.23 -sqlalchemy_utils>=0.37.6 +sqlalchemy_utils>=0.37.6 \ No newline at end of file