Skip to content

Commit

Permalink
Using Tarball.extract for extracting files from tarball
Browse files Browse the repository at this point in the history
  • Loading branch information
riya-17 committed Dec 6, 2022
1 parent b4a053b commit 781c8cd
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 57 deletions.
30 changes: 11 additions & 19 deletions lib/pbench/server/api/resources/datasets_inventory.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from http import HTTPStatus
from logging import Logger
import os

from flask import send_file
from flask.wrappers import Response
Expand All @@ -17,7 +18,7 @@
ParamType,
Schema,
)
from pbench.server.cache_manager import CacheManager, TarballNotFound
from pbench.server.cache_manager import CacheManager, Tarball, TarballNotFound


class DatasetsInventory(ApiBase):
Expand Down Expand Up @@ -63,22 +64,13 @@ def _get(self, params: ApiParams, _) -> Response:
except TarballNotFound as e:
raise APIAbort(HTTPStatus.NOT_FOUND, str(e))

if target is None:
file_path = tarball.tarball_path
else:
dataset_location = tarball.unpacked
if dataset_location is None:
raise APIAbort(HTTPStatus.NOT_FOUND, "The dataset is not unpacked")
file_path = dataset_location / target
tarball_name = tarball.tarball_path.name
target_path = (
tarball_name if target is None else (os.path.join(tarball_name, target))
)
try:
file_path = Tarball.extract(str(tarball.tarball_path), target_path)
except Exception as exc:
raise APIAbort(HTTPStatus.NOT_FOUND, str(exc))

if file_path.is_file():
return send_file(file_path)
elif file_path.exists():
raise APIAbort(
HTTPStatus.UNSUPPORTED_MEDIA_TYPE,
"The specified path does not refer to a regular file",
)
else:
raise APIAbort(
HTTPStatus.NOT_FOUND, "The specified path does not refer to a file"
)
return send_file(file_path)
17 changes: 9 additions & 8 deletions lib/pbench/server/cache_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,16 @@ def __str__(self) -> str:

class MetadataError(CacheManagerError):
"""
A problem was found locating or processing a tarball's metadata.log file.
A problem was found locating or processing a tarball file.
"""

def __init__(self, tarball: Path, error: Exception):
def __init__(self, tarball: str, file_name: str, error: Exception):
self.tarball = tarball
self.file_name = file_name
self.error = str(error)

def __str__(self) -> str:
return f"A problem occurred processing metadata.log from {self.tarball!s}: {self.error!r}"
return f"A problem occurred processing `{Path(self.file_name).name}` from {self.tarball!s}: {self.error!r}"


class TarballUnpackError(CacheManagerError):
Expand Down Expand Up @@ -266,11 +267,13 @@ def create(cls, tarball: Path, controller: "Controller") -> "Tarball":

return cls(destination, controller)

def extract(self, path: str) -> str:
@staticmethod
def extract(tarball_path: str, path: str) -> str:
"""
Extract a file from the tarball and return it as a string
Args:
tarball_path: path for the tarball
path: relative path within the tarball of a file
Raises:
Expand All @@ -280,11 +283,9 @@ def extract(self, path: str) -> str:
The named file as a string
"""
try:
return (
tarfile.open(self.tarball_path, "r:*").extractfile(path).read().decode()
)
return tarfile.open(tarball_path, "r:*").extractfile(path).read().decode()
except Exception as exc:
raise MetadataError(self.tarball_path, exc)
raise MetadataError(tarball_path, path, exc)

def get_metadata(self) -> JSONOBJECT:
"""
Expand Down
63 changes: 33 additions & 30 deletions lib/pbench/test/unit/server/test_datasets_inventory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import requests
import werkzeug.utils

from pbench.server.cache_manager import CacheManager
from pbench.server.cache_manager import CacheManager, Tarball, MetadataError
from pbench.server.database.models.datasets import Dataset, DatasetNotFound


Expand Down Expand Up @@ -68,69 +68,72 @@ def test_unauthorized_access(self, query_get_as):
"message": "User drb is not authorized to READ a resource owned by test with private access"
}

def test_dataset_is_not_unpacked(self, query_get_as, monkeypatch):
def mock_find_not_unpacked(self, dataset):
class Tarball(object):
unpacked = None
def test_path_is_file(self, query_get_as, monkeypatch):
def mock_extract(tarball_path, path):
raise MetadataError(tarball_path, path, f"AttributeError: 'NoneType' object has no attribute 'read'")

# Validate the resource_id
Dataset.query(resource_id=dataset)
return Tarball

monkeypatch.setattr(CacheManager, "find_dataset", mock_find_not_unpacked)

response = query_get_as("fio_2", "1-default", HTTPStatus.NOT_FOUND)
assert response.json == {"message": "The dataset is not unpacked"}

def test_path_is_directory(self, query_get_as, monkeypatch):
monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(Tarball, "extract", mock_extract)
monkeypatch.setattr(Path, "is_file", lambda self: False)
monkeypatch.setattr(Path, "exists", lambda self: True)

response = query_get_as("fio_2", "1-default", HTTPStatus.UNSUPPORTED_MEDIA_TYPE)
response = query_get_as("fio_2", "1-default", HTTPStatus.NOT_FOUND)
assert response.json == {
"message": "The specified path does not refer to a regular file"
"message": "A problem occurred processing `1-default` from /dataset_tarball: \"AttributeError: 'NoneType' object has no attribute 'read'\""
}

def test_not_a_file(self, query_get_as, monkeypatch):
def test_path_exist(self, query_get_as, monkeypatch):
def mock_extract(tarball_path, path):
raise MetadataError(tarball_path, path, f'KeyError: "filename \'{path}\' not found"')

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(Tarball, "extract", mock_extract)
monkeypatch.setattr(Path, "is_file", lambda self: False)
monkeypatch.setattr(Path, "exists", lambda self: False)
monkeypatch.setattr(Path, "exists", lambda self: True)

response = query_get_as("fio_2", "1-default", HTTPStatus.NOT_FOUND)
response = query_get_as("fio_2", "metadata.log", HTTPStatus.NOT_FOUND)
assert response.json == {
"message": "The specified path does not refer to a file"
}
"message": 'A problem occurred processing `metadata.log` from /dataset_tarball: \'KeyError: "filename \\\'dataset_tarball/metadata.log\\\' not found"\''}

def test_dataset_in_given_path(self, query_get_as, monkeypatch):
@pytest.mark.parametrize("key", (None, ""))
def test_empty_target_value(self, query_get_as, monkeypatch, key):
file_sent = None

def mock_extract(tarball_path, path):
raise MetadataError(tarball_path, path, f"AttributeError: 'NoneType' object has no attribute 'read'")

def mock_send_file(path_or_file, *args, **kwargs):
nonlocal file_sent
file_sent = path_or_file
return {"status": "OK"}

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(Tarball, "extract", mock_extract)
monkeypatch.setattr(Path, "is_file", lambda self: True)
monkeypatch.setattr(werkzeug.utils, "send_file", mock_send_file)

response = query_get_as("fio_2", "1-default/default.csv", HTTPStatus.OK)
assert response.status_code == HTTPStatus.OK
assert str(file_sent) == "/dataset/1-default/default.csv"
response = query_get_as("fio_2", key, HTTPStatus.NOT_FOUND)
assert response.status_code == HTTPStatus.NOT_FOUND
assert response.json == {
"message": "A problem occurred processing `dataset_tarball` from /dataset_tarball: \"AttributeError: 'NoneType' object has no attribute 'read'\""
}

@pytest.mark.parametrize("key", (None, ""))
def test_get_result_tarball(self, query_get_as, monkeypatch, key):
def test_dataset_in_given_path(self, query_get_as, monkeypatch):
file_sent = None

def mock_extract(tarball_path, path):
return path

def mock_send_file(path_or_file, *args, **kwargs):
nonlocal file_sent
file_sent = path_or_file
return {"status": "OK"}

monkeypatch.setattr(CacheManager, "find_dataset", self.mock_find_dataset)
monkeypatch.setattr(Tarball, "extract", mock_extract)
monkeypatch.setattr(Path, "is_file", lambda self: True)
monkeypatch.setattr(werkzeug.utils, "send_file", mock_send_file)

response = query_get_as("fio_2", key, HTTPStatus.OK)
response = query_get_as("fio_2", "1-default/default.csv", HTTPStatus.OK)
assert response.status_code == HTTPStatus.OK
assert str(file_sent) == "/dataset_tarball"
assert str(file_sent) == "dataset_tarball/1-default/default.csv"

0 comments on commit 781c8cd

Please sign in to comment.