From 3bdd430652a51a9aa8f87b5887e54619887c2fb2 Mon Sep 17 00:00:00 2001 From: Mike Cantelon Date: Sat, 30 Sep 2023 16:36:29 -0700 Subject: [PATCH] Add test data generation tool. (#217) --- requirements/test.txt | 1 + tools/__init__.py | 0 tools/app/__init__.py | 0 tools/app/tool.py | 5 +++ tools/generate-test-data.py | 60 +++++++++++++++++++++++++ tools/helpers/__init__.py | 0 tools/helpers/data.py | 88 +++++++++++++++++++++++++++++++++++++ tools/tests/test_data.py | 65 +++++++++++++++++++++++++++ 8 files changed, 219 insertions(+) create mode 100644 tools/__init__.py create mode 100644 tools/app/__init__.py create mode 100644 tools/app/tool.py create mode 100644 tools/generate-test-data.py create mode 100644 tools/helpers/__init__.py create mode 100644 tools/helpers/data.py create mode 100644 tools/tests/test_data.py diff --git a/requirements/test.txt b/requirements/test.txt index 4b9b2679..2b4ed645 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,5 +1,6 @@ -r base.txt +faker flake8==5.0.4 pytest==5.4.3 pytest_cov==2.11.1 diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/app/__init__.py b/tools/app/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/app/tool.py b/tools/app/tool.py new file mode 100644 index 00000000..28dc39f8 --- /dev/null +++ b/tools/app/tool.py @@ -0,0 +1,5 @@ +import sys + +sys.path.append("../AIPscan") + +config_name = "default" diff --git a/tools/generate-test-data.py b/tools/generate-test-data.py new file mode 100644 index 00000000..a2f3dacc --- /dev/null +++ b/tools/generate-test-data.py @@ -0,0 +1,60 @@ +from app import tool +from faker import Faker +from flask import Flask +from helpers import data + +from AIPscan import db +from config import CONFIGS + +app = Flask(__name__) +app.config.from_object(CONFIGS[tool.config_name]) + +db.init_app(app) + +fake = Faker() +randint = fake.random.randint + +with app.app_context(): + # Add example storage services + ss_to_create = 2 + + print(f"Creating {ss_to_create} storage services...") + + ss_ids = [] + fetch_jobs = {} + + default_created = False + for _ in range(ss_to_create): + is_default = len(ss_ids) == 0 + + ss = data.create_storage_service(is_default) + ss_ids.append(ss.id) + + fetch_job = data.create_fetch_job(ss) + fetch_jobs[ss.id] = fetch_job.id + + # Add example storage locations + storage_locations_per_ss = 2 + ss_locations_to_create = ss_to_create * storage_locations_per_ss + + print(f"Creating {ss_locations_to_create} storage service locations...") + + aip_batches_created = 0 + total_aip_batches = len(ss_ids) * storage_locations_per_ss + for ss_id in ss_ids: + for _ in range(storage_locations_per_ss): + sl = data.create_location(ss_id) + + db.session.add(sl) + db.session.commit() + + # Add AIPs + aip_batches_created += 1 + + print(f"Creating AIPs ({aip_batches_created}/{total_aip_batches})...") + + for _ in range(1, randint(100, 300)): + aip = data.create_aip(ss_id, sl.id, fetch_jobs[ss.id]) + data.create_aip_files(100, 300, aip) + + print("Done.") diff --git a/tools/helpers/__init__.py b/tools/helpers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/helpers/data.py b/tools/helpers/data.py new file mode 100644 index 00000000..22b9d241 --- /dev/null +++ b/tools/helpers/data.py @@ -0,0 +1,88 @@ +from datetime import date + +from faker import Faker + +from AIPscan import db +from AIPscan.models import AIP, FetchJob, File, StorageLocation, StorageService + +fake = Faker() +randint = fake.random.randint + + +def create_storage_service(default): + ss = StorageService( + name=fake.text(20)[:-1], + url=fake.url(), + user_name=fake.profile()["username"], + api_key=fake.password(), + download_limit=0, + download_offset=0, + default=default, + ) + db.session.add(ss) + db.session.commit() + + return ss + + +def create_fetch_job(storage_service): + fetch_job = FetchJob( + total_packages=0, + total_aips=0, + total_deleted_aips=0, + download_start=date.today(), + download_end=date.today(), + download_directory=fake.file_path(), + storage_service_id=storage_service.id, + ) + db.session.add(fetch_job) + db.session.commit() + + return fetch_job + + +def create_location(storage_service_id): + return StorageLocation( + current_location=fake.file_path(), + description=fake.text(20)[:-1], + storage_service_id=storage_service_id, + ) + + +def create_aip(storage_service_id, storage_location_id, fetch_job_id): + aip = AIP( + uuid=fake.uuid4(), + transfer_name=fake.text(20)[:-1], + create_date=date.today(), + mets_sha256=fake.sha256(), + size=randint(10000, 100_000_000), + storage_service_id=storage_service_id, + storage_location_id=storage_location_id, + fetch_job_id=fetch_job_id, + origin_pipeline_id=1, + ) + db.session.add(aip) + db.session.commit() + + return aip + + +def create_aip_files(min, max, aip): + for _ in range(1, randint(min, max)): + aipfile = File( + aip_id=aip.id, + name=fake.text(20)[:-1], + filepath=fake.file_path(), + uuid=fake.uuid4(), + file_type="original", + size=randint(1000, 1_000_000), + date_created=date.today(), + puid=fake.text(20)[:-1], + file_format=fake.text(20)[:-1], + format_version=fake.text(20)[:-1], + checksum_type=fake.text(20)[:-1], + checksum_value=fake.text(20)[:-1], + premis_object="", + ) + db.session.add(aipfile) + db.session.commit() diff --git a/tools/tests/test_data.py b/tools/tests/test_data.py new file mode 100644 index 00000000..c38ef097 --- /dev/null +++ b/tools/tests/test_data.py @@ -0,0 +1,65 @@ +import datetime + +import pytest + +from tools.helpers import data + + +@pytest.fixture +def mock_db_add(mocker): + mocker.patch("AIPscan.db.session.add") + mocker.patch("AIPscan.db.session.commit") + + +def test_create_storage_service(mock_db_add): + ss = data.create_storage_service(True) + + assert ss.name + assert type(ss.name) == str + assert ss.url + assert type(ss.url) == str + assert ss.user_name + assert type(ss.user_name) == str + assert ss.api_key + assert type(ss.api_key) == str + assert ss.default + assert type(ss.default) == bool + + ss = data.create_storage_service(False) + assert not ss.default + + +def test_create_fetch_job(mock_db_add): + ss = data.create_storage_service(True) + ss.id = 1 + fetch_job = data.create_fetch_job(ss) + + assert fetch_job.download_start + assert type(fetch_job.download_start) == datetime.date + assert fetch_job.download_end + assert type(fetch_job.download_end) == datetime.date + assert fetch_job.download_directory + assert type(fetch_job.download_directory) == str + assert fetch_job.storage_service_id == ss.id + + +def test_create_location(mock_db_add): + location = data.create_location(1) + + assert location.current_location + assert location.description + assert location.storage_service_id == 1 + + +def test_create_aip(mock_db_add): + aip = data.create_aip(1, 2, 3) + + assert aip.uuid + assert aip.transfer_name + assert aip.create_date + assert aip.mets_sha256 + assert aip.size + assert aip.storage_service_id == 1 + assert aip.storage_location_id == 2 + assert aip.fetch_job_id == 3 + assert aip.origin_pipeline_id == 1