From 7769e88c37e111624513d4c0db7d16781e40af05 Mon Sep 17 00:00:00 2001 From: Mike Cantelon Date: Sat, 30 Sep 2023 16:36:29 -0700 Subject: [PATCH] Add test data generation tool. (#217) --- requirements/test.txt | 1 + tools/__init__.py | 0 tools/generate-test-data.py | 66 ++++++++++++++++++++++++++ tools/helpers/__init__.py | 0 tools/helpers/data.py | 92 +++++++++++++++++++++++++++++++++++++ tools/tests/test_data.py | 55 ++++++++++++++++++++++ 6 files changed, 214 insertions(+) create mode 100644 tools/__init__.py create mode 100644 tools/generate-test-data.py create mode 100644 tools/helpers/__init__.py create mode 100644 tools/helpers/data.py create mode 100644 tools/tests/test_data.py diff --git a/requirements/test.txt b/requirements/test.txt index 4b9b2679..2b4ed645 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,5 +1,6 @@ -r base.txt +faker flake8==5.0.4 pytest==5.4.3 pytest_cov==2.11.1 diff --git a/tools/__init__.py b/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/generate-test-data.py b/tools/generate-test-data.py new file mode 100644 index 00000000..58a09490 --- /dev/null +++ b/tools/generate-test-data.py @@ -0,0 +1,66 @@ +from datetime import date +import sys + +sys.path.append("../AIPscan") + +from faker import Faker +from flask import Flask +from flask_sqlalchemy import SQLAlchemy + +from config import CONFIGS +from AIPscan import db +from AIPscan.models import AIP, FetchJob, File, StorageLocation, StorageService +from helpers import data + +app = Flask(__name__) +app.config.from_object(CONFIGS["default"]) + +db.init_app(app) + +fake = Faker() +randint = fake.random.randint + +with app.app_context(): + # Add example storage services + ss_to_create = 2 + + print(f"Creating {ss_to_create} storage services...") + + ss_ids = [] + fetch_jobs = {} + + default_created = False + for _ in range(ss_to_create): + is_default = len(ss_ids) == 0 + + ss = data.create_storage_service(is_default) + ss_ids.append(ss.id) + + fetch_job = data.create_fetch_job(ss) + fetch_jobs[ss.id] = fetch_job.id + + # Add example storage locations + storage_locations_per_ss = 2 + ss_locations_to_create = ss_to_create * storage_locations_per_ss + + print(f"Creating {ss_locations_to_create} storage service locations...") + + aip_batches_created = 0 + total_aip_batches = len(ss_ids) * storage_locations_per_ss + for ss_id in ss_ids: + for _ in range(storage_locations_per_ss): + sl = data.create_location(ss_id) + + db.session.add(sl) + db.session.commit() + + # Add AIPs + aip_batches_created += 1 + + print(f"Creating AIPs ({aip_batches_created}/{total_aip_batches})...") + + for _ in range(1, randint(100, 300)): + aip = data.create_aip(ss_id, sl.id, fetch_jobs[ss.id]) + data.create_aip_files(100, 300, aip) + + print("Done.") diff --git a/tools/helpers/__init__.py b/tools/helpers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tools/helpers/data.py b/tools/helpers/data.py new file mode 100644 index 00000000..b21b99fc --- /dev/null +++ b/tools/helpers/data.py @@ -0,0 +1,92 @@ +from datetime import date +import sys + +sys.path.append("../AIPscan") + +from faker import Faker + +from config import CONFIGS +from AIPscan import db +from AIPscan.models import AIP, FetchJob, File, StorageLocation, StorageService + +fake = Faker() +randint = fake.random.randint + + +def create_storage_service(default): + ss = StorageService( + name=fake.text(20)[:-1], + url=fake.url(), + user_name=fake.profile()["username"], + api_key=fake.password(), + download_limit=0, + download_offset=0, + default=default, + ) + db.session.add(ss) + db.session.commit() + + return ss + + +def create_fetch_job(storage_service): + fetch_job = FetchJob( + total_packages=0, + total_aips=0, + total_deleted_aips=0, + download_start=date.today(), + download_end=date.today(), + download_directory=fake.file_path(), + storage_service_id=storage_service.id, + ) + db.session.add(fetch_job) + db.session.commit() + + return fetch_job + + +def create_location(storage_service_id): + return StorageLocation( + current_location=fake.file_path(), + description=fake.text(20)[:-1], + storage_service_id=storage_service_id, + ) + + +def create_aip(storage_service_id, storage_location_id, fetch_job_id): + aip = AIP( + uuid=fake.uuid4(), + transfer_name=fake.text(20)[:-1], + create_date=date.today(), + mets_sha256=fake.sha256(), + size=randint(10000, 100_000_000), + storage_service_id=storage_service_id, + storage_location_id=storage_location_id, + fetch_job_id=fetch_job_id, + origin_pipeline_id=1, + ) + db.session.add(aip) + db.session.commit() + + return aip + + +def create_aip_files(min, max, aip): + for _ in range(1, randint(min, max)): + aipfile = File( + aip_id=aip.id, + name=fake.text(20)[:-1], + filepath=fake.file_path(), + uuid=fake.uuid4(), + file_type="original", + size=randint(1000, 1_000_000), + date_created=date.today(), + puid=fake.text(20)[:-1], + file_format=fake.text(20)[:-1], + format_version=fake.text(20)[:-1], + checksum_type=fake.text(20)[:-1], + checksum_value=fake.text(20)[:-1], + premis_object="", + ) + db.session.add(aipfile) + db.session.commit() diff --git a/tools/tests/test_data.py b/tools/tests/test_data.py new file mode 100644 index 00000000..6ea58c68 --- /dev/null +++ b/tools/tests/test_data.py @@ -0,0 +1,55 @@ +import pytest + +from tools.helpers import data + + +@pytest.fixture +def mock_db_add(mocker): + mocker.patch("AIPscan.db.session.add") + mocker.patch("AIPscan.db.session.commit") + + +def test_create_storage_service(mock_db_add): + ss = data.create_storage_service(True) + + assert ss.name + assert ss.url + assert ss.user_name + assert ss.api_key + assert ss.default + + ss = data.create_storage_service(False) + assert not ss.default + + +def test_create_fetch_job(mock_db_add): + ss = data.create_storage_service(True) + ss.id = 1 + fetch_job = data.create_fetch_job(ss) + + assert fetch_job.download_start + assert fetch_job.download_end + assert fetch_job.download_directory + assert fetch_job.storage_service_id == ss.id + + +def test_create_location(mock_db_add): + location = data.create_location(1) + + assert location.current_location + assert location.description + assert location.storage_service_id == 1 + + +def test_create_aip(mock_db_add): + aip = data.create_aip(1, 2, 3) + + assert aip.uuid + assert aip.transfer_name + assert aip.create_date + assert aip.mets_sha256 + assert aip.size + assert aip.storage_service_id == 1 + assert aip.storage_location_id == 2 + assert aip.fetch_job_id == 3 + assert aip.origin_pipeline_id == 1