Skip to content

Commit

Permalink
Add test data generation tool. (#217)
Browse files Browse the repository at this point in the history
  • Loading branch information
mcantelon committed Oct 4, 2023
1 parent 40b01a3 commit 7769e88
Show file tree
Hide file tree
Showing 6 changed files with 214 additions and 0 deletions.
1 change: 1 addition & 0 deletions requirements/test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
-r base.txt

faker
flake8==5.0.4
pytest==5.4.3
pytest_cov==2.11.1
Expand Down
Empty file added tools/__init__.py
Empty file.
66 changes: 66 additions & 0 deletions tools/generate-test-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
from datetime import date
import sys

sys.path.append("../AIPscan")

from faker import Faker
from flask import Flask
from flask_sqlalchemy import SQLAlchemy

from config import CONFIGS
from AIPscan import db
from AIPscan.models import AIP, FetchJob, File, StorageLocation, StorageService
from helpers import data

app = Flask(__name__)
app.config.from_object(CONFIGS["default"])

db.init_app(app)

fake = Faker()
randint = fake.random.randint

with app.app_context():
# Add example storage services
ss_to_create = 2

print(f"Creating {ss_to_create} storage services...")

ss_ids = []
fetch_jobs = {}

default_created = False
for _ in range(ss_to_create):
is_default = len(ss_ids) == 0

ss = data.create_storage_service(is_default)
ss_ids.append(ss.id)

fetch_job = data.create_fetch_job(ss)
fetch_jobs[ss.id] = fetch_job.id

# Add example storage locations
storage_locations_per_ss = 2
ss_locations_to_create = ss_to_create * storage_locations_per_ss

print(f"Creating {ss_locations_to_create} storage service locations...")

aip_batches_created = 0
total_aip_batches = len(ss_ids) * storage_locations_per_ss
for ss_id in ss_ids:
for _ in range(storage_locations_per_ss):
sl = data.create_location(ss_id)

db.session.add(sl)
db.session.commit()

# Add AIPs
aip_batches_created += 1

print(f"Creating AIPs ({aip_batches_created}/{total_aip_batches})...")

for _ in range(1, randint(100, 300)):
aip = data.create_aip(ss_id, sl.id, fetch_jobs[ss.id])
data.create_aip_files(100, 300, aip)

print("Done.")
Empty file added tools/helpers/__init__.py
Empty file.
92 changes: 92 additions & 0 deletions tools/helpers/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from datetime import date
import sys

sys.path.append("../AIPscan")

from faker import Faker

from config import CONFIGS
from AIPscan import db
from AIPscan.models import AIP, FetchJob, File, StorageLocation, StorageService

fake = Faker()
randint = fake.random.randint


def create_storage_service(default):
ss = StorageService(
name=fake.text(20)[:-1],
url=fake.url(),
user_name=fake.profile()["username"],
api_key=fake.password(),
download_limit=0,
download_offset=0,
default=default,
)
db.session.add(ss)
db.session.commit()

return ss


def create_fetch_job(storage_service):
fetch_job = FetchJob(
total_packages=0,
total_aips=0,
total_deleted_aips=0,
download_start=date.today(),
download_end=date.today(),
download_directory=fake.file_path(),
storage_service_id=storage_service.id,
)
db.session.add(fetch_job)
db.session.commit()

return fetch_job


def create_location(storage_service_id):
return StorageLocation(
current_location=fake.file_path(),
description=fake.text(20)[:-1],
storage_service_id=storage_service_id,
)


def create_aip(storage_service_id, storage_location_id, fetch_job_id):
aip = AIP(
uuid=fake.uuid4(),
transfer_name=fake.text(20)[:-1],
create_date=date.today(),
mets_sha256=fake.sha256(),
size=randint(10000, 100_000_000),
storage_service_id=storage_service_id,
storage_location_id=storage_location_id,
fetch_job_id=fetch_job_id,
origin_pipeline_id=1,
)
db.session.add(aip)
db.session.commit()

return aip


def create_aip_files(min, max, aip):
for _ in range(1, randint(min, max)):
aipfile = File(
aip_id=aip.id,
name=fake.text(20)[:-1],
filepath=fake.file_path(),
uuid=fake.uuid4(),
file_type="original",
size=randint(1000, 1_000_000),
date_created=date.today(),
puid=fake.text(20)[:-1],
file_format=fake.text(20)[:-1],
format_version=fake.text(20)[:-1],
checksum_type=fake.text(20)[:-1],
checksum_value=fake.text(20)[:-1],
premis_object="",
)
db.session.add(aipfile)
db.session.commit()
55 changes: 55 additions & 0 deletions tools/tests/test_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import pytest

from tools.helpers import data


@pytest.fixture
def mock_db_add(mocker):
mocker.patch("AIPscan.db.session.add")
mocker.patch("AIPscan.db.session.commit")


def test_create_storage_service(mock_db_add):
ss = data.create_storage_service(True)

assert ss.name
assert ss.url
assert ss.user_name
assert ss.api_key
assert ss.default

ss = data.create_storage_service(False)
assert not ss.default


def test_create_fetch_job(mock_db_add):
ss = data.create_storage_service(True)
ss.id = 1
fetch_job = data.create_fetch_job(ss)

assert fetch_job.download_start
assert fetch_job.download_end
assert fetch_job.download_directory
assert fetch_job.storage_service_id == ss.id


def test_create_location(mock_db_add):
location = data.create_location(1)

assert location.current_location
assert location.description
assert location.storage_service_id == 1


def test_create_aip(mock_db_add):
aip = data.create_aip(1, 2, 3)

assert aip.uuid
assert aip.transfer_name
assert aip.create_date
assert aip.mets_sha256
assert aip.size
assert aip.storage_service_id == 1
assert aip.storage_location_id == 2
assert aip.fetch_job_id == 3
assert aip.origin_pipeline_id == 1

0 comments on commit 7769e88

Please sign in to comment.