-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
Added a tool to populate AIPscan with randomly generated example data.
- Loading branch information
Showing
9 changed files
with
360 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
-r base.txt | ||
|
||
faker==14.2.1 | ||
flake8==5.0.4 | ||
pytest==6.2.5 | ||
pytest_cov==2.11.1 | ||
|
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import os | ||
import sys | ||
|
||
from flask import Flask | ||
|
||
# Alter path so tools can import AIPscan's modules | ||
relpath = f"{os.path.dirname(__file__)}/../../../AIPscan" | ||
sys.path.append(os.path.abspath(relpath)) | ||
|
||
config_name = "default" | ||
|
||
|
||
def create_app_instance(configuration, db): | ||
app = Flask(__name__) | ||
app.config.from_object(configuration) | ||
|
||
db.init_app(app) | ||
|
||
return app |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
#!/usr/bin/env python3 | ||
import sys | ||
|
||
import click | ||
from app import init | ||
from helpers import data | ||
|
||
from AIPscan import db | ||
from AIPscan.models import FetchJob | ||
from config import CONFIGS | ||
|
||
|
||
@click.command() | ||
@click.option("--storage-services-to-create", default=2) | ||
@click.option("--locations-per-storage-service", default=2) | ||
@click.option("--locations-min-aip-count", default=10) | ||
@click.option("--locations-max-aip-count", default=30) | ||
@click.option("--aip-min-file-count", default=10) | ||
@click.option("--aip-max-file-count", default=30) | ||
@click.option("--seed", default=0) | ||
def main( | ||
storage_services_to_create, | ||
locations_per_storage_service, | ||
locations_min_aip_count, | ||
locations_max_aip_count, | ||
aip_min_file_count, | ||
aip_max_file_count, | ||
seed, | ||
): | ||
# Initialize Flash app context | ||
app = init.create_app_instance(CONFIGS[init.config_name], db) | ||
|
||
# Change seed | ||
if seed > 0: | ||
data.seed(seed) | ||
|
||
with app.app_context(): | ||
# Add example pipeline and storage services | ||
print( | ||
f"Creating/fetching pipeline and creating {storage_services_to_create} storage services..." | ||
) | ||
pipeline = data.create_or_fetch_fake_pipeline() | ||
|
||
ss_ids = [] | ||
fetch_jobs = {} | ||
|
||
for _ in range(storage_services_to_create): | ||
is_default = len(ss_ids) == 0 | ||
|
||
ss = data.create_fake_storage_service(is_default) | ||
ss_ids.append(ss.id) | ||
|
||
fetch_job = data.create_fake_fetch_job(ss.id) | ||
fetch_jobs[ss.id] = fetch_job.id | ||
|
||
# Populate storage service locations | ||
ss_locations_to_create = ( | ||
storage_services_to_create * locations_per_storage_service | ||
) | ||
|
||
print( | ||
f"Creating {ss_locations_to_create} storage service locations (and their AIPs)..." | ||
) | ||
|
||
aip_batches_created = 0 | ||
total_aip_batches = len(ss_ids) * locations_per_storage_service | ||
for ss_id in ss_ids: | ||
for _ in range(locations_per_storage_service): | ||
# Add location | ||
sl = data.create_fake_location(ss_id) | ||
|
||
# Add AIPs and AIP files | ||
aip_batches_created += 1 | ||
|
||
print(f"Creating AIPs ({aip_batches_created}/{total_aip_batches})...") | ||
|
||
aipcount = 0 | ||
for _ in range( | ||
1, data.randint(locations_min_aip_count, locations_max_aip_count) | ||
): | ||
aip = data.create_fake_aip( | ||
pipeline.id, ss_id, sl.id, fetch_jobs[ss.id] | ||
) | ||
data.create_fake_aip_files( | ||
aip_min_file_count, aip_max_file_count, aip.id | ||
) | ||
aipcount += 1 | ||
|
||
# Update package/AIP counts in fetch job | ||
fetch_job = FetchJob.query.get(fetch_jobs[ss_id]) | ||
fetch_job.total_packages += aipcount | ||
fetch_job.total_aips += aipcount | ||
db.session.commit() | ||
|
||
print("Done.") | ||
|
||
|
||
if __name__ == "__main__": | ||
sys.exit(main()) |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
import os | ||
from datetime import date | ||
|
||
from faker import Faker | ||
|
||
from AIPscan import db | ||
from AIPscan.models import ( | ||
AIP, | ||
FetchJob, | ||
File, | ||
Pipeline, | ||
StorageLocation, | ||
StorageService, | ||
) | ||
|
||
# Initialize Faker instance | ||
fake = Faker() | ||
|
||
|
||
def seed(seed): | ||
fake.seed_instance(seed) | ||
|
||
|
||
def randint(start, end): | ||
return fake.random.randint(start, end) | ||
|
||
|
||
def create_or_fetch_fake_pipeline(): | ||
pipeline = db.session.query(Pipeline).first() | ||
|
||
if pipeline is None: | ||
pipeline = Pipeline(origin_pipeline=fake.uuid4(), dashboard_url=fake.url()) | ||
|
||
db.session.add(pipeline) | ||
db.session.commit() | ||
|
||
return pipeline | ||
|
||
|
||
def create_fake_storage_service(default): | ||
ss = StorageService( | ||
name=fake.text(20)[:-1], | ||
url=fake.url(), | ||
user_name=fake.profile()["username"], | ||
api_key=fake.password(), | ||
download_limit=0, | ||
download_offset=0, | ||
default=default, | ||
) | ||
|
||
db.session.add(ss) | ||
db.session.commit() | ||
|
||
return ss | ||
|
||
|
||
def create_fake_fetch_job(storage_service_id): | ||
fetch_job = FetchJob( | ||
total_packages=0, | ||
total_aips=0, | ||
total_deleted_aips=0, | ||
download_start=date.today(), | ||
download_end=date.today(), | ||
download_directory=fake.file_path(), | ||
storage_service_id=storage_service_id, | ||
) | ||
fetch_job.total_dips = 0 | ||
fetch_job.total_sips = 0 | ||
fetch_job.total_replicas = 0 | ||
|
||
db.session.add(fetch_job) | ||
db.session.commit() | ||
|
||
return fetch_job | ||
|
||
|
||
def create_fake_location(storage_service_id): | ||
current_location = os.path.join(os.path.dirname(fake.file_path(3)), fake.uuid4()) | ||
|
||
location = StorageLocation( | ||
current_location=current_location, | ||
description=fake.text(20)[:-1], | ||
storage_service_id=storage_service_id, | ||
) | ||
|
||
db.session.add(location) | ||
db.session.commit() | ||
|
||
return location | ||
|
||
|
||
def create_fake_aip(pipeline_id, storage_service_id, storage_location_id, fetch_job_id): | ||
aip = AIP( | ||
uuid=fake.uuid4(), | ||
transfer_name=fake.text(20)[:-1], | ||
create_date=date.today(), | ||
mets_sha256=fake.sha256(), | ||
size=randint(10000, 100_000_000), | ||
storage_service_id=storage_service_id, | ||
storage_location_id=storage_location_id, | ||
fetch_job_id=fetch_job_id, | ||
origin_pipeline_id=pipeline_id, | ||
) | ||
|
||
db.session.add(aip) | ||
db.session.commit() | ||
|
||
return aip | ||
|
||
|
||
def create_fake_aip_files(min, max, aip_id): | ||
for _ in range(1, randint(min, max)): | ||
aipfile = File( | ||
aip_id=aip_id, | ||
name=fake.text(20)[:-1], | ||
filepath=fake.file_path(), | ||
uuid=fake.uuid4(), | ||
file_type="original", | ||
size=randint(1000, 1_000_000), | ||
date_created=date.today(), | ||
puid=fake.text(20)[:-1], | ||
file_format=fake.text(20)[:-1], | ||
format_version=fake.text(20)[:-1], | ||
checksum_type=fake.text(20)[:-1], | ||
checksum_value=fake.text(20)[:-1], | ||
premis_object="", | ||
) | ||
|
||
db.session.add(aipfile) | ||
db.session.commit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
import datetime | ||
|
||
import pytest | ||
|
||
from tools.helpers import data | ||
|
||
|
||
@pytest.fixture | ||
def mock_db_add(mocker): | ||
mocker.patch("AIPscan.db.session.add") | ||
mocker.patch("AIPscan.db.session.commit") | ||
|
||
|
||
def test_create_fake_storage_service(mock_db_add): | ||
ss = data.create_fake_storage_service(True) | ||
|
||
assert ss.name | ||
assert type(ss.name) is str | ||
|
||
assert ss.url | ||
assert type(ss.url) is str | ||
|
||
assert ss.user_name | ||
assert type(ss.user_name) is str | ||
|
||
assert ss.api_key | ||
assert type(ss.api_key) is str | ||
|
||
assert ss.default | ||
assert type(ss.default) is bool | ||
|
||
ss = data.create_fake_storage_service(False) | ||
assert not ss.default | ||
|
||
|
||
def test_create_fake_fetch_job(mock_db_add): | ||
ss = data.create_fake_storage_service(True) | ||
ss.id = 1 | ||
|
||
fetch_job = data.create_fake_fetch_job(ss.id) | ||
|
||
assert fetch_job.download_start | ||
assert type(fetch_job.download_start) is datetime.date | ||
|
||
assert fetch_job.download_end | ||
assert type(fetch_job.download_end) is datetime.date | ||
|
||
assert fetch_job.download_directory | ||
assert type(fetch_job.download_directory) is str | ||
|
||
assert fetch_job.storage_service_id == ss.id | ||
|
||
|
||
def test_create_fake_location(mock_db_add): | ||
location = data.create_fake_location(1) | ||
|
||
assert location.current_location | ||
assert type(location.current_location) is str | ||
|
||
assert location.description | ||
assert type(location.description) is str | ||
|
||
assert location.storage_service_id == 1 | ||
|
||
|
||
def test_create_fake_aip(mock_db_add): | ||
aip = data.create_fake_aip(1, 2, 3, 4) | ||
|
||
assert aip.uuid | ||
assert type(aip.uuid) is str | ||
|
||
assert aip.transfer_name | ||
assert type(aip.transfer_name) is str | ||
|
||
assert aip.create_date | ||
assert type(aip.create_date) is datetime.date | ||
|
||
assert aip.mets_sha256 | ||
assert type(aip.mets_sha256) is str | ||
|
||
assert aip.size | ||
assert type(aip.size) is int | ||
|
||
assert aip.origin_pipeline_id == 1 | ||
assert aip.storage_service_id == 2 | ||
assert aip.storage_location_id == 3 | ||
assert aip.fetch_job_id == 4 | ||
assert aip.origin_pipeline_id == 1 |