From e1c53cab4828950d10b3b584427c74e52e0a2eae Mon Sep 17 00:00:00 2001 From: Francois Date: Tue, 25 Feb 2025 16:24:54 +0000 Subject: [PATCH] added test for bia agent and updated some ingest test wiring and settings using --- bia-ingest/bia_ingest/api_client.py | 4 +- .../bia_ingest/biostudies/find_bia_studies.py | 4 +- bia-ingest/bia_ingest/persistence_strategy.py | 8 +- bia-ingest/bia_ingest/settings.py | 4 +- bia-ingest/test/conftest.py | 28 +- .../10259a72-db54-4277-928b-6f291d1b9ca1.json | 19 ++ .../2dbf5050-5704-484c-94ca-98e8e1d08e11.json | 23 ++ .../2d124551-c51a-4d8a-817e-2804f558e059.json | 25 ++ .../44500cd8-1ad9-4340-ba55-3427a2782486.json | 58 ++++ .../0d4d7ee6-d288-4ed2-a4d6-d1dbcde23d80.json | 25 ++ .../119b42c3-8ee0-486a-8b3d-9dfff40d09a2.json | 25 ++ .../22550bb0-037b-49b6-946f-7c72f6aa2594.json | 25 ++ .../22e3a91e-c02c-4b49-8b9c-7c0a9e657b60.json | 25 ++ .../2b181c80-841d-47c0-a57a-5b0aa60e658c.json | 25 ++ .../62f651ec-282c-4cd3-8105-fba457455326.json | 25 ++ .../7633f093-d365-49c9-9440-7febbe3e1680.json | 25 ++ .../97ad64e0-026c-4ccd-8e8a-0373c40cf7d4.json | 25 ++ .../99b6e5e9-9565-45e6-a007-279b3bcc3201.json | 25 ++ .../be76820d-9cf4-4bc8-96c1-692b02ded6ca.json | 25 ++ .../c5602e72-e46d-4e6a-bfef-fe8d84cded38.json | 25 ++ .../dbc54b19-98e3-4d25-9841-a241ef60a499.json | 25 ++ .../f57b3316-c0e7-44d6-a56d-40e776be5c35.json | 25 ++ .../5f4095ba-8bd3-4efa-9c7e-18bc35ddf416.json | 16 ++ .../c27b92d9-1340-455f-9278-c8663326214e.json | 12 + .../869ff676-bf68-4e2d-869e-7e644cbbec42.json | 127 +++++++++ .../input/S-BIAD1492.json | 255 ++++++++++++++++++ .../input/bbc024_images.json | 107 ++++++++ .../input/bbc024_masks.json | 92 +++++++ bia-ingest/test/test_bia_agent_study.py | 125 +++++++++ bia-ingest/test/test_bia_ingest_cli.py | 33 +-- .../test/test_order_of_processing_datasets.py | 46 ++-- 31 files changed, 1254 insertions(+), 57 deletions(-) create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/annotation_method/10259a72-db54-4277-928b-6f291d1b9ca1.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/bio_sample/S-BIAD1492/2dbf5050-5704-484c-94ca-98e8e1d08e11.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/dataset/S-BIAD1492/2d124551-c51a-4d8a-817e-2804f558e059.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/dataset/S-BIAD1492/44500cd8-1ad9-4340-ba55-3427a2782486.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/0d4d7ee6-d288-4ed2-a4d6-d1dbcde23d80.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/119b42c3-8ee0-486a-8b3d-9dfff40d09a2.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/22550bb0-037b-49b6-946f-7c72f6aa2594.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/22e3a91e-c02c-4b49-8b9c-7c0a9e657b60.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/2b181c80-841d-47c0-a57a-5b0aa60e658c.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/62f651ec-282c-4cd3-8105-fba457455326.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/7633f093-d365-49c9-9440-7febbe3e1680.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/97ad64e0-026c-4ccd-8e8a-0373c40cf7d4.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/99b6e5e9-9565-45e6-a007-279b3bcc3201.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/be76820d-9cf4-4bc8-96c1-692b02ded6ca.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/c5602e72-e46d-4e6a-bfef-fe8d84cded38.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/dbc54b19-98e3-4d25-9841-a241ef60a499.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/f57b3316-c0e7-44d6-a56d-40e776be5c35.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/image_acquisition_protocol/S-BIAD1492/5f4095ba-8bd3-4efa-9c7e-18bc35ddf416.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/specimen_imaging_preparation_protocol/S-BIAD1492/c27b92d9-1340-455f-9278-c8663326214e.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/expected_output/study/S-BIAD1492/869ff676-bf68-4e2d-869e-7e644cbbec42.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/input/S-BIAD1492.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/input/bbc024_images.json create mode 100644 bia-ingest/test/data/example_bia_agent_study/input/bbc024_masks.json create mode 100644 bia-ingest/test/test_bia_agent_study.py diff --git a/bia-ingest/bia_ingest/api_client.py b/bia-ingest/bia_ingest/api_client.py index 1656f15a..5849feee 100644 --- a/bia-ingest/bia_ingest/api_client.py +++ b/bia-ingest/bia_ingest/api_client.py @@ -3,12 +3,13 @@ from bia_integrator_api.api import PrivateApi import bia_integrator_api.models as api_models import logging -from bia_ingest.settings import settings +from bia_ingest.settings import get_settings logger = logging.getLogger("__main__." + __name__) def get_bia_api_client(): + settings = get_settings() private_api_client = get_client_private( username=settings.bia_api_username, password=settings.bia_api_password, @@ -18,6 +19,7 @@ def get_bia_api_client(): def get_local_bia_api_client(): + settings = get_settings() api_config = Configuration(host=settings.local_bia_api_basepath) private_api = PrivateApi(ApiClient(configuration=api_config)) try: diff --git a/bia-ingest/bia_ingest/biostudies/find_bia_studies.py b/bia-ingest/bia_ingest/biostudies/find_bia_studies.py index 68c8752a..d2ceecb4 100644 --- a/bia-ingest/bia_ingest/biostudies/find_bia_studies.py +++ b/bia-ingest/bia_ingest/biostudies/find_bia_studies.py @@ -6,7 +6,7 @@ from typing import Optional from bia_integrator_api.util import get_client from bia_integrator_api.models import Study -from bia_ingest.settings import settings +from bia_ingest.settings import get_settings import re logger = logging.getLogger("__main__." + __name__) @@ -115,7 +115,7 @@ def get_accno(acc_id): ) acc_id_of_interest = [result.accession for result in studies_of_interest] logging.info("Fetching all studies from bia api") - api_client = get_client(settings.bia_api_basepath) + api_client = get_client(get_settings().bia_api_basepath) bia_existing_studies = fetch_studies_from_api(api_client, page_size) processed_acc_ids = [str(study.accession_id) for study in bia_existing_studies] unprocessed_acc_ids = sorted(list(set(acc_id_of_interest) - set(processed_acc_ids)), key=lambda acc_id : get_accno(acc_id)) diff --git a/bia-ingest/bia_ingest/persistence_strategy.py b/bia-ingest/bia_ingest/persistence_strategy.py index aa5e14c6..482ed474 100644 --- a/bia-ingest/bia_ingest/persistence_strategy.py +++ b/bia-ingest/bia_ingest/persistence_strategy.py @@ -12,7 +12,7 @@ from bia_integrator_api.exceptions import NotFoundException import bia_integrator_api.models as api_models from bia_ingest.api_client import get_bia_api_client, get_local_bia_api_client -from bia_ingest.settings import settings +from bia_ingest.settings import get_settings logger = logging.getLogger("__main__." + __name__) @@ -84,8 +84,8 @@ def fetch_by_uuid( # Persist using API class ApiPersister(PersistenceStrategy): def __init__(self, api_client: PrivateApi) -> None: - assert ( - isinstance(api_client, PrivateApi) or isinstance(api_client, PublicApi) + assert isinstance(api_client, PrivateApi) or isinstance( + api_client, PublicApi ), f"ApiPersister cannot be created. Expected valid instance of or . Got : {type(api_client)} - are your API credentials valid and/or is the API server online?" self.api_client = api_client @@ -145,7 +145,7 @@ def persistence_strategy_factory(persistence_mode: PersistenceMode, **kwargs): return ApiPersister(api_client=get_local_bia_api_client()) elif persistence_mode == PersistenceMode.disk: return DiskPersister( - output_dir_base=settings.bia_data_dir, + output_dir_base=get_settings().bia_data_dir, accession_id=kwargs["accession_id"], ) else: diff --git a/bia-ingest/bia_ingest/settings.py b/bia-ingest/bia_ingest/settings.py index 0ad2ad27..b9fe59a5 100644 --- a/bia-ingest/bia_ingest/settings.py +++ b/bia-ingest/bia_ingest/settings.py @@ -38,4 +38,6 @@ class Settings(BaseSettings): bia_api_password: str = Field("") -settings = Settings() + +def get_settings(): + return Settings() diff --git a/bia-ingest/test/conftest.py b/bia-ingest/test/conftest.py index 1a712e55..c273c370 100644 --- a/bia-ingest/test/conftest.py +++ b/bia-ingest/test/conftest.py @@ -16,7 +16,16 @@ BioStudiesProcessingVersion, ) from bia_integrator_api.util import get_client -from bia_ingest.settings import settings +import os +from dotenv.main import dotenv_values + + +def pytest_configure(config: pytest.Config): + env_settings = dotenv_values(str(Path(__file__).parents[1] / ".env_template")) + os.environ["bia_api_basepath"] = env_settings["local_bia_api_basepath"] + os.environ["bia_api_username"] = env_settings["local_bia_api_username"] + os.environ["bia_api_password"] = env_settings["local_bia_api_password"] + @pytest.fixture def test_submission() -> Submission: @@ -76,7 +85,7 @@ def _mock_request_get(flist_url: str) -> Dict[str, str]: @pytest.fixture -def mock_search_result(monkeypatch): +def mock_search_result(): """Requests.get mocked to read file from disk""" mock_result = { @@ -104,16 +113,15 @@ def mock_search_result(monkeypatch): } search_result = SearchPage(**mock_result) - def _mock_search_result(url, headers) -> Dict[str, str]: + return search_result - return_value = Mock() - return_value.status_code = 200 - return_value.content = search_result.model_dump_json() - return return_value - monkeypatch.setattr(requests, "get", _mock_search_result) +@pytest.fixture() +def get_bia_api_client(): + return get_client(os.environ.get("bia_api_basepath")) @pytest.fixture() -def get_bia_api_client(): - return get_client(settings.local_bia_api_basepath) +def tmp_bia_data_dir(tmp_path): + os.environ["bia_data_dir"] = str(tmp_path) + return tmp_path diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/annotation_method/10259a72-db54-4277-928b-6f291d1b9ca1.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/annotation_method/10259a72-db54-4277-928b-6f291d1b9ca1.json new file mode 100644 index 00000000..fb2cecef --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/annotation_method/10259a72-db54-4277-928b-6f291d1b9ca1.json @@ -0,0 +1,19 @@ +{ + "title_id": "Ground truth segmentation masks", + "uuid": "10259a72-db54-4277-928b-6f291d1b9ca1", + "version": 0, + "model": { + "type_name": "AnnotationMethod", + "version": 3 + }, + "attribute": [], + "protocol_description": "Each image contains exactly 20 masks; this is the ground truth for counting. Ground truth for foreground/background segmentation are available as labeled 16bit grayscale images", + "annotation_criteria": null, + "annotation_coverage": "All data has been annotated.", + "transformation_description": null, + "spatial_information": null, + "method_type": [ + "other" + ], + "annotation_source_indicator": null +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/bio_sample/S-BIAD1492/2dbf5050-5704-484c-94ca-98e8e1d08e11.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/bio_sample/S-BIAD1492/2dbf5050-5704-484c-94ca-98e8e1d08e11.json new file mode 100644 index 00000000..c1f8b48f --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/bio_sample/S-BIAD1492/2dbf5050-5704-484c-94ca-98e8e1d08e11.json @@ -0,0 +1,23 @@ +{ + "title_id": "Simulated HL-60 cells", + "uuid": "2dbf5050-5704-484c-94ca-98e8e1d08e11", + "version": 0, + "model": { + "type_name": "BioSample", + "version": 3 + }, + "attribute": [], + "organism_classification": [ + { + "attribute": [], + "common_name": null, + "scientific_name": "simulated data", + "ncbi_id": null + } + ], + "biological_entity_description": "simulated human promyelocytic leukemia cells (HL-60) stained with DAPI", + "experimental_variable_description": [], + "extrinsic_variable_description": [], + "intrinsic_variable_description": [], + "growth_protocol_uuid": null +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/dataset/S-BIAD1492/2d124551-c51a-4d8a-817e-2804f558e059.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/dataset/S-BIAD1492/2d124551-c51a-4d8a-817e-2804f558e059.json new file mode 100644 index 00000000..6e8a885f --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/dataset/S-BIAD1492/2d124551-c51a-4d8a-817e-2804f558e059.json @@ -0,0 +1,25 @@ +{ + "title_id": "Ground truth segmentation masks", + "uuid": "2d124551-c51a-4d8a-817e-2804f558e059", + "version": 0, + "model": { + "type_name": "Dataset", + "version": 1 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "annotation_method_uuid", + "value": { + "annotation_method_uuid": [ + "10259a72-db54-4277-928b-6f291d1b9ca1" + ] + } + } + ], + "description": null, + "analysis_method": [], + "correlation_method": [], + "example_image_uri": [], + "submitted_in_study_uuid": "869ff676-bf68-4e2d-869e-7e644cbbec42" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/dataset/S-BIAD1492/44500cd8-1ad9-4340-ba55-3427a2782486.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/dataset/S-BIAD1492/44500cd8-1ad9-4340-ba55-3427a2782486.json new file mode 100644 index 00000000..4ada4357 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/dataset/S-BIAD1492/44500cd8-1ad9-4340-ba55-3427a2782486.json @@ -0,0 +1,58 @@ +{ + "title_id": "Simulated fluorescence images", + "uuid": "44500cd8-1ad9-4340-ba55-3427a2782486", + "version": 0, + "model": { + "type_name": "Dataset", + "version": 1 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "associations", + "value": { + "associations": [ + { + "image_analysis": null, + "image_correlation": null, + "biosample": "Simulated HL-60 cells", + "image_acquisition": "Simulated fluorescence microscopy", + "specimen": "Digital Phantom Generation" + } + ] + } + }, + { + "provenance": "bia_ingest", + "name": "image_acquisition_protocol_uuid", + "value": { + "image_acquisition_protocol_uuid": [ + "5f4095ba-8bd3-4efa-9c7e-18bc35ddf416" + ] + } + }, + { + "provenance": "bia_ingest", + "name": "specimen_imaging_preparation_protocol_uuid", + "value": { + "specimen_imaging_preparation_protocol_uuid": [ + "c27b92d9-1340-455f-9278-c8663326214e" + ] + } + }, + { + "provenance": "bia_ingest", + "name": "bio_sample_uuid", + "value": { + "bio_sample_uuid": [ + "2dbf5050-5704-484c-94ca-98e8e1d08e11" + ] + } + } + ], + "description": "Four subsets (each in high and low signal-to-noise ratio variant) of 30 images each are provided. Each image contains 20 HL-60 cell nuclei, but the nuclei cluster with different probabilities (0%, 25%, 50%, and 75%) in the four subsets.", + "analysis_method": [], + "correlation_method": [], + "example_image_uri": [], + "submitted_in_study_uuid": "869ff676-bf68-4e2d-869e-7e644cbbec42" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/0d4d7ee6-d288-4ed2-a4d6-d1dbcde23d80.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/0d4d7ee6-d288-4ed2-a4d6-d1dbcde23d80.json new file mode 100644 index 00000000..58276dbe --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/0d4d7ee6-d288-4ed2-a4d6-d1dbcde23d80.json @@ -0,0 +1,25 @@ +{ + "uuid": "0d4d7ee6-d288-4ed2-a4d6-d1dbcde23d80", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "Annotation type": "Segmentation masks", + "Source image": "BBBC024/BBBC024_v1_c50_lowSNR_images_TIFF/image-final_0012.tif" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c50_lowSNR_images_TIFF/image-labels_0012.tif", + "format": "file", + "size_in_bytes": 3195094, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c50_lowSNR_images_TIFF/image-labels_0012.tif", + "submission_dataset_uuid": "2d124551-c51a-4d8a-817e-2804f558e059" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/119b42c3-8ee0-486a-8b3d-9dfff40d09a2.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/119b42c3-8ee0-486a-8b3d-9dfff40d09a2.json new file mode 100644 index 00000000..1e8152da --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/119b42c3-8ee0-486a-8b3d-9dfff40d09a2.json @@ -0,0 +1,25 @@ +{ + "uuid": "119b42c3-8ee0-486a-8b3d-9dfff40d09a2", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "clustering probability": "0%", + "SNR": "high" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c00_highSNR_images_TIFF/image-final_0012.tif", + "format": "file", + "size_in_bytes": 61795818, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c00_highSNR_images_TIFF/image-final_0012.tif", + "submission_dataset_uuid": "44500cd8-1ad9-4340-ba55-3427a2782486" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/22550bb0-037b-49b6-946f-7c72f6aa2594.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/22550bb0-037b-49b6-946f-7c72f6aa2594.json new file mode 100644 index 00000000..56d6ddb5 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/22550bb0-037b-49b6-946f-7c72f6aa2594.json @@ -0,0 +1,25 @@ +{ + "uuid": "22550bb0-037b-49b6-946f-7c72f6aa2594", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "clustering probability": "75%", + "SNR": "low" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c75_lowSNR_images_TIFF/image-final_0012.tif", + "format": "file", + "size_in_bytes": 50872008, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c75_lowSNR_images_TIFF/image-final_0012.tif", + "submission_dataset_uuid": "44500cd8-1ad9-4340-ba55-3427a2782486" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/22e3a91e-c02c-4b49-8b9c-7c0a9e657b60.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/22e3a91e-c02c-4b49-8b9c-7c0a9e657b60.json new file mode 100644 index 00000000..32447931 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/22e3a91e-c02c-4b49-8b9c-7c0a9e657b60.json @@ -0,0 +1,25 @@ +{ + "uuid": "22e3a91e-c02c-4b49-8b9c-7c0a9e657b60", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "Annotation type": "Segmentation masks", + "Source image": "BBBC024/BBBC024_v1_c75_lowSNR_images_TIFF/image-final_0012.tif" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c75_lowSNR_images_TIFF/image-labels_0012.tif", + "format": "file", + "size_in_bytes": 3217536, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c75_lowSNR_images_TIFF/image-labels_0012.tif", + "submission_dataset_uuid": "2d124551-c51a-4d8a-817e-2804f558e059" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/2b181c80-841d-47c0-a57a-5b0aa60e658c.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/2b181c80-841d-47c0-a57a-5b0aa60e658c.json new file mode 100644 index 00000000..95e4cce7 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/2b181c80-841d-47c0-a57a-5b0aa60e658c.json @@ -0,0 +1,25 @@ +{ + "uuid": "2b181c80-841d-47c0-a57a-5b0aa60e658c", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "Annotation type": "Segmentation masks", + "Source image": "BBBC024/BBBC024_v1_c25_highSNR_images_TIFF/image-final_0012.tif" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c25_highSNR_images_TIFF/image-labels_0012.tif", + "format": "file", + "size_in_bytes": 3207776, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c25_highSNR_images_TIFF/image-labels_0012.tif", + "submission_dataset_uuid": "2d124551-c51a-4d8a-817e-2804f558e059" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/62f651ec-282c-4cd3-8105-fba457455326.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/62f651ec-282c-4cd3-8105-fba457455326.json new file mode 100644 index 00000000..cb4020ac --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/62f651ec-282c-4cd3-8105-fba457455326.json @@ -0,0 +1,25 @@ +{ + "uuid": "62f651ec-282c-4cd3-8105-fba457455326", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "clustering probability": "50%", + "SNR": "high" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c50_highSNR_images_TIFF/image-final_0012.tif", + "format": "file", + "size_in_bytes": 61303490, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c50_highSNR_images_TIFF/image-final_0012.tif", + "submission_dataset_uuid": "44500cd8-1ad9-4340-ba55-3427a2782486" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/7633f093-d365-49c9-9440-7febbe3e1680.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/7633f093-d365-49c9-9440-7febbe3e1680.json new file mode 100644 index 00000000..c9fd21ef --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/7633f093-d365-49c9-9440-7febbe3e1680.json @@ -0,0 +1,25 @@ +{ + "uuid": "7633f093-d365-49c9-9440-7febbe3e1680", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "Annotation type": "Segmentation masks", + "Source image": "BBBC024/BBBC024_v1_c00_highSNR_images_TIFF/image-final_0012.tif" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c00_highSNR_images_TIFF/image-labels_0012.tif", + "format": "file", + "size_in_bytes": 3182548, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c00_highSNR_images_TIFF/image-labels_0012.tif", + "submission_dataset_uuid": "2d124551-c51a-4d8a-817e-2804f558e059" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/97ad64e0-026c-4ccd-8e8a-0373c40cf7d4.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/97ad64e0-026c-4ccd-8e8a-0373c40cf7d4.json new file mode 100644 index 00000000..94793b5f --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/97ad64e0-026c-4ccd-8e8a-0373c40cf7d4.json @@ -0,0 +1,25 @@ +{ + "uuid": "97ad64e0-026c-4ccd-8e8a-0373c40cf7d4", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "clustering probability": "50%", + "SNR": "low" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c50_lowSNR_images_TIFF/image-final_0012.tif", + "format": "file", + "size_in_bytes": 50845142, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c50_lowSNR_images_TIFF/image-final_0012.tif", + "submission_dataset_uuid": "44500cd8-1ad9-4340-ba55-3427a2782486" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/99b6e5e9-9565-45e6-a007-279b3bcc3201.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/99b6e5e9-9565-45e6-a007-279b3bcc3201.json new file mode 100644 index 00000000..13563147 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/99b6e5e9-9565-45e6-a007-279b3bcc3201.json @@ -0,0 +1,25 @@ +{ + "uuid": "99b6e5e9-9565-45e6-a007-279b3bcc3201", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "clustering probability": "25%", + "SNR": "high" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c25_highSNR_images_TIFF/image-final_0012.tif", + "format": "file", + "size_in_bytes": 61405626, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c25_highSNR_images_TIFF/image-final_0012.tif", + "submission_dataset_uuid": "44500cd8-1ad9-4340-ba55-3427a2782486" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/be76820d-9cf4-4bc8-96c1-692b02ded6ca.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/be76820d-9cf4-4bc8-96c1-692b02ded6ca.json new file mode 100644 index 00000000..69c338a5 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/be76820d-9cf4-4bc8-96c1-692b02ded6ca.json @@ -0,0 +1,25 @@ +{ + "uuid": "be76820d-9cf4-4bc8-96c1-692b02ded6ca", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "Annotation type": "Segmentation masks", + "Source image": "BBBC024/BBBC024_v1_c00_lowSNR_images_TIFF/image-final_0012.tif" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c00_lowSNR_images_TIFF/image-labels_0012.tif", + "format": "file", + "size_in_bytes": 3178098, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c00_lowSNR_images_TIFF/image-labels_0012.tif", + "submission_dataset_uuid": "2d124551-c51a-4d8a-817e-2804f558e059" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/c5602e72-e46d-4e6a-bfef-fe8d84cded38.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/c5602e72-e46d-4e6a-bfef-fe8d84cded38.json new file mode 100644 index 00000000..07ce19a8 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/c5602e72-e46d-4e6a-bfef-fe8d84cded38.json @@ -0,0 +1,25 @@ +{ + "uuid": "c5602e72-e46d-4e6a-bfef-fe8d84cded38", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "Annotation type": "Segmentation masks", + "Source image": "BBBC024/BBBC024_v1_c50_highSNR_images_TIFF/image-final_0012.tif" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c50_highSNR_images_TIFF/image-labels_0012.tif", + "format": "file", + "size_in_bytes": 3177868, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c50_highSNR_images_TIFF/image-labels_0012.tif", + "submission_dataset_uuid": "2d124551-c51a-4d8a-817e-2804f558e059" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/dbc54b19-98e3-4d25-9841-a241ef60a499.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/dbc54b19-98e3-4d25-9841-a241ef60a499.json new file mode 100644 index 00000000..8cb588c5 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/dbc54b19-98e3-4d25-9841-a241ef60a499.json @@ -0,0 +1,25 @@ +{ + "uuid": "dbc54b19-98e3-4d25-9841-a241ef60a499", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "clustering probability": "75%", + "SNR": "high" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c75_highSNR_images_TIFF/image-final_0012.tif", + "format": "file", + "size_in_bytes": 61715488, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c75_highSNR_images_TIFF/image-final_0012.tif", + "submission_dataset_uuid": "44500cd8-1ad9-4340-ba55-3427a2782486" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/f57b3316-c0e7-44d6-a56d-40e776be5c35.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/f57b3316-c0e7-44d6-a56d-40e776be5c35.json new file mode 100644 index 00000000..4e0064f8 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/file_reference/S-BIAD1492/f57b3316-c0e7-44d6-a56d-40e776be5c35.json @@ -0,0 +1,25 @@ +{ + "uuid": "f57b3316-c0e7-44d6-a56d-40e776be5c35", + "version": 0, + "model": { + "type_name": "FileReference", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "attributes_from_biostudies.File", + "value": { + "attributes": { + "clustering probability": "0%", + "SNR": "low" + } + } + } + ], + "file_path": "BBBC024/BBBC024_v1_c00_lowSNR_images_TIFF/image-final_0012.tif", + "format": "file", + "size_in_bytes": 50966860, + "uri": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/BBBC024/BBBC024_v1_c00_lowSNR_images_TIFF/image-final_0012.tif", + "submission_dataset_uuid": "44500cd8-1ad9-4340-ba55-3427a2782486" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/image_acquisition_protocol/S-BIAD1492/5f4095ba-8bd3-4efa-9c7e-18bc35ddf416.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/image_acquisition_protocol/S-BIAD1492/5f4095ba-8bd3-4efa-9c7e-18bc35ddf416.json new file mode 100644 index 00000000..48080501 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/image_acquisition_protocol/S-BIAD1492/5f4095ba-8bd3-4efa-9c7e-18bc35ddf416.json @@ -0,0 +1,16 @@ +{ + "title_id": "Simulated fluorescence microscopy", + "uuid": "5f4095ba-8bd3-4efa-9c7e-18bc35ddf416", + "version": 0, + "model": { + "type_name": "ImageAcquisitionProtocol", + "version": 2 + }, + "attribute": [], + "protocol_description": "N/A", + "imaging_instrument_description": "Virtual microscope imitating the microscope Zeiss S100 (objective Zeiss 63x/1.40 Oil DIC) attached to confocal unit Atto CARV and CCD camera Micromax 1300-YHS.", + "fbbi_id": [], + "imaging_method_name": [ + "simulated fluorescence microscopy" + ] +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/specimen_imaging_preparation_protocol/S-BIAD1492/c27b92d9-1340-455f-9278-c8663326214e.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/specimen_imaging_preparation_protocol/S-BIAD1492/c27b92d9-1340-455f-9278-c8663326214e.json new file mode 100644 index 00000000..f3362048 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/specimen_imaging_preparation_protocol/S-BIAD1492/c27b92d9-1340-455f-9278-c8663326214e.json @@ -0,0 +1,12 @@ +{ + "title_id": "Digital Phantom Generation", + "uuid": "c27b92d9-1340-455f-9278-c8663326214e", + "version": 0, + "model": { + "type_name": "SpecimenImagingPreparationProtocol", + "version": 2 + }, + "attribute": [], + "protocol_description": "In the experiments using the HL-60 cell line, investigators are usually interested in studying the cell nucleus that occupies most of the cell volume. Therefore, the following text will focus on modeling nucleus shape and texture. When simulating the appearance of the HL-60 standard cell line, we can presume the shape of the initial object to be spherical, as this object type is topologically equivalent to a sphere. However, the basic objects like spheres or ellipsoids are too simple and regular. Because the aim is to simulate real objects, a certain amount of irregularity is required. For this purpose, we used the PDE-based method to distort the object shape. The idea is based on viewing the object boundary as a deformable surface. The deformation is realized with fast level set methods using artificial noise as a speed function. Besides the shape, the texture of the nucleus image profile reveals important information about the cell activity. In each stage of the cell cycle, chromatin has different properties, and hence when stained, it looks different. For these purposes, the study and the measurement of heterogeneity of chromatin is an important task. Essentially, there are two ways to generate synthetic texture: algorithms for texture synthesis and methods for procedural texture modeling. Here, we decided to use the latter one. The texture function is defined as a sum of several Perlin's noise function. However, certain nucleus parts may not contain chromatin and hence may remain unstained. These locations are either left blank (without any texture) or defined as very dark. The latter case corresponds to an unwanted staining effect. The nucleoli might be an example of such an object type that typically appears as a dark (not stained) place in the image of a nucleus. It was discovered empirically that there is only one nucleolus per healthy nucleus in human cells. As for cancerous cells, there might be more than one nucleolus. The shape of such a nucleolus is mostly spherical or slightly deformed. Because of this property, its generation follows the same idea as the generation of the whole HL-60 nucleus.", + "signal_channel_information": [] +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/expected_output/study/S-BIAD1492/869ff676-bf68-4e2d-869e-7e644cbbec42.json b/bia-ingest/test/data/example_bia_agent_study/expected_output/study/S-BIAD1492/869ff676-bf68-4e2d-869e-7e644cbbec42.json new file mode 100644 index 00000000..76396279 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/expected_output/study/S-BIAD1492/869ff676-bf68-4e2d-869e-7e644cbbec42.json @@ -0,0 +1,127 @@ +{ + "uuid": "869ff676-bf68-4e2d-869e-7e644cbbec42", + "version": 0, + "model": { + "type_name": "Study", + "version": 2 + }, + "attribute": [ + { + "provenance": "bia_ingest", + "name": "Extras from biostudies.Submission.attributes", + "value": { + "Keyword": [ + "segmentation", + "synthetic data", + "nucleus" + ] + } + }, + { + "provenance": "bia_ingest", + "name": "biostudies json/pagetab entry", + "value": { + "json": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/S-BIAD1492.json", + "pagetab": "https://www.ebi.ac.uk/biostudies/files/S-BIAD1492/S-BIAD1492.tsv" + } + } + ], + "accession_id": "S-BIAD1492", + "licence": "CC_BY_3.0", + "author": [ + { + "rorid": null, + "address": null, + "website": null, + "orcid": null, + "display_name": "David Svoboda", + "affiliation": [ + { + "rorid": null, + "address": "Botanická 68a, Faculty of Informatics, Masaryk University, 602 00 Brno, Czech Republic", + "website": null, + "display_name": "Centre for Biomedical Image Analysis" + } + ], + "contact_email": null, + "role": "dataset creator" + }, + { + "rorid": null, + "address": null, + "website": null, + "orcid": null, + "display_name": "Michal Kozubek", + "affiliation": [ + { + "rorid": null, + "address": "Botanická 68a, Faculty of Informatics, Masaryk University, 602 00 Brno, Czech Republic", + "website": null, + "display_name": "Centre for Biomedical Image Analysis" + } + ], + "contact_email": null, + "role": "dataset creator" + }, + { + "rorid": null, + "address": null, + "website": null, + "orcid": null, + "display_name": "Stanislav Stejskal", + "affiliation": [ + { + "rorid": null, + "address": "Botanická 68a, Faculty of Informatics, Masaryk University, 602 00 Brno, Czech Republic", + "website": null, + "display_name": "Centre for Biomedical Image Analysis" + } + ], + "contact_email": null, + "role": "dataset creator" + }, + { + "rorid": null, + "address": null, + "website": null, + "orcid": null, + "display_name": "Broad Institute's Imaging Platform", + "affiliation": [ + { + "rorid": null, + "address": null, + "website": null, + "display_name": "Broad Institute of Massachusetts Institute of Technology and Harvard, Cambridge, Massachusetts, USA" + } + ], + "contact_email": null, + "role": "data curation" + }, + { + "rorid": null, + "address": null, + "website": null, + "orcid": "0000-0002-0456-6912", + "display_name": "Teresa Zulueta-Coarasa", + "affiliation": [ + { + "rorid": null, + "address": null, + "website": null, + "display_name": "European Bioinformatics Institute" + } + ], + "contact_email": null, + "role": "data curation, submitter" + } + ], + "title": "Synthetic images and segmentation masks simulating HL-60 cell nucleus in 3D", + "release_date": "2024-11-26", + "description": "One of the principal challenges in counting or segmenting nuclei is dealing with clustered nuclei. To help assess algorithms' performance in this regard, this synthetic image set consists of four subsets with increasing degree of clustering. Each subset is also provided in two different levels of quality: high SNR and low SNR.", + "keyword": [], + "acknowledgement": "", + "see_also": [], + "related_publication": [], + "grant": [], + "funding_statement": "" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/input/S-BIAD1492.json b/bia-ingest/test/data/example_bia_agent_study/input/S-BIAD1492.json new file mode 100644 index 00000000..9f6f6314 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/input/S-BIAD1492.json @@ -0,0 +1,255 @@ +{ + "accno" : "S-BIAD1492", + "attributes" : [ { + "name" : "REMBI_PageTab Conversion Script Version", + "value" : "1.0.0" + }, { + "name" : "Title", + "value" : "Synthetic images and segmentation masks simulating HL-60 cell nucleus in 3D" + }, { + "name" : "ReleaseDate", + "value" : "2024-11-26" + }, { + "name" : "AttachTo", + "value" : "BioImages" + } ], + "section" : { + "type" : "Study", + "attributes" : [ { + "name" : "Description", + "value" : "One of the principal challenges in counting or segmenting nuclei is dealing with clustered nuclei. To help assess algorithms' performance in this regard, this synthetic image set consists of four subsets with increasing degree of clustering. Each subset is also provided in two different levels of quality: high SNR and low SNR." + }, { + "name" : "License", + "value" : "CC BY 3.0" + }, { + "name" : "Keyword", + "value" : "segmentation" + }, { + "name" : "Keyword", + "value" : "synthetic data" + }, { + "name" : "Keyword", + "value" : "nucleus" + } ], + "links" : [ { + "url" : "https://bbbc.broadinstitute.org/BBBC024", + "attributes" : [ { + "name" : "Description", + "value" : "Original submission on the Broad Bioimage Benchmark Collection" + } ] + } ], + "subsections" : [ { + "type" : "author", + "attributes" : [ { + "name" : "Name", + "value" : "David Svoboda" + }, { + "name" : "Role", + "value" : "dataset creator" + }, { + "name" : "affiliation", + "value" : "o2", + "reference" : true + } ] + }, { + "type" : "author", + "attributes" : [ { + "name" : "Name", + "value" : "Michal Kozubek" + }, { + "name" : "Role", + "value" : "dataset creator" + }, { + "name" : "affiliation", + "value" : "o2", + "reference" : true + } ] + }, { + "type" : "author", + "attributes" : [ { + "name" : "Name", + "value" : "Stanislav Stejskal" + }, { + "name" : "Role", + "value" : "dataset creator" + }, { + "name" : "affiliation", + "value" : "o2", + "reference" : true + } ] + }, { + "type" : "author", + "attributes" : [ { + "name" : "Name", + "value" : "Broad Institute's Imaging Platform" + }, { + "name" : "Role", + "value" : "data curation" + }, { + "name" : "affiliation", + "value" : "o1", + "reference" : true + } ] + }, { + "type" : "author", + "attributes" : [ { + "name" : "Name", + "value" : "Teresa Zulueta-Coarasa" + }, { + "name" : "Email", + "value" : "teresaz@ebi.ac.uk" + }, { + "name" : "Role", + "value" : "data curation, submitter" + }, { + "name" : "ORCID", + "value" : "0000-0002-0456-6912" + }, { + "name" : "affiliation", + "value" : "o3", + "reference" : true + } ] + }, { + "accno" : "o1", + "type" : "organization", + "attributes" : [ { + "name" : "Name", + "value" : "Broad Institute of Massachusetts Institute of Technology and Harvard, Cambridge, Massachusetts, USA" + }, { + "name" : "Address" + } ] + }, { + "accno" : "o2", + "type" : "organization", + "attributes" : [ { + "name" : "Name", + "value" : "Centre for Biomedical Image Analysis" + }, { + "name" : "Address", + "value" : "Botanická 68a, Faculty of Informatics, Masaryk University, 602 00 Brno, Czech Republic" + } ] + }, { + "accno" : "o3", + "type" : "organization", + "attributes" : [ { + "name" : "Name", + "value" : "European Bioinformatics Institute" + }, { + "name" : "Address" + } ] + }, { + "type" : "Publication", + "attributes" : [ { + "name" : "Title", + "value" : "Generation of digital phantoms of cell nuclei and simulation of image formation in 3D image cytometry" + }, { + "name" : "Year", + "value" : "2009" + }, { + "name" : "Authors", + "value" : "Svoboda David, Kozubkek Michal, Stejskal Stanislav." + }, { + "name" : "DOI", + "value" : "https://doi.org/10.1002/cyto.a.20714" + } ] + }, { + "accno" : "Biosample-1", + "type" : "Biosample", + "attributes" : [ { + "name" : "Title", + "value" : "Simulated HL-60 cells" + }, { + "name" : "Biological entity", + "value" : "simulated human promyelocytic leukemia cells (HL-60) stained with DAPI" + } ], + "subsections" : [ { + "accno" : "Organism-1", + "type" : "Organism", + "attributes" : [ { + "name" : "Scientific name", + "value" : "simulated data" + } ] + } ] + }, { + "accno" : "Specimen-1", + "type" : "Specimen", + "attributes" : [ { + "name" : "Title", + "value" : "Digital Phantom Generation" + }, { + "name" : "Sample Preparation Protocol", + "value" : "In the experiments using the HL-60 cell line, investigators are usually interested in studying the cell nucleus that occupies most of the cell volume. Therefore, the following text will focus on modeling nucleus shape and texture. When simulating the appearance of the HL-60 standard cell line, we can presume the shape of the initial object to be spherical, as this object type is topologically equivalent to a sphere. However, the basic objects like spheres or ellipsoids are too simple and regular. Because the aim is to simulate real objects, a certain amount of irregularity is required. For this purpose, we used the PDE-based method to distort the object shape. The idea is based on viewing the object boundary as a deformable surface. The deformation is realized with fast level set methods using artificial noise as a speed function. Besides the shape, the texture of the nucleus image profile reveals important information about the cell activity. In each stage of the cell cycle, chromatin has different properties, and hence when stained, it looks different. For these purposes, the study and the measurement of heterogeneity of chromatin is an important task. Essentially, there are two ways to generate synthetic texture: algorithms for texture synthesis and methods for procedural texture modeling. Here, we decided to use the latter one. The texture function is defined as a sum of several Perlin's noise function. However, certain nucleus parts may not contain chromatin and hence may remain unstained. These locations are either left blank (without any texture) or defined as very dark. The latter case corresponds to an unwanted staining effect. The nucleoli might be an example of such an object type that typically appears as a dark (not stained) place in the image of a nucleus. It was discovered empirically that there is only one nucleolus per healthy nucleus in human cells. As for cancerous cells, there might be more than one nucleolus. The shape of such a nucleolus is mostly spherical or slightly deformed. Because of this property, its generation follows the same idea as the generation of the whole HL-60 nucleus." + } ] + }, { + "accno" : "Image Acquisition-1", + "type" : "Image Acquisition", + "attributes" : [ { + "name" : "Title", + "value" : "Simulated fluorescence microscopy" + }, { + "name" : "Imaging Instrument", + "value" : "Virtual microscope imitating the microscope Zeiss S100 (objective Zeiss 63x/1.40 Oil DIC) attached to confocal unit Atto CARV and CCD camera Micromax 1300-YHS." + }, { + "name" : "Image Acquisition Parameters", + "value" : "N/A" + } ], + "subsections" : [ { + "accno" : "Imaging Method-1", + "type" : "Imaging Method", + "attributes" : [ { + "name" : "Ontology Value", + "value" : "simulated fluorescence microscopy" + } ] + } ] + }, { + "accno" : "Annotations-1", + "type" : "Annotations", + "attributes" : [ { + "name" : "Title", + "value" : "Ground truth segmentation masks" + }, { + "name" : "Annotation overview", + "value" : "Perfect segmentation masks were computer-generated for the simulated images." + }, { + "name" : "Annotation method", + "value" : "Each image contains exactly 20 masks; this is the ground truth for counting. Ground truth for foreground/background segmentation are available as labeled 16bit grayscale images" + }, { + "name" : "Annotation confidence level", + "value" : "High confidence masks were created from synthetic images ." + }, { + "name" : "Annotation coverage", + "value" : "All data has been annotated." + }, { + "name" : "File List", + "value" : "bbc024_masks.json" + } ] + }, { + "accno" : "Study Component-1", + "type" : "Study Component", + "attributes" : [ { + "name" : "Name", + "value" : "Simulated fluorescence images" + }, { + "name" : "Description", + "value" : "Four subsets (each in high and low signal-to-noise ratio variant) of 30 images each are provided. Each image contains 20 HL-60 cell nuclei, but the nuclei cluster with different probabilities (0%, 25%, 50%, and 75%) in the four subsets." + }, { + "name" : "File List", + "value" : "bbc024_images.json" + } ], + "subsections" : [ { + "type" : "Associations", + "attributes" : [ { + "name" : "Biosample", + "value" : "Simulated HL-60 cells" + }, { + "name" : "Specimen", + "value" : "Digital Phantom Generation" + }, { + "name" : "Image acquisition", + "value" : "Simulated fluorescence microscopy" + } ] + } ] + } ] + }, + "type" : "submission" +} \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/input/bbc024_images.json b/bia-ingest/test/data/example_bia_agent_study/input/bbc024_images.json new file mode 100644 index 00000000..5fc4b28f --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/input/bbc024_images.json @@ -0,0 +1,107 @@ +[ + { + "path": "BBBC024/BBBC024_v1_c75_lowSNR_images_TIFF/image-final_0012.tif", + "size": 50872008, + "attributes": [ + { + "name": "clustering probability", + "value": "75%" + }, + { + "name": "SNR", + "value": "low" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c50_lowSNR_images_TIFF/image-final_0012.tif", + "size": 50845142, + "attributes": [ + { + "name": "clustering probability", + "value": "50%" + }, + { + "name": "SNR", + "value": "low" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c25_highSNR_images_TIFF/image-final_0012.tif", + "size": 61405626, + "attributes": [ + { + "name": "clustering probability", + "value": "25%" + }, + { + "name": "SNR", + "value": "high" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c75_highSNR_images_TIFF/image-final_0012.tif", + "size": 61715488, + "attributes": [ + { + "name": "clustering probability", + "value": "75%" + }, + { + "name": "SNR", + "value": "high" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c00_lowSNR_images_TIFF/image-final_0012.tif", + "size": 50966860, + "attributes": [ + { + "name": "clustering probability", + "value": "0%" + }, + { + "name": "SNR", + "value": "low" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c50_highSNR_images_TIFF/image-final_0012.tif", + "size": 61303490, + "attributes": [ + { + "name": "clustering probability", + "value": "50%" + }, + { + "name": "SNR", + "value": "high" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c00_highSNR_images_TIFF/image-final_0012.tif", + "size": 61795818, + "attributes": [ + { + "name": "clustering probability", + "value": "0%" + }, + { + "name": "SNR", + "value": "high" + } + ], + "type": "file" + } +] \ No newline at end of file diff --git a/bia-ingest/test/data/example_bia_agent_study/input/bbc024_masks.json b/bia-ingest/test/data/example_bia_agent_study/input/bbc024_masks.json new file mode 100644 index 00000000..019c2912 --- /dev/null +++ b/bia-ingest/test/data/example_bia_agent_study/input/bbc024_masks.json @@ -0,0 +1,92 @@ +[ + { + "path": "BBBC024/BBBC024_v1_c75_lowSNR_images_TIFF/image-labels_0012.tif", + "size": 3217536, + "attributes": [ + { + "name": "Annotation type", + "value": "Segmentation masks" + }, + { + "name": "Source image", + "value": "BBBC024/BBBC024_v1_c75_lowSNR_images_TIFF/image-final_0012.tif" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c50_lowSNR_images_TIFF/image-labels_0012.tif", + "size": 3195094, + "attributes": [ + { + "name": "Annotation type", + "value": "Segmentation masks" + }, + { + "name": "Source image", + "value": "BBBC024/BBBC024_v1_c50_lowSNR_images_TIFF/image-final_0012.tif" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c25_highSNR_images_TIFF/image-labels_0012.tif", + "size": 3207776, + "attributes": [ + { + "name": "Annotation type", + "value": "Segmentation masks" + }, + { + "name": "Source image", + "value": "BBBC024/BBBC024_v1_c25_highSNR_images_TIFF/image-final_0012.tif" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c00_lowSNR_images_TIFF/image-labels_0012.tif", + "size": 3178098, + "attributes": [ + { + "name": "Annotation type", + "value": "Segmentation masks" + }, + { + "name": "Source image", + "value": "BBBC024/BBBC024_v1_c00_lowSNR_images_TIFF/image-final_0012.tif" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c50_highSNR_images_TIFF/image-labels_0012.tif", + "size": 3177868, + "attributes": [ + { + "name": "Annotation type", + "value": "Segmentation masks" + }, + { + "name": "Source image", + "value": "BBBC024/BBBC024_v1_c50_highSNR_images_TIFF/image-final_0012.tif" + } + ], + "type": "file" + }, + { + "path": "BBBC024/BBBC024_v1_c00_highSNR_images_TIFF/image-labels_0012.tif", + "size": 3182548, + "attributes": [ + { + "name": "Annotation type", + "value": "Segmentation masks" + }, + { + "name": "Source image", + "value": "BBBC024/BBBC024_v1_c00_highSNR_images_TIFF/image-final_0012.tif" + } + ], + "type": "file" + } +] \ No newline at end of file diff --git a/bia-ingest/test/test_bia_agent_study.py b/bia-ingest/test/test_bia_agent_study.py new file mode 100644 index 00000000..903b1d0a --- /dev/null +++ b/bia-ingest/test/test_bia_agent_study.py @@ -0,0 +1,125 @@ +from typer.testing import CliRunner +from pathlib import Path +from bia_ingest import cli +from bia_ingest.biostudies.api import requests, Submission, SubmissionTable +import json +from unittest.mock import Mock +from glob import glob +from bia_shared_datamodels import bia_data_model +import pytest +from pydantic import BaseModel +from pydantic.alias_generators import to_snake +from typing import Type + + +@pytest.fixture +def expected_bia_agent_objects() -> tuple[dict, int]: + + path_to_load = Path(__file__).parent / "data" / "example_bia_agent_study" / "expected_output" + + file_paths = glob(f"{path_to_load}/**/*.json", recursive=True) + n_expected_objects = len(file_paths) + expected_objects_dict = {} + + for file_name in file_paths: + + data_dict = json.loads(Path(file_name).read_text()) + + object_type = data_dict["model"]["type_name"] + bia_type: Type[BaseModel] = getattr(bia_data_model, object_type) + + if to_snake(object_type) not in expected_objects_dict: + expected_objects_dict[to_snake(object_type)] = [] + + expected_objects_dict[to_snake(object_type)].append( + bia_type.model_validate(data_dict) + ) + + return expected_objects_dict, n_expected_objects + + +def test_cli_writes_expected_files( + monkeypatch, + tmp_bia_data_dir, + test_submission_table, + expected_bia_agent_objects, +): + """ + Test checking a study created by the BIA Agent is ingestable. + The two file lists have been shortened as we do not need to test every file list. + """ + + expected_objects_dict, n_expected_objects = expected_bia_agent_objects + + path_to_load = Path(__file__).parent / "data" / "example_bia_agent_study" / "input" + + def _mock_filelist_get(flist_url: str) -> dict[str, str]: + path = path_to_load / Path(flist_url).name + return_value = Mock() + return_value.status_code = 200 + return_value.content = path.read_text() + return return_value + + def _load_submission(accession_id: str) -> Submission: + submission_path = path_to_load / "S-BIAD1492.json" + json_data = json.loads(submission_path.read_text()) + submission = Submission.model_validate(json_data) + return submission + + def _load_submission_table_info(accession_id: str): + return test_submission_table + + # def _disk_persistance_settings(path): + # return Settings(bia_data_dir=str(path)) + + monkeypatch.setattr(cli, "load_submission", _load_submission) + monkeypatch.setattr(cli, "load_submission_table_info", _load_submission_table_info) + # monkeypatch.setattr( + # persistence_strategy, "settings", _disk_persistance_settings(tmp_path) + # ) + monkeypatch.setattr(requests, "get", _mock_filelist_get) + + runner = CliRunner() + result = runner.invoke( + cli.app, + [ + "ingest", + "S-BIAD1492", + "--persistence-mode", + "disk", + "--process-filelist", + "always", + ], + ) + assert result.exit_code == 0 + + files_written = [f for f in tmp_bia_data_dir.rglob("*.json")] + assert len(files_written) == n_expected_objects + + files_written_by_type = {k: [] for k in expected_objects_dict.keys()} + file: Path + for file in files_written: + for key in files_written_by_type.keys(): + if file.parts[-3] == key: + files_written_by_type[key].append(file) + break + + for key in expected_objects_dict: + assert len(expected_objects_dict[key]) == len(files_written_by_type[key]) + + for dir_name, expected_objects in expected_objects_dict.items(): + dir_path = tmp_bia_data_dir / dir_name / "S-BIAD1492" + + if not isinstance(expected_objects, list): + expected_objects = [ + expected_objects, + ] + for expected_object in expected_objects: + created_object_path = dir_path / f"{expected_object.uuid}.json" + created_object_type = getattr( + bia_data_model, expected_object.model.type_name + ) + created_object = created_object_type.model_validate_json( + created_object_path.read_text() + ) + assert created_object == expected_object diff --git a/bia-ingest/test/test_bia_ingest_cli.py b/bia-ingest/test/test_bia_ingest_cli.py index 88106ce6..802bb334 100644 --- a/bia-ingest/test/test_bia_ingest_cli.py +++ b/bia-ingest/test/test_bia_ingest_cli.py @@ -1,11 +1,10 @@ from typer.testing import CliRunner from pathlib import Path from bia_ingest import cli -from bia_ingest import persistence_strategy -from bia_ingest.biostudies import api from bia_shared_datamodels import bia_data_model import pytest -from bia_ingest.settings import Settings +from unittest.mock import Mock +from bia_ingest.biostudies import api from bia_test_data.mock_objects import ( mock_growth_protocol, mock_study, @@ -46,13 +45,13 @@ def expected_objects() -> tuple[dict, int]: return expected_objects_dict, n_expected_objects +@pytest.mark.usefixtures("mock_request_get") def test_cli_writes_expected_files( monkeypatch, - tmp_path, test_submission, test_submission_table, - mock_request_get, expected_objects, + tmp_bia_data_dir, ): expected_objects_dict, n_expected_objects = expected_objects @@ -63,14 +62,8 @@ def _load_submission(accession_id: str) -> api.Submission: def _load_submission_table_info(accession_id: str): return test_submission_table - def _disk_persistance_settings(path): - return Settings(bia_data_dir=str(path)) - monkeypatch.setattr(cli, "load_submission", _load_submission) monkeypatch.setattr(cli, "load_submission_table_info", _load_submission_table_info) - monkeypatch.setattr( - persistence_strategy, "settings", _disk_persistance_settings(tmp_path) - ) result = runner.invoke( cli.app, @@ -85,7 +78,7 @@ def _disk_persistance_settings(path): ) assert result.exit_code == 0 - files_written = [f for f in tmp_path.rglob("*.json")] + files_written = [f for f in tmp_bia_data_dir.rglob("*.json")] assert len(files_written) == n_expected_objects files_written_by_type = {k: [] for k in expected_objects_dict.keys()} @@ -100,7 +93,7 @@ def _disk_persistance_settings(path): assert len(expected_objects_dict[key]) == len(files_written_by_type[key]) for dir_name, expected_objects in expected_objects_dict.items(): - dir_path = tmp_path / dir_name / test_submission.accno + dir_path = tmp_bia_data_dir / dir_name / test_submission.accno if not isinstance(expected_objects, list): expected_objects = [ @@ -117,11 +110,11 @@ def _disk_persistance_settings(path): assert created_object == expected_object +@pytest.mark.usefixtures("mock_request_get") def test_cli_persists_expected_documents( monkeypatch, test_submission, test_submission_table, - mock_request_get, expected_objects, get_bia_api_client, ): @@ -158,8 +151,8 @@ def _load_submission_table_info(accession_id: str): get_func = get_bia_api_client.__getattribute__(f"get_{class_name}") for expected_object in expected_objects: persisted_object = get_func(str(expected_object.uuid)) - # Using the model_dump_json instead of direct comparison because the expected objects - # are instances of the bia_shared_models and not api client models + # Using the model_dump_json instead of direct comparison because the expected objects + # are instances of the bia_shared_models and not api client models assert ( persisted_object.model_dump_json() == expected_object.model_dump_json() ) @@ -172,6 +165,14 @@ def test_cli_find_test_study( ): outfile = tmp_path.absolute() / "find_output" + def _mock_search_result(url, headers) -> dict[str, str]: + return_value = Mock() + return_value.status_code = 200 + return_value.content = mock_search_result.model_dump_json() + return return_value + + monkeypatch.setattr(api.requests, "get", _mock_search_result) + result = runner.invoke( cli.app, ["find", "new-biostudies-studies", "-o", outfile], diff --git a/bia-ingest/test/test_order_of_processing_datasets.py b/bia-ingest/test/test_order_of_processing_datasets.py index e1f95be7..29553587 100644 --- a/bia-ingest/test/test_order_of_processing_datasets.py +++ b/bia-ingest/test/test_order_of_processing_datasets.py @@ -7,7 +7,6 @@ mock_file_reference, ) from bia_ingest import persistence_strategy -from bia_ingest.settings import Settings def _modify_annotation_file_list( @@ -26,15 +25,10 @@ def _modify_annotation_file_list( @pytest.fixture(scope="function") -def persister(test_submission, tmp_path, monkeypatch) -> persistence_strategy.PersistenceStrategy: - - def _disk_persistance_settings(path): - return Settings( - bia_data_dir=str(path) - ) - - monkeypatch.setattr(persistence_strategy, "settings", _disk_persistance_settings(tmp_path)) - +def persister( + test_submission, + tmp_bia_data_dir, # Note this fixture is requested explicity to ensure correct ordering of fixtures (i.e. after settings are configured for tests.) +) -> persistence_strategy.PersistenceStrategy: persister = persistence_strategy.persistence_strategy_factory( persistence_mode="disk", accession_id=test_submission.accno, @@ -62,10 +56,7 @@ def submission_with_file_lists_with_some_identical_file_paths( @pytest.fixture -def study_component_file_references_only( - test_submission, - mock_request_get, -) -> dict: +def study_component_file_references_only() -> dict: datasets = mock_dataset.get_dataset() ds_study_component_1 = datasets[0] @@ -90,8 +81,6 @@ def study_component_file_references_only( @pytest.fixture def study_component_and_unique_annotation_file_references( - test_submission, - mock_request_get, study_component_file_references_only, ) -> dict: datasets = mock_dataset.get_dataset() @@ -109,17 +98,26 @@ def study_component_and_unique_annotation_file_references( return file_references -@pytest.mark.parametrize("submission_fixture, expected_file_references_fixture", [ - (submission_with_reused_file_list, study_component_file_references_only), - (submission_with_file_lists_with_some_identical_file_paths, study_component_and_unique_annotation_file_references) -]) + +@pytest.mark.usefixtures("mock_request_get") +@pytest.mark.parametrize( + "submission_fixture, expected_file_references_fixture", + [ + (submission_with_reused_file_list, study_component_file_references_only), + ( + submission_with_file_lists_with_some_identical_file_paths, + study_component_and_unique_annotation_file_references, + ), + ], +) def test_process_submission_v4_prefers_study_component_dataset_when_creating_file_references( submission_fixture, expected_file_references_fixture, request, ingestion_result_summary, + tmp_bia_data_dir, persister, - tmp_path, + ): """ Tests that when file references with the same file path (i.e. with the same uuid) are created @@ -130,7 +128,9 @@ def test_process_submission_v4_prefers_study_component_dataset_when_creating_fil """ submission = request.getfixturevalue(submission_fixture.__name__) - expected_file_references = request.getfixturevalue(expected_file_references_fixture.__name__) + expected_file_references = request.getfixturevalue( + expected_file_references_fixture.__name__ + ) process_submission_v4( submission=submission, @@ -140,7 +140,7 @@ def test_process_submission_v4_prefers_study_component_dataset_when_creating_fil ) # Check expected number of file references written - file_reference_base_path = tmp_path / "file_reference" / submission.accno + file_reference_base_path = tmp_bia_data_dir / "file_reference" / submission.accno created_file_references = [ bia_data_model.FileReference.model_validate_json(f.read_text()) for f in file_reference_base_path.glob("*.json")