IGVF-2378-gersbach-additions (#1336)

ian-whaling · web-flow · commit 1aea14b40136 · 2025-02-25T16:15:46.000-08:00
diff --git a/src/igvfd/audit/construct_library_set.py b/src/igvfd/audit/construct_library_set.py
@@ -9,6 +9,19 @@
 )
 
 
+def get_assay_terms(value, system):
+    assay_terms = set()
+    for sample in value.get('applied_to_samples', []):
+        sample_object = system.get('request').embed(
+            sample + '@@object_with_select_calculated_properties?field=file_sets')
+        file_sets = sample_object.get('file_sets', [])
+        for file_set in file_sets:
+            if file_set.startswith('/measurement-sets/'):
+                input_file_set_object = system.get('request').embed(file_set + '@@object?skip_calculated=true')
+                assay_terms.add(input_file_set_object.get('assay_term'))
+    return list(assay_terms)
+
+
 @audit_checker('ConstructLibrarySet', frame='object')
 def audit_construct_library_set_associated_phenotypes(value, system):
     '''
@@ -97,26 +110,35 @@ def audit_integrated_content_files(value, system):
     '''
     [
         {
-            "audit_description": "Guide libraries are expected to link to an integrated content file of guide RNA sequences.",
+            "audit_description": "Guide libraries used in CRISPR assays are expected to link to an integrated content file of guide RNA sequences.",
             "audit_category": "missing guide RNA sequences",
             "audit_level": "NOT_COMPLIANT"
         },
         {
-            "audit_description": "Reporter libraries are expected to link to an integrated content file of MPRA sequence designs.",
+            "audit_description": "Reporter libraries used in MPRA assays are expected to link to an integrated content file of MPRA sequence designs.",
             "audit_category": "missing MPRA sequence designs",
             "audit_level": "NOT_COMPLIANT"
         }
     ]
     '''
     audit_message_guide = get_audit_message(audit_integrated_content_files, index=0)
     audit_message_reporter = get_audit_message(audit_integrated_content_files, index=1)
+    assay_terms = get_assay_terms(value, system)
+    CRISPR_assays = [
+        '/assay-terms/OBI_0003659/',  # in vitro CRISPR screen assay
+        '/assay-terms/OBI_0003660/',  # in vitro CRISPR screen using single-cell RNA-seq
+        '/assay-terms/OBI_0003661/'  # in vitro CRISPR screen using flow cytometry
+    ]
+    MPRA_assays = [
+        '/assay-terms/OBI_0002675/'  # massively parallel reporter assay
+    ]
     library_expectation = {
-        'guide library': ('guide RNA sequences', audit_message_guide),
-        'reporter library': ('MPRA sequence designs', audit_message_reporter),
+        'guide library': ('guide RNA sequences', audit_message_guide, CRISPR_assays),
+        'reporter library': ('MPRA sequence designs', audit_message_reporter, MPRA_assays),
     }
     integrated_content_files = value.get('integrated_content_files', '')
     library_type = value.get('file_set_type', '')
-    if library_type in library_expectation:
+    if library_type in library_expectation and any(assay_term in library_expectation[library_type][2] for assay_term in assay_terms):
         file_expectation = library_expectation[library_type][0]
         audit_message = library_expectation[library_type][1]
         if integrated_content_files:
diff --git a/src/igvfd/mappings/construct_library_set.json b/src/igvfd/mappings/construct_library_set.json
@@ -1,6 +1,6 @@
 {
-    "hash": "57e1e31b2b9efad29507e894d9483c27",
-    "index_name": "construct_library_set_57e1e31b",
+    "hash": "6aedb30a9960e4ac9f08e080d9c028d6",
+    "index_name": "construct_library_set_6aedb30a",
     "item_type": "construct_library_set",
     "mapping": {
         "dynamic_templates": [
diff --git a/src/igvfd/schemas/changelogs/analysis_step.md b/src/igvfd/schemas/changelogs/analysis_step.md
@@ -2,6 +2,8 @@
 
 ### Minor changes since schema version 5
 
+* Extend `input_content_types` enum list to include `differential peak quantifications`.
+* Extend `output_content_types` enum list to include `differential peak quantifications`.
 * Extend `input_content_types` enum list to include `protein language model`.
 * Extend `output_content_types` enum list to include `protein language model`.
 * Extend `input_content_types` enum list to include `genome index`.
diff --git a/src/igvfd/schemas/changelogs/biosample.md b/src/igvfd/schemas/changelogs/biosample.md
@@ -1,5 +1,6 @@
 ## Changelog for *`biosample.json`*
 
+* Extend `nucleic_acid_delivery` enum list to include `nucleofection`. (02/27/2025)
 * Extend `status` enum list to include `preview`. (11/22/2024)
 * Add `publication_identifiers`. (07/31/2024)
 * Restrict `publication_identifiers` to submission by admins only. (07/17/2024)
diff --git a/src/igvfd/schemas/changelogs/in_vitro_system.md b/src/igvfd/schemas/changelogs/in_vitro_system.md
@@ -2,6 +2,7 @@
 
 ### Minor changes since schema version 25
 
+* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
 * Extend `collections` enum list to include `ACMG73`.
 * Extend `collections` enum list to include `Morphic`.
 * Extend `collections` enum list to include `StanfordFCC`.
diff --git a/src/igvfd/schemas/changelogs/multiplexed_sample.md b/src/igvfd/schemas/changelogs/multiplexed_sample.md
@@ -2,6 +2,7 @@
 
 ### Minor changes since schema version 10
 
+* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
 * Extend `collections` enum list to include `ACMG73`.
 * Extend `collections` enum list to include `Morphic`.
 * Extend `collections` enum list to include `StanfordFCC`.
diff --git a/src/igvfd/schemas/changelogs/primary_cell.md b/src/igvfd/schemas/changelogs/primary_cell.md
@@ -2,6 +2,7 @@
 
 ### Minor changes since schema version 22
 
+* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
 * Extend `collections` enum list to include `ACMG73`.
 * Extend `collections` enum list to include `Morphic`.
 * Extend `collections` enum list to include `StanfordFCC`.
diff --git a/src/igvfd/schemas/changelogs/sample.md b/src/igvfd/schemas/changelogs/sample.md
@@ -1,5 +1,6 @@
 ## Changelog for *`sample.json`*
 
+* Extend `nucleic_acid_delivery` enum list to include `nucleofection`. (02/27/2025)
 * Extend `nucleic_acid_delivery` enum list to include `lipofectamine` and `electroporation`. (02/03/2025)
 * Extend `status` enum list to include `preview`. (11/22/2024)
 * Remove `publication_identifiers`. (07/31/2024)
diff --git a/src/igvfd/schemas/changelogs/tabular_file.md b/src/igvfd/schemas/changelogs/tabular_file.md
@@ -2,6 +2,7 @@
 
 ### Minor changes since schema version 13
 
+* Extend `content_type` enum list to include `differential peak quantifications`.
 * Extend `transcriptome_annotation` enum list to include `GENCODE 22`.
 * Extend `collections` enum list to include `ACMG73`.
 * Extend `collections` enum list to include `Morphic`.
diff --git a/src/igvfd/schemas/changelogs/technical_sample.md b/src/igvfd/schemas/changelogs/technical_sample.md
@@ -2,6 +2,7 @@
 
 ### Minor changes since schema version 14
 
+* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
 * Extend `collections` enum list to include `ACMG73`.
 * Extend `collections` enum list to include `Morphic`.
 * Extend `collections` enum list to include `StanfordFCC`.
diff --git a/src/igvfd/schemas/changelogs/tissue.md b/src/igvfd/schemas/changelogs/tissue.md
@@ -2,6 +2,7 @@
 
 ### Minor changes since schema version 21
 
+* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
 * Extend `collections` enum list to include `ACMG73`.
 * Extend `collections` enum list to include `Morphic`.
 * Extend `collections` enum list to include `StanfordFCC`.
diff --git a/src/igvfd/schemas/changelogs/whole_organism.md b/src/igvfd/schemas/changelogs/whole_organism.md
@@ -2,6 +2,7 @@
 
 ### Minor changes since schema version 24
 
+* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
 * Extend `collections` enum list to include `ACMG73`.
 * Extend `collections` enum list to include `Morphic`.
 * Extend `collections` enum list to include `StanfordFCC`.
diff --git a/src/igvfd/schemas/sample.json b/src/igvfd/schemas/sample.json
@@ -221,7 +221,8 @@
                 "lipofectamine",
                 "electroporation",
                 "lentiviral transduction",
-                "transfection"
+                "transfection",
+                "nucleofection"
             ],
             "submissionExample": {
                 "appscript": "transfection",
diff --git a/src/igvfd/schemas/tabular_file.json b/src/igvfd/schemas/tabular_file.json
@@ -124,6 +124,7 @@
                 "differential chromatin contact quantifications",
                 "differential element quantifications",
                 "differential gene expression quantifications",
+                "differential peak quantifications",
                 "differential transcript expression quantifications",
                 "DNA footprint scores",
                 "editing templates",
@@ -173,6 +174,7 @@
                 "differential chromatin contact quantifications": "Details differences in chromatin interactions between experimental conditions.",
                 "differential element quantifications": "A file that details the change in effect sizes for a specific non-coding element between two experimental conditions.",
                 "differential gene expression quantifications": "The quantified changes in gene expression levels between different conditions or groups.",
+                "differential peak quantifications": "The quantified changes in peaks detected between experimental conditions.",
                 "differential transcript expression quantifications": "The quantified changes in expression levels of transcripts between conditions or groups.",
                 "DNA footprint scores": "A file containing DNA footprint scores, which represents the binding sites between DNA and proteins called from assays like ATAC-seq data.",
                 "editing templates": "The homology-directed DNA repair (HDR) templates containing SNVs in the library of saturation genome editing assays.",
diff --git a/src/igvfd/tests/data/inserts/tabular_file.json b/src/igvfd/tests/data/inserts/tabular_file.json
@@ -483,5 +483,23 @@
         "upload_status": "validated",
         "file_set": "j-michael-cherry:barcodes_curated_set",
         "controlled_access": false
+    },
+    {
+        "uuid": "858449fd-5f24-4a51-a4ab-e8aba826b94f",
+        "lab": "j-michael-cherry",
+        "award": "HG012012",
+        "aliases": [
+            "igvf:differential_peak_quantifications"
+        ],
+        "status": "released",
+        "release_timestamp": "2025-02-01T21:29:45Z",
+        "md5sum": "08dc4cb2eee35c7a2c7cb200cb17282a",
+        "file_format": "tsv",
+        "content_type": "differential peak quantifications",
+        "submitted_file_name": "/Users/igvf/igvf_files/differential_peak_quantifications.tsv.gz",
+        "file_size": 145360919,
+        "upload_status": "validated",
+        "file_set": "igvf:analysis_set_with_input",
+        "controlled_access": false
     }
 ]
diff --git a/src/igvfd/tests/data/inserts/whole_organism.json b/src/igvfd/tests/data/inserts/whole_organism.json
@@ -25,7 +25,11 @@
         "virtual": false,
         "protocols": [
             "https://www.protocols.io/test-protocols-url-12345"
-        ]
+        ],
+        "construct_library_sets": [
+            "igvf:basic_construct_library_set_1"
+        ],
+        "nucleic_acid_delivery": "nucleofection"
     },
     {
         "uuid": "d4c46526-0307-11ed-b939-0242ac120002",
diff --git a/src/igvfd/tests/test_audit_construct_library_set.py b/src/igvfd/tests/test_audit_construct_library_set.py
@@ -184,8 +184,20 @@ def test_audit_construct_library_set_with_invalid_chroms(
 def test_audit_construct_library_set_guide_library_guide_rna_sequences(
     testapp,
     construct_library_set_genome_wide,
-    tabular_file
+    tabular_file,
+    assay_term_crispr,
+    measurement_set,
+    tissue
 ):
+    testapp.patch_json(
+        measurement_set['@id'],
+        {'assay_term': assay_term_crispr['@id'],
+         'samples': [tissue['@id']]}
+    )
+    testapp.patch_json(
+        tissue['@id'],
+        {'construct_library_sets': [construct_library_set_genome_wide['@id']]}
+    )
     res = testapp.get(construct_library_set_genome_wide['@id'] + '@@audit')
     assert any(
         error['category'] == 'missing guide RNA sequences'
@@ -213,8 +225,21 @@ def test_audit_construct_library_set_guide_library_guide_rna_sequences(
 def test_audit_construct_library_set_mpra_sequence_designs(
     testapp,
     construct_library_set_reporter,
-    tabular_file
+    tabular_file,
+    assay_term_mpra,
+    measurement_set,
+    tissue,
+    assay_term_starr
 ):
+    testapp.patch_json(
+        measurement_set['@id'],
+        {'assay_term': assay_term_mpra['@id'],
+         'samples': [tissue['@id']]}
+    )
+    testapp.patch_json(
+        tissue['@id'],
+        {'construct_library_sets': [construct_library_set_reporter['@id']]}
+    )
     res = testapp.get(construct_library_set_reporter['@id'] + '@@audit')
     assert any(
         error['category'] == 'missing MPRA sequence designs'
@@ -229,6 +254,19 @@ def test_audit_construct_library_set_mpra_sequence_designs(
         error['category'] == 'missing MPRA sequence designs'
         for error in res.json['audit'].get('NOT_COMPLIANT', [])
     )
+    testapp.patch_json(
+        measurement_set['@id'],
+        {'assay_term': assay_term_starr['@id']}
+    )
+    res = testapp.get(construct_library_set_reporter['@id'] + '@@audit')
+    assert all(
+        error['category'] != 'missing MPRA sequence designs'
+        for error in res.json['audit'].get('NOT_COMPLIANT', [])
+    )
+    testapp.patch_json(
+        measurement_set['@id'],
+        {'assay_term': assay_term_mpra['@id']}
+    )
     testapp.patch_json(
         tabular_file['@id'],
         {'content_type': 'MPRA sequence designs'}

Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`		`- "hash": "57e1e31b2b9efad29507e894d9483c27",`
`3`		`- "index_name": "construct_library_set_57e1e31b",`
	`2`	`+ "hash": "6aedb30a9960e4ac9f08e080d9c028d6",`
	`3`	`+ "index_name": "construct_library_set_6aedb30a",`
`4`	`4`	`"item_type": "construct_library_set",`
`5`	`5`	`"mapping": {`
`6`	`6`	`"dynamic_templates": [`