Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IGVF-2378-gersbach-additions #1336

Merged
merged 10 commits into from
Feb 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions src/igvfd/audit/construct_library_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@
)


def get_assay_terms(value, system):
assay_terms = set()
for sample in value.get('applied_to_samples', []):
sample_object = system.get('request').embed(
sample + '@@object_with_select_calculated_properties?field=file_sets')
file_sets = sample_object.get('file_sets', [])
for file_set in file_sets:
if file_set.startswith('/measurement-sets/'):
input_file_set_object = system.get('request').embed(file_set + '@@object?skip_calculated=true')
assay_terms.add(input_file_set_object.get('assay_term'))
return list(assay_terms)


@audit_checker('ConstructLibrarySet', frame='object')
def audit_construct_library_set_associated_phenotypes(value, system):
'''
Expand Down Expand Up @@ -97,26 +110,35 @@ def audit_integrated_content_files(value, system):
'''
[
{
"audit_description": "Guide libraries are expected to link to an integrated content file of guide RNA sequences.",
"audit_description": "Guide libraries used in CRISPR assays are expected to link to an integrated content file of guide RNA sequences.",
"audit_category": "missing guide RNA sequences",
"audit_level": "NOT_COMPLIANT"
},
{
"audit_description": "Reporter libraries are expected to link to an integrated content file of MPRA sequence designs.",
"audit_description": "Reporter libraries used in MPRA assays are expected to link to an integrated content file of MPRA sequence designs.",
"audit_category": "missing MPRA sequence designs",
"audit_level": "NOT_COMPLIANT"
}
]
'''
audit_message_guide = get_audit_message(audit_integrated_content_files, index=0)
audit_message_reporter = get_audit_message(audit_integrated_content_files, index=1)
assay_terms = get_assay_terms(value, system)
CRISPR_assays = [
'/assay-terms/OBI_0003659/', # in vitro CRISPR screen assay
'/assay-terms/OBI_0003660/', # in vitro CRISPR screen using single-cell RNA-seq
'/assay-terms/OBI_0003661/' # in vitro CRISPR screen using flow cytometry
]
MPRA_assays = [
'/assay-terms/OBI_0002675/' # massively parallel reporter assay
]
library_expectation = {
'guide library': ('guide RNA sequences', audit_message_guide),
'reporter library': ('MPRA sequence designs', audit_message_reporter),
'guide library': ('guide RNA sequences', audit_message_guide, CRISPR_assays),
'reporter library': ('MPRA sequence designs', audit_message_reporter, MPRA_assays),
}
integrated_content_files = value.get('integrated_content_files', '')
library_type = value.get('file_set_type', '')
if library_type in library_expectation:
if library_type in library_expectation and any(assay_term in library_expectation[library_type][2] for assay_term in assay_terms):
file_expectation = library_expectation[library_type][0]
audit_message = library_expectation[library_type][1]
if integrated_content_files:
Expand Down
4 changes: 2 additions & 2 deletions src/igvfd/mappings/construct_library_set.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"hash": "57e1e31b2b9efad29507e894d9483c27",
"index_name": "construct_library_set_57e1e31b",
"hash": "6aedb30a9960e4ac9f08e080d9c028d6",
"index_name": "construct_library_set_6aedb30a",
"item_type": "construct_library_set",
"mapping": {
"dynamic_templates": [
Expand Down
2 changes: 2 additions & 0 deletions src/igvfd/schemas/changelogs/analysis_step.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

### Minor changes since schema version 5

* Extend `input_content_types` enum list to include `differential peak quantifications`.
* Extend `output_content_types` enum list to include `differential peak quantifications`.
* Extend `input_content_types` enum list to include `protein language model`.
* Extend `output_content_types` enum list to include `protein language model`.
* Extend `input_content_types` enum list to include `genome index`.
Expand Down
1 change: 1 addition & 0 deletions src/igvfd/schemas/changelogs/biosample.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## Changelog for *`biosample.json`*

* Extend `nucleic_acid_delivery` enum list to include `nucleofection`. (02/27/2025)
* Extend `status` enum list to include `preview`. (11/22/2024)
* Add `publication_identifiers`. (07/31/2024)
* Restrict `publication_identifiers` to submission by admins only. (07/17/2024)
Expand Down
1 change: 1 addition & 0 deletions src/igvfd/schemas/changelogs/in_vitro_system.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Minor changes since schema version 25

* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
* Extend `collections` enum list to include `ACMG73`.
* Extend `collections` enum list to include `Morphic`.
* Extend `collections` enum list to include `StanfordFCC`.
Expand Down
1 change: 1 addition & 0 deletions src/igvfd/schemas/changelogs/multiplexed_sample.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Minor changes since schema version 10

* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
* Extend `collections` enum list to include `ACMG73`.
* Extend `collections` enum list to include `Morphic`.
* Extend `collections` enum list to include `StanfordFCC`.
Expand Down
1 change: 1 addition & 0 deletions src/igvfd/schemas/changelogs/primary_cell.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Minor changes since schema version 22

* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
* Extend `collections` enum list to include `ACMG73`.
* Extend `collections` enum list to include `Morphic`.
* Extend `collections` enum list to include `StanfordFCC`.
Expand Down
1 change: 1 addition & 0 deletions src/igvfd/schemas/changelogs/sample.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## Changelog for *`sample.json`*

* Extend `nucleic_acid_delivery` enum list to include `nucleofection`. (02/27/2025)
* Extend `nucleic_acid_delivery` enum list to include `lipofectamine` and `electroporation`. (02/03/2025)
* Extend `status` enum list to include `preview`. (11/22/2024)
* Remove `publication_identifiers`. (07/31/2024)
Expand Down
1 change: 1 addition & 0 deletions src/igvfd/schemas/changelogs/tabular_file.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Minor changes since schema version 13

* Extend `content_type` enum list to include `differential peak quantifications`.
* Extend `transcriptome_annotation` enum list to include `GENCODE 22`.
* Extend `collections` enum list to include `ACMG73`.
* Extend `collections` enum list to include `Morphic`.
Expand Down
1 change: 1 addition & 0 deletions src/igvfd/schemas/changelogs/technical_sample.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Minor changes since schema version 14

* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
* Extend `collections` enum list to include `ACMG73`.
* Extend `collections` enum list to include `Morphic`.
* Extend `collections` enum list to include `StanfordFCC`.
Expand Down
1 change: 1 addition & 0 deletions src/igvfd/schemas/changelogs/tissue.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Minor changes since schema version 21

* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
* Extend `collections` enum list to include `ACMG73`.
* Extend `collections` enum list to include `Morphic`.
* Extend `collections` enum list to include `StanfordFCC`.
Expand Down
1 change: 1 addition & 0 deletions src/igvfd/schemas/changelogs/whole_organism.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

### Minor changes since schema version 24

* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
* Extend `collections` enum list to include `ACMG73`.
* Extend `collections` enum list to include `Morphic`.
* Extend `collections` enum list to include `StanfordFCC`.
Expand Down
3 changes: 2 additions & 1 deletion src/igvfd/schemas/sample.json
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@
"lipofectamine",
"electroporation",
"lentiviral transduction",
"transfection"
"transfection",
"nucleofection"
],
"submissionExample": {
"appscript": "transfection",
Expand Down
2 changes: 2 additions & 0 deletions src/igvfd/schemas/tabular_file.json
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
"differential chromatin contact quantifications",
"differential element quantifications",
"differential gene expression quantifications",
"differential peak quantifications",
"differential transcript expression quantifications",
"DNA footprint scores",
"editing templates",
Expand Down Expand Up @@ -173,6 +174,7 @@
"differential chromatin contact quantifications": "Details differences in chromatin interactions between experimental conditions.",
"differential element quantifications": "A file that details the change in effect sizes for a specific non-coding element between two experimental conditions.",
"differential gene expression quantifications": "The quantified changes in gene expression levels between different conditions or groups.",
"differential peak quantifications": "The quantified changes in peaks detected between experimental conditions.",
"differential transcript expression quantifications": "The quantified changes in expression levels of transcripts between conditions or groups.",
"DNA footprint scores": "A file containing DNA footprint scores, which represents the binding sites between DNA and proteins called from assays like ATAC-seq data.",
"editing templates": "The homology-directed DNA repair (HDR) templates containing SNVs in the library of saturation genome editing assays.",
Expand Down
18 changes: 18 additions & 0 deletions src/igvfd/tests/data/inserts/tabular_file.json
Original file line number Diff line number Diff line change
Expand Up @@ -483,5 +483,23 @@
"upload_status": "validated",
"file_set": "j-michael-cherry:barcodes_curated_set",
"controlled_access": false
},
{
"uuid": "858449fd-5f24-4a51-a4ab-e8aba826b94f",
"lab": "j-michael-cherry",
"award": "HG012012",
"aliases": [
"igvf:differential_peak_quantifications"
],
"status": "released",
"release_timestamp": "2025-02-01T21:29:45Z",
"md5sum": "08dc4cb2eee35c7a2c7cb200cb17282a",
"file_format": "tsv",
"content_type": "differential peak quantifications",
"submitted_file_name": "/Users/igvf/igvf_files/differential_peak_quantifications.tsv.gz",
"file_size": 145360919,
"upload_status": "validated",
"file_set": "igvf:analysis_set_with_input",
"controlled_access": false
}
]
6 changes: 5 additions & 1 deletion src/igvfd/tests/data/inserts/whole_organism.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
"virtual": false,
"protocols": [
"https://www.protocols.io/test-protocols-url-12345"
]
],
"construct_library_sets": [
"igvf:basic_construct_library_set_1"
],
"nucleic_acid_delivery": "nucleofection"
},
{
"uuid": "d4c46526-0307-11ed-b939-0242ac120002",
Expand Down
42 changes: 40 additions & 2 deletions src/igvfd/tests/test_audit_construct_library_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,20 @@ def test_audit_construct_library_set_with_invalid_chroms(
def test_audit_construct_library_set_guide_library_guide_rna_sequences(
testapp,
construct_library_set_genome_wide,
tabular_file
tabular_file,
assay_term_crispr,
measurement_set,
tissue
):
testapp.patch_json(
measurement_set['@id'],
{'assay_term': assay_term_crispr['@id'],
'samples': [tissue['@id']]}
)
testapp.patch_json(
tissue['@id'],
{'construct_library_sets': [construct_library_set_genome_wide['@id']]}
)
res = testapp.get(construct_library_set_genome_wide['@id'] + '@@audit')
assert any(
error['category'] == 'missing guide RNA sequences'
Expand Down Expand Up @@ -213,8 +225,21 @@ def test_audit_construct_library_set_guide_library_guide_rna_sequences(
def test_audit_construct_library_set_mpra_sequence_designs(
testapp,
construct_library_set_reporter,
tabular_file
tabular_file,
assay_term_mpra,
measurement_set,
tissue,
assay_term_starr
):
testapp.patch_json(
measurement_set['@id'],
{'assay_term': assay_term_mpra['@id'],
'samples': [tissue['@id']]}
)
testapp.patch_json(
tissue['@id'],
{'construct_library_sets': [construct_library_set_reporter['@id']]}
)
res = testapp.get(construct_library_set_reporter['@id'] + '@@audit')
assert any(
error['category'] == 'missing MPRA sequence designs'
Expand All @@ -229,6 +254,19 @@ def test_audit_construct_library_set_mpra_sequence_designs(
error['category'] == 'missing MPRA sequence designs'
for error in res.json['audit'].get('NOT_COMPLIANT', [])
)
testapp.patch_json(
measurement_set['@id'],
{'assay_term': assay_term_starr['@id']}
)
res = testapp.get(construct_library_set_reporter['@id'] + '@@audit')
assert all(
error['category'] != 'missing MPRA sequence designs'
for error in res.json['audit'].get('NOT_COMPLIANT', [])
)
testapp.patch_json(
measurement_set['@id'],
{'assay_term': assay_term_mpra['@id']}
)
testapp.patch_json(
tabular_file['@id'],
{'content_type': 'MPRA sequence designs'}
Expand Down