Skip to content

Commit 1aea14b

Browse files
authored
IGVF-2378-gersbach-additions (#1336)
1 parent ae929f8 commit 1aea14b

17 files changed

+107
-11
lines changed

src/igvfd/audit/construct_library_set.py

+27-5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,19 @@
99
)
1010

1111

12+
def get_assay_terms(value, system):
13+
assay_terms = set()
14+
for sample in value.get('applied_to_samples', []):
15+
sample_object = system.get('request').embed(
16+
sample + '@@object_with_select_calculated_properties?field=file_sets')
17+
file_sets = sample_object.get('file_sets', [])
18+
for file_set in file_sets:
19+
if file_set.startswith('/measurement-sets/'):
20+
input_file_set_object = system.get('request').embed(file_set + '@@object?skip_calculated=true')
21+
assay_terms.add(input_file_set_object.get('assay_term'))
22+
return list(assay_terms)
23+
24+
1225
@audit_checker('ConstructLibrarySet', frame='object')
1326
def audit_construct_library_set_associated_phenotypes(value, system):
1427
'''
@@ -97,26 +110,35 @@ def audit_integrated_content_files(value, system):
97110
'''
98111
[
99112
{
100-
"audit_description": "Guide libraries are expected to link to an integrated content file of guide RNA sequences.",
113+
"audit_description": "Guide libraries used in CRISPR assays are expected to link to an integrated content file of guide RNA sequences.",
101114
"audit_category": "missing guide RNA sequences",
102115
"audit_level": "NOT_COMPLIANT"
103116
},
104117
{
105-
"audit_description": "Reporter libraries are expected to link to an integrated content file of MPRA sequence designs.",
118+
"audit_description": "Reporter libraries used in MPRA assays are expected to link to an integrated content file of MPRA sequence designs.",
106119
"audit_category": "missing MPRA sequence designs",
107120
"audit_level": "NOT_COMPLIANT"
108121
}
109122
]
110123
'''
111124
audit_message_guide = get_audit_message(audit_integrated_content_files, index=0)
112125
audit_message_reporter = get_audit_message(audit_integrated_content_files, index=1)
126+
assay_terms = get_assay_terms(value, system)
127+
CRISPR_assays = [
128+
'/assay-terms/OBI_0003659/', # in vitro CRISPR screen assay
129+
'/assay-terms/OBI_0003660/', # in vitro CRISPR screen using single-cell RNA-seq
130+
'/assay-terms/OBI_0003661/' # in vitro CRISPR screen using flow cytometry
131+
]
132+
MPRA_assays = [
133+
'/assay-terms/OBI_0002675/' # massively parallel reporter assay
134+
]
113135
library_expectation = {
114-
'guide library': ('guide RNA sequences', audit_message_guide),
115-
'reporter library': ('MPRA sequence designs', audit_message_reporter),
136+
'guide library': ('guide RNA sequences', audit_message_guide, CRISPR_assays),
137+
'reporter library': ('MPRA sequence designs', audit_message_reporter, MPRA_assays),
116138
}
117139
integrated_content_files = value.get('integrated_content_files', '')
118140
library_type = value.get('file_set_type', '')
119-
if library_type in library_expectation:
141+
if library_type in library_expectation and any(assay_term in library_expectation[library_type][2] for assay_term in assay_terms):
120142
file_expectation = library_expectation[library_type][0]
121143
audit_message = library_expectation[library_type][1]
122144
if integrated_content_files:

src/igvfd/mappings/construct_library_set.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
2-
"hash": "57e1e31b2b9efad29507e894d9483c27",
3-
"index_name": "construct_library_set_57e1e31b",
2+
"hash": "6aedb30a9960e4ac9f08e080d9c028d6",
3+
"index_name": "construct_library_set_6aedb30a",
44
"item_type": "construct_library_set",
55
"mapping": {
66
"dynamic_templates": [

src/igvfd/schemas/changelogs/analysis_step.md

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
### Minor changes since schema version 5
44

5+
* Extend `input_content_types` enum list to include `differential peak quantifications`.
6+
* Extend `output_content_types` enum list to include `differential peak quantifications`.
57
* Extend `input_content_types` enum list to include `protein language model`.
68
* Extend `output_content_types` enum list to include `protein language model`.
79
* Extend `input_content_types` enum list to include `genome index`.

src/igvfd/schemas/changelogs/biosample.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
## Changelog for *`biosample.json`*
22

3+
* Extend `nucleic_acid_delivery` enum list to include `nucleofection`. (02/27/2025)
34
* Extend `status` enum list to include `preview`. (11/22/2024)
45
* Add `publication_identifiers`. (07/31/2024)
56
* Restrict `publication_identifiers` to submission by admins only. (07/17/2024)

src/igvfd/schemas/changelogs/in_vitro_system.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
### Minor changes since schema version 25
44

5+
* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
56
* Extend `collections` enum list to include `ACMG73`.
67
* Extend `collections` enum list to include `Morphic`.
78
* Extend `collections` enum list to include `StanfordFCC`.

src/igvfd/schemas/changelogs/multiplexed_sample.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
### Minor changes since schema version 10
44

5+
* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
56
* Extend `collections` enum list to include `ACMG73`.
67
* Extend `collections` enum list to include `Morphic`.
78
* Extend `collections` enum list to include `StanfordFCC`.

src/igvfd/schemas/changelogs/primary_cell.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
### Minor changes since schema version 22
44

5+
* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
56
* Extend `collections` enum list to include `ACMG73`.
67
* Extend `collections` enum list to include `Morphic`.
78
* Extend `collections` enum list to include `StanfordFCC`.

src/igvfd/schemas/changelogs/sample.md

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
## Changelog for *`sample.json`*
22

3+
* Extend `nucleic_acid_delivery` enum list to include `nucleofection`. (02/27/2025)
34
* Extend `nucleic_acid_delivery` enum list to include `lipofectamine` and `electroporation`. (02/03/2025)
45
* Extend `status` enum list to include `preview`. (11/22/2024)
56
* Remove `publication_identifiers`. (07/31/2024)

src/igvfd/schemas/changelogs/tabular_file.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
### Minor changes since schema version 13
44

5+
* Extend `content_type` enum list to include `differential peak quantifications`.
56
* Extend `transcriptome_annotation` enum list to include `GENCODE 22`.
67
* Extend `collections` enum list to include `ACMG73`.
78
* Extend `collections` enum list to include `Morphic`.

src/igvfd/schemas/changelogs/technical_sample.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
### Minor changes since schema version 14
44

5+
* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
56
* Extend `collections` enum list to include `ACMG73`.
67
* Extend `collections` enum list to include `Morphic`.
78
* Extend `collections` enum list to include `StanfordFCC`.

src/igvfd/schemas/changelogs/tissue.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
### Minor changes since schema version 21
44

5+
* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
56
* Extend `collections` enum list to include `ACMG73`.
67
* Extend `collections` enum list to include `Morphic`.
78
* Extend `collections` enum list to include `StanfordFCC`.

src/igvfd/schemas/changelogs/whole_organism.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
### Minor changes since schema version 24
44

5+
* Extend `nucleic_acid_delivery` enum list to include `nucleofection`.
56
* Extend `collections` enum list to include `ACMG73`.
67
* Extend `collections` enum list to include `Morphic`.
78
* Extend `collections` enum list to include `StanfordFCC`.

src/igvfd/schemas/sample.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,8 @@
221221
"lipofectamine",
222222
"electroporation",
223223
"lentiviral transduction",
224-
"transfection"
224+
"transfection",
225+
"nucleofection"
225226
],
226227
"submissionExample": {
227228
"appscript": "transfection",

src/igvfd/schemas/tabular_file.json

+2
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@
124124
"differential chromatin contact quantifications",
125125
"differential element quantifications",
126126
"differential gene expression quantifications",
127+
"differential peak quantifications",
127128
"differential transcript expression quantifications",
128129
"DNA footprint scores",
129130
"editing templates",
@@ -173,6 +174,7 @@
173174
"differential chromatin contact quantifications": "Details differences in chromatin interactions between experimental conditions.",
174175
"differential element quantifications": "A file that details the change in effect sizes for a specific non-coding element between two experimental conditions.",
175176
"differential gene expression quantifications": "The quantified changes in gene expression levels between different conditions or groups.",
177+
"differential peak quantifications": "The quantified changes in peaks detected between experimental conditions.",
176178
"differential transcript expression quantifications": "The quantified changes in expression levels of transcripts between conditions or groups.",
177179
"DNA footprint scores": "A file containing DNA footprint scores, which represents the binding sites between DNA and proteins called from assays like ATAC-seq data.",
178180
"editing templates": "The homology-directed DNA repair (HDR) templates containing SNVs in the library of saturation genome editing assays.",

src/igvfd/tests/data/inserts/tabular_file.json

+18
Original file line numberDiff line numberDiff line change
@@ -483,5 +483,23 @@
483483
"upload_status": "validated",
484484
"file_set": "j-michael-cherry:barcodes_curated_set",
485485
"controlled_access": false
486+
},
487+
{
488+
"uuid": "858449fd-5f24-4a51-a4ab-e8aba826b94f",
489+
"lab": "j-michael-cherry",
490+
"award": "HG012012",
491+
"aliases": [
492+
"igvf:differential_peak_quantifications"
493+
],
494+
"status": "released",
495+
"release_timestamp": "2025-02-01T21:29:45Z",
496+
"md5sum": "08dc4cb2eee35c7a2c7cb200cb17282a",
497+
"file_format": "tsv",
498+
"content_type": "differential peak quantifications",
499+
"submitted_file_name": "/Users/igvf/igvf_files/differential_peak_quantifications.tsv.gz",
500+
"file_size": 145360919,
501+
"upload_status": "validated",
502+
"file_set": "igvf:analysis_set_with_input",
503+
"controlled_access": false
486504
}
487505
]

src/igvfd/tests/data/inserts/whole_organism.json

+5-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@
2525
"virtual": false,
2626
"protocols": [
2727
"https://www.protocols.io/test-protocols-url-12345"
28-
]
28+
],
29+
"construct_library_sets": [
30+
"igvf:basic_construct_library_set_1"
31+
],
32+
"nucleic_acid_delivery": "nucleofection"
2933
},
3034
{
3135
"uuid": "d4c46526-0307-11ed-b939-0242ac120002",

src/igvfd/tests/test_audit_construct_library_set.py

+40-2
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,20 @@ def test_audit_construct_library_set_with_invalid_chroms(
184184
def test_audit_construct_library_set_guide_library_guide_rna_sequences(
185185
testapp,
186186
construct_library_set_genome_wide,
187-
tabular_file
187+
tabular_file,
188+
assay_term_crispr,
189+
measurement_set,
190+
tissue
188191
):
192+
testapp.patch_json(
193+
measurement_set['@id'],
194+
{'assay_term': assay_term_crispr['@id'],
195+
'samples': [tissue['@id']]}
196+
)
197+
testapp.patch_json(
198+
tissue['@id'],
199+
{'construct_library_sets': [construct_library_set_genome_wide['@id']]}
200+
)
189201
res = testapp.get(construct_library_set_genome_wide['@id'] + '@@audit')
190202
assert any(
191203
error['category'] == 'missing guide RNA sequences'
@@ -213,8 +225,21 @@ def test_audit_construct_library_set_guide_library_guide_rna_sequences(
213225
def test_audit_construct_library_set_mpra_sequence_designs(
214226
testapp,
215227
construct_library_set_reporter,
216-
tabular_file
228+
tabular_file,
229+
assay_term_mpra,
230+
measurement_set,
231+
tissue,
232+
assay_term_starr
217233
):
234+
testapp.patch_json(
235+
measurement_set['@id'],
236+
{'assay_term': assay_term_mpra['@id'],
237+
'samples': [tissue['@id']]}
238+
)
239+
testapp.patch_json(
240+
tissue['@id'],
241+
{'construct_library_sets': [construct_library_set_reporter['@id']]}
242+
)
218243
res = testapp.get(construct_library_set_reporter['@id'] + '@@audit')
219244
assert any(
220245
error['category'] == 'missing MPRA sequence designs'
@@ -229,6 +254,19 @@ def test_audit_construct_library_set_mpra_sequence_designs(
229254
error['category'] == 'missing MPRA sequence designs'
230255
for error in res.json['audit'].get('NOT_COMPLIANT', [])
231256
)
257+
testapp.patch_json(
258+
measurement_set['@id'],
259+
{'assay_term': assay_term_starr['@id']}
260+
)
261+
res = testapp.get(construct_library_set_reporter['@id'] + '@@audit')
262+
assert all(
263+
error['category'] != 'missing MPRA sequence designs'
264+
for error in res.json['audit'].get('NOT_COMPLIANT', [])
265+
)
266+
testapp.patch_json(
267+
measurement_set['@id'],
268+
{'assay_term': assay_term_mpra['@id']}
269+
)
232270
testapp.patch_json(
233271
tabular_file['@id'],
234272
{'content_type': 'MPRA sequence designs'}

0 commit comments

Comments
 (0)