Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IGVF-2113-index-file #1206

Merged
merged 10 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 0 additions & 44 deletions src/igvfd/audit/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,47 +115,3 @@ def audit_external_reference_files(value, system):
f'but does not have identifier(s) from an external resource listed in `dbxrefs`.'
)
yield AuditFailure(audit_message.get('audit_category', ''), f'{detail} {audit_message.get("audit_description", "")}', level=audit_message.get('audit_level', ''))


@audit_checker('AlignmentFile', frame='object')
def audit_bai_alignment_files(value, system):
'''
[
{
"audit_description": "Alignment files in bai format are expected to have their corresponding bam file in `derived_from`.",
"audit_category": "incorrect bam file",
"audit_level": "ERROR"
}
]
'''
audit_message = get_audit_message(audit_bai_alignment_files)
object_type = space_in_words(value['@type'][0]).capitalize()
check_properties_list = ['content_type', 'assembly', 'filtered', 'redacted', 'transcriptome_annotation']
inconsistent_properties_list = []
if value.get('file_format') == 'bai':
if 'derived_from' not in value:
detail = (
f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} has no bam file in `derived_from`.')
yield AuditFailure(audit_message.get('audit_category', ''), f'{detail} {audit_message.get("audit_description", "")}', level=audit_message.get('audit_level', ''))
else:
derived_from_file = value.get('derived_from')
if len(derived_from_file) > 1:
detail = (
f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} has multiple files in `derived_from`.')
yield AuditFailure(audit_message.get('audit_category', ''), f'{detail} {audit_message.get("audit_description", "")}', level=audit_message.get('audit_level', ''))
else:
derived_from_file_obj = system.get('request').embed(
derived_from_file[0], '@@object?skip_calculated=true')
if derived_from_file_obj.get('file_format') != 'bam':
detail = (
f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} has incorrect file in `derived_from`.')
yield AuditFailure(audit_message.get('audit_category', ''), f'{detail} {audit_message.get("audit_description", "")}', level=audit_message.get('audit_level', ''))
else:
for property in check_properties_list:
if value.get(property) != derived_from_file_obj.get(property):
inconsistent_properties_list.append(property)
if inconsistent_properties_list:
inconsistent_properties_str = ', '.join(inconsistent_properties_list)
detail = (f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} has the following inconsistent properties with its bam file in `derived_from`: '
f'{inconsistent_properties_str}.')
yield AuditFailure(audit_message.get('audit_category', ''), f'{detail} {audit_message.get("audit_description", "")}', level=audit_message.get('audit_level', ''))
39 changes: 39 additions & 0 deletions src/igvfd/audit/index_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from snovault.auditor import (
audit_checker,
AuditFailure,
)
from .formatter import (
audit_link,
path_to_text,
get_audit_message,
space_in_words
)


@audit_checker('IndexFile', frame='object')
def audit_index_files_derived_from(value, system):
'''
[
{
"audit_description": "Index files in tbi format are expected to have a corresponding tsv or vcf file in `derived_from`.",
"audit_category": "unexpected indexed file",
"audit_level": "ERROR"
}
]
'''
audit_message_tbi = get_audit_message(audit_index_files_derived_from)
object_type = space_in_words(value['@type'][0]).capitalize()
# For tbi files, check that the indexed file is of an expected file_format.
# No need to check bai files, since Alignment Files can only be bams.
derived_from_file = value.get('derived_from', [])
derived_from_file_obj = system.get('request').embed(derived_from_file[0], '@@object?skip_calculated=true')
if value['file_format'] == 'tbi':
if derived_from_file_obj.get('file_format') not in ['tsv', 'vcf']:
detail = (
f'{object_type} {audit_link(path_to_text(value["@id"]), value["@id"])} '
f'has a file of unexpected file format in `derived_from`.')
yield AuditFailure(
audit_message_tbi.get('audit_category', ''),
f'{detail} {audit_message_tbi.get("audit_description", "")}',
level=audit_message_tbi.get('audit_level', '')
)
2 changes: 2 additions & 0 deletions src/igvfd/commands/make_audit_docstring_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import igvfd.audit.formatter
import igvfd.audit.human_donor
import igvfd.audit.in_vitro_system
import igvfd.audit.index_file
import igvfd.audit.matrix_file
import igvfd.audit.measurement_set
import igvfd.audit.multiplexed_sample
Expand Down Expand Up @@ -37,6 +38,7 @@
igvfd.audit.formatter,
igvfd.audit.human_donor,
igvfd.audit.in_vitro_system,
igvfd.audit.index_file,
igvfd.audit.matrix_file,
igvfd.audit.measurement_set,
igvfd.audit.multiplexed_sample,
Expand Down
1 change: 1 addition & 0 deletions src/igvfd/loadxl.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
'matrix_file',
'model_file',
'tabular_file',
'index_file',
'genome_browser_annotation_file',
'image_file'
]
Expand Down
4 changes: 2 additions & 2 deletions src/igvfd/mappings/alignment_file.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"hash": "d2d11b08bd6e06c2ba47015b17b1f8aa",
"index_name": "alignment_file_d2d11b08",
"hash": "12428c46f270cbe2fdf3b7b7867a2c68",
"index_name": "alignment_file_12428c46",
"item_type": "alignment_file",
"mapping": {
"dynamic_templates": [
Expand Down
Loading