Skip to content

Commit

Permalink
Output warning when VCF is annotated with the PICK flag but not trans…
Browse files Browse the repository at this point in the history
…cript was PICK'ed
  • Loading branch information
susannasiebert committed Oct 13, 2023
1 parent a25f1f1 commit d8f2c05
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 3 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CHROM POS REF ALT SYMBOL
chr17 7675088 C T TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53,TP53
15 changes: 15 additions & 0 deletions tests/test_vep_annotation_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,3 +120,18 @@ def test_vcf_with_multiple_transcripts_and_no_pick(self):
vep_annotation_reporter.main(command)
self.assertTrue(cmp(os.path.join(self.test_data_dir, 'output.merge_multiple_transcripts.tsv'), os.path.join(temp_path.name, 'input.tsv')))
temp_path.cleanup()

@unittest.skip("")
def test_vcf_with_multiple_transcripts_and_pick_set_for_none(self):
logging.disable(logging.NOTSET)
with LogCapture() as l:
temp_path = tempfile.TemporaryDirectory()
os.symlink(os.path.join(self.test_data_dir, 'input.no_pick_value.vcf.gz'), os.path.join(temp_path.name, 'input.vcf.gz'))
command = [
os.path.join(temp_path.name, 'input.vcf.gz'),
'SYMBOL',
]
vep_annotation_reporter.main(command)
self.assertTrue(cmp(os.path.join(self.test_data_dir, 'output.no_pick_value.tsv'), os.path.join(temp_path.name, 'input.tsv')))
temp_path.cleanup()
l.check_present(('root', 'WARNING', "VCF is annotated with the PICK flag but no PICK'ed transcript found for variant chr17 7675088 C T. Writing values for all transcripts."))
15 changes: 12 additions & 3 deletions vatools/vep_annotation_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import tempfile
import csv
import binascii
import logging

def define_parser():
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -99,13 +100,18 @@ def resolve_alleles(entry, csq_alleles):
return alleles

def transcript_for_alt(transcripts, alt):
no_pick_value = False
for transcript in transcripts[alt]:
if 'PICK' in transcript and transcript['PICK'] == '1':
return transcript
return transcript, no_pick_value

if 'PICK' in transcripts[alt][0]:
no_pick_value = True

merged_transcripts = {}
for key in transcripts[alt][0].keys():
merged_transcripts[key] = ",".join([transcript[key] for transcript in transcripts[alt]])
return merged_transcripts
return merged_transcripts, no_pick_value

def decode_hex(match_string):
hex_string = match_string.group(0).replace('%', '')
Expand Down Expand Up @@ -142,7 +148,10 @@ def extract_vep_fields(args):
alt = alt.serialize()
if alt not in vep[chr][pos][ref]:
if alleles_dict[alt] in transcripts:
vep[chr][pos][ref][alt] = transcript_for_alt(transcripts, alleles_dict[alt])
values, no_pick_value = transcript_for_alt(transcripts, alleles_dict[alt])
if no_pick_value:
logging.warning("VCF is annotated with the PICK flag but no PICK'ed transcript found for variant {} {} {} {}. Writing values for all transcripts.".format(chr, pos, ref, alt))
vep[chr][pos][ref][alt] = values
else:
vep[chr][pos][ref][alt] = None
else:
Expand Down

0 comments on commit d8f2c05

Please sign in to comment.