Skip to content

Commit

Permalink
Merge pull request #71 from griffithlab/issue_57
Browse files Browse the repository at this point in the history
Downgrade duplicate variant error to a warning and skip duplicates.
  • Loading branch information
susannasiebert authored Oct 16, 2023
2 parents cff9982 + 6905d5f commit f61fb34
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 2 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CHROM POS REF ALT SYMBOL
chr17 7675088 C T TP53
14 changes: 14 additions & 0 deletions tests/test_vep_annotation_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,20 @@ def test_vcf_with_multiple_transcripts_and_no_pick(self):
self.assertTrue(cmp(os.path.join(self.test_data_dir, 'output.merge_multiple_transcripts.tsv'), os.path.join(temp_path.name, 'input.tsv')))
temp_path.cleanup()

def test_vcf_with_duplicate_variant(self):
logging.disable(logging.NOTSET)
with LogCapture() as l:
temp_path = tempfile.TemporaryDirectory()
os.symlink(os.path.join(self.test_data_dir, 'input.duplicate_variant.vcf.gz'), os.path.join(temp_path.name, 'input.vcf.gz'))
command = [
os.path.join(temp_path.name, 'input.vcf.gz'),
'SYMBOL',
]
vep_annotation_reporter.main(command)
self.assertTrue(cmp(os.path.join(self.test_data_dir, 'output.duplicate_variant.tsv'), os.path.join(temp_path.name, 'input.tsv')))
temp_path.cleanup()
l.check_present(('root', 'WARNING', "VEP entry at CHR chr17, POS 7675088, REF C , ALT T already exists. Skipping subsequent entries."))

def test_vcf_with_multiple_transcripts_and_pick_set_for_none(self):
logging.disable(logging.NOTSET)
with LogCapture() as l:
Expand Down
7 changes: 5 additions & 2 deletions vatools/vep_annotation_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ def extract_vep_fields(args):
else:
vep[chr][pos][ref][alt] = None
else:
sys.exit("VEP entry for at CHR %s, POS %s, REF %s , ALT % already exists" % (chr, pos, ref, alt) )
logging.warning("VEP entry at CHR %s, POS %s, REF %s , ALT %s already exists. Skipping subsequent entries." % (chr, pos, ref, alt) )
vcf_reader.close()
return vep

Expand Down Expand Up @@ -201,6 +201,7 @@ def main(args_input = sys.argv[1:]):
with open(output_file, 'w') as output_filehandle:
writer = csv.DictWriter(output_filehandle, fieldnames = ['CHROM', 'POS', 'REF', 'ALT'] + args.vep_fields, delimiter = "\t")
writer.writeheader()
rows = []
for variant in vcf_reader:
row = {
'CHROM': str(variant.CHROM),
Expand All @@ -209,7 +210,9 @@ def main(args_input = sys.argv[1:]):
'ALT' : ','.join(map(lambda a: a.serialize(), variant.ALT)),
}
row = add_vep_fields_to_row(args, row, vep)
writer.writerow(row)
if row not in rows:
rows.append(row)
writer.writerows(rows)

if __name__ == '__main__':
main()

0 comments on commit f61fb34

Please sign in to comment.