Skip to content

Commit

Permalink
Merge branch 'feature/simpleiter' into develop
Browse files Browse the repository at this point in the history
  • Loading branch information
ACEnglish committed Feb 5, 2024
2 parents d09b0a5 + a7d0c02 commit d9a11b8
Show file tree
Hide file tree
Showing 152 changed files with 3,018 additions and 3,173 deletions.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
3 changes: 2 additions & 1 deletion repo_utils/answer_key/help.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
usage: truvari [-h] CMD ...

Truvari v4.3.0.dev Structural Variant Benchmarking and Annotation
Truvari v4.2.1.dev Structural Variant Benchmarking and Annotation

Available commands:
bench Performance metrics from comparison of two VCFs
Expand All @@ -13,6 +13,7 @@ Available commands:
divide Divide a VCF into independent shards
phab Variant harmonization using MSA
refine Automated bench result refinement with phab
ga4gh Convert Truvari result to GA4GH
version Print the Truvari version and exit

positional arguments:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ chr20 19663366 19663526 2 2 0 0 False 2 2 0 0 TP
chr20 20295955 20296565 2 2 2 1 True 3 3 0 0 TP
chr20 20320285 20320579 1 1 2 1 True 1 1 0 0 TP
chr20 20337200 20337707 1 1 6 2 True 3 3 0 0 TP
chr20 20354570 20358323 7 7 9 1 True 14 14 0 0 TP
chr20 20354570 20358323 7 7 9 1 True 16 16 0 0 TP
chr20 20458640 20458937 2 2 0 0 False 2 2 0 0 TP
chr20 21120212 21120539 2 2 1 0 True 1 1 0 0 TP
chr20 21721174 21721735 2 2 1 0 True 3 3 0 0 TP
Expand Down Expand Up @@ -133,7 +133,7 @@ chr20 58694862 58695411 2 2 0 0 False 2 2 0 0 TP
chr20 59383804 59385235 3 3 2 0 True 4 4 0 0 TP
chr20 59442036 59442273 2 2 0 0 False 2 2 0 0 TP
chr20 60087391 60087965 2 2 0 0 False 2 2 0 0 TP
chr20 60314330 60315020 2 2 2 0 True 6 6 0 0 TP
chr20 60314330 60315020 2 2 2 0 True 5 5 0 0 TP
chr20 60600107 60600320 1 1 0 0 False 1 1 0 0 TP
chr20 60702942 60703115 2 2 2 1 True 3 3 0 0 TP
chr20 60764031 60764634 2 2 0 0 False 2 2 0 0 TP
Expand All @@ -155,7 +155,7 @@ chr20 61723833 61724244 1 1 0 0 False 1 1 0 0 TP
chr20 61744188 61744654 2 2 1 0 True 2 2 0 0 TP
chr20 61783403 61784839 3 3 1 0 True 4 4 0 0 TP
chr20 61919676 61921374 2 2 0 0 False 2 2 0 0 TP
chr20 62057573 62059139 1 1 3 1 True 8 8 0 0 TP
chr20 62057573 62059139 1 1 3 1 True 7 7 0 0 TP
chr20 62212844 62213874 4 4 0 0 False 4 4 0 0 TP
chr20 62270279 62271094 1 1 3 1 True 4 4 0 0 TP
chr20 62317837 62318459 2 2 0 0 False 2 2 0 0 TP
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
{
"TP": 17,
"TN": 6,
"FP": 23,
"TP": 19,
"TN": 10,
"FP": 17,
"FN": 58,
"base P": 76,
"base N": 16,
"comp P": 85,
"comp N": 7,
"PPV": 0.2,
"TPR": 0.2236842105263158,
"TNR": 0.375,
"NPV": 0.8571428571428571,
"ACC": 0.25,
"BA": 0.2993421052631579,
"F1": 0.2111801242236025,
"base P": 78,
"base N": 14,
"comp P": 81,
"comp N": 11,
"PPV": 0.2345679012345679,
"TPR": 0.24358974358974358,
"TNR": 0.7142857142857143,
"NPV": 0.9090909090909091,
"ACC": 0.31521739130434784,
"BA": 0.47893772893772896,
"F1": 0.2389937106918239,
"UND": 0
}
108 changes: 54 additions & 54 deletions repo_utils/answer_key/refine/refine_output_three/refine.regions.txt
Original file line number Diff line number Diff line change
@@ -1,93 +1,93 @@
chrom start end in_tpbase in_tp in_fn in_fp refined out_tpbase out_tp out_fn out_fp state
chr20 278929 279069 3 3 1 0 False 3 3 1 0 FN
chr20 641912 642420 3 3 2 0 False 3 3 2 0 FN
chr20 2240960 2241290 1 1 5 1 True 1 1 4 1 FN,FP
chr20 4032357 4033228 2 2 2 1 True 1 1 2 1 FN,FP
chr20 5040476 5040477 0 0 0 2 False 0 0 0 2 FP
chr20 5041941 5042268 1 1 1 1 True 2 2 0 0 TP
chr20 278929 279069 2 3 1 0 False 2 3 1 0 FN
chr20 641912 642420 2 3 2 0 False 2 3 2 0 FN
chr20 2240960 2241290 0 1 5 1 True 1 1 4 1 FN,FP
chr20 4032357 4033228 2 1 2 1 True 0 0 2 1 FN,FP
chr20 5040476 5040477 0 0 0 0 False 0 0 0 0 TN
chr20 5041941 5042268 1 1 1 0 False 1 1 1 0 FN
chr20 7720952 7720968 1 1 1 0 False 1 1 1 0 FN
chr20 8661944 8662119 1 1 3 1 True 2 2 0 0 TP
chr20 8661944 8662119 0 1 2 1 True 2 2 0 0 TP
chr20 10802727 10802844 1 1 1 0 False 1 1 1 0 FN
chr20 13848272 13848544 3 3 1 0 False 3 3 1 0 FN
chr20 14862054 14862644 5 5 5 1 True 3 3 1 0 FN
chr20 16257854 16259205 2 2 2 0 False 2 2 2 0 FN
chr20 13848272 13848544 2 2 1 0 False 2 2 1 0 FN
chr20 14862054 14862644 4 4 5 1 True 3 3 1 0 FN
chr20 16257854 16259205 2 2 1 0 False 2 2 1 0 FN
chr20 16395201 16395373 3 3 1 0 False 3 3 1 0 FN
chr20 17081293 17081365 2 2 1 0 False 2 2 1 0 FN
chr20 18209139 18210134 3 3 2 1 True 3 3 0 1 FP
chr20 20296014 20296330 2 2 2 1 True 5 5 0 0 TP
chr20 20320339 20320519 1 1 2 1 True 3 3 0 0 TP
chr20 20296014 20296330 1 1 2 1 True 4 4 0 0 TP
chr20 20320339 20320519 0 1 2 1 True 3 3 0 0 TP
chr20 20337285 20337624 1 1 6 2 True 1 1 0 0 TP
chr20 20354912 20355435 3 3 1 0 False 3 3 1 0 FN
chr20 20356530 20357810 4 4 8 1 True 8 8 2 0 FN
chr20 20354912 20355435 2 2 1 0 False 2 2 1 0 FN
chr20 20356530 20357810 2 3 8 1 True 8 8 2 0 FN
chr20 21120298 21120461 2 2 1 0 False 2 2 1 0 FN
chr20 21721451 21721646 2 2 1 0 False 2 2 1 0 FN
chr20 22082266 22083905 4 4 3 1 True 3 3 4 1 FN,FP
chr20 23155578 23155857 3 3 2 2 True 3 3 1 1 FN,FP
chr20 21721451 21721646 0 2 1 0 False 0 2 1 0 FN
chr20 22082266 22083905 3 3 3 1 True 3 3 4 1 FN,FP
chr20 23155578 23155857 3 3 1 2 True 3 3 1 1 FN,FP
chr20 23560939 23561098 1 1 2 2 True 2 2 0 0 TP
chr20 24408073 24408820 3 3 1 0 False 3 3 1 0 FN
chr20 24682066 24682125 2 2 1 0 False 2 2 1 0 FN
chr20 25781790 25781791 0 0 0 1 False 0 0 0 1 FP
chr20 32723044 32723045 0 0 1 0 False 0 0 1 0 FN
chr20 34235898 34235981 0 0 2 1 True 2 2 0 0 TP
chr20 25781790 25781791 0 0 0 0 False 0 0 0 0 TN
chr20 32723044 32723045 0 0 0 0 False 0 0 0 0 TN
chr20 34235898 34235981 0 0 1 1 True 1 1 0 0 TP
chr20 35539212 35539582 4 4 1 0 False 4 4 1 0 FN
chr20 35580686 35580756 1 1 2 1 True 1 1 2 1 FN,FP
chr20 37361785 37361886 2 2 1 0 False 2 2 1 0 FN
chr20 38123799 38124003 2 2 1 4 True 1 1 2 3 FN,FP
chr20 38123799 38124003 0 1 1 4 True 1 1 2 3 FN,FP
chr20 38463997 38464344 2 2 2 0 False 2 2 2 0 FN
chr20 41196370 41196495 3 3 1 0 False 3 3 1 0 FN
chr20 41196370 41196495 2 3 1 0 False 2 3 1 0 FN
chr20 41257714 41258003 0 0 0 1 False 0 0 0 1 FP
chr20 44764150 44764203 2 2 1 0 False 2 2 1 0 FN
chr20 45600655 45600695 2 2 2 0 False 2 2 2 0 FN
chr20 48449794 48450385 4 4 2 0 False 4 4 2 0 FN
chr20 44764150 44764203 0 1 1 0 False 0 1 1 0 FN
chr20 45600655 45600695 0 2 2 0 False 0 2 2 0 FN
chr20 48449794 48450385 4 4 1 0 False 4 4 1 0 FN
chr20 49834182 49834469 2 2 1 0 False 2 2 1 0 FN
chr20 50775646 50775832 1 1 2 1 True 0 0 0 0 TN
chr20 51953819 51953820 0 0 0 1 False 0 0 0 1 FP
chr20 53204099 53204252 2 2 2 1 True 0 0 0 0 TN
chr20 50775646 50775832 0 0 1 1 True 0 0 0 0 TN
chr20 51953819 51953820 0 0 0 0 False 0 0 0 0 TN
chr20 53204099 53204252 0 1 2 0 False 0 1 2 0 FN
chr20 55624808 55625652 6 6 6 0 False 6 6 6 0 FN
chr20 55627638 55628305 7 7 4 0 False 7 7 4 0 FN
chr20 55944272 55945175 2 2 2 1 True 1 1 2 0 FN
chr20 56280541 56281913 4 4 5 1 True 4 4 2 1 FN,FP
chr20 55944272 55945175 2 2 1 1 True 1 1 2 0 FN
chr20 56280541 56281913 4 4 4 1 True 5 5 2 1 FN,FP
chr20 57090868 57091166 1 1 2 0 False 1 1 2 0 FN
chr20 57110450 57110593 2 2 1 0 False 2 2 1 0 FN
chr20 57190256 57190428 0 0 3 1 True 0 0 0 0 TN
chr20 57350856 57350920 1 1 1 0 False 1 1 1 0 FN
chr20 57350856 57350920 1 1 0 0 False 1 1 0 0 TP
chr20 57949001 57949346 1 1 4 1 True 0 0 4 1 FN,FP
chr20 59384366 59384743 3 3 2 0 False 3 3 2 0 FN
chr20 60314443 60314711 2 2 2 0 False 2 2 2 0 FN
chr20 60703005 60703087 2 2 2 1 True 3 3 0 0 TP
chr20 61100921 61102405 1 1 4 1 True 4 4 0 0 TP
chr20 61201822 61202242 2 2 4 1 True 1 1 0 0 TP
chr20 61282925 61283479 4 4 2 0 False 4 4 2 0 FN
chr20 59384366 59384743 2 2 2 0 False 2 2 2 0 FN
chr20 60314443 60314711 2 1 1 0 False 2 1 1 0 FN
chr20 60703005 60703087 2 2 1 1 True 3 3 0 0 TP
chr20 61100921 61102405 1 1 3 1 True 4 4 0 0 TP
chr20 61201822 61202242 1 2 4 1 True 1 1 0 0 TP
chr20 61282925 61283479 3 3 2 0 False 3 3 2 0 FN
chr20 61289662 61290273 1 1 2 1 True 0 0 0 0 TN
chr20 61329345 61329441 0 0 0 2 False 0 0 0 2 FP
chr20 61562109 61562252 0 0 2 1 True 1 1 0 0 TP
chr20 61329345 61329441 0 0 0 1 False 0 0 0 1 FP
chr20 61562109 61562252 0 0 1 1 True 1 1 0 0 TP
chr20 61744401 61744592 2 2 1 0 False 2 2 1 0 FN
chr20 61783958 61784698 3 3 1 0 False 3 3 1 0 FN
chr20 62057602 62058768 1 1 3 1 True 1 1 0 0 TP
chr20 62057602 62058768 1 1 2 1 True 1 1 0 0 TP
chr20 62270413 62270827 1 1 3 1 True 3 3 0 0 TP
chr20 62321396 62321730 2 2 3 0 False 2 2 3 0 FN
chr20 62349641 62349826 1 1 5 1 True 0 0 2 1 FN,FP
chr20 62360410 62360602 0 0 8 2 True 2 2 2 1 FN,FP
chr20 62321396 62321730 2 2 2 0 False 2 2 2 0 FN
chr20 62349641 62349826 1 1 4 1 True 0 0 2 1 FN,FP
chr20 62360410 62360602 0 0 6 2 True 1 1 1 1 FN,FP
chr20 62830650 62830697 2 2 1 1 True 1 1 0 0 TP
chr20 62875241 62875404 2 2 3 0 False 2 2 3 0 FN
chr20 62875241 62875404 2 2 2 0 False 2 2 2 0 FN
chr20 63028066 63029030 4 4 1 1 True 2 2 0 0 TP
chr20 63049093 63049159 3 3 1 0 False 3 3 1 0 FN
chr20 63049093 63049159 1 1 1 0 False 1 1 1 0 FN
chr20 63154687 63154921 1 1 1 0 False 1 1 1 0 FN
chr20 63167473 63167564 2 2 1 0 False 2 2 1 0 FN
chr20 63221509 63221721 1 1 2 1 True 0 0 0 0 TN
chr20 63167473 63167564 2 2 0 0 False 2 2 0 0 TP
chr20 63221509 63221721 1 1 1 1 True 0 0 0 0 TN
chr20 63372214 63372400 2 2 1 0 False 2 2 1 0 FN
chr20 63491957 63492390 1 1 3 1 True 2 2 1 0 FN
chr20 63491957 63492390 1 1 2 1 True 2 2 1 0 FN
chr20 63535751 63536002 1 1 2 0 False 1 1 2 0 FN
chr20 63559415 63559719 1 1 4 1 True 3 3 0 0 TP
chr20 63641847 63642015 1 1 2 1 True 0 0 0 0 TN
chr20 63693449 63693732 1 1 6 1 True 10 10 0 0 TP
chr20 63641847 63642015 0 0 1 0 False 0 0 1 0 FN
chr20 63693449 63693732 1 1 5 1 True 10 10 0 0 TP
chr20 63770936 63771014 0 0 0 2 False 0 0 0 2 FP
chr20 63948594 63948653 2 2 1 1 True 0 0 0 1 FP
chr20 63948594 63948653 2 2 0 0 False 2 2 0 0 TP
chr20 63964805 63966113 1 1 1 0 False 1 1 1 0 FN
chr20 64065882 64065883 0 0 0 1 False 0 0 0 1 FP
chr20 64065882 64065883 0 0 0 0 False 0 0 0 0 TN
chr20 64090733 64091007 0 0 0 2 False 0 0 0 2 FP
chr20 64097039 64097040 0 0 0 2 False 0 0 0 2 FP
chr20 64097039 64097040 0 0 0 0 False 0 0 0 0 TN
chr20 64125360 64127875 3 3 3 0 False 3 3 3 0 FN
chr20 64131913 64133856 5 5 8 1 True 5 5 6 5 FN,FP
chr20 64131913 64133856 4 5 8 1 True 5 5 6 5 FN,FP
chr20 64134990 64136330 3 3 1 0 False 3 3 1 0 FN
chr20 64173438 64176330 4 4 7 3 True 10 10 5 2 FN,FP
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
{
"TP-base": 408,
"TP-comp": 408,
"FP": 35,
"FN": 108,
"precision": 0.9209932279909706,
"recall": 0.7906976744186046,
"f1": 0.8508863399374348,
"base cnt": 516,
"comp cnt": 443
"TP-base": 422,
"TP-comp": 416,
"FP": 38,
"FN": 129,
"precision": 0.9162995594713657,
"recall": 0.7658802177858439,
"f1": 0.8343646923508332,
"base cnt": 551,
"comp cnt": 454
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ chr20 18209097 18210727 3 3 2 1 True 5 5 0 0 TP
chr20 18675650 18675915 2 2 0 0 False 2 2 0 0 TP
chr20 19663366 19663526 2 2 0 0 False 2 2 0 0 TP
chr20 20337200 20337707 1 1 6 2 True 3 3 0 0 TP
chr20 20354570 20358323 7 7 9 1 True 14 14 0 0 TP
chr20 20354570 20358323 7 7 9 1 True 16 16 0 0 TP
chr20 20458640 20458937 2 2 0 0 False 2 2 0 0 TP
chr20 21120212 21120539 2 2 1 0 True 1 1 0 0 TP
chr20 21721174 21721735 2 2 1 0 True 3 3 0 0 TP
Expand Down Expand Up @@ -73,7 +73,7 @@ chr20 58519902 58520372 2 2 0 0 False 2 2 0 0 TP
chr20 58694862 58695411 2 2 0 0 False 2 2 0 0 TP
chr20 59442036 59442273 2 2 0 0 False 2 2 0 0 TP
chr20 60087391 60087965 2 2 0 0 False 2 2 0 0 TP
chr20 60314330 60315020 2 2 2 0 True 6 6 0 0 TP
chr20 60314330 60315020 2 2 2 0 True 5 5 0 0 TP
chr20 60764031 60764634 2 2 0 0 False 2 2 0 0 TP
chr20 61176725 61176974 2 2 0 0 False 2 2 0 0 TP
chr20 61201683 61202474 2 2 4 1 True 7 7 0 0 TP
Expand All @@ -85,7 +85,7 @@ chr20 61475406 61475726 2 2 0 0 False 2 2 0 0 TP
chr20 61744188 61744654 2 2 1 0 True 2 2 0 0 TP
chr20 61783403 61784839 3 3 1 0 True 4 4 0 0 TP
chr20 61919676 61921374 2 2 0 0 False 2 2 0 0 TP
chr20 62057573 62059139 1 1 3 1 True 8 8 0 0 TP
chr20 62057573 62059139 1 1 3 1 True 7 7 0 0 TP
chr20 62212844 62213874 4 4 0 0 False 4 4 0 0 TP
chr20 62270279 62271094 1 1 3 1 True 4 4 0 0 TP
chr20 62317837 62318459 2 2 0 0 False 2 2 0 0 TP
Expand Down
32 changes: 26 additions & 6 deletions repo_utils/run_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import sys
import pysam
from collections import defaultdict
from intervaltree import IntervalTree

# Use the current truvari, not any installed libraries
sys.path.insert(0, os.getcwd())
Expand All @@ -26,14 +27,33 @@
state = entry.info['include'] == 'in'
assert state == truvari.entry_within(entry, region_start, region_end), f"Bad Boundary {str(entry)}"

v = pysam.VariantFile(vcf_fn)
regions = truvari.RegionVCFIterator(v, includebed=bed_fn)

regions = truvari.RegionVCFIterator(vcf, includebed=bed_fn)
vcf.reset()
truv_ans = defaultdict(lambda: False)
for entry in regions.iterate(v):
for entry in regions.iterate(vcf):
truv_ans[truvari.entry_to_key(entry)] = True

v = pysam.VariantFile(vcf_fn)
for entry in v:
vcf.reset()
for entry in vcf:
state = entry.info['include'] == 'in'
assert state == truv_ans[truvari.entry_to_key(entry)], f"Bad Boundary {str(entry)}"

"""
New Region Filtering
"""
vcf_fn = "repo_utils/test_files/variants/boundary_cpx.vcf.gz"
bed_fn = "repo_utils/test_files/beds/boundary_cpx.bed"

tree = defaultdict(IntervalTree)
with open(bed_fn, 'r') as fh:
for line in fh:
data = line.strip().split()
tree[data[0]].addi(int(data[1]), int(data[2]) + 1)

vcf = pysam.VariantFile(vcf_fn)
for entry in truvari.region_filter(vcf, tree, True):
assert entry.info['include'] == 'in', f"Bad in {str(entry)}"

vcf.reset()
for entry in truvari.region_filter(vcf, tree, False):
assert entry.info['include'] == 'out', f"Bad out {str(entry)}"
21 changes: 21 additions & 0 deletions repo_utils/sub_tests/ga4gh.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# ------------------------------------------------------------
# ga4gh
# ------------------------------------------------------------
run test_ga4gh $truv ga4gh -i $ANSDIR/refine/refine_output_three/ -o $OD/ga4gh_norefine
if [ $test_ga4gh ]; then
assert_exit_code 0
assert_equal $(fn_md5 $ANSDIR/ga4gh/ga4gh_norefine_truth.vcf.gz) $(fn_md5 $OD/ga4gh_norefine_truth.vcf.gz)
assert_equal $(fn_md5 $ANSDIR/ga4gh/ga4gh_norefine_query.vcf.gz) $(fn_md5 $OD/ga4gh_norefine_query.vcf.gz)
fi

run test_ga4gh_refine $truv ga4gh -w -i $ANSDIR/refine/refine_output_three/ -o $OD/ga4gh_withrefine
if [ $test_ga4gh_refine ]; then
assert_exit_code 0
assert_equal $(fn_md5 $ANSDIR/ga4gh/ga4gh_withrefine_truth.vcf.gz) $(fn_md5 $OD/ga4gh_withrefine_truth.vcf.gz)
assert_equal $(fn_md5 $ANSDIR/ga4gh/ga4gh_withrefine_query.vcf.gz) $(fn_md5 $OD/ga4gh_withrefine_query.vcf.gz)
fi

run test_ga4gh_badparam $truv ga4gh -w -i notreal.file1234 -o $ANSDIR/ga4gh/ga4gh_withrefine
if [ $test_ga4gh_badparam ]; then
assert_exit_code 1
fi
3 changes: 3 additions & 0 deletions repo_utils/test_files/beds/boundary_cpx.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
reference 10 20
reference 110 120
reference2 10 20
Loading

0 comments on commit d9a11b8

Please sign in to comment.