Skip to content

Commit

Permalink
More fixes in gVCF merging
Browse files Browse the repository at this point in the history
Genotypes at the ends of gVCF blocks were in some cases still unfilled, even after
the 60c65eb fix.

Fixes #1910, see also #1891 and #1164.
  • Loading branch information
pd3 committed Apr 18, 2023
1 parent 30a5cf4 commit 2f482de
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 4 deletions.
2 changes: 1 addition & 1 deletion test/gvcf.merge.1.out
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@
##INFO=<ID=BLOCKAVG_min30p3a,Number=0,Type=Flag,Description="Non-variant site block. All sites in a block are constrained to be non-variant, have the same filter value, and have all sample values in range [x,y], y <= max(x+3,(x*1.3)). All printed site block sample values are the minimum observed in the region spanned by the block">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT G06 D05 H09
chr1 10106 . C . 0 LowGQX BLOCKAVG_min30p3a;AN=2 GT:GQX:DP:DPF ./.:.:.:. 0/0:12:5:0 ./.:.:.:.
chr1 10107 . C . 0 LowGQX;HighDPFRatio BLOCKAVG_min30p3a;AN=2 GT:GQX:DP:DPF .:.:0:1 ./.:.:.:. 0/0:5:2:0
chr1 10107 . C . 0 LowGQX;HighDPFRatio BLOCKAVG_min30p3a;AN=4 GT:GQX:DP:DPF .:.:0:1 0/0:12:5:0 0/0:5:2:0
chr1 10108 . N . 0 LowGQX;HighDPFRatio END=10110;BLOCKAVG_min30p3a;AN=2 GT:GQX:DP:DPF .:.:0:1 ./.:.:.:. 0/0:5:2:0
chr1 10111 . N . 0 LowGQX END=10120;BLOCKAVG_min30p3a;AN=2 GT:GQX:DP:DPF ./.:.:.:. ./.:.:.:. 0/0:5:2:0
12 changes: 12 additions & 0 deletions test/merge.gvcf.11.1.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
##fileformat=VCFv4.2
##FILTER=<ID=PASS,Description="All filters passed">
##reference=file.fa
##contig=<ID=chr20,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT s1 s2 s3
chr20 64249835 . T <NON_REF> 0 . END=64249836 GT 0/0 0/0 0/0
chr20 64249837 . T <NON_REF> 0 . . GT 0/0 0/0 0/0
chr20 64249838 . T <NON_REF> 0 . END=64250066 GT 0/0 0/0 0/0
chr20 64250067 . T <NON_REF> 0 . . GT 0/0 0/0 ./.
chr20 64250068 . N <NON_REF> 0 . END=64251648 GT 0/0 ./. ./.
8 changes: 8 additions & 0 deletions test/merge.gvcf.11.a.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.2
##reference=file.fa
##contig=<ID=chr20,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT s1
chr20 64249835 . T <NON_REF> 0 . END=64249837 GT 0/0
chr20 64249838 . T <NON_REF> 0 . END=64251648 GT 0/0
8 changes: 8 additions & 0 deletions test/merge.gvcf.11.b.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.2
##reference=file.fa
##contig=<ID=chr20,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT s2
chr20 64249835 . T <NON_REF> 0 . END=64250066 GT 0/0
chr20 64250067 . T <NON_REF> 0 . END=64250067 GT 0/0
8 changes: 8 additions & 0 deletions test/merge.gvcf.11.c.vcf
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##fileformat=VCFv4.2
##reference=file.fa
##contig=<ID=chr20,length=59373566,assembly=B37,md5=1e86411d73e6f00a10590f976be01623,species="Homo sapiens">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the region described in this record">
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT s3
chr20 64249835 . T <NON_REF> 0 . END=64249836 GT 0/0
chr20 64249837 . T <NON_REF> 0 . END=64250066 GT 0/0
4 changes: 2 additions & 2 deletions test/merge.gvcf.2.out
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
6 630 . T <*> . . . PL 66,1,1 66,2,3 .
6 631 . N <*> . . END=666 PL 66,1,1 . .
7 701 . T <*> . . . PL 77,1,1 77,2,1 .
7 702 . T <*> . . . PL . 77,2,2 .
7 703 . T <*> . . . PL 77,1,2 . .
7 702 . T <*> . . . PL 77,1,1 77,2,2 .
7 703 . T <*> . . . PL 77,1,2 77,2,2 .
7 704 . N <*> . . END=777 PL 77,1,2 . .
8 1 . T <*> . . END=2 PL 88,1,1 . .
8 3 . T <*>,A . . . PL 88,1,1,1,1,1 88,88,88,2,88,1 88,88,88,3,88,1
Expand Down
1 change: 1 addition & 0 deletions test/test.pl
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
run_test(\&test_vcf_merge,$opts,in=>['merge.mrules.1.a','merge.mrules.1.b'],out=>'merge.mrules.1.2.out',args=>'--gvcf - -M AD:.,PL:.');
run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.5.a','merge.gvcf.5.b'],out=>'merge.gvcf.5.1.out',args=>'--gvcf -');
run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.5.a','merge.gvcf.5.b'],out=>'merge.gvcf.5.1.out',args=>'--gvcf - --merge none');
run_test(\&test_vcf_merge,$opts,in=>['merge.gvcf.11.a','merge.gvcf.11.b','merge.gvcf.11.c'],out=>'merge.gvcf.11.1.out',args=>'--gvcf -');
# run_test(\&test_vcf_merge_big,$opts,in=>'merge_big.1',out=>'merge_big.1.1',nsmpl=>79000,nfiles=>79,nalts=>486,args=>''); # commented out for speed
run_test(\&test_vcf_query,$opts,in=>'query.string',out=>'query.string.1.out',args=>q[-f '%CHROM\\t%POS\\t%CLNREVSTAT\\n' -i'CLNREVSTAT="criteria_provided,_conflicting_interpretations"']);
run_test(\&test_vcf_query,$opts,in=>'query.string',out=>'query.string.1.out',args=>q[-f '%CHROM\\t%POS\\t%CLNREVSTAT\\n' -i'CLNREVSTAT="criteria_provided" || CLNREVSTAT="_conflicting_interpretations"']);
Expand Down
2 changes: 1 addition & 1 deletion vcfmerge.c
Original file line number Diff line number Diff line change
Expand Up @@ -2780,7 +2780,7 @@ void clean_buffer(args_t *args)
{
if ( ma->gvcf[ir].active )
{
if ( ma->pos >= ma->gvcf[ir].end ) ma->gvcf[ir].active = 0;
if ( ma->pos > ma->gvcf[ir].end ) ma->gvcf[ir].active = 0;
else if ( ma->buf[ir].cur==-1 ) ma->buf[ir].cur = ma->buf[ir].beg; // re-activate interrupted gVCF block
}
if ( !ma->gvcf[ir].active ) ma->buf[ir].cur = -1;
Expand Down

0 comments on commit 2f482de

Please sign in to comment.