From 921d5e42a16f2097376bd427ca7907b32ae49326 Mon Sep 17 00:00:00 2001 From: James Date: Mon, 11 Mar 2024 16:05:21 -0400 Subject: [PATCH 1/2] fix to long deletions that overhang into the assembly window causing exceptions --- .../HaplotypeCallerEngine.java | 1 + .../HaplotypeCallerIntegrationTest.java | 3 +- ...ode_givenAlleles_ExtremeLengthDeletion.vcf | 29 ++++++++++++++++++ ...givenAlleles_ExtremeLengthDeletion.vcf.idx | Bin 0 -> 359 bytes 4 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf.idx diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index 9e09eeba8a9..cf9f9d18310 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -758,6 +758,7 @@ public List callRegion(final AssemblyRegion region, final Featur final List givenAlleles = features.getValues(hcArgs.alleles).stream() .filter(vc -> hcArgs.forceCallFiltered || vc.isNotFiltered()) .flatMap(vc -> GATKVariantContextUtils.splitVariantContextToEvents(vc, false, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, false).stream()) + .filter(event -> event.getStart() >= region.getSpan().getStart()) // filter out events that do not start within the region, as they cannot be emitted and genotyped byt this assembly region even if they may affect assembly/genotyping .collect(Collectors.toList()); if( givenAlleles.isEmpty() && region.size() == 0 ) { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java index cddb87c5dac..1e059b444b5 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java @@ -671,7 +671,8 @@ public void testFloorGVCFBlocks(final String inputFileName, final String referen public Object[][] getForceCallingInputs() { return new Object[][] { {NA12878_20_21_WGS_bam, new File(TEST_FILES_DIR, "testGenotypeGivenAllelesMode_givenAlleles.vcf"), "20:10000000-10010000"}, - {NA12878_20_21_WGS_bam, new File(toolsTestDir, "mutect/gga_mode.vcf"), "20:9998500-10010000"} + {NA12878_20_21_WGS_bam, new File(toolsTestDir, "mutect/gga_mode.vcf"), "20:9998500-10010000"}, + {NA12878_20_21_WGS_bam, new File(TEST_FILES_DIR, "testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf"), "20:9998500-10010000"} // This is designed to test https://github.com/broadinstitute/gatk/issues/8675, which stemmed from an edge case in the force calling logic where a deletion allele that is longer than the assembly window padding spans into the assembly window. This tests that we do not see an exception in this case. }; } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf new file mode 100644 index 00000000000..e36b2052864 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf @@ -0,0 +1,29 @@ +##fileformat=VCFv4.2 +##FILTER= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##INFO= +##contig= +##contig= +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 +20 10000694 . GAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAAGAAAAAA A . . . GT 0|1 diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf.idx b/src/test/resources/org/broadinstitute/hellbender/tools/haplotypecaller/testGenotypeGivenAllelesMode_givenAlleles_ExtremeLengthDeletion.vcf.idx new file mode 100644 index 0000000000000000000000000000000000000000..f469a4b8452bb36aea9d4360945adfaccb3d7ab2 GIT binary patch literal 359 zcmZ8cO-sW-5M4iR;? Date: Mon, 11 Mar 2024 16:11:03 -0400 Subject: [PATCH 2/2] some comments changed && git push --- .../tools/walkers/haplotypecaller/HaplotypeCallerEngine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index cf9f9d18310..36918f1fb91 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -758,7 +758,7 @@ public List callRegion(final AssemblyRegion region, final Featur final List givenAlleles = features.getValues(hcArgs.alleles).stream() .filter(vc -> hcArgs.forceCallFiltered || vc.isNotFiltered()) .flatMap(vc -> GATKVariantContextUtils.splitVariantContextToEvents(vc, false, GenotypeAssignmentMethod.BEST_MATCH_TO_ORIGINAL, false).stream()) - .filter(event -> event.getStart() >= region.getSpan().getStart()) // filter out events that do not start within the region, as they cannot be emitted and genotyped byt this assembly region even if they may affect assembly/genotyping + .filter(event -> event.getStart() >= region.getSpan().getStart()) // filter out events that do not start within the region. This approach works because events that begin upstream of the calling window cannot be called by this region calling code in the frist place. .collect(Collectors.toList()); if( givenAlleles.isEmpty() && region.size() == 0 ) {