diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java index 1cdc9bbea00..0425327a30e 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingEngine.java @@ -515,8 +515,8 @@ protected final VariantCallContext emptyCallContext(final FeatureContext feature VariantContext vc; if ( configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { - final VariantContext ggaVc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(features, - rawContext.getLocation(), false, configuration.genotypeFilteredAlleles, configuration.alleles); + final VariantContext ggaVc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromVariantList(features, + rawContext.getLocation(), configuration.genotypeFilteredAlleles, configuration.alleles); if (ggaVc == null) { return null; } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java index 706eab47f41..986478d833b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtils.java @@ -18,49 +18,51 @@ import java.util.Set; import java.util.stream.Collectors; +import static org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED; +import static org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils.FilteredRecordMergeType.KEEP_UNCONDITIONAL; + /** * Compendium of utils to work in GENOTYPE_GIVEN_ALLELES mode. */ public final class GenotypingGivenAllelesUtils { /** - * Composes the given allele variant-context providing information about the rods and reference location. + * Composes the given allele variant-context providing information about the given allele variants and reference location. * @param tracker the meta data tracker. * @param loc the query location. - * @param snpsOnly whether we only should consider SNP variation. * @param keepFiltered whether to include filtered variants * @param allelesBinding the target variation context binding containing the given alleles. * @return never {@code null} */ - public static VariantContext composeGivenAllelesVariantContextFromRod(final FeatureContext tracker, - final Locatable loc, - final boolean snpsOnly, - final boolean keepFiltered, - final FeatureInput allelesBinding) { + public static VariantContext composeGivenAllelesVariantContextFromVariantList(final FeatureContext tracker, + final Locatable loc, + final boolean keepFiltered, + final FeatureInput allelesBinding) { Utils.nonNull(tracker, "tracker may not be null"); Utils.nonNull(loc, "location may not be null"); Utils.nonNull(allelesBinding, "alleles binding may not be null"); final List variantContextsInFeatureContext = tracker.getValues(allelesBinding, new SimpleInterval(loc)); - return composeGivenAllelesVariantContextFromVariantList(variantContextsInFeatureContext, loc, snpsOnly, keepFiltered); + return composeGivenAllelesVariantContextFromVariantList(variantContextsInFeatureContext, loc, keepFiltered); } @VisibleForTesting - protected static VariantContext composeGivenAllelesVariantContextFromVariantList(final List variantContextsInFeatureContext, final Locatable loc, final boolean snpsOnly, final boolean keepFiltered) { - final List rodVcsAtLoc = variantContextsInFeatureContext + protected static VariantContext composeGivenAllelesVariantContextFromVariantList(final List variantContextsInFeatureContext, + final Locatable loc, + final boolean keepFiltered) { + final List vcsAtLoc = variantContextsInFeatureContext .stream() .filter(vc -> vc.getStart() == loc.getStart() && - (keepFiltered || vc.isNotFiltered()) && - (!snpsOnly || vc.isSNP())) + (keepFiltered || vc.isNotFiltered())) .collect(Collectors.toList()); - if (rodVcsAtLoc.isEmpty()) { + if (vcsAtLoc.isEmpty()) { return null; } - final List haplotypeSources = rodVcsAtLoc.stream().map(VariantContext::getSource).collect(Collectors.toList()); - final VariantContext mergedVc = GATKVariantContextUtils.simpleMerge(rodVcsAtLoc, haplotypeSources, - GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, + final List haplotypeSources = vcsAtLoc.stream().map(VariantContext::getSource).collect(Collectors.toList()); + final VariantContext mergedVc = GATKVariantContextUtils.simpleMerge(vcsAtLoc, haplotypeSources, + keepFiltered ? KEEP_UNCONDITIONAL : KEEP_IF_ANY_UNFILTERED, GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false); return mergedVc; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerGenotypingEngine.java index dc661e83602..5e0c9aa9fde 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerGenotypingEngine.java @@ -113,8 +113,9 @@ protected TreeSet decomposeHaplotypesIntoVariantContexts(final List getVCsAtThisLocation(final List haplotypes, - final int loc, - final List activeAllelesToGenotype) { - return getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype, false); + protected static List getVariantContextsFromGivenAlleles(final int loc, + final List activeAllelesToGenotype, + final boolean includeSpanningEvents) { + final Set uniqueLocationsAndAlleles = new HashSet<>(); + final List results = new ArrayList<>(); + + int givenAlleleSourceCount = 0; + for( final VariantContext givenAlleleVC : activeAllelesToGenotype ) { + if( givenAlleleVC.getStart() <= loc && givenAlleleVC.getEnd() >= loc) { + if (! (includeSpanningEvents || givenAlleleVC.getStart() == loc)) { + continue; + } + int alleleCount = 0; + for( final Allele givenAltAllele : givenAlleleVC.getAlternateAlleles() ) { + final List alleleSet = Arrays.asList(givenAlleleVC.getReference(), givenAltAllele); + + //TODO: this source name seems arbitrary and probably just has to be unique + //TODO: how about replace it by vcSourceName = String.parseInt(nameCounter++)? + final String vcSourceName = "Comp" + givenAlleleSourceCount + "Allele" + alleleCount; + // check if this event is already in the list of events due to a repeat in the input alleles track + final VariantContext candidateEventToAdd = new VariantContextBuilder(givenAlleleVC).alleles(alleleSet).source(vcSourceName).make(); + + final LocationAndAlleles locationAndAlleles = new LocationAndAlleles(candidateEventToAdd.getStart(), candidateEventToAdd.getAlleles()); + if (! uniqueLocationsAndAlleles.contains(locationAndAlleles)) { + uniqueLocationsAndAlleles.add(locationAndAlleles); + results.add(candidateEventToAdd); + } + + alleleCount++; + } + } + givenAlleleSourceCount++; + } + return results; } /** - * Returns the list of variant contexts active at this location, using either the list of active haplotypes or - * the list of active alleles to genotypes if we are in GGA mode as candidates. This method will include events - * that span the current location if includeSpanningEvents is set to true; otherwise it will only include events that - * have loc as their start position. - * @param haplotypes Haplotypes for the current active region. + * Returns the list of events discovered in assembled haplotypes that are active at this location. The results will + * include events that span the current location if includeSpanningEvents is set to true; otherwise it will only + * include events that have loc as their start position. * @param loc The start position we are genotyping - * @param activeAllelesToGenotype The list of given alleles for the current active region, empty unless we are in GGA mode + * @param haplotypes list of active haplotypes at the current location * @param includeSpanningEvents If true, will also return events that span loc */ - protected static List getVCsAtThisLocation(final List haplotypes, - final int loc, - final List activeAllelesToGenotype, - final boolean includeSpanningEvents) { - // the overlapping events to merge into a common reference view + protected static List getVariantContextsFromActiveHaplotypes(final int loc, + final List haplotypes, + final boolean includeSpanningEvents) { final List results = new ArrayList<>(); final Set uniqueLocationsAndAlleles = new HashSet<>(); - if( activeAllelesToGenotype.isEmpty() ) { - haplotypes.stream() - .flatMap(h -> Utils.stream(h.getEventMap().getSpanningEvents(loc))) - .filter(Objects::nonNull) - .filter(v -> (includeSpanningEvents || v.getStart() == loc)) - .forEach(v -> { - final LocationAndAlleles locationAndAlleles = new LocationAndAlleles(v.getStart(), v.getAlleles()); - if (! uniqueLocationsAndAlleles.contains(locationAndAlleles)) { - uniqueLocationsAndAlleles.add(locationAndAlleles); - results.add(v); - } - }); - } else { // we are in GGA mode! - int compCount = 0; - for( final VariantContext compVC : activeAllelesToGenotype ) { - if( compVC.getStart() <= loc && compVC.getEnd() >= loc) { - if (! (includeSpanningEvents || compVC.getStart() == loc)) { - continue; - } - int alleleCount = 0; - for( final Allele compAltAllele : compVC.getAlternateAlleles() ) { - final List alleleSet = Arrays.asList(compVC.getReference(), compAltAllele); - - //TODO: this source name seems arbitrary and probably just has to be unique - //TODO: how about replace it by vcSourceName = String.parseInt(nameCounter++)? - final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount; - // check if this event is already in the list of events due to a repeat in the input alleles track - final VariantContext candidateEventToAdd = new VariantContextBuilder(compVC).alleles(alleleSet).source(vcSourceName).make(); - - final LocationAndAlleles locationAndAlleles = new LocationAndAlleles(candidateEventToAdd.getStart(), candidateEventToAdd.getAlleles()); - if (! uniqueLocationsAndAlleles.contains(locationAndAlleles)) { - uniqueLocationsAndAlleles.add(locationAndAlleles); - results.add(candidateEventToAdd); - } - - alleleCount++; + haplotypes.stream() + .flatMap(h -> Utils.stream(h.getEventMap().getOverlappingEvents(loc))) + .filter(Objects::nonNull) + .filter(v -> (includeSpanningEvents || v.getStart() == loc)) + .forEach(v -> { + final LocationAndAlleles locationAndAlleles = new LocationAndAlleles(v.getStart(), v.getAlleles()); + if (! uniqueLocationsAndAlleles.contains(locationAndAlleles)) { + uniqueLocationsAndAlleles.add(locationAndAlleles); + results.add(v); } - } - compCount++; - } - } - + }); return results; } @@ -274,12 +268,14 @@ protected static Map> createAlleleMapper(final VariantCo for (final Haplotype h : haplotypes) { - final Iterator spanningEvents = h.getEventMap().getSpanningEvents(loc); + final List spanningEvents = h.getEventMap().getOverlappingEvents(loc); - final boolean hasSpanningEvents = spanningEvents.hasNext(); + if (spanningEvents.isEmpty()) { //no events --> this haplotype supports the reference at this locus + result.get(ref).add(h); + continue; + } - while (spanningEvents.hasNext()) { - final VariantContext spanningEvent = spanningEvents.next(); + for (VariantContext spanningEvent : spanningEvents) { if (spanningEvent.getStart() == loc) { // the event starts at the current location @@ -287,6 +283,8 @@ protected static Map> createAlleleMapper(final VariantCo // reference allele lengths are equal; we can just use the spanning event's alt allele // in the case of GGA mode the spanning event might not match an allele in the mergedVC if (result.containsKey(spanningEvent.getAlternateAllele(0))) { + // variant contexts derived from the event map have only one alt allele each, so we can just + // grab the first one (we're not assuming that the sample is biallelic) result.get(spanningEvent.getAlternateAllele(0)).add(h); } } else if (spanningEvent.getReference().length() < mergedVC.getReference().length()) { @@ -310,8 +308,8 @@ protected static Map> createAlleleMapper(final VariantCo if (activeAllelesToGenotype != null && activeAllelesToGenotype.size() > 0) { // in HC GGA mode we need to check to make sure that spanning deletion // events actually match one of the alleles we were given to genotype - VariantContext matchingGivenVc = findMatchingGivenAllele(activeAllelesToGenotype, spanningEvent); - if (matchingGivenVc != null) { + final boolean eventMatchesGivenAllele = eventMatchesGivenAllele(activeAllelesToGenotype, spanningEvent); + if (eventMatchesGivenAllele) { if (!result.containsKey(Allele.SPAN_DEL)) { result.put(Allele.SPAN_DEL, new ArrayList<>()); } @@ -328,25 +326,22 @@ protected static Map> createAlleleMapper(final VariantCo } } - if (! hasSpanningEvents) { //no events --> this haplotype supports the reference at this locus - result.get(ref).add(h); - } } return result; } - private static VariantContext findMatchingGivenAllele(final List activeAllelesToGenotype, final VariantContext spanningEvent) { + private static boolean eventMatchesGivenAllele(final List activeAllelesToGenotype, final VariantContext spanningEvent) { for (VariantContext givenVC : activeAllelesToGenotype) { if (givenVC.getStart() == spanningEvent.getStart() && givenVC.getReference().equals(spanningEvent.getReference())) { for (Allele a : spanningEvent.getAlternateAlleles()) { if (givenVC.hasAlternateAllele(a)) { - return givenVC; + return true; } } } } - return null; + return false; } // Builds the read-likelihoods collection to use for annotation considering user arguments and the collection diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java index 357c8b26121..9f0ed7a8b43 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerEngine.java @@ -442,8 +442,9 @@ public void writeHeader( final VariantContextWriter vcfWriter, final SAMSequence public ActivityProfileState isActive( final AlignmentContext context, final ReferenceContext ref, final FeatureContext features ) { if ( hcArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { - final VariantContext vcFromAllelesRod = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(features, ref.getInterval(), false, hcArgs.genotypeFilteredAlleles, hcArgs.alleles); - if( vcFromAllelesRod != null ) { + final VariantContext vcFromGivenAlleles = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromVariantList(features, + ref.getInterval(), hcArgs.genotypeFilteredAlleles, hcArgs.alleles); + if( vcFromGivenAlleles != null ) { return new ActivityProfileState(ref.getInterval(), 1.0); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java index 2e4718c9098..e509e3ddd0b 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/HaplotypeCallerGenotypingEngine.java @@ -135,8 +135,15 @@ public CalledHaplotypes assignGenotypeLikelihoods(final List haplotyp continue; } + final List activeEventVariantContexts; + if( activeAllelesToGenotype.isEmpty() ) { + activeEventVariantContexts = getVariantContextsFromActiveHaplotypes(loc, haplotypes, true); + } else { // we are in GGA mode! + activeEventVariantContexts = getVariantContextsFromGivenAlleles(loc, activeAllelesToGenotype, true); + } + final List eventsAtThisLocWithSpanDelsReplaced = - replaceSpanDels(getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype, true), + replaceSpanDels(activeEventVariantContexts, Allele.create(ref[loc - refLoc.getStart()], true), loc); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java index 159f412627f..87676edf933 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2Engine.java @@ -227,8 +227,9 @@ public void shutdown() { @Override public ActivityProfileState isActive(final AlignmentContext context, final ReferenceContext ref, final FeatureContext featureContext) { if ( MTAC.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) { - final VariantContext vcFromAllelesRod = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(featureContext, ref.getInterval(), false, MTAC.genotypeFilteredAlleles, MTAC.alleles); - if( vcFromAllelesRod != null ) { + final VariantContext vcFromGivenAlleles = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromVariantList(featureContext, + ref.getInterval(), MTAC.genotypeFilteredAlleles, MTAC.alleles); + if( vcFromGivenAlleles != null ) { return new ActivityProfileState(ref.getInterval(), 1.0); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java index 5f16c8e02f4..7f70aa2dd0f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/SomaticGenotypingEngine.java @@ -104,9 +104,7 @@ public CalledHaplotypes callMutations( final List returnCalls = new ArrayList<>(); for( final int loc : startPosKeySet ) { - // Note: as above, passing an empty list of activeAllelesToGenotype is the correct behavior even when givenAlleles is - // non-empty, for the same reason. If you don't believe this, check {@code testGivenAllelesMode} in {@link Mutect2IntegrationTest}. - final List eventsAtThisLoc = getVCsAtThisLocation(haplotypes, loc, Collections.emptyList()); + final List eventsAtThisLoc = getVariantContextsFromActiveHaplotypes(loc, haplotypes, false); final VariantContext mergedVC = AssemblyBasedCallerUtils.makeMergedVariantContext(eventsAtThisLoc); if( mergedVC == null ) { continue; @@ -188,7 +186,7 @@ public CalledHaplotypes callMutations( } private Set getAllelesConsistentWithGivenAlleles(List givenAlleles, int loc, VariantContext mergedVC) { - final List> givenAltAndRefAllelesInOriginalContext = getVCsAtThisLocation(Collections.emptyList(), loc, givenAlleles).stream() + final List> givenAltAndRefAllelesInOriginalContext = getVariantContextsFromGivenAlleles(loc, givenAlleles, false).stream() .flatMap(vc -> vc.getAlternateAlleles().stream().map(allele -> ImmutablePair.of(allele, vc.getReference()))).collect(Collectors.toList()); return mergedVC.getAlternateAlleles().stream() diff --git a/src/main/java/org/broadinstitute/hellbender/utils/haplotype/EventMap.java b/src/main/java/org/broadinstitute/hellbender/utils/haplotype/EventMap.java index 466abbbb8f2..7ebdc280824 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/haplotype/EventMap.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/haplotype/EventMap.java @@ -11,17 +11,13 @@ import org.apache.logging.log4j.Logger; import org.broadinstitute.hellbender.exceptions.GATKException; import org.broadinstitute.hellbender.utils.BaseUtils; -import org.broadinstitute.hellbender.utils.IndexRange; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.param.ParamUtils; import org.broadinstitute.hellbender.utils.read.AlignmentUtils; import java.io.Serializable; import java.util.*; -import java.util.function.IntPredicate; import java.util.stream.Collectors; -import java.util.stream.IntStream; -import java.util.stream.Stream; /** * Extract simple VariantContext events from a single haplotype @@ -396,8 +392,11 @@ public static TreeSet buildEventMapsForHaplotypes( final List getSpanningEvents(final int loc) { - return headMap(loc, true).values().stream().filter(v -> v.getEnd() >= loc).iterator(); + /** + * Returns any events in the map that overlap loc, including spanning deletions and events that start at loc. + */ + public List getOverlappingEvents(final int loc) { + return headMap(loc, true).values().stream().filter(v -> v.getEnd() >= loc).collect(Collectors.toList()); } private static class VariantContextComparator implements Comparator, Serializable { diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtilsUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtilsUnitTest.java index 05470da2c1e..c04e9d5028f 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtilsUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/genotyper/GenotypingGivenAllelesUtilsUnitTest.java @@ -5,7 +5,6 @@ import htsjdk.variant.variantcontext.VariantContextBuilder; import org.broadinstitute.hellbender.utils.SimpleInterval; import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; -import org.testng.Assert; import org.testng.annotations.Test; import java.util.Arrays; @@ -15,26 +14,26 @@ public class GenotypingGivenAllelesUtilsUnitTest { @Test - public void testComposeGivenAllelesVariantContextFromRod() { + public void testComposeGivenAllelesVariantContextFromVariantList() { - final SimpleInterval loc = new SimpleInterval("20", 10093568, 10093568); + final SimpleInterval loc = new SimpleInterval("20", 68, 68); final List sameLocDelAlleles1 = Arrays.asList(Allele.create("GT", true), Allele.create("G")); - final VariantContext sameLocDelVc1 = new VariantContextBuilder("a", "20", 10093568, 10093569, sameLocDelAlleles1).make(); + final VariantContext sameLocDelVc1 = new VariantContextBuilder("a", "20", 68, 69, sameLocDelAlleles1).make(); final List sameLocSnpAlleles1 = Arrays.asList(Allele.create("G", true), Allele.create("C")); - final VariantContext sameLocSnpVc1 = new VariantContextBuilder("a", "20", 10093568, 10093568, sameLocSnpAlleles1).make(); + final VariantContext sameLocSnpVc1 = new VariantContextBuilder("a", "20", 68, 68, sameLocSnpAlleles1).make(); final List spanningDelAlleles1 = Arrays.asList(Allele.create("ATGTA", true), Allele.create("A")); - final VariantContext spanningDelVc1 = new VariantContextBuilder("a", "20", 10093566, 10093570, spanningDelAlleles1).make(); + final VariantContext spanningDelVc1 = new VariantContextBuilder("a", "20", 66, 70, spanningDelAlleles1).make(); final List expectedAlleles = Arrays.asList(Allele.create("GT", true), Allele.create("G"), Allele.create("CT")); - final VariantContext expectedVC = new VariantContextBuilder("a", "20", 10093568, 10093569, expectedAlleles).make(); + final VariantContext expectedVC = new VariantContextBuilder("a", "20", 68, 69, expectedAlleles).make(); final VariantContext variantContext = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromVariantList(Arrays.asList(spanningDelVc1, sameLocDelVc1, sameLocSnpVc1), loc, - false, true); + true); VariantContextTestUtils.assertVariantContextsAreEqual(variantContext, expectedVC, Collections.emptyList()); } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerGenotypingEngineUnitTest.java b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerGenotypingEngineUnitTest.java index df0cc6937ac..d572a8fde12 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerGenotypingEngineUnitTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/walkers/haplotypecaller/AssemblyBasedCallerGenotypingEngineUnitTest.java @@ -16,11 +16,11 @@ public class AssemblyBasedCallerGenotypingEngineUnitTest extends GATKBaseTest { - @DataProvider(name = "getVcsAtThisLocation") - public Object[][] getVcsAtThisLocationData() { + @DataProvider(name = "getVariantContextsFromActiveHaplotypes") + public Object[][] getVariantContextsFromActiveHaplotypesData() { final List tests = new ArrayList<>(); - tests.add(new Object[]{new ArrayList<>(), 1000, new ArrayList<>(), new ArrayList<>()}); + tests.add(new Object[]{new ArrayList<>(), 1000, new ArrayList<>()}); final Haplotype snpHaplotype = new Haplotype("ACTGGTCAACTGGTCAACTGGTCAACTGGTCA".getBytes()); final List snpAlleles = Arrays.asList(Allele.create("A", true), Allele.create("G")); @@ -59,14 +59,14 @@ public Object[][] getVcsAtThisLocationData() { final VariantContext deletionVcNoSpan = deletionVcNoSpanBuilder.make(); deletionHaplotypeNoSpan.setEventMap(new EventMap(Arrays.asList(deletionVcNoSpan))); - tests.add(new Object[]{Arrays.asList(snpHaplotype), 1000, new ArrayList<>(), Arrays.asList(snpVc)}); - tests.add(new Object[]{Arrays.asList(snpHaplotype, snpHaplotypeDuplicate), 1000, new ArrayList<>(), Arrays.asList(snpVc)}); - tests.add(new Object[]{Arrays.asList(deletionHaplotype), 995, new ArrayList<>(), Arrays.asList(deletionVc)}); - tests.add(new Object[]{Arrays.asList(deletionHaplotype), 1000, new ArrayList<>(), Arrays.asList(deletionVc)}); - tests.add(new Object[]{Arrays.asList(deletionHaplotype, deletionHaplotypeNoSpan), 1000, new ArrayList<>(), Arrays.asList(deletionVc)}); - tests.add(new Object[]{Arrays.asList(deletionHaplotype, deletionHaplotypeFalseDuplicate, deletionHaplotypeNoSpan), 1000, new ArrayList<>(), Arrays.asList(deletionVc, deletionVcFalseDuplicate)}); + tests.add(new Object[]{Arrays.asList(snpHaplotype), 1000, Arrays.asList(snpVc)}); + tests.add(new Object[]{Arrays.asList(snpHaplotype, snpHaplotypeDuplicate), 1000, Arrays.asList(snpVc)}); + tests.add(new Object[]{Arrays.asList(deletionHaplotype), 995, Arrays.asList(deletionVc)}); + tests.add(new Object[]{Arrays.asList(deletionHaplotype), 1000, Arrays.asList(deletionVc)}); + tests.add(new Object[]{Arrays.asList(deletionHaplotype, deletionHaplotypeNoSpan), 1000, Arrays.asList(deletionVc)}); + tests.add(new Object[]{Arrays.asList(deletionHaplotype, deletionHaplotypeFalseDuplicate, deletionHaplotypeNoSpan), 1000, Arrays.asList(deletionVc, deletionVcFalseDuplicate)}); - tests.add(new Object[]{Arrays.asList(deletionHaplotype, snpHaplotype), 1000, new ArrayList<>(), Arrays.asList(deletionVc, snpVc)}); + tests.add(new Object[]{Arrays.asList(deletionHaplotype, snpHaplotype), 1000, Arrays.asList(deletionVc, snpVc)}); final Haplotype sameLocDelHap1 = new Haplotype("AAAAAAAGAAA".getBytes()); final List sameLocDelAlleles1 = Arrays.asList(Allele.create("GTT", true), Allele.create("G")); @@ -83,27 +83,100 @@ public Object[][] getVcsAtThisLocationData() { final VariantContext sameLocInsVc1 = new VariantContextBuilder("a", "20", 10093568, 10093568, sameLocInsAlleles1).make(); sameLocInsHap1.setEventMap(new EventMap(Arrays.asList(sameLocInsVc1))); - tests.add(new Object[]{Arrays.asList(sameLocDelHap1, sameLocDelHap2, sameLocInsHap1), 10093568, new ArrayList<>(), Arrays.asList(sameLocDelVc1, sameLocDelVc2, sameLocInsVc1)}); + tests.add(new Object[]{Arrays.asList(sameLocDelHap1, sameLocDelHap2, sameLocInsHap1), 10093568, Arrays.asList(sameLocDelVc1, sameLocDelVc2, sameLocInsVc1)}); - tests.add(new Object[]{new ArrayList<>(), 1000, Arrays.asList(snpVc), Arrays.asList(snpVCBuilder.source("Comp0Allele0").make())}); - tests.add(new Object[]{new ArrayList<>(), 995, Arrays.asList(deletionVc), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make())}); - tests.add(new Object[]{new ArrayList<>(), 1000, Arrays.asList(deletionVc), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make())}); - tests.add(new Object[]{new ArrayList<>(), 1000, Arrays.asList(deletionVc, snpVc), + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "getVariantContextsFromActiveHaplotypes") + public void testGetVariantContextsFromActiveHaplotypes(final List haplotypes, + final int loc, + final List expectedVcsAtThisLocation) { + + final List vcsAtThisPosition = AssemblyBasedCallerGenotypingEngine.getVariantContextsFromActiveHaplotypes(loc, haplotypes, true); + Assert.assertEquals(vcsAtThisPosition.size(), expectedVcsAtThisLocation.size()); + for (int i = 0; i < expectedVcsAtThisLocation.size(); i++) { + VariantContextTestUtils.assertVariantContextsAreEqual(vcsAtThisPosition.get(i), expectedVcsAtThisLocation.get(i), new ArrayList<>()); + Assert.assertEquals(vcsAtThisPosition.get(i).getSource(), expectedVcsAtThisLocation.get(i).getSource()); + } + } + + @DataProvider(name = "getVariantContextsFromGivenAlleles") + public Object[][] getVcsAtThisLocationFromGivenAllelesData() { + final List tests = new ArrayList<>(); + + tests.add(new Object[]{1000, new ArrayList<>(), new ArrayList<>()}); + + final Haplotype snpHaplotype = new Haplotype("ACTGGTCAACTGGTCAACTGGTCAACTGGTCA".getBytes()); + final List snpAlleles = Arrays.asList(Allele.create("A", true), Allele.create("G")); + final VariantContextBuilder snpVCBuilder = new VariantContextBuilder("a", "20", 1000, 1000, snpAlleles); + final VariantContext snpVc = snpVCBuilder.make(); + snpHaplotype.setEventMap(new EventMap(Arrays.asList(snpVc))); + + // this one matches the snp haplotype above (to test duplicate removal) + final Haplotype snpHaplotypeDuplicate = new Haplotype("ACTGGTCAACTGGTCAACTGGTCAACTGGACA".getBytes()); + final List snpAlleles2 = Arrays.asList(Allele.create("A", true), Allele.create("G")); + final VariantContextBuilder svpVC2Builder = new VariantContextBuilder("a", "20", 1000, 1000, snpAlleles2); + final VariantContext snpVc2 = svpVC2Builder.make(); + final List snpAlleles3 = Arrays.asList(Allele.create("T", true), Allele.create("A")); + final VariantContextBuilder snpVC3Builder = new VariantContextBuilder("a", "20", 1020, 1020, snpAlleles3); + final VariantContext snpVc3 = snpVC3Builder.make(); + snpHaplotypeDuplicate.setEventMap(new EventMap(Arrays.asList(snpVc2, snpVc3))); + + + final Haplotype deletionHaplotype = new Haplotype("ACTGGTCAGGTCAACTGGTCA".getBytes()); + final List deletionAlleles = Arrays.asList(Allele.create("ACTGGTCAACT", true), Allele.create("A")); + final VariantContextBuilder deletionVCBuilder = new VariantContextBuilder("a", "20", 995, 1005, deletionAlleles); + final VariantContext deletionVc = deletionVCBuilder.make(); + deletionHaplotype.setEventMap(new EventMap(Arrays.asList(deletionVc))); + + // matches the deletion alleles above but at a different position (to catch an edge case in duplicate removal) + final Haplotype deletionHaplotypeFalseDuplicate = new Haplotype("ACTGGTCAGGTCAACTGGTCA".getBytes()); + final List deletionAllelesFalseDuplicate = Arrays.asList(Allele.create("ACTGGTCAACT", true), Allele.create("A")); + final VariantContextBuilder deletionFalseDuplicateBuilder = new VariantContextBuilder("a", "20", 998, 1008, deletionAllelesFalseDuplicate); + final VariantContext deletionVcFalseDuplicate = deletionFalseDuplicateBuilder.make(); + deletionHaplotypeFalseDuplicate.setEventMap(new EventMap(Arrays.asList(deletionVcFalseDuplicate))); + + // doesn't overlap 1000 + final Haplotype deletionHaplotypeNoSpan = new Haplotype("CAACTGGTCAACTGGTCAACTGGTCAACTGGTCAACTGGTCA".getBytes()); + final List deletionAllelesNoSpan = Arrays.asList(Allele.create("GTCAA", true), Allele.create("G")); + final VariantContextBuilder deletionVcNoSpanBuilder = new VariantContextBuilder("a", "20", 990, 994, deletionAllelesNoSpan); + final VariantContext deletionVcNoSpan = deletionVcNoSpanBuilder.make(); + deletionHaplotypeNoSpan.setEventMap(new EventMap(Arrays.asList(deletionVcNoSpan))); + + final Haplotype sameLocDelHap1 = new Haplotype("AAAAAAAGAAA".getBytes()); + final List sameLocDelAlleles1 = Arrays.asList(Allele.create("GTT", true), Allele.create("G")); + final VariantContext sameLocDelVc1 = new VariantContextBuilder("a", "20", 10093568, 10093570, sameLocDelAlleles1).make(); + sameLocDelHap1.setEventMap(new EventMap(Arrays.asList(sameLocDelVc1))); + + final Haplotype sameLocDelHap2 = new Haplotype("AAAAAAAGTAAA".getBytes()); + final List sameLocDelAlleles2 = Arrays.asList(Allele.create("GT", true), Allele.create("G")); + final VariantContext sameLocDelVc2 = new VariantContextBuilder("a", "20", 10093568, 10093569, sameLocDelAlleles2).make(); + sameLocDelHap2.setEventMap(new EventMap(Arrays.asList(sameLocDelVc2))); + + final Haplotype sameLocInsHap1 = new Haplotype("AAAAAAAGTTTAAA".getBytes()); + final List sameLocInsAlleles1 = Arrays.asList(Allele.create("G", true), Allele.create("GT")); + final VariantContext sameLocInsVc1 = new VariantContextBuilder("a", "20", 10093568, 10093568, sameLocInsAlleles1).make(); + sameLocInsHap1.setEventMap(new EventMap(Arrays.asList(sameLocInsVc1))); + + tests.add(new Object[]{1000, Arrays.asList(snpVc), Arrays.asList(snpVCBuilder.source("Comp0Allele0").make())}); + tests.add(new Object[]{995, Arrays.asList(deletionVc), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make())}); + tests.add(new Object[]{1000, Arrays.asList(deletionVc), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make())}); + tests.add(new Object[]{1000, Arrays.asList(deletionVc, snpVc), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make(), snpVCBuilder.source("Comp1Allele0").make())}); - tests.add(new Object[]{new ArrayList<>(), 1000, Arrays.asList(deletionVc, deletionVcNoSpan), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make())}); - tests.add(new Object[]{new ArrayList<>(), 1000, Arrays.asList(deletionVc, deletionVcFalseDuplicate, deletionVcNoSpan), + tests.add(new Object[]{1000, Arrays.asList(deletionVc, deletionVcNoSpan), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make())}); + tests.add(new Object[]{1000, Arrays.asList(deletionVc, deletionVcFalseDuplicate, deletionVcNoSpan), Arrays.asList(deletionVCBuilder.source("Comp0Allele0").make(), deletionFalseDuplicateBuilder.source("Comp1Allele0").make())}); return tests.toArray(new Object[][]{}); } - @Test(dataProvider = "getVcsAtThisLocation") - public void testGetVCsAtThisLocation(final List haplotypes, - final int loc, + @Test(dataProvider = "getVariantContextsFromGivenAlleles") + public void testGetVariantContextsFromGivenAlleles(final int loc, final List activeAllelesToGenotype, final List expectedVcsAtThisLocation) { - final List vcsAtThisPosition = AssemblyBasedCallerGenotypingEngine.getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype, true); + final List vcsAtThisPosition = AssemblyBasedCallerGenotypingEngine.getVariantContextsFromGivenAlleles(loc, activeAllelesToGenotype, true); Assert.assertEquals(vcsAtThisPosition.size(), expectedVcsAtThisLocation.size()); for (int i = 0; i < expectedVcsAtThisLocation.size(); i++) { VariantContextTestUtils.assertVariantContextsAreEqual(vcsAtThisPosition.get(i), expectedVcsAtThisLocation.get(i), new ArrayList<>());