Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Be explicit about when VariantContexts are biallelic #8332

Merged
merged 14 commits into from
Jun 12, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ protected List<Funcotation> createFuncotationsOnVariant(final VariantContext var
// funcotation factory variant.
// matchIndices length will always be the same as the number of alt alleles in the variant (first parameter)
// Note that this is not the same length as the featureVariant.
final int[] matchIndices = GATKVariantContextUtils.matchAllelesOnly(variant, featureVariant);
final int[] matchIndices = GATKVariantContextUtils.matchAllelesAndStart(variant, featureVariant);

for (int i = 0; i < matchIndices.length; i++) {
final int matchIndex = matchIndices[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.MathUtils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.haplotype.EventMap;
import org.broadinstitute.hellbender.utils.haplotype.Haplotype;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
Expand Down Expand Up @@ -64,9 +65,9 @@ public static Triple<int[], int[], double[]> annotate(final VariantContext vc,
// encode each haplotype as a string of variant starts and alt allele strings, excluding the locus of vc (to avoid reference/germline bias)
// note that VariantContexts in an EventMap are always biallelic, so var.getAlternateAllele(0) is valid
final Map<String, List<Haplotype>> haplotypeGroups = haplotypeLikelihoods.alleles().stream()
.collect(Collectors.groupingBy(hap -> hap.getEventMap().getVariantContexts().stream()
.filter(var -> var.getStart() != vc.getStart())
.map(var -> var.getStart() + var.getAlternateAllele(0).getBaseString())
.collect(Collectors.groupingBy(hap -> hap.getEventMap().getEvents().stream()
.filter(event -> event.getStart() != vc.getStart())
.map(event -> event.getStart() + event.altAllele().getBaseString())
.collect(Collectors.joining())));

// sum the read support counts for all haplotypes within each group
Expand Down Expand Up @@ -143,16 +144,16 @@ public List<String> getKeyNames() {
// k bases longer we simply check that the event map alt allele matches the variant context alt allele excluding the
// latter's last k bases
private static boolean containsAltAllele(final EventMap eventMap, final VariantContext vc, final int altAlleleIndex) {
final List<VariantContext> overlapping = eventMap.getOverlappingEvents(vc.getStart());
final List<Event> overlapping = eventMap.getOverlappingEvents(vc.getStart());
if (overlapping.isEmpty()) {
return false;
} else if (overlapping.get(0).getStart() != vc.getStart()) {
return false;
} else {
final VariantContext eventMapVC = overlapping.get(0);
final int excessBases = vc.getReference().length() - eventMapVC.getReference().length();
final Event overlappingEvent = overlapping.get(0);
final int excessBases = vc.getReference().length() - overlappingEvent.refAllele().length();

return equalBasesExcludingSuffix(eventMapVC.getAlternateAllele(0).getBases(),
return equalBasesExcludingSuffix(overlappingEvent.altAllele().getBases(),
vc.getAlternateAllele(altAlleleIndex).getBases(), excessBases);
}
}
Expand All @@ -177,12 +178,11 @@ private static boolean equalBasesExcludingSuffix(final byte[] eventMapBases, fin
}

// count variants in one haplotype but not the other
// note that we use the fact that EventMap VariantContexts are biallelic
private static int uniqueVariants(final Haplotype hap1, final Haplotype hap2, final int excludedPosition) {
final EventMap eventMap2 = hap2.getEventMap();
return (int) hap1.getEventMap().getVariantContexts().stream()
.filter(vc -> vc.getStart() != excludedPosition)
.filter(vc -> !containsAltAllele(eventMap2, vc, 0))
return (int) hap1.getEventMap().getEvents().stream()
.filter(event -> event.getStart() != excludedPosition)
.filter(event -> !event.equals(eventMap2.get(event.getStart())))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these comparisons equivalent?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's equivalent, and in any case this is an M3 annotation so I would only be harming myself.

.count();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.AlleleLikelihoods;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
Expand Down Expand Up @@ -48,6 +49,12 @@ public Map<String, Object> annotate(final ReferenceContext ref,
return Collections.unmodifiableMap(map);
}

public static Pair<List<Integer>, byte[]> getNumTandemRepeatUnits(final ReferenceContext ref, final Event event) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

gross....

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unavoidable i guess

Copy link
Contributor Author

@davidbenjamin davidbenjamin Jun 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I could refactor the tandem repeat code but it would be hard to justify that as a contribution to DRAGEN GATK.

final byte[] refBases = ref.getBases();
final int startIndex = event.getStart() + 1 - ref.getWindow().getStart(); // +1 to exclude leading match base common to VC's ref and alt alleles
return GATKVariantContextUtils.getNumTandemRepeatUnits(event.refAllele(), Collections.singletonList(event.altAllele()), Arrays.copyOfRange(refBases, startIndex, refBases.length));
}

public static Pair<List<Integer>, byte[]> getNumTandemRepeatUnits(final ReferenceContext ref, final VariantContext vc) {
final byte[] refBases = ref.getBases();
final int startIndex = vc.getStart() + 1 - ref.getWindow().getStart(); // +1 to exclude leading match base common to VC's ref and alt alleles
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeAssignmentMethod;
import org.broadinstitute.hellbender.utils.SimpleInterval;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils;

import java.util.ArrayList;
Expand Down Expand Up @@ -162,7 +163,7 @@ private static String getRsID(final List<VariantContext> rsIDSourceVCs, final Va
Utils.nonNull(vcToAnnotate, "vcToAnnotate cannot be null");
final List<String> rsids = new ArrayList<>();

final List<VariantContext> vcAnnotateList = GATKVariantContextUtils.splitVariantContextToBiallelics(vcToAnnotate, true,
final List<Event> vcAnnotateList = GATKVariantContextUtils.splitVariantContextToEvents(vcToAnnotate, true,
GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS, true);

for ( final VariantContext vcCompSource : rsIDSourceVCs ) {
Expand All @@ -174,12 +175,12 @@ private static String getRsID(final List<VariantContext> rsIDSourceVCs, final Va
throw new IllegalArgumentException("source rsID VariantContext " + vcCompSource + " is not on same chromosome as vcToAnnotate " + vcToAnnotate);
}

final List<VariantContext> vcCompList = GATKVariantContextUtils.splitVariantContextToBiallelics(vcCompSource, true,
final List<Event> vcCompList = GATKVariantContextUtils.splitVariantContextToEvents(vcCompSource, true,
GenotypeAssignmentMethod.SET_TO_NO_CALL_NO_ANNOTATIONS, true);
boolean addThisID = false;
for (final VariantContext vcComp : vcCompList) {
for (final VariantContext vcToAnnotateBi : vcAnnotateList) {
if (vcComp.getStart() == vcToAnnotateBi.getStart() && vcToAnnotateBi.getReference().equals(vcComp.getReference()) && vcComp.getAlternateAlleles().equals(vcToAnnotateBi.getAlternateAlleles())) {
for (final Event vcComp : vcCompList) {
for (final Event vcToAnnotateBi : vcAnnotateList) {
if (vcComp.equals(vcToAnnotateBi)) {
addThisID = true;
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AssemblyBasedCallerUtils;
import org.broadinstitute.hellbender.utils.*;
import org.broadinstitute.hellbender.utils.genotyper.SampleList;
import org.broadinstitute.hellbender.utils.haplotype.Event;
import org.broadinstitute.hellbender.utils.logging.OneShotLogger;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
Expand Down Expand Up @@ -124,7 +125,7 @@ public Config getConfiguration() {
* @param vc Input variant context to complete.
* @return VC with assigned genotypes
*/
public VariantContext calculateGenotypes(final VariantContext vc, final GenotypePriorCalculator gpc, final List<VariantContext> givenAlleles) {
jamesemery marked this conversation as resolved.
Show resolved Hide resolved
public VariantContext calculateGenotypes(final VariantContext vc, final GenotypePriorCalculator gpc, final List<Event> givenAlleles) {
// if input VC can't be genotyped, exit with either null VCC or, in case where we need to emit all sites, an empty call
if (cannotBeGenotyped(vc) || vc.getNSamples() == 0) {
return null;
Expand All @@ -150,7 +151,7 @@ public VariantContext calculateGenotypes(final VariantContext vc, final Genotype
}

final AFCalculationResult AFresult = alleleFrequencyCalculator.calculate(reducedVC, defaultPloidy);
final Set<Allele> forcedAlleles = AssemblyBasedCallerUtils.getAllelesConsistentWithGivenAlleles(givenAlleles, vc);
final Set<Allele> forcedAlleles = AssemblyBasedCallerUtils.allelesConsistentWithGivenAlleles(givenAlleles, vc);
final OutputAlleleSubset outputAlternativeAlleles = calculateOutputAlleleSubset(AFresult, vc, forcedAlleles);

// note the math.abs is necessary because -10 * 0.0 => -0.0 which isn't nice
Expand Down Expand Up @@ -289,7 +290,7 @@ public List<Integer> alternativeAlleleMLECounts() {
* Provided the exact mode computations it returns the appropriate subset of alleles that progress to genotyping.
* @param afCalculationResult the allele fraction calculation result.
* @param vc the variant context
* @param forcedAlleles alleles from the vc input that are consistent with forced alleles in the assembly region {@link AssemblyBasedCallerUtils#getAllelesConsistentWithGivenAlleles}
* @param forcedAlleles alleles from the vc input that are consistent with forced alleles in the assembly region {@link AssemblyBasedCallerUtils#allelesConsistentWithGivenAlleles}
* @return information about the alternative allele subsetting {@code null}.
*/
private OutputAlleleSubset calculateOutputAlleleSubset(final AFCalculationResult afCalculationResult, final VariantContext vc, final Set<Allele> forcedAlleles) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package org.broadinstitute.hellbender.tools.walkers.genotyper.afcalc;

import htsjdk.variant.variantcontext.*;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.GenotypeBuilder;
import htsjdk.variant.variantcontext.VariantContext;
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap;
import org.apache.commons.math3.special.Gamma;
Expand All @@ -10,11 +13,9 @@
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeCalculationArgumentCollection;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypeIndexCalculator;
import org.broadinstitute.hellbender.tools.walkers.genotyper.GenotypingLikelihoods;
import org.broadinstitute.hellbender.tools.walkers.haplotypecaller.AlleleAndContext;
import org.broadinstitute.hellbender.utils.*;
import org.broadinstitute.hellbender.utils.dragstr.DragstrParams;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -154,22 +155,13 @@ public AFCalculationResult fastCalculateDiploidBasedOnGLs(final GenotypingLikeli
final int numAlleles = gls.numberOfAlleles();
final List<Allele> alleles = gls.asListOfAlleles();

final List<Integer> alleleLengths = new ArrayList<>();
for (Allele al : gls.asListOfAlleles()) {
if (al instanceof AlleleAndContext) {
alleleLengths.add(((AlleleAndContext) al).maxAlleleLength());
} else {
alleleLengths.add(al.length());
}
}
final int alleleLength = alleleLengths.stream().max(Integer::compare).get();
final int alleleLength = gls.asListOfAlleles().stream().map(Allele::length).max(Integer::compare).get();

final List<String> samples = gls.asListOfSamples();
final List<Genotype> genotypes = IntStream.range(0, samples.size()).mapToObj(idx -> new GenotypeBuilder(samples.get(idx)).alleles(alleles).PL(gls.sampleLikelihoods(idx).getAsPLs()).make()).collect(Collectors.toList());
return calculate(numAlleles, alleles, genotypes, defaultPloidy, alleleLength);
}


/**
* Private function that actually calculates allele frequencies etc.
*
Expand Down

This file was deleted.

Loading