Skip to content

Commit

Permalink
Address PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
cwhelan committed Jul 3, 2018
1 parent f5cfa06 commit f5ff180
Show file tree
Hide file tree
Showing 10 changed files with 214 additions and 139 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -515,8 +515,8 @@ protected final VariantCallContext emptyCallContext(final FeatureContext feature
VariantContext vc;

if ( configuration.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext ggaVc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(features,
rawContext.getLocation(), false, configuration.genotypeFilteredAlleles, configuration.alleles);
final VariantContext ggaVc = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromVariantList(features,
rawContext.getLocation(), configuration.genotypeFilteredAlleles, configuration.alleles);
if (ggaVc == null) {
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,49 +18,51 @@
import java.util.Set;
import java.util.stream.Collectors;

import static org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
import static org.broadinstitute.hellbender.utils.variant.GATKVariantContextUtils.FilteredRecordMergeType.KEEP_UNCONDITIONAL;

/**
* Compendium of utils to work in GENOTYPE_GIVEN_ALLELES mode.
*/
public final class GenotypingGivenAllelesUtils {

/**
* Composes the given allele variant-context providing information about the rods and reference location.
* Composes the given allele variant-context providing information about the given allele variants and reference location.
* @param tracker the meta data tracker.
* @param loc the query location.
* @param snpsOnly whether we only should consider SNP variation.
* @param keepFiltered whether to include filtered variants
* @param allelesBinding the target variation context binding containing the given alleles.
* @return never {@code null}
*/
public static VariantContext composeGivenAllelesVariantContextFromRod(final FeatureContext tracker,
final Locatable loc,
final boolean snpsOnly,
final boolean keepFiltered,
final FeatureInput<VariantContext> allelesBinding) {
public static VariantContext composeGivenAllelesVariantContextFromVariantList(final FeatureContext tracker,
final Locatable loc,
final boolean keepFiltered,
final FeatureInput<VariantContext> allelesBinding) {
Utils.nonNull(tracker, "tracker may not be null");
Utils.nonNull(loc, "location may not be null");
Utils.nonNull(allelesBinding, "alleles binding may not be null");

final List<VariantContext> variantContextsInFeatureContext = tracker.getValues(allelesBinding, new SimpleInterval(loc));
return composeGivenAllelesVariantContextFromVariantList(variantContextsInFeatureContext, loc, snpsOnly, keepFiltered);
return composeGivenAllelesVariantContextFromVariantList(variantContextsInFeatureContext, loc, keepFiltered);
}

@VisibleForTesting
protected static VariantContext composeGivenAllelesVariantContextFromVariantList(final List<VariantContext> variantContextsInFeatureContext, final Locatable loc, final boolean snpsOnly, final boolean keepFiltered) {
final List<VariantContext> rodVcsAtLoc = variantContextsInFeatureContext
protected static VariantContext composeGivenAllelesVariantContextFromVariantList(final List<VariantContext> variantContextsInFeatureContext,
final Locatable loc,
final boolean keepFiltered) {
final List<VariantContext> vcsAtLoc = variantContextsInFeatureContext
.stream()
.filter(vc -> vc.getStart() == loc.getStart() &&
(keepFiltered || vc.isNotFiltered()) &&
(!snpsOnly || vc.isSNP()))
(keepFiltered || vc.isNotFiltered()))
.collect(Collectors.toList());


if (rodVcsAtLoc.isEmpty()) {
if (vcsAtLoc.isEmpty()) {
return null;
}
final List<String> haplotypeSources = rodVcsAtLoc.stream().map(VariantContext::getSource).collect(Collectors.toList());
final VariantContext mergedVc = GATKVariantContextUtils.simpleMerge(rodVcsAtLoc, haplotypeSources,
GATKVariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED,
final List<String> haplotypeSources = vcsAtLoc.stream().map(VariantContext::getSource).collect(Collectors.toList());
final VariantContext mergedVc = GATKVariantContextUtils.simpleMerge(vcsAtLoc, haplotypeSources,
keepFiltered ? KEEP_UNCONDITIONAL : KEEP_IF_ANY_UNFILTERED,
GATKVariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);

return mergedVc;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,9 @@ protected TreeSet<Integer> decomposeHaplotypesIntoVariantContexts(final List<Hap
}

/**
* Utility class to represent a location and set of Alleles, used to track redundant variant contexts by adding
* to a set
* This class exists to allow VariantContext objects to be compared based only on their location and set of alleles,
* providing a more liberal equals method so that VariantContext objects can be placed into a Set
* which retains only VCs that have non-redundant location and Allele lists.
*/
protected static class LocationAndAlleles {
private final int loc;
Expand Down Expand Up @@ -146,87 +147,80 @@ public boolean equals(final Object o) {

@Override
public int hashCode() {
int result = loc;
result = 31 * result + (alleles != null ? alleles.hashCode() : 0);
return result;
return 31 * loc + (alleles != null ? alleles.hashCode() : 0);
}

}

/**
* Returns the list of variant contexts active at this location, using either the list of active haplotypes or
* the list of active alleles to genotypes if we are in GGA mode as candidates. This version of the method will not include events
* that span the current location, only those that have loc as their start position.
* @param haplotypes Haplotypes for the current active region.
* Returns the list of given alleles active at this location. This method will include events that span the current
* location if includeSpanningEvents is set to true; otherwise it will only include events that have loc as their \
* start position.
* @param loc The start position we are genotyping
* @param activeAllelesToGenotype The list of given alleles for the current active region, empty unless we are in GGA mode
* @param includeSpanningEvents If true, will also return events that span loc
*/
protected static List<VariantContext> getVCsAtThisLocation(final List<Haplotype> haplotypes,
final int loc,
final List<VariantContext> activeAllelesToGenotype) {
return getVCsAtThisLocation(haplotypes, loc, activeAllelesToGenotype, false);
protected static List<VariantContext> getVariantContextsFromGivenAlleles(final int loc,
final List<VariantContext> activeAllelesToGenotype,
final boolean includeSpanningEvents) {
final Set<LocationAndAlleles> uniqueLocationsAndAlleles = new HashSet<>();
final List<VariantContext> results = new ArrayList<>();

int givenAlleleSourceCount = 0;
for( final VariantContext givenAlleleVC : activeAllelesToGenotype ) {
if( givenAlleleVC.getStart() <= loc && givenAlleleVC.getEnd() >= loc) {
if (! (includeSpanningEvents || givenAlleleVC.getStart() == loc)) {
continue;
}
int alleleCount = 0;
for( final Allele givenAltAllele : givenAlleleVC.getAlternateAlleles() ) {
final List<Allele> alleleSet = Arrays.asList(givenAlleleVC.getReference(), givenAltAllele);

//TODO: this source name seems arbitrary and probably just has to be unique
//TODO: how about replace it by vcSourceName = String.parseInt(nameCounter++)?
final String vcSourceName = "Comp" + givenAlleleSourceCount + "Allele" + alleleCount;
// check if this event is already in the list of events due to a repeat in the input alleles track
final VariantContext candidateEventToAdd = new VariantContextBuilder(givenAlleleVC).alleles(alleleSet).source(vcSourceName).make();

final LocationAndAlleles locationAndAlleles = new LocationAndAlleles(candidateEventToAdd.getStart(), candidateEventToAdd.getAlleles());
if (! uniqueLocationsAndAlleles.contains(locationAndAlleles)) {
uniqueLocationsAndAlleles.add(locationAndAlleles);
results.add(candidateEventToAdd);
}

alleleCount++;
}
}
givenAlleleSourceCount++;
}
return results;
}

/**
* Returns the list of variant contexts active at this location, using either the list of active haplotypes or
* the list of active alleles to genotypes if we are in GGA mode as candidates. This method will include events
* that span the current location if includeSpanningEvents is set to true; otherwise it will only include events that
* have loc as their start position.
* @param haplotypes Haplotypes for the current active region.
* Returns the list of events discovered in assembled haplotypes that are active at this location. The results will
* include events that span the current location if includeSpanningEvents is set to true; otherwise it will only
* include events that have loc as their start position.
* @param loc The start position we are genotyping
* @param activeAllelesToGenotype The list of given alleles for the current active region, empty unless we are in GGA mode
* @param haplotypes list of active haplotypes at the current location
* @param includeSpanningEvents If true, will also return events that span loc
*/
protected static List<VariantContext> getVCsAtThisLocation(final List<Haplotype> haplotypes,
final int loc,
final List<VariantContext> activeAllelesToGenotype,
final boolean includeSpanningEvents) {
// the overlapping events to merge into a common reference view
protected static List<VariantContext> getVariantContextsFromActiveHaplotypes(final int loc,
final List<Haplotype> haplotypes,
final boolean includeSpanningEvents) {
final List<VariantContext> results = new ArrayList<>();
final Set<LocationAndAlleles> uniqueLocationsAndAlleles = new HashSet<>();

if( activeAllelesToGenotype.isEmpty() ) {
haplotypes.stream()
.flatMap(h -> Utils.stream(h.getEventMap().getSpanningEvents(loc)))
.filter(Objects::nonNull)
.filter(v -> (includeSpanningEvents || v.getStart() == loc))
.forEach(v -> {
final LocationAndAlleles locationAndAlleles = new LocationAndAlleles(v.getStart(), v.getAlleles());
if (! uniqueLocationsAndAlleles.contains(locationAndAlleles)) {
uniqueLocationsAndAlleles.add(locationAndAlleles);
results.add(v);
}
});
} else { // we are in GGA mode!
int compCount = 0;
for( final VariantContext compVC : activeAllelesToGenotype ) {
if( compVC.getStart() <= loc && compVC.getEnd() >= loc) {
if (! (includeSpanningEvents || compVC.getStart() == loc)) {
continue;
}
int alleleCount = 0;
for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
final List<Allele> alleleSet = Arrays.asList(compVC.getReference(), compAltAllele);

//TODO: this source name seems arbitrary and probably just has to be unique
//TODO: how about replace it by vcSourceName = String.parseInt(nameCounter++)?
final String vcSourceName = "Comp" + compCount + "Allele" + alleleCount;
// check if this event is already in the list of events due to a repeat in the input alleles track
final VariantContext candidateEventToAdd = new VariantContextBuilder(compVC).alleles(alleleSet).source(vcSourceName).make();

final LocationAndAlleles locationAndAlleles = new LocationAndAlleles(candidateEventToAdd.getStart(), candidateEventToAdd.getAlleles());
if (! uniqueLocationsAndAlleles.contains(locationAndAlleles)) {
uniqueLocationsAndAlleles.add(locationAndAlleles);
results.add(candidateEventToAdd);
}

alleleCount++;
haplotypes.stream()
.flatMap(h -> Utils.stream(h.getEventMap().getOverlappingEvents(loc)))
.filter(Objects::nonNull)
.filter(v -> (includeSpanningEvents || v.getStart() == loc))
.forEach(v -> {
final LocationAndAlleles locationAndAlleles = new LocationAndAlleles(v.getStart(), v.getAlleles());
if (! uniqueLocationsAndAlleles.contains(locationAndAlleles)) {
uniqueLocationsAndAlleles.add(locationAndAlleles);
results.add(v);
}
}
compCount++;
}
}

});
return results;
}

Expand Down Expand Up @@ -274,19 +268,23 @@ protected static Map<Allele, List<Haplotype>> createAlleleMapper(final VariantCo

for (final Haplotype h : haplotypes) {

final Iterator<VariantContext> spanningEvents = h.getEventMap().getSpanningEvents(loc);
final List<VariantContext> spanningEvents = h.getEventMap().getOverlappingEvents(loc);

final boolean hasSpanningEvents = spanningEvents.hasNext();
if (spanningEvents.isEmpty()) { //no events --> this haplotype supports the reference at this locus
result.get(ref).add(h);
continue;
}

while (spanningEvents.hasNext()) {
final VariantContext spanningEvent = spanningEvents.next();
for (VariantContext spanningEvent : spanningEvents) {
if (spanningEvent.getStart() == loc) {
// the event starts at the current location

if (spanningEvent.getReference().length() == mergedVC.getReference().length()) {
// reference allele lengths are equal; we can just use the spanning event's alt allele
// in the case of GGA mode the spanning event might not match an allele in the mergedVC
if (result.containsKey(spanningEvent.getAlternateAllele(0))) {
// variant contexts derived from the event map have only one alt allele each, so we can just
// grab the first one (we're not assuming that the sample is biallelic)
result.get(spanningEvent.getAlternateAllele(0)).add(h);
}
} else if (spanningEvent.getReference().length() < mergedVC.getReference().length()) {
Expand All @@ -310,8 +308,8 @@ protected static Map<Allele, List<Haplotype>> createAlleleMapper(final VariantCo
if (activeAllelesToGenotype != null && activeAllelesToGenotype.size() > 0) {
// in HC GGA mode we need to check to make sure that spanning deletion
// events actually match one of the alleles we were given to genotype
VariantContext matchingGivenVc = findMatchingGivenAllele(activeAllelesToGenotype, spanningEvent);
if (matchingGivenVc != null) {
final boolean eventMatchesGivenAllele = eventMatchesGivenAllele(activeAllelesToGenotype, spanningEvent);
if (eventMatchesGivenAllele) {
if (!result.containsKey(Allele.SPAN_DEL)) {
result.put(Allele.SPAN_DEL, new ArrayList<>());
}
Expand All @@ -328,25 +326,22 @@ protected static Map<Allele, List<Haplotype>> createAlleleMapper(final VariantCo
}
}

if (! hasSpanningEvents) { //no events --> this haplotype supports the reference at this locus
result.get(ref).add(h);
}
}
return result;
}

private static VariantContext findMatchingGivenAllele(final List<VariantContext> activeAllelesToGenotype, final VariantContext spanningEvent) {
private static boolean eventMatchesGivenAllele(final List<VariantContext> activeAllelesToGenotype, final VariantContext spanningEvent) {
for (VariantContext givenVC : activeAllelesToGenotype) {
if (givenVC.getStart() == spanningEvent.getStart() && givenVC.getReference().equals(spanningEvent.getReference())) {
for (Allele a : spanningEvent.getAlternateAlleles()) {
if (givenVC.hasAlternateAllele(a)) {
return givenVC;
return true;
}
}
}

}
return null;
return false;
}

// Builds the read-likelihoods collection to use for annotation considering user arguments and the collection
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,8 +442,9 @@ public void writeHeader( final VariantContextWriter vcfWriter, final SAMSequence
public ActivityProfileState isActive( final AlignmentContext context, final ReferenceContext ref, final FeatureContext features ) {

if ( hcArgs.genotypingOutputMode == GenotypingOutputMode.GENOTYPE_GIVEN_ALLELES ) {
final VariantContext vcFromAllelesRod = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromRod(features, ref.getInterval(), false, hcArgs.genotypeFilteredAlleles, hcArgs.alleles);
if( vcFromAllelesRod != null ) {
final VariantContext vcFromGivenAlleles = GenotypingGivenAllelesUtils.composeGivenAllelesVariantContextFromVariantList(features,
ref.getInterval(), hcArgs.genotypeFilteredAlleles, hcArgs.alleles);
if( vcFromGivenAlleles != null ) {
return new ActivityProfileState(ref.getInterval(), 1.0);
}
}
Expand Down
Loading

0 comments on commit f5ff180

Please sign in to comment.