Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Port of GATK3 Variant Annotator Tool #3803

Merged
merged 5 commits into from
Feb 26, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public final class WellformedReadFilter extends ReadFilter {
private ReadFilter wellFormedFilter = null;

// Command line parser requires a no-arg constructor
public WellformedReadFilter() {
public WellformedReadFilter() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unecessary space here

}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,7 @@ public static JavaRDD<VariantContext> callVariantsWithHaplotypeCaller(
final Broadcast<ReferenceMultiSource> referenceBroadcast = ctx.broadcast(reference);
final Broadcast<HaplotypeCallerArgumentCollection> hcArgsBroadcast = ctx.broadcast(hcArgs);

final VariantAnnotatorEngine variantAnnotatorEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(hcArgs.defaultGATKVariantAnnotationArgumentCollection, hcArgs.dbsnp.dbsnp, hcArgs.comps);
final VariantAnnotatorEngine variantAnnotatorEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(hcArgs.defaultGATKVariantAnnotationArgumentCollection, hcArgs.dbsnp.dbsnp, hcArgs.comps, hcArgs.emitReferenceConfidence == ReferenceConfidenceMode.GVCF);
final Broadcast<VariantAnnotatorEngine> annotatorEngineBroadcast = ctx.broadcast(variantAnnotatorEngine);

final List<ShardBoundary> shardBoundaries = getShardBoundaries(header, intervals, shardingArgs.readShardSize, shardingArgs.readShardPadding);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ private void resizeReferenceIfNeeded(SimpleInterval intervalToClose) {
@Override
public void onTraversalStart() {
// create the annotation engine
annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(variantAnnotationArgumentCollection, dbsnp.dbsnp, Collections.emptyList());
annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(variantAnnotationArgumentCollection, dbsnp.dbsnp, Collections.emptyList(), false);

vcfWriter = getVCFWriter();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public void onTraversalStart() {

final SampleList samples = new IndexedSampleList(inputVCFHeader.getGenotypeSamples()); //todo should this be getSampleNamesInOrder?

annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(defaultGATKVariantAnnotationArgumentCollection, dbsnp.dbsnp, Collections.emptyList());
annotationEngine = VariantAnnotatorEngine.ofSelectedMinusExcluded(defaultGATKVariantAnnotationArgumentCollection, dbsnp.dbsnp, Collections.emptyList(), false);

// We only want the engine to generate the AS_QUAL key if we are using AlleleSpecific annotations.
genotypingEngine = new MinimalGenotypingEngine(createUAC(), samples, new GeneralPloidyFailOverAFCalculatorProvider(genotypeArgs), annotationEngine.isRequestedReducibleRawKey(GATKVCFConstants.AS_QUAL_KEY));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
package org.broadinstitute.hellbender.tools.walkers.annotator;

import htsjdk.samtools.Cigar;
import htsjdk.samtools.CigarElement;
import htsjdk.samtools.CigarOperator;
import htsjdk.samtools.SAMRecord;
import org.apache.commons.lang.StringUtils;
import org.broadinstitute.hellbender.utils.pairhmm.PairHMM;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;

import java.util.ArrayList;
import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.read.ReadUtils;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
Expand Down Expand Up @@ -41,4 +42,10 @@ public static OptionalDouble getReadBaseQuality(final GATKRead read, final int r
Utils.nonNull(read);
return OptionalDouble.of(read.getBaseQuality(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, refLoc, ReadUtils.ClippingTail.RIGHT_TAIL)));
}

@Override
// When we have a pileupe element we only need its underlying read in order to com
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

double typo! plileupE in order to com... pute?

protected OptionalDouble getElementForPileupElement(final PileupElement p, final int refLoc) {
return OptionalDouble.of(p.getQual());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.AlignmentUtils;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
Expand Down Expand Up @@ -36,4 +37,11 @@ protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc
Utils.nonNull(read);
return OptionalDouble.of(AlignmentUtils.getNumHardClippedBases(read));
}

@Override
protected OptionalDouble getElementForPileupElement(final PileupElement p, final int refLoc) {
Utils.nonNull(p);
// default to returning the same value
return getElementForRead(p.getRead(),refLoc);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;

import java.util.*;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -56,6 +57,46 @@ public void annotate(final ReferenceContext ref,
// make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext
Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a subset of ReadLikelihoods alleles " + likelihoods.alleles());

int[] counts;
if (likelihoods.hasFilledLikelihoods()) {
// Compute based on the alignment map
counts = annotateWithLikelihoods(vc, g, alleles, likelihoods);
} else if (likelihoods.readCount()==0) {
// No reads, thus cant compute the AD so do nothing
return;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this path is weird because all the other ones set AD to something, this just lets it continue being whatever it was before?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If there are no read likelihoods we don't add an AD to the genotype. I'm cool with that.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

commented

} else if (vc.isSNP()) {
// Compute based on pileup bases at the snp site (won't match haplotype caller output)
counts = annotateWithPileup(vc, likelihoods.getStratifiedPileups(vc).get(g.getSampleName()));
} else {
// Otherwise return an empty AD field for an indel.
counts = new int[vc.getNAlleles()];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the deal with the end case? If it's not a snp we just give up?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's what we've traditionally done and it's also why we weren't going to support VariantAnnotator in GATK4. We expect that local realignment would change the indel ADs substantially compared with the pileup so we didn't even try.

}

gb.AD(counts);
}

private int[] annotateWithPileup(final VariantContext vc, List<PileupElement> pileupElements) {

final HashMap<Byte, Integer> alleleCounts = new HashMap<>();
for ( final Allele allele : vc.getAlleles() ) {
alleleCounts.put(allele.getBases()[0], 0);
}
for ( final PileupElement p : pileupElements) {
// only count bases that support alleles in the variant context
alleleCounts.computeIfPresent(p.getBase(), (base, count) -> count+ 1);
}

// we need to add counts in the correct order
final int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(vc.getReference().getBases()[0]);
for (int i = 0; i < vc.getNAlleles() -1; i++) {
counts[i + 1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]);
}
return counts;
}

private int[] annotateWithLikelihoods(VariantContext vc, Genotype g, Set<Allele> alleles, ReadLikelihoods<Allele> likelihoods) {

final Map<Allele, Integer> alleleCounts = new LinkedHashMap<>();
for ( final Allele allele : vc.getAlleles() ) {
alleleCounts.put(allele, 0);
Expand All @@ -68,11 +109,11 @@ public void annotate(final ReferenceContext ref,

final int[] counts = new int[alleleCounts.size()];
counts[0] = alleleCounts.get(vc.getReference()); //first one in AD is always ref
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
for (int i = 0; i < vc.getNAlleles() -1; i++) {
counts[i + 1] = alleleCounts.get(vc.getAlternateAllele(i));
}

gb.AD(counts);
return counts;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.GenotypesContext;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.FisherExactTest;
import org.broadinstitute.hellbender.utils.QualityUtils;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;

import java.util.Collections;
import java.util.List;
Expand Down Expand Up @@ -44,6 +43,7 @@ public final class FisherStrand extends StrandBiasTest implements StandardAnnota

static final double MIN_PVALUE = 1E-320;
private static final int MIN_COUNT = ARRAY_DIM;
private static final int MIN_QUAL_FOR_FILTERED_TEST = 17;

// how large do we want the normalized table to be? (ie, sum of all entries must be smaller that this)
private static final double TARGET_TABLE_SIZE = 200.0;
Expand All @@ -59,6 +59,14 @@ protected Map<String, Object> calculateAnnotationFromGTfield(final GenotypesCont
return ( tableFromPerSampleAnnotations != null )? annotationForOneTable(pValueForContingencyTable(tableFromPerSampleAnnotations)) : null;
}

@Override
protected Map<String, Object> calculateAnnotationFromStratifiedContexts(final Map<String, List<PileupElement>> stratifiedContexts,
final VariantContext vc){
final int[][] tableNoFiltering = getPileupContingencyTable(stratifiedContexts, vc.getReference(), vc.getAlternateAlleles(), -1, MIN_COUNT);
final int[][] tableFiltering = getPileupContingencyTable(stratifiedContexts, vc.getReference(), vc.getAlternateAlleles(), MIN_QUAL_FOR_FILTERED_TEST, MIN_COUNT);
return annotationForOneTable(Math.max(pValueForContingencyTable(tableFiltering), pValueForContingencyTable(tableNoFiltering)));
}

@Override
protected Map<String, Object> calculateAnnotationFromLikelihoods(final ReadLikelihoods<Allele> likelihoods,
final VariantContext vc){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
Expand Down Expand Up @@ -44,7 +45,13 @@ protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc

@Override
protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc) {
Utils.nonNull(read);
throw new IllegalStateException("This method should never have been called as getElementForRead(read,refloc,mostLikelyAllele) was overriden");
// todo its possible this should throw, as This method should never have been called as getElementForRead(read,refloc,mostLikelyAllele) was overriden
return OptionalDouble.empty();
}

@Override
protected OptionalDouble getElementForPileupElement(final PileupElement p, final int refLoc) {
// todo its possible this should throw, as This method should never have been called as getElementForRead(read,refloc,mostLikelyAllele) was overriden
return OptionalDouble.empty();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.help.HelpConstants;
import org.broadinstitute.hellbender.utils.pileup.PileupElement;
import org.broadinstitute.hellbender.utils.read.GATKRead;
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants;
import org.broadinstitute.hellbender.utils.variant.GATKVCFHeaderLines;
Expand Down Expand Up @@ -42,4 +43,9 @@ protected OptionalDouble getElementForRead(final GATKRead read, final int refLoc
Utils.nonNull(read);
return OptionalDouble.of(read.getMappingQuality());
}

protected OptionalDouble getElementForPileupElement(final PileupElement p, final int refLoc) {
// default to returning the same value
return OptionalDouble.of(p.getMappingQual());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,11 @@ public Map<String, Object> annotate(final ReferenceContext ref,
ADrestrictedDepth += totalADdepth;
}
depth += totalADdepth;
continue;
}
} else if (likelihoods != null) {
}
// if there is no AD value or it is a dummy value, we want to look to other means to get the depth
if (likelihoods != null) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was meant to become an if instead of an else if?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you have a dummy AD value, (like NaN|NaN|Nan) then you probably still want to determine the depth of the sample by the likelihoods.

depth += likelihoods.sampleReadCount(likelihoods.indexOfSample(genotype.getSampleName()));
} else if ( genotype.hasDP() ) {
depth += genotype.getDP();
Expand Down
Loading