-
Notifications
You must be signed in to change notification settings - Fork 596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Port of GATK3 Variant Annotator Tool #3803
Changes from 2 commits
3621134
f57120a
4e45c49
fe6119b
f3fe575
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
import org.broadinstitute.barclay.help.DocumentedFeature; | ||
import org.broadinstitute.hellbender.utils.Utils; | ||
import org.broadinstitute.hellbender.utils.help.HelpConstants; | ||
import org.broadinstitute.hellbender.utils.pileup.PileupElement; | ||
import org.broadinstitute.hellbender.utils.read.GATKRead; | ||
import org.broadinstitute.hellbender.utils.read.ReadUtils; | ||
import org.broadinstitute.hellbender.utils.variant.GATKVCFConstants; | ||
|
@@ -41,4 +42,10 @@ public static OptionalDouble getReadBaseQuality(final GATKRead read, final int r | |
Utils.nonNull(read); | ||
return OptionalDouble.of(read.getBaseQuality(ReadUtils.getReadCoordinateForReferenceCoordinateUpToEndOfRead(read, refLoc, ReadUtils.ClippingTail.RIGHT_TAIL))); | ||
} | ||
|
||
@Override | ||
// When we have a pileupe element we only need its underlying read in order to com | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. double typo! plileupE in order to com... pute? |
||
protected OptionalDouble getElementForPileupElement(final PileupElement p, final int refLoc) { | ||
return OptionalDouble.of(p.getQual()); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,6 +12,7 @@ | |
import org.broadinstitute.hellbender.utils.Utils; | ||
import org.broadinstitute.hellbender.utils.genotyper.ReadLikelihoods; | ||
import org.broadinstitute.hellbender.utils.help.HelpConstants; | ||
import org.broadinstitute.hellbender.utils.pileup.PileupElement; | ||
|
||
import java.util.*; | ||
import java.util.stream.Collectors; | ||
|
@@ -56,6 +57,46 @@ public void annotate(final ReferenceContext ref, | |
// make sure that there's a meaningful relationship between the alleles in the likelihoods and our VariantContext | ||
Utils.validateArg(likelihoods.alleles().containsAll(alleles), () -> "VC alleles " + alleles + " not a subset of ReadLikelihoods alleles " + likelihoods.alleles()); | ||
|
||
int[] counts; | ||
if (likelihoods.hasFilledLikelihoods()) { | ||
// Compute based on the alignment map | ||
counts = annotateWithLikelihoods(vc, g, alleles, likelihoods); | ||
} else if (likelihoods.readCount()==0) { | ||
// No reads, thus cant compute the AD so do nothing | ||
return; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this path is weird because all the other ones set AD to something, this just lets it continue being whatever it was before? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If there are no read likelihoods we don't add an AD to the genotype. I'm cool with that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. commented |
||
} else if (vc.isSNP()) { | ||
// Compute based on pileup bases at the snp site (won't match haplotype caller output) | ||
counts = annotateWithPileup(vc, likelihoods.getStratifiedPileups(vc).get(g.getSampleName())); | ||
} else { | ||
// Otherwise return an empty AD field for an indel. | ||
counts = new int[vc.getNAlleles()]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What's the deal with the end case? If it's not a snp we just give up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's what we've traditionally done and it's also why we weren't going to support VariantAnnotator in GATK4. We expect that local realignment would change the indel ADs substantially compared with the pileup so we didn't even try. |
||
} | ||
|
||
gb.AD(counts); | ||
} | ||
|
||
private int[] annotateWithPileup(final VariantContext vc, List<PileupElement> pileupElements) { | ||
|
||
final HashMap<Byte, Integer> alleleCounts = new HashMap<>(); | ||
for ( final Allele allele : vc.getAlleles() ) { | ||
alleleCounts.put(allele.getBases()[0], 0); | ||
} | ||
for ( final PileupElement p : pileupElements) { | ||
// only count bases that support alleles in the variant context | ||
alleleCounts.computeIfPresent(p.getBase(), (base, count) -> count+ 1); | ||
} | ||
|
||
// we need to add counts in the correct order | ||
final int[] counts = new int[alleleCounts.size()]; | ||
counts[0] = alleleCounts.get(vc.getReference().getBases()[0]); | ||
for (int i = 0; i < vc.getNAlleles() -1; i++) { | ||
counts[i + 1] = alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]); | ||
} | ||
return counts; | ||
} | ||
|
||
private int[] annotateWithLikelihoods(VariantContext vc, Genotype g, Set<Allele> alleles, ReadLikelihoods<Allele> likelihoods) { | ||
|
||
final Map<Allele, Integer> alleleCounts = new LinkedHashMap<>(); | ||
for ( final Allele allele : vc.getAlleles() ) { | ||
alleleCounts.put(allele, 0); | ||
|
@@ -68,11 +109,11 @@ public void annotate(final ReferenceContext ref, | |
|
||
final int[] counts = new int[alleleCounts.size()]; | ||
counts[0] = alleleCounts.get(vc.getReference()); //first one in AD is always ref | ||
for (int i = 0; i < vc.getAlternateAlleles().size(); i++) { | ||
for (int i = 0; i < vc.getNAlleles() -1; i++) { | ||
counts[i + 1] = alleleCounts.get(vc.getAlternateAllele(i)); | ||
} | ||
|
||
gb.AD(counts); | ||
return counts; | ||
} | ||
|
||
@Override | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -85,8 +85,11 @@ public Map<String, Object> annotate(final ReferenceContext ref, | |
ADrestrictedDepth += totalADdepth; | ||
} | ||
depth += totalADdepth; | ||
continue; | ||
} | ||
} else if (likelihoods != null) { | ||
} | ||
// if there is no AD value or it is a dummy value, we want to look to other means to get the depth | ||
if (likelihoods != null) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was meant to become an if instead of an else if? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you have a dummy AD value, (like NaN|NaN|Nan) then you probably still want to determine the depth of the sample by the likelihoods. |
||
depth += likelihoods.sampleReadCount(likelihoods.indexOfSample(genotype.getSampleName())); | ||
} else if ( genotype.hasDP() ) { | ||
depth += genotype.getDP(); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
unecessary space here