diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/FilterMutectCalls.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/FilterMutectCalls.java index d61c547f3a8..04c6affbf29 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/FilterMutectCalls.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/FilterMutectCalls.java @@ -105,7 +105,7 @@ public void onTraversalStart() { final Optional normalSample = normalSampleHeaderLine == null ? Optional.empty() : Optional.of(normalSampleHeaderLine.getValue()); filteringEngine = new Mutect2FilteringEngine(MTFAC, tumorSample, normalSample); - filteringFirstPass = new FilteringFirstPass(); + filteringFirstPass = new FilteringFirstPass(tumorSample); } @Override diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/FilteringFirstPass.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/FilteringFirstPass.java index f435c57783a..0986580e343 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/FilteringFirstPass.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/FilteringFirstPass.java @@ -1,8 +1,10 @@ package org.broadinstitute.hellbender.tools.walkers.mutect; +import htsjdk.variant.variantcontext.Genotype; import htsjdk.variant.variantcontext.VariantContext; import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.exceptions.UserException; import org.broadinstitute.hellbender.utils.Utils; import org.broadinstitute.hellbender.utils.tsv.DataLine; @@ -18,13 +20,15 @@ public class FilteringFirstPass { final List filterResults; final Map> filteredPhasedCalls; final Map filterStats; + final String tumorSample; boolean readyForSecondPass; - public FilteringFirstPass() { + public FilteringFirstPass(final String tumorSample) { filterResults = new ArrayList<>(); filteredPhasedCalls = new HashMap<>(); filterStats = new HashMap<>(); readyForSecondPass = false; + this.tumorSample = tumorSample; } public boolean isReadyForSecondPass() { return readyForSecondPass; } @@ -34,14 +38,36 @@ public FilterStats getFilterStats(final String filterName){ return filterStats.get(filterName); } + public boolean isOnFilteredHaplotype(final VariantContext vc, final int maxDistance) { + + final Genotype tumorGenotype = vc.getGenotype(tumorSample); + + if (!hasPhaseInfo(tumorGenotype)) { + return false; + } + + final String pgt = (String) tumorGenotype.getExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY, ""); + final String pid = (String) tumorGenotype.getExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY, ""); + final int position = vc.getStart(); + + final Pair filteredCall = filteredPhasedCalls.get(pid); + if (filteredCall == null) { + return false; + } + + // Check that vc occurs on the filtered haplotype + return filteredCall.getLeft().equals(pgt) && Math.abs(filteredCall.getRight() - position) <= maxDistance; + } + public void add(final FilterResult filterResult, final VariantContext vc) { filterResults.add(filterResult); + final Genotype tumorGenotype = vc.getGenotype(tumorSample); - if (!filterResult.getFilters().isEmpty() && hasPhaseInfo(vc)) { - final String pgt = vc.getAttributeAsString(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY, ""); - final String pid = vc.getAttributeAsString(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY, ""); + if (!filterResult.getFilters().isEmpty() && hasPhaseInfo(tumorGenotype)) { + final String pgt = (String) tumorGenotype.getExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY, ""); + final String pid = (String) tumorGenotype.getExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY, ""); final int position = vc.getStart(); - filteredPhasedCalls.put(pgt, new ImmutablePair<>(pid, position)); + filteredPhasedCalls.put(pid, new ImmutablePair<>(pgt, position)); } } @@ -95,8 +121,8 @@ public static FilterStats calculateThresholdForReadOrientationFilter(final doubl cumulativeExpectedFPs, numPassingVariants, cumulativeExpectedFPs/numPassingVariants, requestedFPR); } - private static boolean hasPhaseInfo(VariantContext vc) { - return vc.hasAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY) && vc.hasAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY); + public static boolean hasPhaseInfo(final Genotype genotype) { + return genotype.hasExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY) && genotype.hasExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY); } public List getFilterResults() { diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2FiltersArgumentCollection.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2FiltersArgumentCollection.java index 59a764a1758..dfa83e36a19 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2FiltersArgumentCollection.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/M2FiltersArgumentCollection.java @@ -28,6 +28,7 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl public static final String UNIQUE_ALT_READ_COUNT_LONG_NAME = "unique-alt-read-count"; public static final String TUMOR_SEGMENTATION_LONG_NAME = "tumor-segmentation"; public static final String ORIENTATION_BIAS_FDR_LONG_NAME = "orientation-bias-fdr"; // FDR = false discovery rate + public static final String MAX_DISTANCE_TO_FILTERED_CALL_ON_SAME_HAPLOTYPE_LONG_NAME = "distance-on-haplotype"; public static final String FILTERING_STATS_LONG_NAME = "stats"; @@ -124,4 +125,8 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl public File mutect2FilteringStatsTable = new File("Mutect2FilteringStats.tsv"); + @Argument(fullName = MAX_DISTANCE_TO_FILTERED_CALL_ON_SAME_HAPLOTYPE_LONG_NAME, optional = true, doc = "On second filtering pass, variants with same PGT and PID tags as a filtered variant within this distance are filtered.") + public int maxDistanceToFilteredCallOnSameHaplotype = 100; + + } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2FilteringEngine.java b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2FilteringEngine.java index cd9fcea0f68..8cbc884d070 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2FilteringEngine.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/walkers/mutect/Mutect2FilteringEngine.java @@ -292,7 +292,7 @@ private void applyDuplicatedAltReadFilter(final M2FiltersArgumentCollection MTFA } } - private void applyReadOrientationFilter(final VariantContext vc, final FilterResult filterResult, final Optional filterSummary){ + private void applyReadOrientationFilter(final VariantContext vc, final FilterResult filterResult, final Optional firstPass){ if (! vc.isSNP()){ return; } @@ -305,12 +305,12 @@ private void applyReadOrientationFilter(final VariantContext vc, final FilterRes final double artifactPosterior = GATKProtectedVariantContextUtils.getAttributeAsDouble(tumorGenotype, GATKVCFConstants.ROF_POSTERIOR_KEY, -1.0); - if (! filterSummary.isPresent()) { + if (! firstPass.isPresent()) { // During first pass we simply collect the posterior artifact probabilities filterResult.setReadOrientationPosterior(artifactPosterior); return; } else { - final double threshold = filterSummary.get().getFilterStats(GATKVCFConstants.READ_ORIENTATION_ARTIFACT_FILTER_NAME).getThreshold(); + final double threshold = firstPass.get().getFilterStats(GATKVCFConstants.READ_ORIENTATION_ARTIFACT_FILTER_NAME).getThreshold(); if (artifactPosterior > threshold){ filterResult.addFilter(GATKVCFConstants.READ_ORIENTATION_ARTIFACT_FILTER_NAME); @@ -318,10 +318,17 @@ private void applyReadOrientationFilter(final VariantContext vc, final FilterRes } } + private void applyFilteredHaplotypeFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult, final Optional firstPass){ + if ( firstPass.isPresent() && firstPass.get().isOnFilteredHaplotype(vc, MTFAC.maxDistanceToFilteredCallOnSameHaplotype)){ + filterResult.addFilter(GATKVCFConstants.BAD_HAPLOTYPE_FILTER_NAME); + } + } + public FilterResult calculateFilters(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, - final Optional filterStats) { - filterStats.ifPresent(ffp -> Utils.validate(ffp.isReadyForSecondPass(), "First pass information has not been processed into a model for the second pass.")); + final Optional firstPass) { + firstPass.ifPresent(ffp -> Utils.validate(ffp.isReadyForSecondPass(), "First pass information has not been processed into a model for the second pass.")); final FilterResult filterResult = new FilterResult(); + applyFilteredHaplotypeFilter(MTFAC, vc, filterResult, firstPass); applyInsufficientEvidenceFilter(MTFAC, vc, filterResult); applyClusteredEventFilter(vc, filterResult); applyDuplicatedAltReadFilter(MTFAC, vc, filterResult); @@ -338,7 +345,7 @@ public FilterResult calculateFilters(final M2FiltersArgumentCollection MTFAC, fi applyReadPositionFilter(MTFAC, vc, filterResult); // The following filters use the information gathered during the first pass - applyReadOrientationFilter(vc, filterResult, filterStats); + applyReadOrientationFilter(vc, filterResult, firstPass); return filterResult; } diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java index ad7195c3d5c..ef27535522a 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFConstants.java @@ -158,6 +158,7 @@ their names (or descriptions) depend on some threshold. Those filters are not i public final static String READ_POSITION_FILTER_NAME = "read_position"; public final static String CONTAMINATION_FILTER_NAME = "contamination"; public final static String READ_ORIENTATION_ARTIFACT_FILTER_NAME = "read_orientation_artifact"; + public final static String BAD_HAPLOTYPE_FILTER_NAME = "bad_haplotype"; public static final List MUTECT_FILTER_NAMES = Arrays.asList(STR_CONTRACTION_FILTER_NAME, PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, TUMOR_LOD_FILTER_NAME, GERMLINE_RISK_FILTER_NAME, @@ -165,7 +166,7 @@ their names (or descriptions) depend on some threshold. Those filters are not i MEDIAN_BASE_QUALITY_FILTER_NAME, MEDIAN_MAPPING_QUALITY_FILTER_NAME, MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME, READ_POSITION_FILTER_NAME, CONTAMINATION_FILTER_NAME, DUPLICATED_EVIDENCE_FILTER_NAME, - READ_ORIENTATION_ARTIFACT_FILTER_NAME); + READ_ORIENTATION_ARTIFACT_FILTER_NAME, BAD_HAPLOTYPE_FILTER_NAME); // Symbolic alleles public final static String SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG = "ALT"; diff --git a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java index b6690d06101..504a61c44a0 100644 --- a/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java +++ b/src/main/java/org/broadinstitute/hellbender/utils/variant/GATKVCFHeaderLines.java @@ -60,6 +60,7 @@ private static void addFilterLine(final VCFFilterHeaderLine line) { addFilterLine(new VCFFilterHeaderLine(CONTAMINATION_FILTER_NAME, "contamination")); addFilterLine(new VCFFilterHeaderLine(DUPLICATED_EVIDENCE_FILTER_NAME, "evidence for alt allele is overrepresented by apparent duplicates")); addFilterLine(new VCFFilterHeaderLine(READ_ORIENTATION_ARTIFACT_FILTER_NAME, "orientation bias detected by the orientation bias mixture model")); + addFilterLine(new VCFFilterHeaderLine(BAD_HAPLOTYPE_FILTER_NAME, "Variant near filtered variant on same haplotype.")); addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype"));