Skip to content

Commit

Permalink
same haplotype filter
Browse files Browse the repository at this point in the history
  • Loading branch information
davidbenjamin committed Aug 7, 2018
1 parent 5a7b6be commit c961829
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public void onTraversalStart() {
final Optional<String> normalSample = normalSampleHeaderLine == null ? Optional.empty() : Optional.of(normalSampleHeaderLine.getValue());

filteringEngine = new Mutect2FilteringEngine(MTFAC, tumorSample, normalSample);
filteringFirstPass = new FilteringFirstPass();
filteringFirstPass = new FilteringFirstPass(tumorSample);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package org.broadinstitute.hellbender.tools.walkers.mutect;


import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.utils.Utils;
import org.broadinstitute.hellbender.utils.tsv.DataLine;
Expand All @@ -18,13 +20,15 @@ public class FilteringFirstPass {
final List<FilterResult> filterResults;
final Map<String, ImmutablePair<String, Integer>> filteredPhasedCalls;
final Map<String, FilterStats> filterStats;
final String tumorSample;
boolean readyForSecondPass;

public FilteringFirstPass() {
public FilteringFirstPass(final String tumorSample) {
filterResults = new ArrayList<>();
filteredPhasedCalls = new HashMap<>();
filterStats = new HashMap<>();
readyForSecondPass = false;
this.tumorSample = tumorSample;
}

public boolean isReadyForSecondPass() { return readyForSecondPass; }
Expand All @@ -34,14 +38,36 @@ public FilterStats getFilterStats(final String filterName){
return filterStats.get(filterName);
}

public boolean isOnFilteredHaplotype(final VariantContext vc, final int maxDistance) {

final Genotype tumorGenotype = vc.getGenotype(tumorSample);

if (!hasPhaseInfo(tumorGenotype)) {
return false;
}

final String pgt = (String) tumorGenotype.getExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY, "");
final String pid = (String) tumorGenotype.getExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY, "");
final int position = vc.getStart();

final Pair<String, Integer> filteredCall = filteredPhasedCalls.get(pid);
if (filteredCall == null) {
return false;
}

// Check that vc occurs on the filtered haplotype
return filteredCall.getLeft().equals(pgt) && Math.abs(filteredCall.getRight() - position) <= maxDistance;
}

public void add(final FilterResult filterResult, final VariantContext vc) {
filterResults.add(filterResult);
final Genotype tumorGenotype = vc.getGenotype(tumorSample);

if (!filterResult.getFilters().isEmpty() && hasPhaseInfo(vc)) {
final String pgt = vc.getAttributeAsString(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY, "");
final String pid = vc.getAttributeAsString(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY, "");
if (!filterResult.getFilters().isEmpty() && hasPhaseInfo(tumorGenotype)) {
final String pgt = (String) tumorGenotype.getExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY, "");
final String pid = (String) tumorGenotype.getExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY, "");
final int position = vc.getStart();
filteredPhasedCalls.put(pgt, new ImmutablePair<>(pid, position));
filteredPhasedCalls.put(pid, new ImmutablePair<>(pgt, position));
}
}

Expand Down Expand Up @@ -95,8 +121,8 @@ public static FilterStats calculateThresholdForReadOrientationFilter(final doubl
cumulativeExpectedFPs, numPassingVariants, cumulativeExpectedFPs/numPassingVariants, requestedFPR);
}

private static boolean hasPhaseInfo(VariantContext vc) {
return vc.hasAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY) && vc.hasAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY);
public static boolean hasPhaseInfo(final Genotype genotype) {
return genotype.hasExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_GT_KEY) && genotype.hasExtendedAttribute(GATKVCFConstants.HAPLOTYPE_CALLER_PHASING_ID_KEY);
}

public List<FilterResult> getFilterResults() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl
public static final String UNIQUE_ALT_READ_COUNT_LONG_NAME = "unique-alt-read-count";
public static final String TUMOR_SEGMENTATION_LONG_NAME = "tumor-segmentation";
public static final String ORIENTATION_BIAS_FDR_LONG_NAME = "orientation-bias-fdr"; // FDR = false discovery rate
public static final String MAX_DISTANCE_TO_FILTERED_CALL_ON_SAME_HAPLOTYPE_LONG_NAME = "distance-on-haplotype";

public static final String FILTERING_STATS_LONG_NAME = "stats";

Expand Down Expand Up @@ -124,4 +125,8 @@ public class M2FiltersArgumentCollection extends AssemblyBasedCallerArgumentColl
public File mutect2FilteringStatsTable = new File("Mutect2FilteringStats.tsv");


@Argument(fullName = MAX_DISTANCE_TO_FILTERED_CALL_ON_SAME_HAPLOTYPE_LONG_NAME, optional = true, doc = "On second filtering pass, variants with same PGT and PID tags as a filtered variant within this distance are filtered.")
public int maxDistanceToFilteredCallOnSameHaplotype = 100;


}
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ private void applyDuplicatedAltReadFilter(final M2FiltersArgumentCollection MTFA
}
}

private void applyReadOrientationFilter(final VariantContext vc, final FilterResult filterResult, final Optional<FilteringFirstPass> filterSummary){
private void applyReadOrientationFilter(final VariantContext vc, final FilterResult filterResult, final Optional<FilteringFirstPass> firstPass){
if (! vc.isSNP()){
return;
}
Expand All @@ -305,23 +305,30 @@ private void applyReadOrientationFilter(final VariantContext vc, final FilterRes

final double artifactPosterior = GATKProtectedVariantContextUtils.getAttributeAsDouble(tumorGenotype, GATKVCFConstants.ROF_POSTERIOR_KEY, -1.0);

if (! filterSummary.isPresent()) {
if (! firstPass.isPresent()) {
// During first pass we simply collect the posterior artifact probabilities
filterResult.setReadOrientationPosterior(artifactPosterior);
return;
} else {
final double threshold = filterSummary.get().getFilterStats(GATKVCFConstants.READ_ORIENTATION_ARTIFACT_FILTER_NAME).getThreshold();
final double threshold = firstPass.get().getFilterStats(GATKVCFConstants.READ_ORIENTATION_ARTIFACT_FILTER_NAME).getThreshold();

if (artifactPosterior > threshold){
filterResult.addFilter(GATKVCFConstants.READ_ORIENTATION_ARTIFACT_FILTER_NAME);
}
}
}

private void applyFilteredHaplotypeFilter(final M2FiltersArgumentCollection MTFAC, final VariantContext vc, final FilterResult filterResult, final Optional<FilteringFirstPass> firstPass){
if ( firstPass.isPresent() && firstPass.get().isOnFilteredHaplotype(vc, MTFAC.maxDistanceToFilteredCallOnSameHaplotype)){
filterResult.addFilter(GATKVCFConstants.BAD_HAPLOTYPE_FILTER_NAME);
}
}

public FilterResult calculateFilters(final M2FiltersArgumentCollection MTFAC, final VariantContext vc,
final Optional<FilteringFirstPass> filterStats) {
filterStats.ifPresent(ffp -> Utils.validate(ffp.isReadyForSecondPass(), "First pass information has not been processed into a model for the second pass."));
final Optional<FilteringFirstPass> firstPass) {
firstPass.ifPresent(ffp -> Utils.validate(ffp.isReadyForSecondPass(), "First pass information has not been processed into a model for the second pass."));
final FilterResult filterResult = new FilterResult();
applyFilteredHaplotypeFilter(MTFAC, vc, filterResult, firstPass);
applyInsufficientEvidenceFilter(MTFAC, vc, filterResult);
applyClusteredEventFilter(vc, filterResult);
applyDuplicatedAltReadFilter(MTFAC, vc, filterResult);
Expand All @@ -338,7 +345,7 @@ public FilterResult calculateFilters(final M2FiltersArgumentCollection MTFAC, fi
applyReadPositionFilter(MTFAC, vc, filterResult);

// The following filters use the information gathered during the first pass
applyReadOrientationFilter(vc, filterResult, filterStats);
applyReadOrientationFilter(vc, filterResult, firstPass);
return filterResult;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,14 +158,15 @@ their names (or descriptions) depend on some threshold. Those filters are not i
public final static String READ_POSITION_FILTER_NAME = "read_position";
public final static String CONTAMINATION_FILTER_NAME = "contamination";
public final static String READ_ORIENTATION_ARTIFACT_FILTER_NAME = "read_orientation_artifact";
public final static String BAD_HAPLOTYPE_FILTER_NAME = "bad_haplotype";

public static final List<String> MUTECT_FILTER_NAMES = Arrays.asList(STR_CONTRACTION_FILTER_NAME,
PON_FILTER_NAME, CLUSTERED_EVENTS_FILTER_NAME, TUMOR_LOD_FILTER_NAME, GERMLINE_RISK_FILTER_NAME,
MULTIALLELIC_FILTER_NAME, STRAND_ARTIFACT_FILTER_NAME, ARTIFACT_IN_NORMAL_FILTER_NAME,
MEDIAN_BASE_QUALITY_FILTER_NAME, MEDIAN_MAPPING_QUALITY_FILTER_NAME,
MEDIAN_FRAGMENT_LENGTH_DIFFERENCE_FILTER_NAME,
READ_POSITION_FILTER_NAME, CONTAMINATION_FILTER_NAME, DUPLICATED_EVIDENCE_FILTER_NAME,
READ_ORIENTATION_ARTIFACT_FILTER_NAME);
READ_ORIENTATION_ARTIFACT_FILTER_NAME, BAD_HAPLOTYPE_FILTER_NAME);

// Symbolic alleles
public final static String SYMBOLIC_ALLELE_DEFINITION_HEADER_TAG = "ALT";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ private static void addFilterLine(final VCFFilterHeaderLine line) {
addFilterLine(new VCFFilterHeaderLine(CONTAMINATION_FILTER_NAME, "contamination"));
addFilterLine(new VCFFilterHeaderLine(DUPLICATED_EVIDENCE_FILTER_NAME, "evidence for alt allele is overrepresented by apparent duplicates"));
addFilterLine(new VCFFilterHeaderLine(READ_ORIENTATION_ARTIFACT_FILTER_NAME, "orientation bias detected by the orientation bias mixture model"));
addFilterLine(new VCFFilterHeaderLine(BAD_HAPLOTYPE_FILTER_NAME, "Variant near filtered variant on same haplotype."));


addFormatLine(new VCFFormatHeaderLine(ALLELE_BALANCE_KEY, 1, VCFHeaderLineType.Float, "Allele balance for each het genotype"));
Expand Down

0 comments on commit c961829

Please sign in to comment.