Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add experimental FilterFuncotations tool #4991

Merged
merged 8 commits into from
Aug 17, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
package org.broadinstitute.hellbender.tools.funcotator;

import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.variantcontext.VariantContextBuilder;
import htsjdk.variant.variantcontext.writer.VariantContextWriter;
import htsjdk.variant.vcf.VCFFilterHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderLineType;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import org.broadinstitute.barclay.argparser.Argument;
import org.broadinstitute.barclay.argparser.CommandLineProgramProperties;
import org.broadinstitute.barclay.argparser.ExperimentalFeature;
import org.broadinstitute.barclay.help.DocumentedFeature;
import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions;
import org.broadinstitute.hellbender.engine.FeatureContext;
import org.broadinstitute.hellbender.engine.ReadsContext;
import org.broadinstitute.hellbender.engine.ReferenceContext;
import org.broadinstitute.hellbender.engine.VariantWalker;
import org.broadinstitute.hellbender.exceptions.UserException;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ClinVarFilter;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.FuncotationFilter;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LmmFilter;
import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LofFilter;
import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer;
import picard.cmdline.programgroups.VariantEvaluationProgramGroup;

import java.io.File;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Filter variants based on clinically-significant Funcotations.
*
* This proof-of-concept tool is an example for how to parse and use the VCF output of Funcotator.
* It's currently hard-coded to look for specific {@link Funcotation}s from:
* <ul>
* <li><a href="http://www.clinvar.com/">ClinVar</a></li>
* <li><a href="http://exac.broadinstitute.org/">Exome Aggregation Consortium (ExAC)</a></li>
* <li><a href="http://personalizedmedicine.partners.org/laboratory-for-molecular-medicine/">Laboratory for Molecular Medicine (LMM)</a></li>
* </ul>
*/
@CommandLineProgramProperties(
summary = FilterFuncotations.SUMMARY,
oneLineSummary = FilterFuncotations.ONE_LINE_SUMMARY,
programGroup = VariantEvaluationProgramGroup.class
)
@DocumentedFeature
@ExperimentalFeature
public class FilterFuncotations extends VariantWalker {

static final String ONE_LINE_SUMMARY = "Filter variants based on clinically-significant Funcotations.";
static final String SUMMARY = ONE_LINE_SUMMARY +
"\nThis proof-of-concept tool is an example for how to parse and use the VCF output of Funcotator." +
"\nCurrently hard-coded to look for specific Funcotations from:" +
"\n * ClinVar (http://www.clinvar.com/)" +
"\n * Exome Aggregation Consortium (ExAC) (http://exac.broadinstitute.org/)" +
"\n * Laboratory for Molecular Medicine (LMM) (http://personalizedmedicine.partners.org/laboratory-for-molecular-medicine/)";

/**
* The version of the Human Genome reference which was used when Funcotating the input VCF.
*
* Used to derive names of Gencode Funcotations.
*/
public enum Reference {
b37(19), hg19(19), hg38(27);

private final int gencodeVersion;

Reference(int gencodeVersion) {
this.gencodeVersion = gencodeVersion;
}

public int getGencodeVersion() {
return gencodeVersion;
}
}

@Argument(
shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME,
fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME,
doc = "Output VCF file to which filtered variants should be written.")
protected File outputFile;

@Argument(
fullName = FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME,
doc = "The version of the Human Genome reference which was used to Funcotate the input VCF."
)
protected Reference reference;

private VariantContextWriter outputVcfWriter;
private String[] funcotationKeys;
private final List<FuncotationFilter> funcotationFilters = new ArrayList<>();

@Override
public void onTraversalStart() {
registerFilters();
final VCFHeader vcfHeader = getHeaderForVariants();

final VCFInfoHeaderLine funcotationHeaderLine = vcfHeader.getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME);
if (funcotationHeaderLine != null) {
funcotationKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription());
outputVcfWriter = createVCFWriter(outputFile);
vcfHeader.addMetaDataLine(new VCFFilterHeaderLine(FilterFuncotationsConstants.NOT_CLINSIG_FILTER,
FilterFuncotationsConstants.NOT_CLINSIG_FILTER_DESCRIPTION));
vcfHeader.addMetaDataLine(new VCFInfoHeaderLine(FilterFuncotationsConstants.CLINSIG_INFO_KEY, 1,
VCFHeaderLineType.String, FilterFuncotationsConstants.CLINSIG_INFO_KEY_DESCRIPTION));
outputVcfWriter.writeHeader(vcfHeader);
} else {
throw new UserException.BadInput("Could not extract Funcotation keys from " +
VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME + " field in input VCF header.");
}
}

private void registerFilters() {
funcotationFilters.add(new ClinVarFilter());
funcotationFilters.add(new LofFilter(reference));
funcotationFilters.add(new LmmFilter());
}

@Override
public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) {
outputVcfWriter.add(applyFilters(variant, getMatchingFilters(variant)));
}

/**
* Collect the names of the {@link FuncotationFilter}s matching the Funcotations of the given variant.
*
* The filter will be treated as a match if it matches Funcotations for any of the transcripts in the
* variant's Funcotation map.
*/
private Set<String> getMatchingFilters(final VariantContext variant) {
final Set<String> matchingFilters = new HashSet<>();


final Map<Allele, FuncotationMap> funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute(
funcotationKeys, variant, "Gencode_" + reference.gencodeVersion + "_annotationTranscript", "FAKE_SOURCE");

funcs.values().forEach(funcotationMap -> {
final Stream<Map<String, String>> transcriptFuncotations = funcotationMap.getTranscriptList().stream()
.map(funcotationMap::get)
.map(funcotations -> funcotations.stream()
.flatMap(this::extractFuncotationFields)
.filter(entry -> entry.getValue() != null && !entry.getValue().isEmpty())
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)));

transcriptFuncotations.forEach(funcotations -> {
final Set<String> matches = funcotationFilters.stream()
.filter(f -> f.checkFilter(funcotations))
.map(FuncotationFilter::getFilterName)
.collect(Collectors.toSet());
matchingFilters.addAll(matches);
});
});

return matchingFilters;
}

/**
* Parse the entries in a Funcotation into a stream of map entries.
*/
private Stream<Map.Entry<String, String>> extractFuncotationFields(final Funcotation funcotation) {
return funcotation.getFieldNames().stream()
.map(name -> new AbstractMap.SimpleEntry<>(name, funcotation.getField(name)));
}

/**
* Mark a variant as matching a set of Funcotation filters, or as matching no filters.
*/
private VariantContext applyFilters(final VariantContext variant, final Set<String> matchingFilters) {
final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variant);
final boolean isSignificant = !matchingFilters.isEmpty();

// Mark the individual filters that make the variant significant, if any.
final String clinicalSignificance = isSignificant ?
String.join(FilterFuncotationsConstants.FILTER_DELIMITER, matchingFilters) :
FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT;
variantContextBuilder.attribute(FilterFuncotationsConstants.CLINSIG_INFO_KEY, clinicalSignificance);

// Also set the filter field for insignificant variants, to make it easier for
// downstream tools to extract out the interesting data.
if (isSignificant) {
variantContextBuilder.passFilters();
} else {
variantContextBuilder.filter(FilterFuncotationsConstants.NOT_CLINSIG_FILTER);
}

return variantContextBuilder.make();
}

@Override
public void closeTool() {
if (outputVcfWriter != null) {
outputVcfWriter.close();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package org.broadinstitute.hellbender.tools.funcotator;

public class FilterFuncotationsConstants {
/**
* Key for the INFO field added to all variants by {@link FilterFuncotations},
* indicating the clinical significance (if any) of the Funcotations on that variant.
*/
public static final String CLINSIG_INFO_KEY = "CLINSIG";

/**
* Description for {@value CLINSIG_INFO_KEY} to include in VCF headers.
*/
public static final String CLINSIG_INFO_KEY_DESCRIPTION =
"Rule(s) which caused this annotation to be flagged as clinically significant.";

/**
* Value to assign to {@value CLINSIG_INFO_KEY} for variants that have no
* clinically-significant Funcotations.
*/
public static final String CLINSIG_INFO_NOT_SIGNIFICANT = "NONE";

/**
* FILTER value applied by {@link FilterFuncotations} to all variants which have
* no clinically-significant Funcotations.
*/
public static final String NOT_CLINSIG_FILTER = "NOT_" + CLINSIG_INFO_KEY;

/**
* Description for {@value NOT_CLINSIG_FILTER} to include in VCF headers.
*/
public static final String NOT_CLINSIG_FILTER_DESCRIPTION = "Filter for clinically insignificant variants.";

/**
* Delimiting string to place between values in the {@value CLINSIG_INFO_KEY} INFO field
* when Funcotations for a variant match multiple filters.
*/
public static final String FILTER_DELIMITER = ",";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package org.broadinstitute.hellbender.tools.funcotator.filtrationRules;

import java.util.Arrays;
import java.util.List;

/**
* {@link FuncotationFilter} matching variants which:
* <ul>
* <li>Occur on a gene in the American College of Medical Genomics (ACMG)'s list of clinically-significant variants</li>
* <li>Have been labeled by ClinVar as pathogenic or likely pathogenic</li>
* <li>Have a max MAF of 5% across sub-populations of ExAC</li>
* </ul>
*/
public class ClinVarFilter extends FuncotationFilter {

/**
* Value to include in the {@value org.broadinstitute.hellbender.tools.funcotator.FilterFuncotationsConstants#CLINSIG_INFO_KEY}
* INFO annotation of variants matching this rule.
*/
public static final String CLINSIG_INFO_VALUE = "CLINVAR";

/**
* Funcotation which will be non-empty for variants which occur on a gene in the ACMG's list.
*
* @see <a href="https://www.ncbi.nlm.nih.gov/clinvar/docs/acmg/">The gene list</a>
*/
private static final String ACMG_DISEASE_FUNCOTATION = "ACMG_recommendation_Disease_Name";

/**
* Funcotation which contains ClinVar's assessment of a variant's clinical significance.
*
* @see <a href="https://www.ncbi.nlm.nih.gov/clinvar/docs/clinsig">Valid values for significance</a>
*/
private static final String CLINVAR_SIGNIFICANCE_FUNCOTATION = "ClinVar_VCF_CLNSIG";

/**
* Clinically-significant values to check for within the {@value CLINVAR_SIGNIFICANCE_FUNCOTATION} Funcotation.
*/
private static final List<String> CLINVAR_SIGNIFICANCE_MATCHING_VALUES = Arrays.asList("Pathogenic", "Likely_pathogenic");

/**
* Maximum MAF a variant can have in ExAC to pass this rule.
*/
private static final double CLINVAR_MAX_MAF = 0.05;

public ClinVarFilter() {
super(CLINSIG_INFO_VALUE);
}

@Override
List<FuncotationFiltrationRule> getRules() {
return Arrays.asList(
funcotations -> funcotations.containsKey(ACMG_DISEASE_FUNCOTATION),
funcotations -> {
final String significance = funcotations.getOrDefault(CLINVAR_SIGNIFICANCE_FUNCOTATION, "");
return CLINVAR_SIGNIFICANCE_MATCHING_VALUES.stream().anyMatch(significance::contains);
},
FilterFuncotationsExacUtils.buildExacMaxMafRule(CLINVAR_MAX_MAF));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package org.broadinstitute.hellbender.tools.funcotator.filtrationRules;

import org.broadinstitute.hellbender.utils.param.ParamUtils;

import java.util.Arrays;
import java.util.Map;

public class FilterFuncotationsExacUtils {
/**
* Sub-population suffixes used within ExAC. Used for calculating max MAF.
*/
private enum ExacSubPopulation {
AFR, AMR, EAS, FIN, NFE, OTH, SAS
}

/**
* Prefix for allele-count Funcotations for each ExAC sub-population.
*/
private static String EXAC_ALLELE_COUNT_PREFIX = "ExAC_AC_";

/**
* Prefix for allele-number Funcotations for each ExAC sub-population.
*/
private static String EXAC_ALLELE_NUMBER_PREFIX = "ExAC_AN_";

/**
* Build a {@link FuncotationFiltrationRule} matching Funcotations from variants with a
* maximum MAF less than some threshold.
*
* @param maxMaf the MAF threshold to check in the rule. Must be in the range [0, 1]
* @return a {@link FuncotationFiltrationRule} matching Funcotations with a MAF (AC/AN)
* less than {@code maxMaf} across all sub-populations of ExAC
*/
public static FuncotationFiltrationRule buildExacMaxMafRule(final double maxMaf) {
ParamUtils.inRange(maxMaf, 0, 1, "MAF must be between 0 and 1");
return funcotations -> getMaxMinorAlleleFreq(funcotations) <= maxMaf;
}

/**
* Calculate the max MAF across all ExAC sub-populations from the given Funcotations.
*
* If a sub-population has an allele number of zero, it will be assigned a MAF of zero.
*/
private static double getMaxMinorAlleleFreq(final Map<String, String> funcotations) {
return Arrays.stream(ExacSubPopulation.values())
.filter(subpop -> funcotations.containsKey(EXAC_ALLELE_COUNT_PREFIX + subpop.name()))
.map(subpop -> {
final Double ac = Double.valueOf(funcotations.get(EXAC_ALLELE_COUNT_PREFIX + subpop.name()));
final Integer an = Integer.valueOf(funcotations.get(EXAC_ALLELE_NUMBER_PREFIX + subpop.name()));

if (an == 0) {
// If a variant has never been seen in ExAC, report it as 0% MAF.
return 0d;
} else {
return ac / an;
}
})
.max(Double::compareTo)
.orElse(0d);
}
}
Loading