diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java new file mode 100644 index 00000000000..11c59ad8c3a --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java @@ -0,0 +1,203 @@ +package org.broadinstitute.hellbender.tools.funcotator; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFFilterHeaderLine; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLineType; +import htsjdk.variant.vcf.VCFInfoHeaderLine; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.ReadsContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.VariantWalker; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ClinVarFilter; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.FuncotationFilter; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LmmFilter; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LofFilter; +import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; +import picard.cmdline.programgroups.VariantEvaluationProgramGroup; + +import java.io.File; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * Filter variants based on clinically-significant Funcotations. + * + * This proof-of-concept tool is an example for how to parse and use the VCF output of Funcotator. + * It's currently hard-coded to look for specific {@link Funcotation}s from: + * + */ +@CommandLineProgramProperties( + summary = FilterFuncotations.SUMMARY, + oneLineSummary = FilterFuncotations.ONE_LINE_SUMMARY, + programGroup = VariantEvaluationProgramGroup.class +) +@DocumentedFeature +@ExperimentalFeature +public class FilterFuncotations extends VariantWalker { + + static final String ONE_LINE_SUMMARY = "Filter variants based on clinically-significant Funcotations."; + static final String SUMMARY = ONE_LINE_SUMMARY + + "\nThis proof-of-concept tool is an example for how to parse and use the VCF output of Funcotator." + + "\nCurrently hard-coded to look for specific Funcotations from:" + + "\n * ClinVar (http://www.clinvar.com/)" + + "\n * Exome Aggregation Consortium (ExAC) (http://exac.broadinstitute.org/)" + + "\n * Laboratory for Molecular Medicine (LMM) (http://personalizedmedicine.partners.org/laboratory-for-molecular-medicine/)"; + + /** + * The version of the Human Genome reference which was used when Funcotating the input VCF. + * + * Used to derive names of Gencode Funcotations. + */ + public enum Reference { + b37(19), hg19(19), hg38(27); + + private final int gencodeVersion; + + Reference(int gencodeVersion) { + this.gencodeVersion = gencodeVersion; + } + + public int getGencodeVersion() { + return gencodeVersion; + } + } + + @Argument( + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + doc = "Output VCF file to which filtered variants should be written.") + protected File outputFile; + + @Argument( + fullName = FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME, + doc = "The version of the Human Genome reference which was used to Funcotate the input VCF." + ) + protected Reference reference; + + private VariantContextWriter outputVcfWriter; + private String[] funcotationKeys; + private final List funcotationFilters = new ArrayList<>(); + + @Override + public void onTraversalStart() { + registerFilters(); + final VCFHeader vcfHeader = getHeaderForVariants(); + + final VCFInfoHeaderLine funcotationHeaderLine = vcfHeader.getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME); + if (funcotationHeaderLine != null) { + funcotationKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription()); + outputVcfWriter = createVCFWriter(outputFile); + vcfHeader.addMetaDataLine(new VCFFilterHeaderLine(FilterFuncotationsConstants.NOT_CLINSIG_FILTER, + FilterFuncotationsConstants.NOT_CLINSIG_FILTER_DESCRIPTION)); + vcfHeader.addMetaDataLine(new VCFInfoHeaderLine(FilterFuncotationsConstants.CLINSIG_INFO_KEY, 1, + VCFHeaderLineType.String, FilterFuncotationsConstants.CLINSIG_INFO_KEY_DESCRIPTION)); + outputVcfWriter.writeHeader(vcfHeader); + } else { + throw new UserException.BadInput("Could not extract Funcotation keys from " + + VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME + " field in input VCF header."); + } + } + + private void registerFilters() { + funcotationFilters.add(new ClinVarFilter()); + funcotationFilters.add(new LofFilter(reference)); + funcotationFilters.add(new LmmFilter()); + } + + @Override + public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { + outputVcfWriter.add(applyFilters(variant, getMatchingFilters(variant))); + } + + /** + * Collect the names of the {@link FuncotationFilter}s matching the Funcotations of the given variant. + * + * The filter will be treated as a match if it matches Funcotations for any of the transcripts in the + * variant's Funcotation map. + */ + private Set getMatchingFilters(final VariantContext variant) { + final Set matchingFilters = new HashSet<>(); + + + final Map funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute( + funcotationKeys, variant, "Gencode_" + reference.gencodeVersion + "_annotationTranscript", "FAKE_SOURCE"); + + funcs.values().forEach(funcotationMap -> { + final Stream> transcriptFuncotations = funcotationMap.getTranscriptList().stream() + .map(funcotationMap::get) + .map(funcotations -> funcotations.stream() + .flatMap(this::extractFuncotationFields) + .filter(entry -> entry.getValue() != null && !entry.getValue().isEmpty()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + + transcriptFuncotations.forEach(funcotations -> { + final Set matches = funcotationFilters.stream() + .filter(f -> f.checkFilter(funcotations)) + .map(FuncotationFilter::getFilterName) + .collect(Collectors.toSet()); + matchingFilters.addAll(matches); + }); + }); + + return matchingFilters; + } + + /** + * Parse the entries in a Funcotation into a stream of map entries. + */ + private Stream> extractFuncotationFields(final Funcotation funcotation) { + return funcotation.getFieldNames().stream() + .map(name -> new AbstractMap.SimpleEntry<>(name, funcotation.getField(name))); + } + + /** + * Mark a variant as matching a set of Funcotation filters, or as matching no filters. + */ + private VariantContext applyFilters(final VariantContext variant, final Set matchingFilters) { + final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variant); + final boolean isSignificant = !matchingFilters.isEmpty(); + + // Mark the individual filters that make the variant significant, if any. + final String clinicalSignificance = isSignificant ? + String.join(FilterFuncotationsConstants.FILTER_DELIMITER, matchingFilters) : + FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT; + variantContextBuilder.attribute(FilterFuncotationsConstants.CLINSIG_INFO_KEY, clinicalSignificance); + + // Also set the filter field for insignificant variants, to make it easier for + // downstream tools to extract out the interesting data. + if (isSignificant) { + variantContextBuilder.passFilters(); + } else { + variantContextBuilder.filter(FilterFuncotationsConstants.NOT_CLINSIG_FILTER); + } + + return variantContextBuilder.make(); + } + + @Override + public void closeTool() { + if (outputVcfWriter != null) { + outputVcfWriter.close(); + } + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsConstants.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsConstants.java new file mode 100644 index 00000000000..9f973a069cf --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsConstants.java @@ -0,0 +1,38 @@ +package org.broadinstitute.hellbender.tools.funcotator; + +public class FilterFuncotationsConstants { + /** + * Key for the INFO field added to all variants by {@link FilterFuncotations}, + * indicating the clinical significance (if any) of the Funcotations on that variant. + */ + public static final String CLINSIG_INFO_KEY = "CLINSIG"; + + /** + * Description for {@value CLINSIG_INFO_KEY} to include in VCF headers. + */ + public static final String CLINSIG_INFO_KEY_DESCRIPTION = + "Rule(s) which caused this annotation to be flagged as clinically significant."; + + /** + * Value to assign to {@value CLINSIG_INFO_KEY} for variants that have no + * clinically-significant Funcotations. + */ + public static final String CLINSIG_INFO_NOT_SIGNIFICANT = "NONE"; + + /** + * FILTER value applied by {@link FilterFuncotations} to all variants which have + * no clinically-significant Funcotations. + */ + public static final String NOT_CLINSIG_FILTER = "NOT_" + CLINSIG_INFO_KEY; + + /** + * Description for {@value NOT_CLINSIG_FILTER} to include in VCF headers. + */ + public static final String NOT_CLINSIG_FILTER_DESCRIPTION = "Filter for clinically insignificant variants."; + + /** + * Delimiting string to place between values in the {@value CLINSIG_INFO_KEY} INFO field + * when Funcotations for a variant match multiple filters. + */ + public static final String FILTER_DELIMITER = ","; +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java new file mode 100644 index 00000000000..13ac98aaa89 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java @@ -0,0 +1,60 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import java.util.Arrays; +import java.util.List; + +/** + * {@link FuncotationFilter} matching variants which: + *
    + *
  • Occur on a gene in the American College of Medical Genomics (ACMG)'s list of clinically-significant variants
  • + *
  • Have been labeled by ClinVar as pathogenic or likely pathogenic
  • + *
  • Have a max MAF of 5% across sub-populations of ExAC
  • + *
+ */ +public class ClinVarFilter extends FuncotationFilter { + + /** + * Value to include in the {@value org.broadinstitute.hellbender.tools.funcotator.FilterFuncotationsConstants#CLINSIG_INFO_KEY} + * INFO annotation of variants matching this rule. + */ + public static final String CLINSIG_INFO_VALUE = "CLINVAR"; + + /** + * Funcotation which will be non-empty for variants which occur on a gene in the ACMG's list. + * + * @see The gene list + */ + private static final String ACMG_DISEASE_FUNCOTATION = "ACMG_recommendation_Disease_Name"; + + /** + * Funcotation which contains ClinVar's assessment of a variant's clinical significance. + * + * @see Valid values for significance + */ + private static final String CLINVAR_SIGNIFICANCE_FUNCOTATION = "ClinVar_VCF_CLNSIG"; + + /** + * Clinically-significant values to check for within the {@value CLINVAR_SIGNIFICANCE_FUNCOTATION} Funcotation. + */ + private static final List CLINVAR_SIGNIFICANCE_MATCHING_VALUES = Arrays.asList("Pathogenic", "Likely_pathogenic"); + + /** + * Maximum MAF a variant can have in ExAC to pass this rule. + */ + private static final double CLINVAR_MAX_MAF = 0.05; + + public ClinVarFilter() { + super(CLINSIG_INFO_VALUE); + } + + @Override + List getRules() { + return Arrays.asList( + funcotations -> funcotations.containsKey(ACMG_DISEASE_FUNCOTATION), + funcotations -> { + final String significance = funcotations.getOrDefault(CLINVAR_SIGNIFICANCE_FUNCOTATION, ""); + return CLINVAR_SIGNIFICANCE_MATCHING_VALUES.stream().anyMatch(significance::contains); + }, + FilterFuncotationsExacUtils.buildExacMaxMafRule(CLINVAR_MAX_MAF)); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java new file mode 100644 index 00000000000..8cd5bbde9f4 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java @@ -0,0 +1,61 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import org.broadinstitute.hellbender.utils.param.ParamUtils; + +import java.util.Arrays; +import java.util.Map; + +public class FilterFuncotationsExacUtils { + /** + * Sub-population suffixes used within ExAC. Used for calculating max MAF. + */ + private enum ExacSubPopulation { + AFR, AMR, EAS, FIN, NFE, OTH, SAS + } + + /** + * Prefix for allele-count Funcotations for each ExAC sub-population. + */ + private static String EXAC_ALLELE_COUNT_PREFIX = "ExAC_AC_"; + + /** + * Prefix for allele-number Funcotations for each ExAC sub-population. + */ + private static String EXAC_ALLELE_NUMBER_PREFIX = "ExAC_AN_"; + + /** + * Build a {@link FuncotationFiltrationRule} matching Funcotations from variants with a + * maximum MAF less than some threshold. + * + * @param maxMaf the MAF threshold to check in the rule. Must be in the range [0, 1] + * @return a {@link FuncotationFiltrationRule} matching Funcotations with a MAF (AC/AN) + * less than {@code maxMaf} across all sub-populations of ExAC + */ + public static FuncotationFiltrationRule buildExacMaxMafRule(final double maxMaf) { + ParamUtils.inRange(maxMaf, 0, 1, "MAF must be between 0 and 1"); + return funcotations -> getMaxMinorAlleleFreq(funcotations) <= maxMaf; + } + + /** + * Calculate the max MAF across all ExAC sub-populations from the given Funcotations. + * + * If a sub-population has an allele number of zero, it will be assigned a MAF of zero. + */ + private static double getMaxMinorAlleleFreq(final Map funcotations) { + return Arrays.stream(ExacSubPopulation.values()) + .filter(subpop -> funcotations.containsKey(EXAC_ALLELE_COUNT_PREFIX + subpop.name())) + .map(subpop -> { + final Double ac = Double.valueOf(funcotations.get(EXAC_ALLELE_COUNT_PREFIX + subpop.name())); + final Integer an = Integer.valueOf(funcotations.get(EXAC_ALLELE_NUMBER_PREFIX + subpop.name())); + + if (an == 0) { + // If a variant has never been seen in ExAC, report it as 0% MAF. + return 0d; + } else { + return ac / an; + } + }) + .max(Double::compareTo) + .orElse(0d); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java new file mode 100644 index 00000000000..61f5f1cfa50 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java @@ -0,0 +1,49 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import org.broadinstitute.hellbender.utils.Utils; + +import java.util.List; +import java.util.Map; + +/** + * A filter to apply to Funcotations in {@link org.broadinstitute.hellbender.tools.funcotator.FilterFuncotations}. + * + * Filters can define an arbitrary number of rules which must match on the Funcotations of a variant in order + * for that variant to "pass". Passing variants will be annotated with the filter's name in the output VCF. + */ +public abstract class FuncotationFilter { + + /** + * The INFO annotation value which should be added to all variants which pass this filter. + */ + private final String filterName; + + FuncotationFilter(final String filterName) { + this.filterName = filterName; + } + + public String getFilterName() { + return filterName; + } + + /** + * Check all of this filter's rules against a set of Funcotations. + * + * @param prunedTranscriptFuncotations Funcotation values of a single transcript. Assumed to have + * been "pruned" to remove null / empty values. Never {@code null} + * @return true if the Funcotations match all of this filter's rules, and false otherwise + */ + public Boolean checkFilter(final Map prunedTranscriptFuncotations) { + Utils.nonNull(prunedTranscriptFuncotations); + + return getRules().stream() + .map(rule -> rule.checkRule(prunedTranscriptFuncotations)) + .reduce(Boolean::logicalAnd) + .orElse(false); + } + + /** + * Build the collection of rules which must match to pass this filter. + */ + abstract List getRules(); +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java new file mode 100644 index 00000000000..9d0fd661e5d --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java @@ -0,0 +1,14 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import java.util.Map; + +/** + * A rule to match against the Funcotations from a variant within a {@link FuncotationFilter}. + */ +interface FuncotationFiltrationRule { + + /** + * Check if a set of Funcotations matches this rule. + */ + boolean checkRule(final Map funcotations); +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java new file mode 100644 index 00000000000..49eb91fa9ba --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java @@ -0,0 +1,33 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import java.util.Collections; +import java.util.List; + +/** + * {@link FuncotationFilter} matching variants which: + *
    + *
  • Have been flagged by LMM as important for loss of function.
  • + *
+ */ +public class LmmFilter extends FuncotationFilter { + + /** + * Value to include in the {@value org.broadinstitute.hellbender.tools.funcotator.FilterFuncotationsConstants#CLINSIG_INFO_KEY} + * INFO annotation of variants matching this rule. + */ + public static final String CLINSIG_INFO_VALUE = "LMM"; + + /** + * Funcotation which will contain "true" for variants which LMM has marked as important. + */ + private static final String LMM_FLAGGED = "LMMKnown_LMM_FLAGGED"; + + public LmmFilter() { + super(CLINSIG_INFO_VALUE); + } + + @Override + List getRules() { + return Collections.singletonList(funcotations -> Boolean.valueOf(funcotations.getOrDefault(LMM_FLAGGED, "false"))); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java new file mode 100644 index 00000000000..6a5ccfe72f0 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java @@ -0,0 +1,68 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import org.broadinstitute.hellbender.tools.funcotator.FilterFuncotations; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; + +import java.util.Arrays; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * {@link FuncotationFilter} matching variants which: + *
    + *
  • Are classified as FRAME_SHIFT_*, NONSENSE, START_CODON_DEL, or SPLICE_SITE
  • + *
  • Occur on a gene where loss of function is a disease mechanism
  • + *
  • Have a max MAF of 1% across sub-populations of ExAC
  • + *
+ */ +public class LofFilter extends FuncotationFilter { + + /** + * Value to include in the {@value org.broadinstitute.hellbender.tools.funcotator.FilterFuncotationsConstants#CLINSIG_INFO_KEY} + * INFO annotation of variants matching this rule. + */ + public static final String CLINSIG_INFO_VALUE = "LOF"; + + /** + * Funcotation which will contain "YES" for variants which are important for loss of function. + */ + private static final String LOF_GENE_FUNCOTATION = "ACMGLMMLof_LOF_Mechanism"; + + /** + * Variant classifications which should be matched by this filter. + */ + private static final Set CONSTANT_LOF_CLASSIFICATIONS = Stream.of( + GencodeFuncotation.VariantClassification.FRAME_SHIFT_DEL, + GencodeFuncotation.VariantClassification.FRAME_SHIFT_INS, + GencodeFuncotation.VariantClassification.NONSENSE, + GencodeFuncotation.VariantClassification.START_CODON_DEL, + GencodeFuncotation.VariantClassification.SPLICE_SITE + ).map(GencodeFuncotation.VariantClassification::toString).collect(Collectors.toSet()); + + /** + * Maximum MAF a variant can have in ExAC to pass this rule. + */ + private static final double LOF_MAX_MAF = 0.01; + + /** + * Funcotation which will contain the variant classification determined by Funcotator. + * + * Varies based on gencode version. + */ + private final String classificationFuncotation; + + public LofFilter(final FilterFuncotations.Reference ref) { + super(CLINSIG_INFO_VALUE); + this.classificationFuncotation = "Gencode_" + ref.getGencodeVersion() + "_variantClassification"; + } + + @Override + List getRules() { + return Arrays.asList( + funcotations -> CONSTANT_LOF_CLASSIFICATIONS.contains(funcotations.getOrDefault(classificationFuncotation, "")), + funcotations -> funcotations.getOrDefault(LOF_GENE_FUNCOTATION, "").equals("YES"), + FilterFuncotationsExacUtils.buildExacMaxMafRule(LOF_MAX_MAF)); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java new file mode 100644 index 00000000000..fe031d7dab3 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java @@ -0,0 +1,68 @@ +package org.broadinstitute.hellbender.tools.funcotator; + +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFHeader; +import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ClinVarFilter; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LmmFilter; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LofFilter; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.File; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class FilterFuncotationsIntegrationTest extends CommandLineProgramTest { + + private static final Path TEST_DATA_DIR = getTestDataDir().toPath().resolve("FilterFuncotations"); + + private static final Set ALL_FILTERS = new HashSet<>(Arrays.asList( + ClinVarFilter.CLINSIG_INFO_VALUE, LofFilter.CLINSIG_INFO_VALUE, LmmFilter.CLINSIG_INFO_VALUE)); + + @DataProvider(name = "uniformVcfProvider") + public Object[][] uniformVcfProvider() { + return new Object[][]{ + {"clinvar.vcf", FilterFuncotations.Reference.hg19, Collections.emptySet(), Collections.singleton(ClinVarFilter.CLINSIG_INFO_VALUE)}, + {"lmm.vcf", FilterFuncotations.Reference.hg38, Collections.emptySet(), Collections.singleton(LmmFilter.CLINSIG_INFO_VALUE)}, + {"lof.vcf", FilterFuncotations.Reference.b37, Collections.emptySet(), Collections.singleton(LofFilter.CLINSIG_INFO_VALUE)}, + {"all.vcf", FilterFuncotations.Reference.hg38, Collections.emptySet(), ALL_FILTERS}, + {"multi-transcript.vcf", FilterFuncotations.Reference.hg38, Collections.emptySet(), ALL_FILTERS}, + {"multi-allelic.vcf", FilterFuncotations.Reference.hg38, Collections.emptySet(), ALL_FILTERS}, + {"none.vcf", FilterFuncotations.Reference.hg38, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER), + Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)} + }; + } + + @Test(dataProvider = "uniformVcfProvider") + public void testFilterUniform(final String vcfName, + final FilterFuncotations.Reference ref, + final Set expectedFilters, + final Set expectedAnnotations) { + + final File tmpOut = createTempFile(vcfName + ".filtered", ".vcf"); + + final List args = Arrays.asList( + "-V", TEST_DATA_DIR.resolve(vcfName).toString(), + "-O", tmpOut.toString(), + "--ref-version", ref.name() + ); + runCommandLine(args); + + final Pair> vcf = VariantContextTestUtils.readEntireVCFIntoMemory(tmpOut.toString()); + vcf.getRight().forEach(variant -> { + Assert.assertEquals(variant.getFilters(), expectedFilters); + + final List clinsigAnnotations = variant.getCommonInfo() + .getAttributeAsStringList(FilterFuncotationsConstants.CLINSIG_INFO_KEY, ""); + Assert.assertEquals(new HashSet<>(clinsigAnnotations), expectedAnnotations); + }); + } +} diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/all.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/all.vcf new file mode 100644 index 00000000000..34bc660d374 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/all.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.2 +##Funcotator Version=0.0.3 | Gencode 27 ALL | LMMKnown 20180618 +##FORMAT= +##INFO= +##buildName=GRCh38 +##contig= +##reference=file:///cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 55039931 . G A . . FUNCOTATION=[ENST00000302118.5|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Likely_pathogenic|YES|NONSENSE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|true] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/clinvar.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/clinvar.vcf new file mode 100644 index 00000000000..47ce4aeacf2 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/clinvar.vcf @@ -0,0 +1,8 @@ +##fileformat=VCFv4.2 +##INFO= +##contig= +##reference=file:///cromwell_root/broad-references/hg19/v0/Homo_sapiens_assembly19.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +1 17349143 . C T . . FUNCOTATION=[ENST00000375499.3|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Pathogenic|4|100|3|100|2|100|1|100|2|100|3|100|4|100] +1 17349144 . C T . . FUNCOTATION=[ENST00000375499.3|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Likely_pathogenic|1|100|2|100|3|100|4|100|3|100|2|100|1|100] +1 17349145 . C T . . FUNCOTATION=[ENST00000375499.3|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Pathogenic/Likely_pathogenic|4|100|1|100|3|100|2|100|3|100|1|100|4|100] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lmm.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lmm.vcf new file mode 100644 index 00000000000..4529f4fd916 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lmm.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.2 +##Funcotator Version=0.0.3 | Gencode 27 ALL | LMMKnown 20180618 +##FORMAT= +##INFO= +##buildName=GRCh38 +##contig= +##reference=file:///cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 55039931 . G A . . FUNCOTATION=[ENST00000302118.5|true] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lof.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lof.vcf new file mode 100644 index 00000000000..ed625a67c39 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lof.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##INFO= +##contig= +##reference=file:///cromwell_root/broad-references/hg19/v0/Homo_sapiens_assembly19.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +1 17349143 . C T . . FUNCOTATION=[ENST00000375499.3|YES|NONSENSE|4|1000|1|1000|1|1000|1|1000|1|1000|1|1000|1|1000] +1 17349144 . C T . . FUNCOTATION=[ENST00000375499.3|YES|START_CODON_DEL|7|1000|9|1000|1|1000|5|1000|8|1000|2|1000|0|1000] +1 17349145 . C T . . FUNCOTATION=[ENST00000375499.3|YES|SPLICE_SITE|3|1000|3|1000|3|1000|3|1000|3|1000|3|1000|3|1000] +1 17349146 . C T . . FUNCOTATION=[ENST00000375499.3|YES|FRAME_SHIFT_INS|4|1000|3|1000|2|1000|1|1000|2|1000|3|1000|4|1000] +1 173491467 . C T . . FUNCOTATION=[ENST00000375499.3|YES|FRAME_SHIFT_DEL|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-allelic.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-allelic.vcf new file mode 100644 index 00000000000..cc2535227c6 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-allelic.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.2 +##Funcotator Version=0.0.3 | Gencode 27 ALL | LMMKnown 20180618 +##FORMAT= +##INFO= +##buildName=GRCh38 +##contig= +##reference=file:///cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 55039931 . G A,T . . FUNCOTATION=[ENST00000302118.5|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Likely_pathogenic|YES|NONSENSE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|],[ENST00000302118.5|||||10|100|10|100|10|100|10|100|10|100|10|100|10|100|true] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-transcript.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-transcript.vcf new file mode 100644 index 00000000000..a593c3ebd95 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-transcript.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.2 +##Funcotator Version=0.0.3 | Gencode 27 ALL | LMMKnown 20180618 +##FORMAT= +##INFO= +##buildName=GRCh38 +##contig= +##reference=file:///cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 55039931 . G A . . FUNCOTATION=[ENST00000302118.5|||||||||||||||||||true]#[ENST00000302118.6|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Likely_pathogenic|||5|100|4|100|3|100|2|100|1|100|2|100|3|100|]#[ENST00000302118.7|||YES|NONSENSE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/none.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/none.vcf new file mode 100644 index 00000000000..7be93c02805 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/none.vcf @@ -0,0 +1,13 @@ +##fileformat=VCFv4.2 +##Funcotator Version=0.0.3 | Gencode 27 ALL | LMMKnown 20180618 +##FORMAT= +##INFO= +##buildName=GRCh38 +##contig= +##reference=file:///cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 55039931 . G A . . FUNCOTATION=[ENST00000302118.5||Likely_pathogenic|NO|NONSENSE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|false] +chr1 55039932 . G A . . FUNCOTATION=[ENST00000302118.5|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Benign|NO|NONSENSE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|false] +chr1 55039933 . G A . . FUNCOTATION=[ENST00000302118.5||Likely_pathogenic|YES|COULD_NOT_DETERMINE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|false] +chr1 55039934 . G A . . FUNCOTATION=[ENST00000302118.5|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Benign|YES|COULD_NOT_DETERMINE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|false] +chr1 55039935 . G A . . FUNCOTATION=[ENST00000302118.5|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Likely_pathogenic|YES|NONSENSE|5|1000|6|1000|7|100|8|1000|0|1000|0|1000|9|1000|false]