From fa9b6c2b401737c7eef764d8885e85cb0bac414e Mon Sep 17 00:00:00 2001 From: Dan Moran Date: Sat, 7 Jul 2018 15:54:52 -0400 Subject: [PATCH 1/8] Add experimental FilterFuncotations tool. Co-authored-by: Jay Carey --- .../tools/funcotator/FilterFuncotations.java | 334 ++++++++++++++++++ .../FilterFuncotationsIntegrationTest.java | 62 ++++ .../tools/FilterFuncotations/all.vcf | 9 + .../tools/FilterFuncotations/clinvar.vcf | 8 + .../tools/FilterFuncotations/lmm.vcf | 9 + .../tools/FilterFuncotations/lof.vcf | 10 + .../tools/FilterFuncotations/none.vcf | 13 + 7 files changed, 445 insertions(+) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java create mode 100644 src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/all.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/clinvar.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lmm.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lof.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/none.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java new file mode 100644 index 00000000000..b2fd9af2a60 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java @@ -0,0 +1,334 @@ +package org.broadinstitute.hellbender.tools.funcotator; + +import htsjdk.variant.variantcontext.Allele; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.variantcontext.VariantContextBuilder; +import htsjdk.variant.variantcontext.writer.VariantContextWriter; +import htsjdk.variant.vcf.VCFFilterHeaderLine; +import htsjdk.variant.vcf.VCFHeader; +import htsjdk.variant.vcf.VCFHeaderLineType; +import htsjdk.variant.vcf.VCFInfoHeaderLine; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.broadinstitute.barclay.argparser.Argument; +import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; +import org.broadinstitute.barclay.argparser.ExperimentalFeature; +import org.broadinstitute.barclay.help.DocumentedFeature; +import org.broadinstitute.hellbender.cmdline.StandardArgumentDefinitions; +import org.broadinstitute.hellbender.engine.FeatureContext; +import org.broadinstitute.hellbender.engine.ReadsContext; +import org.broadinstitute.hellbender.engine.ReferenceContext; +import org.broadinstitute.hellbender.engine.VariantWalker; +import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; +import picard.cmdline.programgroups.VariantEvaluationProgramGroup; + +import java.io.File; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +@CommandLineProgramProperties( + summary = FilterFuncotations.SUMMARY, + oneLineSummary = FilterFuncotations.ONE_LINE_SUMMARY, + programGroup = VariantEvaluationProgramGroup.class +) +@DocumentedFeature +@ExperimentalFeature +public class FilterFuncotations extends VariantWalker { + + static final String ONE_LINE_SUMMARY = "Filter variants based on clinically-significant Funcotations."; + static final String SUMMARY = ONE_LINE_SUMMARY + + " Proof-of-concept hard-coded to look for specific Funcotations from ClinVar, ExAC, and LMM."; + + static final String CLINSIG_RULE_KEY = "CLINSIG"; + static final String NOT_CLINSIG_FILTER = "NOT_" + CLINSIG_RULE_KEY; + + enum ReferenceVersion { + hg19(19), hg38(27); + + final int gencodeVersion; + + ReferenceVersion(int gencodeVersion) { + this.gencodeVersion = gencodeVersion; + } + } + + @Argument( + shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, + fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, + doc = "Output VCF file to which annotated variants should be written.") + protected File outputFile; + + @Argument( + fullName = FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME, + doc = "The version of the Human Genome reference which was used to Funcotate the input VCF." + ) + protected ReferenceVersion referenceVersion; + + private VariantContextWriter outputVcfWriter; + private String[] funcotationKeys; + private List funcotationFilters = new ArrayList<>(); + + @Override + public void onTraversalStart() { + registerFilters(); + final VCFHeader vcfHeader = getHeaderForVariants(); + + final VCFInfoHeaderLine funcotationHeaderLine = vcfHeader.getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME); + if (funcotationHeaderLine != null) { + funcotationKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription()); + outputVcfWriter = createVCFWriter(outputFile); + vcfHeader.addMetaDataLine(new VCFFilterHeaderLine(NOT_CLINSIG_FILTER, "Filter for clinically insignificant variants.")); + vcfHeader.addMetaDataLine(new VCFInfoHeaderLine(CLINSIG_RULE_KEY, 1, VCFHeaderLineType.String, + "Rule(s) which caused this annotation to be flagged as clinically significant.")); + outputVcfWriter.writeHeader(vcfHeader); + } else { + throw new UserException.BadInput("Input VCF does not have Funcotator annotations."); + } + } + + private void registerFilters() { + funcotationFilters.add(new ClinVarFilter()); + funcotationFilters.add(new LofFilter(referenceVersion)); + funcotationFilters.add(new LmmFilter()); + } + + @Override + public void apply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { + outputVcfWriter.add(applyFilters(variant)); + } + + private VariantContext applyFilters(VariantContext variant) { + + final Set matchingFilters = new HashSet<>(); + final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variant); + + final Map funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute( + funcotationKeys, variant, "Gencode_" + referenceVersion.gencodeVersion + "_annotationTranscript", "FILTER"); + + funcs.values().forEach(funcotationMap -> { + final Stream> transcriptFuncotations = funcotationMap.getTranscriptList().stream() + .map(funcotationMap::get) + .map(funcotations -> funcotations.stream() + .flatMap(this::extractFuncotationFields) + .filter(entry -> entry.getValue() != null && !entry.getValue().isEmpty()) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue))); + + transcriptFuncotations.forEach(funcotations -> { + final Set matches = funcotationFilters.stream() + .filter(f -> f.checkFilter(variant, funcotations)) + .map(FuncotationFilter::getFilterName) + .collect(Collectors.toSet()); + matchingFilters.addAll(matches); + }); + }); + + String clinicalSignificance = matchingFilters.isEmpty() ? "NONE" : String.join(",", matchingFilters); + variantContextBuilder.attribute(CLINSIG_RULE_KEY, clinicalSignificance); + + if (matchingFilters.isEmpty()) { + variantContextBuilder.filter(NOT_CLINSIG_FILTER); + } else { + variantContextBuilder.passFilters(); + } + return variantContextBuilder.make(); + } + + private Stream> extractFuncotationFields(final Funcotation funcotation) { + return funcotation.getFieldNames().stream() + .map(name -> new AbstractMap.SimpleEntry<>(name, funcotation.getField(name))); + } + + @Override + public void closeTool() { + if (outputVcfWriter != null) { + outputVcfWriter.close(); + } + } +} + +abstract class FuncotationFiltrationRule { + + enum ExacSubPopulation { + AFR, AMR, EAS, FIN, NFE, OTH, SAS + } + + private static Logger logger = LogManager.getLogger(ClinVarFilter.class); + private final String ruleName; + + FuncotationFiltrationRule(final String ruleName) { + this.ruleName = ruleName; + } + + boolean checkRule(final VariantContext variant,final Map prunedTranscriptFuncotations) { + return !prunedTranscriptFuncotations.isEmpty() && + optionallyLog(applyRule(variant, prunedTranscriptFuncotations), variant); + } + + abstract boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations); + + private boolean optionallyLog(final boolean result, final VariantContext variant) { + if (result) logger.debug(String.format("Matched Rule: %s For Variant %s", ruleName, variant)); + return result; + } + + double getMaxMinorAlleleFreq(final Map funcotations) { + return Arrays.stream(ExacSubPopulation.values()) + .filter(subpop -> funcotations.containsKey("ExAC_AC_" + subpop.name())) + .map(subpop -> { + final Double ac = Double.valueOf(funcotations.get("ExAC_AC_" + subpop.name())); + final Integer an = Integer.valueOf(funcotations.get("ExAC_AN_" + subpop.name())); + + if (an == 0) { + // If a variant has never been seen in ExAC, report it as 0% MAF. + return 0d; + } else { + return ac / an; + } + }) + .max(Double::compareTo) + .orElse(0d); + } +} + +abstract class FuncotationFilter { + + private final String filterName; + + FuncotationFilter(final String filterName) { + this.filterName = filterName; + } + + Boolean checkFilter(final VariantContext variant, final Map prunedTranscriptFuncotations) { + return getRules().stream() + .map(rule -> rule.checkRule(variant, prunedTranscriptFuncotations)) + .reduce(Boolean::logicalAnd) + .orElse(false); + } + + abstract List getRules(); + + public String getFilterName() { + return filterName; + } +} + + +class ClinVarFilter extends FuncotationFilter { + + private static final String ACMG_DISEASE_FUNCOTATION = "ACMG_recommendation_Disease_Name"; + private static final String CLIN_VAR_VCF_CLNSIG = "ClinVar_VCF_CLNSIG"; + + ClinVarFilter() { + super("CLINVAR"); + } + + @Override + List getRules() { + final List clinVarFiltrationRules = new ArrayList<>(); + + // 1) The gene name must be on the ACMG59 list (American College of Medical Genomics). + clinVarFiltrationRules.add(new FuncotationFiltrationRule("ClinVar-ACMG59") { + @Override + boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { + return prunedTranscriptFuncotations.containsKey(ACMG_DISEASE_FUNCOTATION); + } + }); + + // 2) ClinVar annotations specifies Pathogenicity or Likely pathogenic. + clinVarFiltrationRules.add(new FuncotationFiltrationRule("ClinVar-pathogenic") { + @Override + boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { + final String significance = prunedTranscriptFuncotations.getOrDefault(CLIN_VAR_VCF_CLNSIG, ""); + return significance.contains("Pathogenic") || significance.contains("Likely_pathogenic"); + } + }); + + // 3) Frequency: Max Minor Allele Freq is ≤5% in GnoMAD (ExAC for Proof of Concept) + clinVarFiltrationRules.add(new FuncotationFiltrationRule("ClinVar-MAF") { + @Override + boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { + return getMaxMinorAlleleFreq(prunedTranscriptFuncotations) <= 0.05; + } + }); + return clinVarFiltrationRules; + } +} + +class LofFilter extends FuncotationFilter { + + private static final String LOF_GENE_FUNCOTATION = "ACMGLMMLof_LOF_Mechanism"; + private static final String FRAME_SHIFT_PREFIX = "FRAME_SHIFT_"; + private static final List CONSTANT_LOF_CLASSIFICATIONS = Arrays.asList("NONSENSE", "START_CODON_DEL", "SPLICE_SITE"); + + private final String classificationFuncotation; + + LofFilter(final FilterFuncotations.ReferenceVersion ref) { + super("LOF"); + this.classificationFuncotation = "Gencode_" + ref.gencodeVersion + "_variantClassification"; + } + + @Override + List getRules() { + final List lofFiltrationRules = new ArrayList<>(); + // 1) 1) Variant classification is FRAME_SHIFT_*, NONSENSE, START_CODON_DEL, and SPLICE_SITE + // (within 2 bases on either side of exon or intron) on any transcript. + // TODO + lofFiltrationRules.add(new FuncotationFiltrationRule("LOF-class") { + + @Override + boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { + final String classification = prunedTranscriptFuncotations.getOrDefault(classificationFuncotation, ""); + return classification.startsWith(FRAME_SHIFT_PREFIX) || CONSTANT_LOF_CLASSIFICATIONS.contains(classification); + } + }); + + // 2) LoF is disease mechanism (that is do not flag genes where LoF is not part of disease mechanism e.g. RyR1) + // - create static list + lofFiltrationRules.add(new FuncotationFiltrationRule("LOF-mechanism") { + @Override + boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { + return prunedTranscriptFuncotations.getOrDefault(LOF_GENE_FUNCOTATION, "NO").equals("YES"); + } + }); + + // 3) Frequency: Max Minor Allele Freq is ≤1% in GnoMAD (ExAC for Proof of Concept) + lofFiltrationRules.add(new FuncotationFiltrationRule("LOF-MAF") { + @Override + boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { + return getMaxMinorAlleleFreq(prunedTranscriptFuncotations) <= 0.01; + } + }); + return lofFiltrationRules; + } +} + +class LmmFilter extends FuncotationFilter { + + private static final String LMM_FLAGGED = "LMMKnown_LMM_FLAGGED"; + + LmmFilter() { + super("LMM"); + } + + @Override + List getRules() { + // 1) LMM gives us a list of all path/LP variants they have seen. We flag any variant that appears on this + // list regardless of GnoMAD freq. (optional for Proof of Concept) + final FuncotationFiltrationRule rule = new FuncotationFiltrationRule("LMM-path-LP") { + @Override + boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { + return Boolean.valueOf(prunedTranscriptFuncotations.getOrDefault(LMM_FLAGGED, "false")); + } + }; + return Collections.singletonList(rule); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java new file mode 100644 index 00000000000..5e8a3d06e35 --- /dev/null +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java @@ -0,0 +1,62 @@ +package org.broadinstitute.hellbender.tools.funcotator; + +import htsjdk.samtools.util.IOUtil; +import htsjdk.variant.variantcontext.VariantContext; +import htsjdk.variant.vcf.VCFHeader; +import org.apache.commons.lang3.tuple.Pair; +import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; +import org.testng.Assert; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class FilterFuncotationsIntegrationTest extends CommandLineProgramTest { + + private static final Path TEST_DATA_DIR = getTestDataDir().toPath().resolve("FilterFuncotations"); + + @DataProvider(name = "uniformVcfProvider") + public Object[][] uniformVcfProvider() { + return new Object[][]{ + {"clinvar.vcf", 19, Collections.emptySet(), Collections.singleton("CLINVAR")}, + {"lmm.vcf", 38, Collections.emptySet(), Collections.singleton("LMM")}, + {"lof.vcf", 19, Collections.emptySet(), Collections.singleton("LOF")}, + {"all.vcf", 38, Collections.emptySet(), new HashSet<>(Arrays.asList("CLINVAR", "LMM", "LOF"))}, + {"none.vcf", 38, Collections.singleton(FilterFuncotations.NOT_CLINSIG_FILTER), Collections.singleton("NONE")} + }; + } + + @Test(dataProvider = "uniformVcfProvider") + public void testFilterUniform(final String vcfName, + final int build, + final Set expectedFilters, + final Set expectedAnnotations) throws IOException { + + final Path tmpOut = Files.createTempFile(vcfName + ".filtered", ".vcf"); + IOUtil.deleteOnExit(tmpOut); + + final List args = Arrays.asList( + "-V", TEST_DATA_DIR.resolve(vcfName).toString(), + "-O", tmpOut.toString(), + "--ref-version", "hg" + build + ); + runCommandLine(args); + + final Pair> vcf = VariantContextTestUtils.readEntireVCFIntoMemory(tmpOut.toString()); + vcf.getRight().forEach(variant -> { + Assert.assertEquals(variant.getFilters(), expectedFilters); + + final List clinsigAnnotations = variant.getCommonInfo() + .getAttributeAsStringList(FilterFuncotations.CLINSIG_RULE_KEY, ""); + Assert.assertEquals(new HashSet<>(clinsigAnnotations), expectedAnnotations); + }); + } +} diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/all.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/all.vcf new file mode 100644 index 00000000000..34bc660d374 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/all.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.2 +##Funcotator Version=0.0.3 | Gencode 27 ALL | LMMKnown 20180618 +##FORMAT= +##INFO= +##buildName=GRCh38 +##contig= +##reference=file:///cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 55039931 . G A . . FUNCOTATION=[ENST00000302118.5|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Likely_pathogenic|YES|NONSENSE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|true] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/clinvar.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/clinvar.vcf new file mode 100644 index 00000000000..47ce4aeacf2 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/clinvar.vcf @@ -0,0 +1,8 @@ +##fileformat=VCFv4.2 +##INFO= +##contig= +##reference=file:///cromwell_root/broad-references/hg19/v0/Homo_sapiens_assembly19.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +1 17349143 . C T . . FUNCOTATION=[ENST00000375499.3|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Pathogenic|4|100|3|100|2|100|1|100|2|100|3|100|4|100] +1 17349144 . C T . . FUNCOTATION=[ENST00000375499.3|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Likely_pathogenic|1|100|2|100|3|100|4|100|3|100|2|100|1|100] +1 17349145 . C T . . FUNCOTATION=[ENST00000375499.3|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Pathogenic/Likely_pathogenic|4|100|1|100|3|100|2|100|3|100|1|100|4|100] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lmm.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lmm.vcf new file mode 100644 index 00000000000..4529f4fd916 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lmm.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.2 +##Funcotator Version=0.0.3 | Gencode 27 ALL | LMMKnown 20180618 +##FORMAT= +##INFO= +##buildName=GRCh38 +##contig= +##reference=file:///cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 55039931 . G A . . FUNCOTATION=[ENST00000302118.5|true] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lof.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lof.vcf new file mode 100644 index 00000000000..ed625a67c39 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/lof.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.2 +##INFO= +##contig= +##reference=file:///cromwell_root/broad-references/hg19/v0/Homo_sapiens_assembly19.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +1 17349143 . C T . . FUNCOTATION=[ENST00000375499.3|YES|NONSENSE|4|1000|1|1000|1|1000|1|1000|1|1000|1|1000|1|1000] +1 17349144 . C T . . FUNCOTATION=[ENST00000375499.3|YES|START_CODON_DEL|7|1000|9|1000|1|1000|5|1000|8|1000|2|1000|0|1000] +1 17349145 . C T . . FUNCOTATION=[ENST00000375499.3|YES|SPLICE_SITE|3|1000|3|1000|3|1000|3|1000|3|1000|3|1000|3|1000] +1 17349146 . C T . . FUNCOTATION=[ENST00000375499.3|YES|FRAME_SHIFT_INS|4|1000|3|1000|2|1000|1|1000|2|1000|3|1000|4|1000] +1 173491467 . C T . . FUNCOTATION=[ENST00000375499.3|YES|FRAME_SHIFT_DEL|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/none.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/none.vcf new file mode 100644 index 00000000000..7be93c02805 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/none.vcf @@ -0,0 +1,13 @@ +##fileformat=VCFv4.2 +##Funcotator Version=0.0.3 | Gencode 27 ALL | LMMKnown 20180618 +##FORMAT= +##INFO= +##buildName=GRCh38 +##contig= +##reference=file:///cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 55039931 . G A . . FUNCOTATION=[ENST00000302118.5||Likely_pathogenic|NO|NONSENSE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|false] +chr1 55039932 . G A . . FUNCOTATION=[ENST00000302118.5|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Benign|NO|NONSENSE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|false] +chr1 55039933 . G A . . FUNCOTATION=[ENST00000302118.5||Likely_pathogenic|YES|COULD_NOT_DETERMINE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|false] +chr1 55039934 . G A . . FUNCOTATION=[ENST00000302118.5|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Benign|YES|COULD_NOT_DETERMINE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|false] +chr1 55039935 . G A . . FUNCOTATION=[ENST00000302118.5|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Likely_pathogenic|YES|NONSENSE|5|1000|6|1000|7|100|8|1000|0|1000|0|1000|9|1000|false] From 8fb4a98b3c662653d7e58c9b67e00c9b03043397 Mon Sep 17 00:00:00 2001 From: Dan Moran Date: Sun, 8 Jul 2018 13:13:47 -0400 Subject: [PATCH 2/8] Add multi-transcript and multi-allele tests. --- .../funcotator/FilterFuncotationsIntegrationTest.java | 8 ++++++-- .../tools/FilterFuncotations/multi-allelic.vcf | 9 +++++++++ .../tools/FilterFuncotations/multi-transcript.vcf | 9 +++++++++ 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-allelic.vcf create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-transcript.vcf diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java index 5e8a3d06e35..651dd173956 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java @@ -22,14 +22,18 @@ public class FilterFuncotationsIntegrationTest extends CommandLineProgramTest { private static final Path TEST_DATA_DIR = getTestDataDir().toPath().resolve("FilterFuncotations"); - + + private static final Set ALL_FILTERS = new HashSet<>(Arrays.asList("CLINVAR", "LMM", "LOF")); + @DataProvider(name = "uniformVcfProvider") public Object[][] uniformVcfProvider() { return new Object[][]{ {"clinvar.vcf", 19, Collections.emptySet(), Collections.singleton("CLINVAR")}, {"lmm.vcf", 38, Collections.emptySet(), Collections.singleton("LMM")}, {"lof.vcf", 19, Collections.emptySet(), Collections.singleton("LOF")}, - {"all.vcf", 38, Collections.emptySet(), new HashSet<>(Arrays.asList("CLINVAR", "LMM", "LOF"))}, + {"all.vcf", 38, Collections.emptySet(), ALL_FILTERS}, + {"multi-transcript.vcf", 38, Collections.emptySet(), ALL_FILTERS}, + {"multi-allelic.vcf", 38, Collections.emptySet(), ALL_FILTERS}, {"none.vcf", 38, Collections.singleton(FilterFuncotations.NOT_CLINSIG_FILTER), Collections.singleton("NONE")} }; } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-allelic.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-allelic.vcf new file mode 100644 index 00000000000..cc2535227c6 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-allelic.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.2 +##Funcotator Version=0.0.3 | Gencode 27 ALL | LMMKnown 20180618 +##FORMAT= +##INFO= +##buildName=GRCh38 +##contig= +##reference=file:///cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 55039931 . G A,T . . FUNCOTATION=[ENST00000302118.5|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Likely_pathogenic|YES|NONSENSE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|],[ENST00000302118.5|||||10|100|10|100|10|100|10|100|10|100|10|100|10|100|true] diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-transcript.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-transcript.vcf new file mode 100644 index 00000000000..a593c3ebd95 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/FilterFuncotations/multi-transcript.vcf @@ -0,0 +1,9 @@ +##fileformat=VCFv4.2 +##Funcotator Version=0.0.3 | Gencode 27 ALL | LMMKnown 20180618 +##FORMAT= +##INFO= +##buildName=GRCh38 +##contig= +##reference=file:///cromwell_root/broad-references/hg38/v0/Homo_sapiens_assembly38.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +chr1 55039931 . G A . . FUNCOTATION=[ENST00000302118.5|||||||||||||||||||true]#[ENST00000302118.6|Paragangliomas_%20_4_%20_(MIM_%20_115310)|Likely_pathogenic|||5|100|4|100|3|100|2|100|1|100|2|100|3|100|]#[ENST00000302118.7|||YES|NONSENSE|5|1000|6|1000|7|1000|8|1000|0|1000|0|1000|9|1000|] From 7380c79fe20daab14c4d704a1c2e5b86255e8715 Mon Sep 17 00:00:00 2001 From: Dan Moran Date: Wed, 18 Jul 2018 17:34:10 -0400 Subject: [PATCH 3/8] Move code and add comments from review. --- .../tools/funcotator/FilterFuncotations.java | 274 +++++------------- .../FilterFuncotationsConstants.java | 32 ++ .../filtrationRules/ClinVarFilter.java | 51 ++++ .../FilterFuncotationsExacUtils.java | 54 ++++ .../filtrationRules/FuncotationFilter.java | 45 +++ .../FuncotationFiltrationRule.java | 16 + .../funcotator/filtrationRules/LmmFilter.java | 34 +++ .../funcotator/filtrationRules/LofFilter.java | 63 ++++ .../FilterFuncotationsIntegrationTest.java | 17 +- 9 files changed, 374 insertions(+), 212 deletions(-) create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsConstants.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java create mode 100644 src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java index b2fd9af2a60..ebd0824d83f 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java @@ -8,8 +8,6 @@ import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderLineType; import htsjdk.variant.vcf.VCFInfoHeaderLine; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; import org.broadinstitute.barclay.argparser.Argument; import org.broadinstitute.barclay.argparser.CommandLineProgramProperties; import org.broadinstitute.barclay.argparser.ExperimentalFeature; @@ -20,14 +18,16 @@ import org.broadinstitute.hellbender.engine.ReferenceContext; import org.broadinstitute.hellbender.engine.VariantWalker; import org.broadinstitute.hellbender.exceptions.UserException; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ClinVarFilter; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.FuncotationFilter; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LmmFilter; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LofFilter; import org.broadinstitute.hellbender.tools.funcotator.vcfOutput.VcfOutputRenderer; import picard.cmdline.programgroups.VariantEvaluationProgramGroup; import java.io.File; import java.util.AbstractMap; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -35,6 +35,17 @@ import java.util.stream.Collectors; import java.util.stream.Stream; +/** + * Filter variants based on clinically-significant Funcotations. + * + * This proof-of-concept tool is an example for how to parse and use the VCF output of Funcotator. + * It's currently hard-coded to look for specific {@link Funcotation}s from: + * + */ @CommandLineProgramProperties( summary = FilterFuncotations.SUMMARY, oneLineSummary = FilterFuncotations.ONE_LINE_SUMMARY, @@ -46,25 +57,30 @@ public class FilterFuncotations extends VariantWalker { static final String ONE_LINE_SUMMARY = "Filter variants based on clinically-significant Funcotations."; static final String SUMMARY = ONE_LINE_SUMMARY + - " Proof-of-concept hard-coded to look for specific Funcotations from ClinVar, ExAC, and LMM."; + "\nThis proof-of-concept tool is an example for how to parse and use the VCF output of Funcotator." + + "\nCurrently hard-coded to look for specific Funcotations from:" + + "\n * ClinVar (http://www.clinvar.com/)" + + "\n * Exome Aggregation Consortium (ExAC) (http://exac.broadinstitute.org/)" + + "\n * Laboratory for Molecular Medicine (LMM) (http://personalizedmedicine.partners.org/laboratory-for-molecular-medicine/)"; - static final String CLINSIG_RULE_KEY = "CLINSIG"; - static final String NOT_CLINSIG_FILTER = "NOT_" + CLINSIG_RULE_KEY; - - enum ReferenceVersion { + public enum ReferenceVersion { hg19(19), hg38(27); - final int gencodeVersion; + private final int gencodeVersion; ReferenceVersion(int gencodeVersion) { this.gencodeVersion = gencodeVersion; } + + public int getGencodeVersion() { + return gencodeVersion; + } } @Argument( shortName = StandardArgumentDefinitions.OUTPUT_SHORT_NAME, fullName = StandardArgumentDefinitions.OUTPUT_LONG_NAME, - doc = "Output VCF file to which annotated variants should be written.") + doc = "Output VCF file to which filtered variants should be written.") protected File outputFile; @Argument( @@ -86,9 +102,10 @@ public void onTraversalStart() { if (funcotationHeaderLine != null) { funcotationKeys = FuncotatorUtils.extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription()); outputVcfWriter = createVCFWriter(outputFile); - vcfHeader.addMetaDataLine(new VCFFilterHeaderLine(NOT_CLINSIG_FILTER, "Filter for clinically insignificant variants.")); - vcfHeader.addMetaDataLine(new VCFInfoHeaderLine(CLINSIG_RULE_KEY, 1, VCFHeaderLineType.String, - "Rule(s) which caused this annotation to be flagged as clinically significant.")); + vcfHeader.addMetaDataLine(new VCFFilterHeaderLine(FilterFuncotationsConstants.NOT_CLINSIG_FILTER, + FilterFuncotationsConstants.NOT_CLINSIG_FILTER_DESCRIPTION)); + vcfHeader.addMetaDataLine(new VCFInfoHeaderLine(FilterFuncotationsConstants.CLINSIG_INFO_KEY, 1, + VCFHeaderLineType.String, FilterFuncotationsConstants.CLINSIG_INFO_KEY_DESCRIPTION)); outputVcfWriter.writeHeader(vcfHeader); } else { throw new UserException.BadInput("Input VCF does not have Funcotator annotations."); @@ -102,17 +119,22 @@ private void registerFilters() { } @Override - public void apply(VariantContext variant, ReadsContext readsContext, ReferenceContext referenceContext, FeatureContext featureContext) { - outputVcfWriter.add(applyFilters(variant)); + public void apply(final VariantContext variant, final ReadsContext readsContext, final ReferenceContext referenceContext, final FeatureContext featureContext) { + outputVcfWriter.add(applyFilters(variant, getMatchingFilters(variant))); } - private VariantContext applyFilters(VariantContext variant) { - + /** + * Collect the names of the {@link FuncotationFilter}s matching the Funcotations of the given variant. + * + * The filter will be treated as a match if it matches Funcotations for any of the transcripts in the + * variant's Funcotation map. + */ + private Set getMatchingFilters(final VariantContext variant) { final Set matchingFilters = new HashSet<>(); - final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variant); + final Map funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute( - funcotationKeys, variant, "Gencode_" + referenceVersion.gencodeVersion + "_annotationTranscript", "FILTER"); + funcotationKeys, variant, "Gencode_" + referenceVersion.gencodeVersion + "_annotationTranscript", "FAKE_SOURCE"); funcs.values().forEach(funcotationMap -> { final Stream> transcriptFuncotations = funcotationMap.getTranscriptList().stream() @@ -131,204 +153,44 @@ private VariantContext applyFilters(VariantContext variant) { }); }); - String clinicalSignificance = matchingFilters.isEmpty() ? "NONE" : String.join(",", matchingFilters); - variantContextBuilder.attribute(CLINSIG_RULE_KEY, clinicalSignificance); - - if (matchingFilters.isEmpty()) { - variantContextBuilder.filter(NOT_CLINSIG_FILTER); - } else { - variantContextBuilder.passFilters(); - } - return variantContextBuilder.make(); + return matchingFilters; } + /** + * Parse the entries in a Funcotation into a stream of map entries. + */ private Stream> extractFuncotationFields(final Funcotation funcotation) { return funcotation.getFieldNames().stream() .map(name -> new AbstractMap.SimpleEntry<>(name, funcotation.getField(name))); } - @Override - public void closeTool() { - if (outputVcfWriter != null) { - outputVcfWriter.close(); - } - } -} - -abstract class FuncotationFiltrationRule { - - enum ExacSubPopulation { - AFR, AMR, EAS, FIN, NFE, OTH, SAS - } - - private static Logger logger = LogManager.getLogger(ClinVarFilter.class); - private final String ruleName; - - FuncotationFiltrationRule(final String ruleName) { - this.ruleName = ruleName; - } - - boolean checkRule(final VariantContext variant,final Map prunedTranscriptFuncotations) { - return !prunedTranscriptFuncotations.isEmpty() && - optionallyLog(applyRule(variant, prunedTranscriptFuncotations), variant); - } - - abstract boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations); - - private boolean optionallyLog(final boolean result, final VariantContext variant) { - if (result) logger.debug(String.format("Matched Rule: %s For Variant %s", ruleName, variant)); - return result; - } - - double getMaxMinorAlleleFreq(final Map funcotations) { - return Arrays.stream(ExacSubPopulation.values()) - .filter(subpop -> funcotations.containsKey("ExAC_AC_" + subpop.name())) - .map(subpop -> { - final Double ac = Double.valueOf(funcotations.get("ExAC_AC_" + subpop.name())); - final Integer an = Integer.valueOf(funcotations.get("ExAC_AN_" + subpop.name())); - - if (an == 0) { - // If a variant has never been seen in ExAC, report it as 0% MAF. - return 0d; - } else { - return ac / an; - } - }) - .max(Double::compareTo) - .orElse(0d); - } -} - -abstract class FuncotationFilter { - - private final String filterName; - - FuncotationFilter(final String filterName) { - this.filterName = filterName; - } - - Boolean checkFilter(final VariantContext variant, final Map prunedTranscriptFuncotations) { - return getRules().stream() - .map(rule -> rule.checkRule(variant, prunedTranscriptFuncotations)) - .reduce(Boolean::logicalAnd) - .orElse(false); - } - - abstract List getRules(); - - public String getFilterName() { - return filterName; - } -} - - -class ClinVarFilter extends FuncotationFilter { - - private static final String ACMG_DISEASE_FUNCOTATION = "ACMG_recommendation_Disease_Name"; - private static final String CLIN_VAR_VCF_CLNSIG = "ClinVar_VCF_CLNSIG"; - - ClinVarFilter() { - super("CLINVAR"); - } - - @Override - List getRules() { - final List clinVarFiltrationRules = new ArrayList<>(); - - // 1) The gene name must be on the ACMG59 list (American College of Medical Genomics). - clinVarFiltrationRules.add(new FuncotationFiltrationRule("ClinVar-ACMG59") { - @Override - boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { - return prunedTranscriptFuncotations.containsKey(ACMG_DISEASE_FUNCOTATION); - } - }); - - // 2) ClinVar annotations specifies Pathogenicity or Likely pathogenic. - clinVarFiltrationRules.add(new FuncotationFiltrationRule("ClinVar-pathogenic") { - @Override - boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { - final String significance = prunedTranscriptFuncotations.getOrDefault(CLIN_VAR_VCF_CLNSIG, ""); - return significance.contains("Pathogenic") || significance.contains("Likely_pathogenic"); - } - }); - - // 3) Frequency: Max Minor Allele Freq is ≤5% in GnoMAD (ExAC for Proof of Concept) - clinVarFiltrationRules.add(new FuncotationFiltrationRule("ClinVar-MAF") { - @Override - boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { - return getMaxMinorAlleleFreq(prunedTranscriptFuncotations) <= 0.05; - } - }); - return clinVarFiltrationRules; - } -} - -class LofFilter extends FuncotationFilter { - - private static final String LOF_GENE_FUNCOTATION = "ACMGLMMLof_LOF_Mechanism"; - private static final String FRAME_SHIFT_PREFIX = "FRAME_SHIFT_"; - private static final List CONSTANT_LOF_CLASSIFICATIONS = Arrays.asList("NONSENSE", "START_CODON_DEL", "SPLICE_SITE"); - - private final String classificationFuncotation; - - LofFilter(final FilterFuncotations.ReferenceVersion ref) { - super("LOF"); - this.classificationFuncotation = "Gencode_" + ref.gencodeVersion + "_variantClassification"; - } - - @Override - List getRules() { - final List lofFiltrationRules = new ArrayList<>(); - // 1) 1) Variant classification is FRAME_SHIFT_*, NONSENSE, START_CODON_DEL, and SPLICE_SITE - // (within 2 bases on either side of exon or intron) on any transcript. - // TODO - lofFiltrationRules.add(new FuncotationFiltrationRule("LOF-class") { - - @Override - boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { - final String classification = prunedTranscriptFuncotations.getOrDefault(classificationFuncotation, ""); - return classification.startsWith(FRAME_SHIFT_PREFIX) || CONSTANT_LOF_CLASSIFICATIONS.contains(classification); - } - }); - - // 2) LoF is disease mechanism (that is do not flag genes where LoF is not part of disease mechanism e.g. RyR1) - // - create static list - lofFiltrationRules.add(new FuncotationFiltrationRule("LOF-mechanism") { - @Override - boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { - return prunedTranscriptFuncotations.getOrDefault(LOF_GENE_FUNCOTATION, "NO").equals("YES"); - } - }); - - // 3) Frequency: Max Minor Allele Freq is ≤1% in GnoMAD (ExAC for Proof of Concept) - lofFiltrationRules.add(new FuncotationFiltrationRule("LOF-MAF") { - @Override - boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { - return getMaxMinorAlleleFreq(prunedTranscriptFuncotations) <= 0.01; - } - }); - return lofFiltrationRules; - } -} + /** + * Mark a variant as matching a set of Funcotation filters, or as matching no filters. + */ + private VariantContext applyFilters(final VariantContext variant, final Set matchingFilters) { + final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variant); -class LmmFilter extends FuncotationFilter { + // Mark the individual filters that make the variant significant. + final String clinicalSignificance = matchingFilters.isEmpty() ? + FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT : + String.join(",", matchingFilters); + variantContextBuilder.attribute(FilterFuncotationsConstants.CLINSIG_INFO_KEY, clinicalSignificance); - private static final String LMM_FLAGGED = "LMMKnown_LMM_FLAGGED"; + // Also set the filter field for insignificant variants, to make it easier for + // downstream tools to extract out the interesting data. + if (matchingFilters.isEmpty()) { + variantContextBuilder.filter(FilterFuncotationsConstants.NOT_CLINSIG_FILTER); + } else { + variantContextBuilder.passFilters(); + } - LmmFilter() { - super("LMM"); + return variantContextBuilder.make(); } @Override - List getRules() { - // 1) LMM gives us a list of all path/LP variants they have seen. We flag any variant that appears on this - // list regardless of GnoMAD freq. (optional for Proof of Concept) - final FuncotationFiltrationRule rule = new FuncotationFiltrationRule("LMM-path-LP") { - @Override - boolean applyRule(final VariantContext variant, final Map prunedTranscriptFuncotations) { - return Boolean.valueOf(prunedTranscriptFuncotations.getOrDefault(LMM_FLAGGED, "false")); - } - }; - return Collections.singletonList(rule); + public void closeTool() { + if (outputVcfWriter != null) { + outputVcfWriter.close(); + } } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsConstants.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsConstants.java new file mode 100644 index 00000000000..dca73495f34 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsConstants.java @@ -0,0 +1,32 @@ +package org.broadinstitute.hellbender.tools.funcotator; + +public class FilterFuncotationsConstants { + /** + * Key for the INFO field added to all variants by {@link FilterFuncotations}, + * indicating the clinical significance (if any) of the Funcotations on that variant. + */ + public static final String CLINSIG_INFO_KEY = "CLINSIG"; + + /** + * Description for {@value CLINSIG_INFO_KEY} to include in VCF headers. + */ + public static final String CLINSIG_INFO_KEY_DESCRIPTION = + "Rule(s) which caused this annotation to be flagged as clinically significant."; + + /** + * Value to assign to {@value CLINSIG_INFO_KEY} for variants that have no + * clinically-significant Funcotations. + */ + public static final String CLINSIG_INFO_NOT_SIGNIFICANT = "NONE"; + + /** + * FILTER value applied by {@link FilterFuncotations} to all variants which have + * no clinically-significant Funcotations. + */ + public static final String NOT_CLINSIG_FILTER = "NOT_" + CLINSIG_INFO_KEY; + + /** + * Description for {@value NOT_CLINSIG_FILTER} to include in VCF headers. + */ + public static final String NOT_CLINSIG_FILTER_DESCRIPTION = "Filter for clinically insignificant variants."; +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java new file mode 100644 index 00000000000..97eda7ef0b9 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java @@ -0,0 +1,51 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import java.util.Arrays; +import java.util.List; + +/** + * {@link FuncotationFilter} matching variants which: + *
    + *
  • Occur on a gene in the American College of Medical Genomics (ACMG)'s list of clinically-significant variants
  • + *
  • Have been labeled by ClinVar as pathogenic or likely pathogenic
  • + *
  • Have a max MAF of 5% across sub-populations of ExAC
  • + *
+ */ +public class ClinVarFilter extends FuncotationFilter { + + /** + * Value to include in the {@value org.broadinstitute.hellbender.tools.funcotator.FilterFuncotationsConstants#CLINSIG_INFO_KEY} + * INFO annotation of variants matching this rule. + */ + public static final String CLINSIG_INFO_VALUE = "CLINVAR"; + + /** + * Funcotation which will be non-empty for variants which occur on a gene in the ACMG's list. + * + * @see The gene list + */ + private static final String ACMG_DISEASE_FUNCOTATION = "ACMG_recommendation_Disease_Name"; + + /** + * Funcotation which contains ClinVar's assessment of a variant's clinical significance. + * + * @see Valid values for significance + */ + private static final String CLINVAR_SIGNIFICANCE_FUNCOTATION = "ClinVar_VCF_CLNSIG"; + + public ClinVarFilter() { + super(CLINSIG_INFO_VALUE); + } + + @Override + List getRules() { + return Arrays.asList( + (variant, prunedTranscriptFuncotations) -> + prunedTranscriptFuncotations.containsKey(ACMG_DISEASE_FUNCOTATION), + (variant, prunedTranscriptFuncotations) -> { + final String significance = prunedTranscriptFuncotations.getOrDefault(CLINVAR_SIGNIFICANCE_FUNCOTATION, ""); + return significance.contains("Pathogenic") || significance.contains("Likely_pathogenic"); + }, + FilterFuncotationsExacUtils.buildExacMaxMafRule(0.05)); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java new file mode 100644 index 00000000000..723d06a69b3 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java @@ -0,0 +1,54 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import java.util.Arrays; +import java.util.Map; + +public class FilterFuncotationsExacUtils { + /** + * Sub-population suffixes used within ExAC. Used for calculating max MAF. + */ + private enum ExacSubPopulation { + AFR, AMR, EAS, FIN, NFE, OTH, SAS + } + + /** + * Prefix for allele-count Funcotations for each ExAC sub-population. + */ + private static String EXAC_ALLELE_COUNT_PREFIX = "ExAC_AC_"; + + /** + * Prefix for allele-number Funcotations for each ExAC sub-population. + */ + private static String EXAC_ALLELE_NUMBER_PREFIX = "ExAC_AN_"; + + /** + * Build a {@link FuncotationFiltrationRule} matching variants with a MAF less than + * the given value across all sub-populations of ExAC. + */ + public static FuncotationFiltrationRule buildExacMaxMafRule(final double maxMaf) { + return ((variant, prunedTranscriptFuncotations) -> getMaxMinorAlleleFreq(prunedTranscriptFuncotations) <= maxMaf); + } + + /** + * Calculate the max MAF across all ExAC sub-populations from the given Funcotations. + * + * If a sub-population has an allele number of zero, it will be assigned a MAF of zero. + */ + private static double getMaxMinorAlleleFreq(final Map funcotations) { + return Arrays.stream(ExacSubPopulation.values()) + .filter(subpop -> funcotations.containsKey(EXAC_ALLELE_COUNT_PREFIX + subpop.name())) + .map(subpop -> { + final Double ac = Double.valueOf(funcotations.get(EXAC_ALLELE_COUNT_PREFIX + subpop.name())); + final Integer an = Integer.valueOf(funcotations.get(EXAC_ALLELE_NUMBER_PREFIX + subpop.name())); + + if (an == 0) { + // If a variant has never been seen in ExAC, report it as 0% MAF. + return 0d; + } else { + return ac / an; + } + }) + .max(Double::compareTo) + .orElse(0d); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java new file mode 100644 index 00000000000..164e8dd4451 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java @@ -0,0 +1,45 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import htsjdk.variant.variantcontext.VariantContext; + +import java.util.List; +import java.util.Map; + +/** + * A filter to apply to Funcotated variants in {@link org.broadinstitute.hellbender.tools.funcotator.FilterFuncotations}. + * + * Filters can define an arbitrary number of rules which must match on the Funcotations of a variant in order + * for that variant to "pass". Passing variants will be annotated with the filter's name in the output VCF. + */ +public abstract class FuncotationFilter { + + /** + * The INFO annotation value which should be added to all variants which pass this filter. + */ + private final String filterName; + + FuncotationFilter(final String filterName) { + this.filterName = filterName; + } + + public String getFilterName() { + return filterName; + } + + /** + * Check all of this filter's rules against a variant and the Funcotations for one of its transcripts. + * + * @return true if the variant and funcotations match all of this filter's rules, and false otherwise + */ + public Boolean checkFilter(final VariantContext variant, final Map prunedTranscriptFuncotations) { + return getRules().stream() + .map(rule -> rule.checkRule(variant, prunedTranscriptFuncotations)) + .reduce(Boolean::logicalAnd) + .orElse(false); + } + + /** + * Build the collection of rules which a variant must match to pass this filter. + */ + abstract List getRules(); +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java new file mode 100644 index 00000000000..02915024450 --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java @@ -0,0 +1,16 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import htsjdk.variant.variantcontext.VariantContext; + +import java.util.Map; + +/** + * A rule to match against a variant within a {@link FuncotationFilter}. + */ +interface FuncotationFiltrationRule { + + /** + * Check if a variant and the Funcotations for one of its transcripts match this rule. + */ + boolean checkRule(final VariantContext variant, final Map prunedTranscriptFuncotations); +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java new file mode 100644 index 00000000000..531ac75a45d --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java @@ -0,0 +1,34 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import java.util.Collections; +import java.util.List; + +/** + * {@link FuncotationFilter} matching variants which: + *
    + *
  • Have been flagged by LMM as important for loss of function.
  • + *
+ */ +public class LmmFilter extends FuncotationFilter { + + /** + * Value to include in the {@value org.broadinstitute.hellbender.tools.funcotator.FilterFuncotationsConstants#CLINSIG_INFO_KEY} + * INFO annotation of variants matching this rule. + */ + public static final String CLINSIG_INFO_VALUE = "LMM"; + + /** + * Funcotation which will contain "true" for variants which LMM has marked as important. + */ + private static final String LMM_FLAGGED = "LMMKnown_LMM_FLAGGED"; + + public LmmFilter() { + super(CLINSIG_INFO_VALUE); + } + + @Override + List getRules() { + return Collections.singletonList((variant, prunedTranscriptFuncotations) -> + Boolean.valueOf(prunedTranscriptFuncotations.getOrDefault(LMM_FLAGGED, "false"))); + } +} diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java new file mode 100644 index 00000000000..319dfa4c21d --- /dev/null +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java @@ -0,0 +1,63 @@ +package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; + +import org.broadinstitute.hellbender.tools.funcotator.FilterFuncotations; + +import java.util.Arrays; +import java.util.List; + +/** + * {@link FuncotationFilter} matching variants which: + *
    + *
  • Are classified as FRAME_SHIFT_*, NONSENSE, START_CODON_DEL, or SPLICE_SITE
  • + *
  • Occur on a gene where loss of function is a disease mechanism
  • + *
  • Have a max MAF of 1% across sub-populations of ExAC
  • + *
+ */ +public class LofFilter extends FuncotationFilter { + + /** + * Value to include in the {@value org.broadinstitute.hellbender.tools.funcotator.FilterFuncotationsConstants#CLINSIG_INFO_KEY} + * INFO annotation of variants matching this rule. + */ + public static final String CLINSIG_INFO_VALUE = "LOF"; + + /** + * Funcotation which will contain "YES" for variants which are important for loss of function. + */ + private static final String LOF_GENE_FUNCOTATION = "ACMGLMMLof_LOF_Mechanism"; + + /** + * Prefix for frame-shift variant classifications which should be matched by this filter. + */ + private static final String FRAME_SHIFT_PREFIX = "FRAME_SHIFT_"; + + /** + * Variant classifications which should be matched by this filter. + */ + private static final List CONSTANT_LOF_CLASSIFICATIONS = Arrays.asList( + "NONSENSE", "START_CODON_DEL", "SPLICE_SITE"); + + /** + * Funcotation which will contain the variant classification determined by Funcotator. + * + * Varies based on gencode version. + */ + private final String classificationFuncotation; + + public LofFilter(final FilterFuncotations.ReferenceVersion ref) { + super(CLINSIG_INFO_VALUE); + this.classificationFuncotation = "Gencode_" + ref.getGencodeVersion() + "_variantClassification"; + } + + @Override + List getRules() { + return Arrays.asList( + (variant, prunedTranscriptFuncotations) -> { + final String classification = prunedTranscriptFuncotations.getOrDefault(classificationFuncotation, ""); + return classification.startsWith(FRAME_SHIFT_PREFIX) || CONSTANT_LOF_CLASSIFICATIONS.contains(classification); + }, + (variant, prunedTranscriptFuncotations) -> + prunedTranscriptFuncotations.getOrDefault(LOF_GENE_FUNCOTATION, "NO").equals("YES"), + FilterFuncotationsExacUtils.buildExacMaxMafRule(0.01)); + } +} diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java index 651dd173956..af5408f09e1 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java @@ -5,6 +5,9 @@ import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ClinVarFilter; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LmmFilter; +import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LofFilter; import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; @@ -23,18 +26,20 @@ public class FilterFuncotationsIntegrationTest extends CommandLineProgramTest { private static final Path TEST_DATA_DIR = getTestDataDir().toPath().resolve("FilterFuncotations"); - private static final Set ALL_FILTERS = new HashSet<>(Arrays.asList("CLINVAR", "LMM", "LOF")); + private static final Set ALL_FILTERS = new HashSet<>(Arrays.asList( + ClinVarFilter.CLINSIG_INFO_VALUE, LofFilter.CLINSIG_INFO_VALUE, LmmFilter.CLINSIG_INFO_VALUE)); @DataProvider(name = "uniformVcfProvider") public Object[][] uniformVcfProvider() { return new Object[][]{ - {"clinvar.vcf", 19, Collections.emptySet(), Collections.singleton("CLINVAR")}, - {"lmm.vcf", 38, Collections.emptySet(), Collections.singleton("LMM")}, - {"lof.vcf", 19, Collections.emptySet(), Collections.singleton("LOF")}, + {"clinvar.vcf", 19, Collections.emptySet(), Collections.singleton(ClinVarFilter.CLINSIG_INFO_VALUE)}, + {"lmm.vcf", 38, Collections.emptySet(), Collections.singleton(LmmFilter.CLINSIG_INFO_VALUE)}, + {"lof.vcf", 19, Collections.emptySet(), Collections.singleton(LofFilter.CLINSIG_INFO_VALUE)}, {"all.vcf", 38, Collections.emptySet(), ALL_FILTERS}, {"multi-transcript.vcf", 38, Collections.emptySet(), ALL_FILTERS}, {"multi-allelic.vcf", 38, Collections.emptySet(), ALL_FILTERS}, - {"none.vcf", 38, Collections.singleton(FilterFuncotations.NOT_CLINSIG_FILTER), Collections.singleton("NONE")} + {"none.vcf", 38, Collections.singleton(FilterFuncotationsConstants.NOT_CLINSIG_FILTER), + Collections.singleton(FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT)} }; } @@ -59,7 +64,7 @@ public void testFilterUniform(final String vcfName, Assert.assertEquals(variant.getFilters(), expectedFilters); final List clinsigAnnotations = variant.getCommonInfo() - .getAttributeAsStringList(FilterFuncotations.CLINSIG_RULE_KEY, ""); + .getAttributeAsStringList(FilterFuncotationsConstants.CLINSIG_INFO_KEY, ""); Assert.assertEquals(new HashSet<>(clinsigAnnotations), expectedAnnotations); }); } From 91f5f495216ca0e81a255554aa285e789ed62f4e Mon Sep 17 00:00:00 2001 From: Dan Moran Date: Wed, 18 Jul 2018 21:33:36 -0400 Subject: [PATCH 4/8] Even simpler. --- .../tools/funcotator/FilterFuncotations.java | 2 +- .../funcotator/filtrationRules/ClinVarFilter.java | 7 +++---- .../FilterFuncotationsExacUtils.java | 4 ++-- .../filtrationRules/FuncotationFilter.java | 14 ++++++-------- .../filtrationRules/FuncotationFiltrationRule.java | 8 +++----- .../funcotator/filtrationRules/LmmFilter.java | 3 +-- .../funcotator/filtrationRules/LofFilter.java | 7 +++---- 7 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java index ebd0824d83f..138a7b1510d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java @@ -146,7 +146,7 @@ private Set getMatchingFilters(final VariantContext variant) { transcriptFuncotations.forEach(funcotations -> { final Set matches = funcotationFilters.stream() - .filter(f -> f.checkFilter(variant, funcotations)) + .filter(f -> f.checkFilter(funcotations)) .map(FuncotationFilter::getFilterName) .collect(Collectors.toSet()); matchingFilters.addAll(matches); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java index 97eda7ef0b9..d44b7fc381d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java @@ -40,10 +40,9 @@ public ClinVarFilter() { @Override List getRules() { return Arrays.asList( - (variant, prunedTranscriptFuncotations) -> - prunedTranscriptFuncotations.containsKey(ACMG_DISEASE_FUNCOTATION), - (variant, prunedTranscriptFuncotations) -> { - final String significance = prunedTranscriptFuncotations.getOrDefault(CLINVAR_SIGNIFICANCE_FUNCOTATION, ""); + funcotations -> funcotations.containsKey(ACMG_DISEASE_FUNCOTATION), + funcotations -> { + final String significance = funcotations.getOrDefault(CLINVAR_SIGNIFICANCE_FUNCOTATION, ""); return significance.contains("Pathogenic") || significance.contains("Likely_pathogenic"); }, FilterFuncotationsExacUtils.buildExacMaxMafRule(0.05)); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java index 723d06a69b3..71e77865ae0 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java @@ -22,11 +22,11 @@ private enum ExacSubPopulation { private static String EXAC_ALLELE_NUMBER_PREFIX = "ExAC_AN_"; /** - * Build a {@link FuncotationFiltrationRule} matching variants with a MAF less than + * Build a {@link FuncotationFiltrationRule} matching Funcotations with a MAF less than * the given value across all sub-populations of ExAC. */ public static FuncotationFiltrationRule buildExacMaxMafRule(final double maxMaf) { - return ((variant, prunedTranscriptFuncotations) -> getMaxMinorAlleleFreq(prunedTranscriptFuncotations) <= maxMaf); + return funcotations -> getMaxMinorAlleleFreq(funcotations) <= maxMaf; } /** diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java index 164e8dd4451..b8dc73eb974 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java @@ -1,12 +1,10 @@ package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; -import htsjdk.variant.variantcontext.VariantContext; - import java.util.List; import java.util.Map; /** - * A filter to apply to Funcotated variants in {@link org.broadinstitute.hellbender.tools.funcotator.FilterFuncotations}. + * A filter to apply to Funcotations in {@link org.broadinstitute.hellbender.tools.funcotator.FilterFuncotations}. * * Filters can define an arbitrary number of rules which must match on the Funcotations of a variant in order * for that variant to "pass". Passing variants will be annotated with the filter's name in the output VCF. @@ -27,19 +25,19 @@ public String getFilterName() { } /** - * Check all of this filter's rules against a variant and the Funcotations for one of its transcripts. + * Check all of this filter's rules against a set of Funcotations. * - * @return true if the variant and funcotations match all of this filter's rules, and false otherwise + * @return true if the Funcotations match all of this filter's rules, and false otherwise */ - public Boolean checkFilter(final VariantContext variant, final Map prunedTranscriptFuncotations) { + public Boolean checkFilter(final Map prunedTranscriptFuncotations) { return getRules().stream() - .map(rule -> rule.checkRule(variant, prunedTranscriptFuncotations)) + .map(rule -> rule.checkRule(prunedTranscriptFuncotations)) .reduce(Boolean::logicalAnd) .orElse(false); } /** - * Build the collection of rules which a variant must match to pass this filter. + * Build the collection of rules which must match to pass this filter. */ abstract List getRules(); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java index 02915024450..9d0fd661e5d 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFiltrationRule.java @@ -1,16 +1,14 @@ package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; -import htsjdk.variant.variantcontext.VariantContext; - import java.util.Map; /** - * A rule to match against a variant within a {@link FuncotationFilter}. + * A rule to match against the Funcotations from a variant within a {@link FuncotationFilter}. */ interface FuncotationFiltrationRule { /** - * Check if a variant and the Funcotations for one of its transcripts match this rule. + * Check if a set of Funcotations matches this rule. */ - boolean checkRule(final VariantContext variant, final Map prunedTranscriptFuncotations); + boolean checkRule(final Map funcotations); } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java index 531ac75a45d..49eb91fa9ba 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LmmFilter.java @@ -28,7 +28,6 @@ public LmmFilter() { @Override List getRules() { - return Collections.singletonList((variant, prunedTranscriptFuncotations) -> - Boolean.valueOf(prunedTranscriptFuncotations.getOrDefault(LMM_FLAGGED, "false"))); + return Collections.singletonList(funcotations -> Boolean.valueOf(funcotations.getOrDefault(LMM_FLAGGED, "false"))); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java index 319dfa4c21d..6652a8876ae 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java @@ -52,12 +52,11 @@ public LofFilter(final FilterFuncotations.ReferenceVersion ref) { @Override List getRules() { return Arrays.asList( - (variant, prunedTranscriptFuncotations) -> { - final String classification = prunedTranscriptFuncotations.getOrDefault(classificationFuncotation, ""); + funcotations -> { + final String classification = funcotations.getOrDefault(classificationFuncotation, ""); return classification.startsWith(FRAME_SHIFT_PREFIX) || CONSTANT_LOF_CLASSIFICATIONS.contains(classification); }, - (variant, prunedTranscriptFuncotations) -> - prunedTranscriptFuncotations.getOrDefault(LOF_GENE_FUNCOTATION, "NO").equals("YES"), + funcotations -> funcotations.getOrDefault(LOF_GENE_FUNCOTATION, "").equals("YES"), FilterFuncotationsExacUtils.buildExacMaxMafRule(0.01)); } } From e448ef4d4e559414793f72ea37ffeb7a3b88d799 Mon Sep 17 00:00:00 2001 From: Dan Moran Date: Sun, 22 Jul 2018 13:55:15 -0400 Subject: [PATCH 5/8] Round two of review comments. --- .../tools/funcotator/FilterFuncotations.java | 21 +++++++++------ .../filtrationRules/ClinVarFilter.java | 14 ++++++++-- .../FilterFuncotationsExacUtils.java | 8 ++++-- .../filtrationRules/FuncotationFilter.java | 6 +++++ .../funcotator/filtrationRules/LofFilter.java | 26 ++++++++++++------- .../FilterFuncotationsIntegrationTest.java | 9 +++---- 6 files changed, 56 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java index 138a7b1510d..0295d63c8dd 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java @@ -63,6 +63,11 @@ public class FilterFuncotations extends VariantWalker { "\n * Exome Aggregation Consortium (ExAC) (http://exac.broadinstitute.org/)" + "\n * Laboratory for Molecular Medicine (LMM) (http://personalizedmedicine.partners.org/laboratory-for-molecular-medicine/)"; + /** + * The version of the Human Genome reference which was used when Funcotating the input VCF. + * + * Used to derive names of Gencode Funcotations. + */ public enum ReferenceVersion { hg19(19), hg38(27); @@ -91,7 +96,7 @@ public int getGencodeVersion() { private VariantContextWriter outputVcfWriter; private String[] funcotationKeys; - private List funcotationFilters = new ArrayList<>(); + private final List funcotationFilters = new ArrayList<>(); @Override public void onTraversalStart() { @@ -169,19 +174,19 @@ private Stream> extractFuncotationFields(final Funcota */ private VariantContext applyFilters(final VariantContext variant, final Set matchingFilters) { final VariantContextBuilder variantContextBuilder = new VariantContextBuilder(variant); + final boolean isSignificant = !matchingFilters.isEmpty(); - // Mark the individual filters that make the variant significant. - final String clinicalSignificance = matchingFilters.isEmpty() ? - FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT : - String.join(",", matchingFilters); + // Mark the individual filters that make the variant significant, if any. + final String clinicalSignificance = + isSignificant ? String.join(",", matchingFilters) : FilterFuncotationsConstants.CLINSIG_INFO_NOT_SIGNIFICANT; variantContextBuilder.attribute(FilterFuncotationsConstants.CLINSIG_INFO_KEY, clinicalSignificance); // Also set the filter field for insignificant variants, to make it easier for // downstream tools to extract out the interesting data. - if (matchingFilters.isEmpty()) { - variantContextBuilder.filter(FilterFuncotationsConstants.NOT_CLINSIG_FILTER); - } else { + if (isSignificant) { variantContextBuilder.passFilters(); + } else { + variantContextBuilder.filter(FilterFuncotationsConstants.NOT_CLINSIG_FILTER); } return variantContextBuilder.make(); diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java index d44b7fc381d..13ac98aaa89 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/ClinVarFilter.java @@ -33,6 +33,16 @@ public class ClinVarFilter extends FuncotationFilter { */ private static final String CLINVAR_SIGNIFICANCE_FUNCOTATION = "ClinVar_VCF_CLNSIG"; + /** + * Clinically-significant values to check for within the {@value CLINVAR_SIGNIFICANCE_FUNCOTATION} Funcotation. + */ + private static final List CLINVAR_SIGNIFICANCE_MATCHING_VALUES = Arrays.asList("Pathogenic", "Likely_pathogenic"); + + /** + * Maximum MAF a variant can have in ExAC to pass this rule. + */ + private static final double CLINVAR_MAX_MAF = 0.05; + public ClinVarFilter() { super(CLINSIG_INFO_VALUE); } @@ -43,8 +53,8 @@ List getRules() { funcotations -> funcotations.containsKey(ACMG_DISEASE_FUNCOTATION), funcotations -> { final String significance = funcotations.getOrDefault(CLINVAR_SIGNIFICANCE_FUNCOTATION, ""); - return significance.contains("Pathogenic") || significance.contains("Likely_pathogenic"); + return CLINVAR_SIGNIFICANCE_MATCHING_VALUES.stream().anyMatch(significance::contains); }, - FilterFuncotationsExacUtils.buildExacMaxMafRule(0.05)); + FilterFuncotationsExacUtils.buildExacMaxMafRule(CLINVAR_MAX_MAF)); } } diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java index 71e77865ae0..8085cb390e2 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java @@ -22,8 +22,12 @@ private enum ExacSubPopulation { private static String EXAC_ALLELE_NUMBER_PREFIX = "ExAC_AN_"; /** - * Build a {@link FuncotationFiltrationRule} matching Funcotations with a MAF less than - * the given value across all sub-populations of ExAC. + * Build a {@link FuncotationFiltrationRule} matching Funcotations from variants with a + * maximum MAF less than some threshold. + * + * @param maxMaf the MAF threshold to check in the rule + * @return a {@link FuncotationFiltrationRule} matching Funcotations with a MAF (AC/AN) + * less than {@code maxMaf} across all sub-populations of ExAC */ public static FuncotationFiltrationRule buildExacMaxMafRule(final double maxMaf) { return funcotations -> getMaxMinorAlleleFreq(funcotations) <= maxMaf; diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java index b8dc73eb974..61f5f1cfa50 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FuncotationFilter.java @@ -1,5 +1,7 @@ package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; +import org.broadinstitute.hellbender.utils.Utils; + import java.util.List; import java.util.Map; @@ -27,9 +29,13 @@ public String getFilterName() { /** * Check all of this filter's rules against a set of Funcotations. * + * @param prunedTranscriptFuncotations Funcotation values of a single transcript. Assumed to have + * been "pruned" to remove null / empty values. Never {@code null} * @return true if the Funcotations match all of this filter's rules, and false otherwise */ public Boolean checkFilter(final Map prunedTranscriptFuncotations) { + Utils.nonNull(prunedTranscriptFuncotations); + return getRules().stream() .map(rule -> rule.checkRule(prunedTranscriptFuncotations)) .reduce(Boolean::logicalAnd) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java index 6652a8876ae..8a36222db38 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/LofFilter.java @@ -1,9 +1,13 @@ package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; import org.broadinstitute.hellbender.tools.funcotator.FilterFuncotations; +import org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation; import java.util.Arrays; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; /** * {@link FuncotationFilter} matching variants which: @@ -27,15 +31,20 @@ public class LofFilter extends FuncotationFilter { private static final String LOF_GENE_FUNCOTATION = "ACMGLMMLof_LOF_Mechanism"; /** - * Prefix for frame-shift variant classifications which should be matched by this filter. + * Variant classifications which should be matched by this filter. */ - private static final String FRAME_SHIFT_PREFIX = "FRAME_SHIFT_"; + private static final Set CONSTANT_LOF_CLASSIFICATIONS = Stream.of( + GencodeFuncotation.VariantClassification.FRAME_SHIFT_DEL, + GencodeFuncotation.VariantClassification.FRAME_SHIFT_INS, + GencodeFuncotation.VariantClassification.NONSENSE, + GencodeFuncotation.VariantClassification.START_CODON_DEL, + GencodeFuncotation.VariantClassification.SPLICE_SITE + ).map(GencodeFuncotation.VariantClassification::toString).collect(Collectors.toSet()); /** - * Variant classifications which should be matched by this filter. + * Maximum MAF a variant can have in ExAC to pass this rule. */ - private static final List CONSTANT_LOF_CLASSIFICATIONS = Arrays.asList( - "NONSENSE", "START_CODON_DEL", "SPLICE_SITE"); + private static final double LOF_MAX_MAF = 0.01; /** * Funcotation which will contain the variant classification determined by Funcotator. @@ -52,11 +61,8 @@ public LofFilter(final FilterFuncotations.ReferenceVersion ref) { @Override List getRules() { return Arrays.asList( - funcotations -> { - final String classification = funcotations.getOrDefault(classificationFuncotation, ""); - return classification.startsWith(FRAME_SHIFT_PREFIX) || CONSTANT_LOF_CLASSIFICATIONS.contains(classification); - }, + funcotations -> CONSTANT_LOF_CLASSIFICATIONS.contains(funcotations.getOrDefault(classificationFuncotation, "")), funcotations -> funcotations.getOrDefault(LOF_GENE_FUNCOTATION, "").equals("YES"), - FilterFuncotationsExacUtils.buildExacMaxMafRule(0.01)); + FilterFuncotationsExacUtils.buildExacMaxMafRule(LOF_MAX_MAF)); } } diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java index af5408f09e1..90f2cad321a 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java @@ -1,6 +1,5 @@ package org.broadinstitute.hellbender.tools.funcotator; -import htsjdk.samtools.util.IOUtil; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.tuple.Pair; @@ -13,8 +12,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import java.io.IOException; -import java.nio.file.Files; +import java.io.File; import java.nio.file.Path; import java.util.Arrays; import java.util.Collections; @@ -47,10 +45,9 @@ public Object[][] uniformVcfProvider() { public void testFilterUniform(final String vcfName, final int build, final Set expectedFilters, - final Set expectedAnnotations) throws IOException { + final Set expectedAnnotations) { - final Path tmpOut = Files.createTempFile(vcfName + ".filtered", ".vcf"); - IOUtil.deleteOnExit(tmpOut); + final File tmpOut = createTempFile(vcfName + ".filtered", ".vcf"); final List args = Arrays.asList( "-V", TEST_DATA_DIR.resolve(vcfName).toString(), From 4c9274d4d30e8a578dbee5a3a931637991b1c231 Mon Sep 17 00:00:00 2001 From: Dan Moran Date: Fri, 27 Jul 2018 17:24:25 -0400 Subject: [PATCH 6/8] Enforce bounds on max MAF. --- .../filtrationRules/FilterFuncotationsExacUtils.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java index 8085cb390e2..8cd5bbde9f4 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/filtrationRules/FilterFuncotationsExacUtils.java @@ -1,5 +1,7 @@ package org.broadinstitute.hellbender.tools.funcotator.filtrationRules; +import org.broadinstitute.hellbender.utils.param.ParamUtils; + import java.util.Arrays; import java.util.Map; @@ -25,11 +27,12 @@ private enum ExacSubPopulation { * Build a {@link FuncotationFiltrationRule} matching Funcotations from variants with a * maximum MAF less than some threshold. * - * @param maxMaf the MAF threshold to check in the rule + * @param maxMaf the MAF threshold to check in the rule. Must be in the range [0, 1] * @return a {@link FuncotationFiltrationRule} matching Funcotations with a MAF (AC/AN) * less than {@code maxMaf} across all sub-populations of ExAC */ public static FuncotationFiltrationRule buildExacMaxMafRule(final double maxMaf) { + ParamUtils.inRange(maxMaf, 0, 1, "MAF must be between 0 and 1"); return funcotations -> getMaxMinorAlleleFreq(funcotations) <= maxMaf; } From e805042a9a17fd014a89169f6fc15a43390eb77b Mon Sep 17 00:00:00 2001 From: Dan Moran Date: Fri, 17 Aug 2018 11:50:04 -0400 Subject: [PATCH 7/8] Final round of review comments. --- .../tools/funcotator/FilterFuncotations.java | 20 ++++++++++--------- .../FilterFuncotationsConstants.java | 6 ++++++ .../funcotator/filtrationRules/LofFilter.java | 2 +- .../FilterFuncotationsIntegrationTest.java | 18 ++++++++--------- 4 files changed, 27 insertions(+), 19 deletions(-) diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java index 0295d63c8dd..11c59ad8c3a 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotations.java @@ -68,12 +68,12 @@ public class FilterFuncotations extends VariantWalker { * * Used to derive names of Gencode Funcotations. */ - public enum ReferenceVersion { - hg19(19), hg38(27); + public enum Reference { + b37(19), hg19(19), hg38(27); private final int gencodeVersion; - ReferenceVersion(int gencodeVersion) { + Reference(int gencodeVersion) { this.gencodeVersion = gencodeVersion; } @@ -92,7 +92,7 @@ public int getGencodeVersion() { fullName = FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME, doc = "The version of the Human Genome reference which was used to Funcotate the input VCF." ) - protected ReferenceVersion referenceVersion; + protected Reference reference; private VariantContextWriter outputVcfWriter; private String[] funcotationKeys; @@ -113,13 +113,14 @@ public void onTraversalStart() { VCFHeaderLineType.String, FilterFuncotationsConstants.CLINSIG_INFO_KEY_DESCRIPTION)); outputVcfWriter.writeHeader(vcfHeader); } else { - throw new UserException.BadInput("Input VCF does not have Funcotator annotations."); + throw new UserException.BadInput("Could not extract Funcotation keys from " + + VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME + " field in input VCF header."); } } private void registerFilters() { funcotationFilters.add(new ClinVarFilter()); - funcotationFilters.add(new LofFilter(referenceVersion)); + funcotationFilters.add(new LofFilter(reference)); funcotationFilters.add(new LmmFilter()); } @@ -139,7 +140,7 @@ private Set getMatchingFilters(final VariantContext variant) { final Map funcs = FuncotatorUtils.createAlleleToFuncotationMapFromFuncotationVcfAttribute( - funcotationKeys, variant, "Gencode_" + referenceVersion.gencodeVersion + "_annotationTranscript", "FAKE_SOURCE"); + funcotationKeys, variant, "Gencode_" + reference.gencodeVersion + "_annotationTranscript", "FAKE_SOURCE"); funcs.values().forEach(funcotationMap -> { final Stream> transcriptFuncotations = funcotationMap.getTranscriptList().stream() @@ -177,8 +178,9 @@ private VariantContext applyFilters(final VariantContext variant, final Set expectedFilters, final Set expectedAnnotations) { @@ -52,7 +52,7 @@ public void testFilterUniform(final String vcfName, final List args = Arrays.asList( "-V", TEST_DATA_DIR.resolve(vcfName).toString(), "-O", tmpOut.toString(), - "--ref-version", "hg" + build + "--ref-version", ref.name() ); runCommandLine(args); From ab78b93537c68bad253c3a1a123449b917bd59e7 Mon Sep 17 00:00:00 2001 From: Dan Moran Date: Fri, 17 Aug 2018 12:24:25 -0400 Subject: [PATCH 8/8] Fix import after rebase. --- .../tools/funcotator/FilterFuncotationsIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java index 8eea6d5a1ea..fe031d7dab3 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FilterFuncotationsIntegrationTest.java @@ -4,10 +4,10 @@ import htsjdk.variant.vcf.VCFHeader; import org.apache.commons.lang3.tuple.Pair; import org.broadinstitute.hellbender.CommandLineProgramTest; +import org.broadinstitute.hellbender.testutils.VariantContextTestUtils; import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.ClinVarFilter; import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LmmFilter; import org.broadinstitute.hellbender.tools.funcotator.filtrationRules.LofFilter; -import org.broadinstitute.hellbender.utils.test.VariantContextTestUtils; import org.testng.Assert; import org.testng.annotations.DataProvider; import org.testng.annotations.Test;