From 5d0bbf2c32e76fed332b0fbbf1cff9da3351f413 Mon Sep 17 00:00:00 2001 From: lichtens Date: Tue, 22 May 2018 10:37:14 -0400 Subject: [PATCH] - VCF Datasources now have to match the alt and ref alleles to be condsidered a hit. --- .../vcf/VcfFuncotationFactory.java | 3 +- .../funcotator/FuncotatorIntegrationTest.java | 42 +++++++++++++++++++ .../PIK3CA_3_miss_clinvar_alt_only.vcf | 10 +++++ 3 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 src/test/resources/org/broadinstitute/hellbender/tools/funcotator/PIK3CA_3_miss_clinvar_alt_only.vcf diff --git a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java index 06b44c77307..e4a3020ae35 100644 --- a/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java +++ b/src/main/java/org/broadinstitute/hellbender/tools/funcotator/dataSources/vcf/VcfFuncotationFactory.java @@ -149,10 +149,9 @@ protected List createFuncotationsOnVariant(final VariantContext var // By this point we know the feature type is correct, so we cast it: final VariantContext variantFeature = (VariantContext) feature; - //TODO: Add a test that tests alt allele position hit, but misses on the alt allele. // Now we create one funcotation for each Alternate allele: for ( final Allele altAllele : alternateAlleles ) { - if (!variantFeature.hasAlternateAllele(altAllele)) { + if (!(variantFeature.hasAlternateAllele(altAllele) && variantFeature.getReference().equals(variant.getReference()))) { continue; } // Add all Info keys/values to a copy of our default map: diff --git a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java index ebd40b046e0..02df370f488 100644 --- a/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java +++ b/src/test/java/org/broadinstitute/hellbender/tools/funcotator/FuncotatorIntegrationTest.java @@ -52,6 +52,7 @@ public class FuncotatorIntegrationTest extends CommandLineProgramTest { private static final String PIK3CA_VCF_HG38 = toolsTestDir + "funcotator/hg38_trio.pik3ca.vcf"; private static final String PIK3CA_VCF_HG19_SNPS = toolsTestDir + "funcotator/PIK3CA_SNPS_3.vcf"; private static final String PIK3CA_VCF_HG19_INDELS = toolsTestDir + "funcotator/PIK3CA_INDELS_3.vcf"; + private static final String PIK3CA_VCF_HG19_ALTS = toolsTestDir + "funcotator/PIK3CA_3_miss_clinvar_alt_only.vcf"; private static final String DS_PIK3CA_DIR = largeFileTestDir + "funcotator/small_ds/"; static { @@ -503,4 +504,45 @@ public void testVcfMafConcordanceForProteinChange(final String inputVcf, final S Assert.assertEquals(mafProteinChanges, vcfProteinChanges, "Failed matching " + annotationToCheck); } } + + @Test + public void testVcfDatasourceAccountsForAltAlleles() { + final FuncotatorArgumentDefinitions.OutputFormatType vcfOutputFormatType = FuncotatorArgumentDefinitions.OutputFormatType.VCF; + final File vcfOutputFile = getOutputFile(vcfOutputFormatType); + + final ArgumentsBuilder argumentsVcf = new ArgumentsBuilder(); + + argumentsVcf.addVCF(new File(PIK3CA_VCF_HG19_ALTS)); + argumentsVcf.addOutput(vcfOutputFile); + argumentsVcf.addReference(new File(FuncotatorTestConstants.HG19_3_REFERENCE_FILE_NAME)); + argumentsVcf.addArgument(FuncotatorArgumentDefinitions.DATA_SOURCES_PATH_LONG_NAME, DS_PIK3CA_DIR); + argumentsVcf.addArgument(FuncotatorArgumentDefinitions.REFERENCE_VERSION_LONG_NAME, FuncotatorTestConstants.REFERENCE_VERSION_HG19); + argumentsVcf.addArgument(FuncotatorArgumentDefinitions.OUTPUT_FORMAT_LONG_NAME, vcfOutputFormatType.toString()); + argumentsVcf.addBooleanArgument(FuncotatorArgumentDefinitions.IGNORE_FILTERED_VARIANTS_LONG_NAME, false); + + // We need this argument since we are testing on a subset of b37 + argumentsVcf.addBooleanArgument(FuncotatorArgumentDefinitions.ALLOW_HG19_GENCODE_B37_CONTIG_MATCHING_OVERRIDE_LONG_NAME, true); + runCommandLine(argumentsVcf); + + final Pair> vcfInfo = VariantContextTestUtils.readEntireVCFIntoMemory(vcfOutputFile.getAbsolutePath()); + final List variantContexts = vcfInfo.getRight(); + final VCFHeader vcfHeader = vcfInfo.getLeft(); + final VCFInfoHeaderLine funcotationHeaderLine = vcfHeader.getInfoHeaderLine(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME); + final String[] funcotationKeys = extractFuncotatorKeysFromHeaderDescription(funcotationHeaderLine.getDescription()); + + // The first variant context should have clinvar annotations, since it hit on the alt allele. None of the rest. + final String funcotationInfoFieldWithClinVarHit = variantContexts.get(0).getAttributeAsString(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME, null); + Assert.assertEquals(FuncotatorUtils.getFuncotationMapFromVcfFuncotationField(funcotationKeys, funcotationInfoFieldWithClinVarHit).get("dummy_ClinVar_VCF_CLNDISDB"), + FuncotatorUtils.sanitizeFuncotationForVcf("MedGen:C0027672,SNOMED_CT:699346009")); + + // The rest should not have any clinvar hits. + final List clinvarAnnotations = new ArrayList(); + final List clinvarAnnotationsTruth = new ArrayList(); + for (int i = 1; i < variantContexts.size(); i++) { + final String infoField = variantContexts.get(i).getAttributeAsString(VcfOutputRenderer.FUNCOTATOR_VCF_FIELD_NAME, null); + clinvarAnnotations.add(FuncotatorUtils.getFuncotationMapFromVcfFuncotationField(funcotationKeys, infoField).get("dummy_ClinVar_VCF_CLNDISDB")); + clinvarAnnotationsTruth.add(FuncotatorUtils.sanitizeFuncotationForVcf("")); + } + Assert.assertEquals(clinvarAnnotations, clinvarAnnotationsTruth); + } } diff --git a/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/PIK3CA_3_miss_clinvar_alt_only.vcf b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/PIK3CA_3_miss_clinvar_alt_only.vcf new file mode 100644 index 00000000000..34f51298214 --- /dev/null +++ b/src/test/resources/org/broadinstitute/hellbender/tools/funcotator/PIK3CA_3_miss_clinvar_alt_only.vcf @@ -0,0 +1,10 @@ +##fileformat=VCFv4.1 +##fileDate=201708028 +##source=FuncotatorTestsV0.1 +##reference=file:///Users/jonn/Development/references/Homo_sapiens_assembly19.fasta +#CHROM POS ID REF ALT QUAL FILTER INFO +3 178866587 . G A 40 . . +3 178866587 . G T 40 . . +3 178866586 . A ACGA 40 . . +3 178866586 . AG A 40 . . +3 178866587 . G GAG 40 . .