Skip to content

Commit

Permalink
Fixed Funcotator VCF output renderer to correctly preserve B37 contig…
Browse files Browse the repository at this point in the history
… names on output for B37 aligned files (#8539)
  • Loading branch information
jamesemery authored Oct 11, 2023
1 parent d40a485 commit 423d106
Show file tree
Hide file tree
Showing 8 changed files with 1,911 additions and 1,880 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -874,7 +874,6 @@ public void apply(final VariantContext variant, final ReadsContext readsContext,
// Get the correct reference for B37/HG19 compliance:
// This is necessary because of the variant transformation that gets applied in VariantWalkerBase::apply.
final ReferenceContext correctReferenceContext = funcotatorEngine.getCorrectReferenceContext(variant, referenceContext);

// Place the variant on our queue to be funcotated:
enqueueAndHandleVariant(variant, correctReferenceContext, featureContext);
}
Expand Down Expand Up @@ -924,7 +923,11 @@ protected void enqueueAndHandleVariant(final VariantContext variant, final Refer

final FuncotationMap funcotationMap = funcotatorEngine.createFuncotationMapForVariant(variant, referenceContext, featureContext);

// This is necessary because we want to revert the variant contig name change if it was applied in the FuncotatorEngine::getCorrectVariantContextForReference method before outputting the vcf.
// NOTE: this will only revert the variantContext if it was originally changed (only for B37 VCFs)
final VariantContext variantContextForOutput = funcotatorEngine.getCorrectVariantContextForOutput(variant);

// At this point there is only one transcript ID in the funcotation map if canonical or best effect are selected
outputRenderer.write(variant, funcotationMap);
outputRenderer.write(variantContextForOutput, funcotationMap);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ public final class FuncotatorEngine implements AutoCloseable {
*/
private final boolean mustConvertInputContigsToHg19;

/**
* Whether the output variant contigs must be converted back to B37 from hg19 before being returned.
* (NOTE: this means that the output contigs will continue to use B37 contig names even if internally we converted them to hg19)
*/
private boolean mustRevertVariantContigsFromHg19ToB37 = false;

/**
* Whether this {@link FuncotatorEngine} has only produced annotations on variants that have been labeled by the
* {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotationFactory} as {@link org.broadinstitute.hellbender.tools.funcotator.dataSources.gencode.GencodeFuncotation.VariantClassification#IGR}.
Expand Down Expand Up @@ -327,6 +333,22 @@ private VariantContext getCorrectVariantContextForReference(final VariantContext
}
}

/**
* Create a new {@link VariantContext} which will match the given Reference if there is a mismatch for input between the B37 reference and the HG19 reference.
* @param variant A {@link VariantContext} object containing the variant to convert.
* @return A {@link VariantContext} whose contig has been transformed to HG19 if requested by the user. Otherwise, an identical variant.
*/
VariantContext getCorrectVariantContextForOutput(final VariantContext variant) {
if ( mustRevertVariantContigsFromHg19ToB37 ) {
final VariantContextBuilder vcb = new VariantContextBuilder(variant);
vcb.chr(FuncotatorUtils.convertHG19ContigToB37Contig(variant.getContig()));
return vcb.make();
}
else {
return variant;
}
}

/**
* @return The default {@link VariantTransformer} which will automatically convert from the B37 reference standard to the HG19 reference standard for contig names.
*/
Expand Down Expand Up @@ -483,7 +505,7 @@ private boolean determineReferenceAndDatasourceCompatibility() {
}
else if ( funcotatorArgs.referenceVersion.equals(BaseFuncotatorArgumentCollection.FuncotatorReferenceVersionHg19) &&
FuncotatorUtils.isSequenceDictionaryUsingB37Reference(sequenceDictionaryForDrivingVariants) ) {
logger.info("VCF sequence dictionary detected as B37 in HG19 annotation mode. Performing conversion.");
logger.info("VCF sequence dictionary detected as B37 in HG19 annotation mode. Performing conversion. (NOTE: the output VCF will still be B37)");
mustConvertInputContigsToHg19 = true;
}
else {
Expand All @@ -505,6 +527,11 @@ else if ( funcotatorArgs.referenceVersion.equals(BaseFuncotatorArgumentCollectio
"There MAY be some errors (e.g. in the Y chromosome, but possibly in other places as well) due to changes between the two references.");
}

// Record whether we need to revert the contigs back to B37 after annotation:
if (FuncotatorUtils.isSequenceDictionaryUsingB37Reference(sequenceDictionaryForDrivingVariants) && mustConvertInputContigsToHg19) {
this.mustRevertVariantContigsFromHg19ToB37 = true;
}

return mustConvertInputContigsToHg19;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ public void write(final VariantContext variant, final FuncotationMap txToFuncota
variantContextOutputBuilder.genotypes( variant.getGenotypes() );

// Render and add our VCF line:
vcfWriter.add( variantContextOutputBuilder.make() );
VariantContext out = variantContextOutputBuilder.make();
vcfWriter.add( out );
}

private Funcotation createManualAnnotationFuncotation(final Allele altAllele) {
Expand Down
Git LFS file not shown
Original file line number Diff line number Diff line change
Expand Up @@ -91,4 +91,4 @@
##reference=/cromwell_root/broad-references/hg19/v0/Homo_sapiens_assembly19.fasta
##source=Funcotator
#CHROM POS ID REF ALT QUAL FILTER INFO
chr2 70120909 rs3214822 GA G 722.12 PASS FUNCOTATION=[SNRNP27|hg19|chr2|70120910|70120910|FIVE_PRIME_UTR||DEL|A|A|-|g.chr2:70120910delA|ENST00000244227.3|+|1|||||0.43640897755610975|GGGAAAAATGAAAGCTGTGTT|SNRNP27_ENST00000409116.1_FIVE_PRIME_FLANK/SNRNP27_ENST00000488986.1_FIVE_PRIME_FLANK|||||||||||||||||||||||||91|biliary_tract(2)_%7C_breast(12)_%7C_central_nervous_system(44)_%7C_large_intestine(11)_%7C_pancreas(22)|||||||X76302|NM_006857.2|NP_006848.1|HGNC:30240|small_%20_nuclear_%20_ribonucleoprotein_%20_U4/U6.U5_%20_subunit_%20_27|Approved|gene_%20_with_%20_protein_%20_product|protein-coding_%20_gene||"small_%20_nuclear_%20_ribonucleoprotein_%20_27kDa_%20_(U4/U6.U5)"_%2C__%20_"small_%20_nuclear_%20_ribonucleoprotein_%2C__%20_U4/U6.U5_%20_27kDa_%20_subunit"|RY1_%2C__%20_U4/U6.U5-27K|"nucleic_%20_acid_%20_binding_%20_protein_%20_RY_%20_1"_%2C__%20_"U4/U6.U5_%20_small_%20_nuclear_%20_ribonucleoprotein_%20_27_%20_kDa_%20_protein"|2p13.3|2016-10-05||2016-03-11|X76302||11017|ENSG00000124380|7931148_%2C__%20_9085842|NM_006857|||CCDS33219|OTTHUMG00000152689|11017||NM_006857|Q8WVK2|ENSG00000124380|uc002sfw.4|SNR27_HUMAN||Q15410|Q8WVK2|mRNA_%20_processing_%20_(GO:0006397)_%7C_RNA_%20_splicing_%20_(GO:0008380)|nucleus_%20_(GO:0005634)|nucleic_%20_acid_%20_binding_%20_(GO:0003676)|_%7C_|_%7C_|_%7C_|true_%7C_true|false_%7C_false|0.6222_%2C_0.3778_%7C_0.6222_%2C_0.3778|false_%7C_false|false_%7C_false|1_%7C_1|_%7C_|false_%7C_false|true_%7C_false|true_%7C_false|SNRNP27:11017_%7C_SNRNP27:11017|true_%7C_false|false_%7C_false|false_%7C_false|true_%7C_false|true_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|true_%7C_true|false_%7C_false|3214822_%7C_397747233|70120910_%7C_70120912|false_%7C_false|false_%7C_false|0_%7C_0|true_%7C_true|0_%7C_0|false_%7C_false|0.661217_%2C_0.338783_%7C_|false_%7C_false|false_%7C_false|false_%7C_false|DIV_%7C_DIV|true_%7C_false|0x05010002000517013e000200_%7C_0x050100020005000002000200|1_%7C_1|false_%7C_false|134_%7C_138|rs3214822_%7C_rs397747233|_%7C_]
2 70120909 rs3214822 GA G 722.12 PASS FUNCOTATION=[SNRNP27|hg19|chr2|70120910|70120910|FIVE_PRIME_UTR||DEL|A|A|-|g.chr2:70120910delA|ENST00000244227.3|+|1|||||0.43640897755610975|GGGAAAAATGAAAGCTGTGTT|SNRNP27_ENST00000409116.1_FIVE_PRIME_FLANK/SNRNP27_ENST00000488986.1_FIVE_PRIME_FLANK|||||||||||||||||||||||||91|biliary_tract(2)_%7C_breast(12)_%7C_central_nervous_system(44)_%7C_large_intestine(11)_%7C_pancreas(22)|||||||X76302|NM_006857.2|NP_006848.1|HGNC:30240|small_%20_nuclear_%20_ribonucleoprotein_%20_U4/U6.U5_%20_subunit_%20_27|Approved|gene_%20_with_%20_protein_%20_product|protein-coding_%20_gene||"small_%20_nuclear_%20_ribonucleoprotein_%20_27kDa_%20_(U4/U6.U5)"_%2C__%20_"small_%20_nuclear_%20_ribonucleoprotein_%2C__%20_U4/U6.U5_%20_27kDa_%20_subunit"|RY1_%2C__%20_U4/U6.U5-27K|"nucleic_%20_acid_%20_binding_%20_protein_%20_RY_%20_1"_%2C__%20_"U4/U6.U5_%20_small_%20_nuclear_%20_ribonucleoprotein_%20_27_%20_kDa_%20_protein"|2p13.3|2016-10-05||2016-03-11|X76302||11017|ENSG00000124380|7931148_%2C__%20_9085842|NM_006857|||CCDS33219|OTTHUMG00000152689|11017||NM_006857|Q8WVK2|ENSG00000124380|uc002sfw.4|SNR27_HUMAN||Q15410|Q8WVK2|mRNA_%20_processing_%20_(GO:0006397)_%7C_RNA_%20_splicing_%20_(GO:0008380)|nucleus_%20_(GO:0005634)|nucleic_%20_acid_%20_binding_%20_(GO:0003676)|_%7C_|_%7C_|_%7C_|true_%7C_true|false_%7C_false|0.6222_%2C_0.3778_%7C_0.6222_%2C_0.3778|false_%7C_false|false_%7C_false|1_%7C_1|_%7C_|false_%7C_false|true_%7C_false|true_%7C_false|SNRNP27:11017_%7C_SNRNP27:11017|true_%7C_false|false_%7C_false|false_%7C_false|true_%7C_false|true_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|false_%7C_false|true_%7C_true|false_%7C_false|3214822_%7C_397747233|70120910_%7C_70120912|false_%7C_false|false_%7C_false|0_%7C_0|true_%7C_true|0_%7C_0|false_%7C_false|0.661217_%2C_0.338783_%7C_|false_%7C_false|false_%7C_false|false_%7C_false|DIV_%7C_DIV|true_%7C_false|0x05010002000517013e000200_%7C_0x050100020005000002000200|1_%7C_1|false_%7C_false|134_%7C_138|rs3214822_%7C_rs397747233|_%7C_]
Loading

0 comments on commit 423d106

Please sign in to comment.