-
Notifications
You must be signed in to change notification settings - Fork 308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Transcript Effects ignored if more than 1 #1347
Comments
Thanks for reporting this, @majkiw! I'll take a look this morning. |
These quick tests pass diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala
index c63029a0..5c7927cf 100644
--- a/adam-core/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala
+++ b/adam-core/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala
@@ -35,6 +35,7 @@ class TranscriptEffectConverterSuite extends ADAMFunSuite {
final val INVALID_NUMBER = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|1/2|c.-485C>T|||4|1/42|not a number|"
final val INVALID_FRACTION = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|not a number/2|c.-485C>T|||4|1/42|453|"
final val VALID = "T|upstream_gene_variant||TAS1R3|ENSG00000169962|transcript|ENST00000339381.5|protein_coding|1/2|c.-485C>T|||4|1/42|453|"
+ final val MULTIPLE = VALID + "," + VALID + "," + VALID
var variant: Variant = null
var variantContext: VariantContext = null
@@ -134,6 +135,33 @@ class TranscriptEffectConverterSuite extends ADAMFunSuite {
assert(te.getMessages.isEmpty)
}
+ test("parse VCF ANN attribute with multiple transcript effects") {
+ val ann = TranscriptEffectConverter.parseAnn(MULTIPLE, ValidationStringency.STRICT)
+ assert(ann.length == 3)
+
+ ann.foreach(te => {
+ assert(te.getAlternateAllele == "T")
+ assert(te.getEffects.contains("upstream_gene_variant"))
+ assert(te.getGeneName == "TAS1R3")
+ assert(te.getGeneId == "ENSG00000169962")
+ assert(te.getFeatureType == "transcript")
+ assert(te.getFeatureId == "ENST00000339381.5")
+ assert(te.getBiotype == "protein_coding")
+ assert(te.getRank == 1)
+ assert(te.getTotal == 2)
+ assert(te.getTranscriptHgvs == "c.-485C>T")
+ assert(te.getProteinHgvs == null)
+ assert(te.getCdnaPosition == null)
+ assert(te.getCdnaLength == null)
+ assert(te.getCdsPosition == 4)
+ assert(te.getCdsLength == null)
+ assert(te.getProteinPosition == 1)
+ assert(te.getProteinLength == 42)
+ assert(te.getDistance == 453)
+ assert(te.getMessages.isEmpty)
+ })
+ }
+
test("convert to transcript effect from null VCF ANN attribute in variant context") {
when(variantContext.getAttributeAsString("ANN", null)).thenReturn(null)
@@ -253,6 +281,42 @@ class TranscriptEffectConverterSuite extends ADAMFunSuite {
})
}
+ test("convert to transcript effect from VCF ANN attribute in variant context multiple effects same alt allele") {
+ variant = Variant.newBuilder()
+ .setAlternateAllele("T")
+ .build()
+
+ when(variantContext.getAttributeAsString("ANN", null)).thenReturn(MULTIPLE)
+
+ val transcriptEffectsOpt = TranscriptEffectConverter.convertToTranscriptEffects(variant, variantContext)
+ assert(transcriptEffectsOpt.isDefined)
+
+ transcriptEffectsOpt.foreach(transcriptEffects => {
+ assert(transcriptEffects.size === 3)
+ transcriptEffects.foreach(te => {
+ assert(te.getAlternateAllele == "T")
+ assert(te.getEffects.contains("upstream_gene_variant"))
+ assert(te.getGeneName == "TAS1R3")
+ assert(te.getGeneId == "ENSG00000169962")
+ assert(te.getFeatureType == "transcript")
+ assert(te.getFeatureId == "ENST00000339381.5")
+ assert(te.getBiotype == "protein_coding")
+ assert(te.getRank == 1)
+ assert(te.getTotal == 2)
+ assert(te.getTranscriptHgvs == "c.-485C>T")
+ assert(te.getProteinHgvs == null)
+ assert(te.getCdnaPosition == null)
+ assert(te.getCdnaLength == null)
+ assert(te.getCdsPosition == 4)
+ assert(te.getCdsLength == null)
+ assert(te.getProteinPosition == 1)
+ assert(te.getProteinLength == 42)
+ assert(te.getDistance == 453)
+ assert(te.getMessages.isEmpty)
+ })
+ })
+ }
+
test("convert transcript effect to VCF ANN attribute value") {
val te = TranscriptEffect.newBuilder()
.setAlternateAllele("T") The problem may be that in the tests I'm mocking htsjdk with reasonable behavior, which of course may not be a reasonable thing to do. |
Hey,
Notice extra brackets in the start/end of this string that break further parsing. So for lists we need |
If the ANN VCF header line is |
Thank you for picking this up @heuermh. |
Hi,
I was trying to run current
master
against our VCF files and I have noticed that whenever there is more than one value in transcript effects, the list is returned empty.BTW we really appreciate that you added support for transcript effects - this was the last missing thing we required to be able to use ADAM and you added it just 2 weeks after we started testing. Thank you :)
I am attaching example for completeness, but I don't think it's needed, because it happens on every row with multiple transcript effects.
This would return 1 transcript effect:
This would return no transcript effects:
Note: If no one is working on this I will probably look into this myself if I have time today or tomorrow.
The text was updated successfully, but these errors were encountered: