Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ADAM-1381] Fix Variant end position. #1389

Merged
merged 1 commit into from
Feb 21, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,16 @@ private[adam] object VariantContextConverter {
}
}

private val OPT_NON_REF = Some(Allele.create("<NON_REF>", false))

private def optNonRef(v: Variant): Option[Allele] = {
if (v.getAlternateAllele != null) {
None
} else {
OPT_NON_REF
}
}

/**
* Converts the alleles in a variant into a Java collection of htsjdk alleles.
*
Expand All @@ -115,7 +125,8 @@ private[adam] object VariantContextConverter {
*/
private def convertAlleles(v: Variant): java.util.Collection[Allele] = {
val asSeq = Seq(convertAlleleOpt(v.getReferenceAllele, true),
convertAlleleOpt(v.getAlternateAllele)).flatten
convertAlleleOpt(v.getAlternateAllele),
optNonRef(v)).flatten

asSeq
}
Expand Down Expand Up @@ -1848,7 +1859,7 @@ private[adam] class VariantContextConverter(
val builder = new VariantContextBuilder()
.chr(v.getContigName)
.start(v.getStart + 1)
.stop(v.getStart + v.getReferenceAllele.length)
.stop(v.getEnd)
.alleles(VariantContextConverter.convertAlleles(v))

// bind the conversion functions and fold
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ import org.bdgenomics.adam.models.{
SequenceDictionary,
VariantContext => ADAMVariantContext
}
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.util.{ ADAMFunSuite, PhredUtils }
import org.bdgenomics.formats.avro._
import scala.collection.JavaConversions._
Expand Down Expand Up @@ -85,6 +86,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
def adamSNVBuilder(contig: String = "1"): Variant.Builder = Variant.newBuilder()
.setContigName(contig)
.setStart(0L)
.setEnd(1L)
.setReferenceAllele("A")
.setAlternateAllele("T")

Expand Down Expand Up @@ -1863,6 +1865,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
val v = Variant.newBuilder
.setContigName("1")
.setStart(0L)
.setEnd(1L)
.setReferenceAllele("A")
.setAlternateAllele("T")
.build
Expand Down Expand Up @@ -2534,4 +2537,17 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamGt.getVariantCallingAnnotations.getAttributes.containsKey("STRING_G"))
assert(adamGt.getVariantCallingAnnotations.getAttributes.get("STRING_G") === "foo,bar,baz")
}

sparkTest("respect end position for symbolic alts") {
val vcRecords = sc.loadVcf(testFile("gvcf_dir/gvcf_multiallelic.g.vcf"))
.rdd
.collect()

val symbolic = vcRecords.filter(_.variant.variant.getStart == 16157520L)
.head
val optHtsjdkVc = converter.convert(symbolic)

assert(optHtsjdkVc.isDefined)
assert(optHtsjdkVc.get.getEnd === 16157602)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -399,8 +399,6 @@ class ADAMContextSuite extends ADAMFunSuite {
val path = new File(testFile("gvcf_dir/gvcf_multiallelic.g.vcf")).getParent()

val variants = sc.loadVcf(path).toVariantRDD
// Not sure that the count should be 7 below, however the current failure to read the mult-allelic site happens
// before this assertion is even reached
assert(variants.rdd.count === 6)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class VariantContextRDDSuite extends ADAMFunSuite {
val v0 = Variant.newBuilder
.setContigName("chr11")
.setStart(17409572L)
.setEnd(17409573L)
.setReferenceAllele("T")
.setAlternateAllele("C")
.setNames(ImmutableList.of("rs3131972", "rs201888535"))
Expand Down