Skip to content

Commit

Permalink
Remove StructuralVariant and StructuralVariantType, add names field t…
Browse files Browse the repository at this point in the history
…o Variant
  • Loading branch information
heuermh authored and fnothaft committed Nov 3, 2016
1 parent 5c30085 commit 12f245c
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 48 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@
*/
package org.bdgenomics.adam.converters

import com.google.common.collect.ImmutableList
import htsjdk.variant.variantcontext.{
Allele,
GenotypesContext,
GenotypeLikelihoods,
VariantContext => HtsjdkVariantContext,
VariantContextBuilder
}
import htsjdk.variant.vcf.VCFConstants
import java.util.Collections
import org.bdgenomics.utils.misc.Logging
import org.bdgenomics.adam.models.{
Expand Down Expand Up @@ -312,6 +314,36 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N
contigToRefSeq.getOrElse(vc.getChr, vc.getChr)
}

/**
* Split the htsjdk variant context ID field into an array of names.
*
* @param vc htsjdk variant context
* @return Returns an Option wrapping an array of names split from the htsjdk
* variant context ID field
*/
private def splitIds(vc: HtsjdkVariantContext): Option[java.util.List[String]] = {
if (vc.hasID()) {
Some(ImmutableList.copyOf(vc.getID().split(VCFConstants.ID_FIELD_SEPARATOR)))
} else {
None
}
}

/**
* Join the array of variant names into a string for the htsjdk variant context ID field.
*
* @param variant variant
* @return Returns an Option wrapping a string for the htsjdk variant context ID field joined
* from the array of variant names
*/
private def joinNames(variant: Variant): Option[String] = {
if (variant.getNames != null && variant.getNames.length > 0) {
Some(variant.getNames.mkString(VCFConstants.ID_FIELD_SEPARATOR))
} else {
None
}
}

/**
* Builds an avro Variant for a site with a defined alt allele.
*
Expand All @@ -321,15 +353,14 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N
* @return Returns an Avro description of the genotyped site.
*/
private def createADAMVariant(vc: HtsjdkVariantContext, alt: Option[String]): Variant = {
// VCF CHROM, POS, REF and ALT
// VCF CHROM, POS, ID, REF and ALT
val builder = Variant.newBuilder
.setContigName(createContig(vc))
.setStart(vc.getStart - 1 /* ADAM is 0-indexed */ )
.setEnd(vc.getEnd /* ADAM is 0-indexed, so the 1-indexed inclusive end becomes exclusive */ )
.setReferenceAllele(vc.getReference.getBaseString)
alt.foreach(builder.setAlternateAllele(_))
if (vc.hasID())
builder.setNames(vc.getID().split(VCFConstants.ID_FIELD_SEPARATOR))
splitIds(vc).foreach(builder.setNames(_))
builder.build
}

Expand Down Expand Up @@ -537,7 +568,10 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N
.stop(variant.getStart + variant.getReferenceAllele.length)
.alleles(VariantContextConverter.convertAlleles(variant))

vc.databases.flatMap(d => Option(d.getDbSnpId)).foreach(d => vcb.id("rs" + d))
joinNames(variant) match {
case None => vcb.noID()
case Some(s) => vcb.id(s)
}

// TODO: Extract provenance INFO fields
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ import org.bdgenomics.formats.avro.Variant
*/
object VariantField extends FieldEnumeration(Variant.SCHEMA$) {

val contigName, start, end, referenceAllele, alternateAllele, svAllele, somatic, variantErrorProbability = SchemaValue
val contigName, start, end, names, referenceAllele, alternateAllele, somatic = SchemaValue
}
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,6 @@ class ADAMKryoRegistrator extends KryoRegistrator {
kryo.register(classOf[org.bdgenomics.formats.avro.Sample],
new AvroSerializer[org.bdgenomics.formats.avro.Sample])
kryo.register(classOf[org.bdgenomics.formats.avro.Strand])
kryo.register(classOf[org.bdgenomics.formats.avro.StructuralVariant],
new AvroSerializer[org.bdgenomics.formats.avro.StructuralVariant])
kryo.register(classOf[org.bdgenomics.formats.avro.TranscriptEffect],
new AvroSerializer[org.bdgenomics.formats.avro.TranscriptEffect])
kryo.register(classOf[org.bdgenomics.formats.avro.Variant],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/
package org.bdgenomics.adam.converters

import com.google.common.collect.ImmutableList
import htsjdk.samtools.SAMFileReader
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor
import htsjdk.variant.variantcontext.{
Expand All @@ -41,25 +42,25 @@ class VariantContextConverterSuite extends ADAMFunSuite {
SequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(new File(path)))
}

def gatkSNVBuilder: VariantContextBuilder = new VariantContextBuilder()
def htsjdkSNVBuilder: VariantContextBuilder = new VariantContextBuilder()
.alleles(List(Allele.create("A", true), Allele.create("T")))
.start(1L)
.stop(1L)
.chr("1")

def gatkMultiAllelicSNVBuilder: VariantContextBuilder = new VariantContextBuilder()
def htsjdkMultiAllelicSNVBuilder: VariantContextBuilder = new VariantContextBuilder()
.alleles(List(Allele.create("A", true), Allele.create("T"), Allele.create("G")))
.start(1L)
.stop(1L)
.chr("1")

def gatkRefSNV: VariantContextBuilder = new VariantContextBuilder()
def htsjdkRefSNV: VariantContextBuilder = new VariantContextBuilder()
.alleles(List(Allele.create("A", true), Allele.create("<NON_REF>", false)))
.start(1L)
.stop(1L)
.chr("1")

def gatkCNVBuilder: VariantContextBuilder = new VariantContextBuilder()
def htsjdkCNVBuilder: VariantContextBuilder = new VariantContextBuilder()
.alleles(List(Allele.create("A", true), Allele.create("<CN0>", false)))
.start(10L)
.stop(20L)
Expand All @@ -71,10 +72,10 @@ class VariantContextConverterSuite extends ADAMFunSuite {
.setReferenceAllele("A")
.setAlternateAllele("T")

test("Convert GATK site-only SNV to ADAM") {
test("Convert htsjdk site-only SNV to ADAM") {
val converter = new VariantContextConverter

val adamVCs = converter.convert(gatkSNVBuilder.make)
val adamVCs = converter.convert(htsjdkSNVBuilder.make)
assert(adamVCs.length === 1)
val adamVC = adamVCs.head

Expand All @@ -87,21 +88,21 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(variant.getStart === 0L)
}

test("Convert GATK site-only SNV to ADAM with contig conversion") {
test("Convert htsjdk site-only SNV to ADAM with contig conversion") {
val converter = new VariantContextConverter(Some(dictionary))

val adamVCs = converter.convert(gatkSNVBuilder.make)
val adamVCs = converter.convert(htsjdkSNVBuilder.make)
assert(adamVCs.length === 1)

val adamVC = adamVCs.head
val variant = adamVC.variant
assert(variant.getContigName === "NC_000001.10")
}

test("Convert GATK site-only CNV to ADAM") {
test("Convert htsjdk site-only CNV to ADAM") {
val converter = new VariantContextConverter

val adamVCs = converter.convert(gatkCNVBuilder.make)
val adamVCs = converter.convert(htsjdkCNVBuilder.make)
assert(adamVCs.length === 1)
val adamVC = adamVCs.head

Expand All @@ -116,8 +117,8 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(variant.getEnd === 20L)
}

test("Convert GATK SNV w/ genotypes w/ phase information to ADAM") {
val vcb = gatkSNVBuilder
test("Convert htsjdk SNV w/ genotypes w/ phase information to ADAM") {
val vcb = htsjdkSNVBuilder

val genotypeAttributes = Map[String, Object]("PQ" -> new Integer(50), "PS" -> new Integer(1))
val vc = vcb.genotypes(GenotypeBuilder.create("NA12878", vcb.getAlleles(), genotypeAttributes)).make()
Expand All @@ -135,8 +136,8 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(adamGT.getPhaseQuality === 50)
}

test("Convert GATK SNV with different filters to ADAM") {
val vcb = gatkSNVBuilder
test("Convert htsjdk SNV with different filters to ADAM") {
val vcb = htsjdkSNVBuilder
vcb.genotypes(GenotypeBuilder.create("NA12878", vcb.getAlleles))

val converter = new VariantContextConverter
Expand All @@ -161,32 +162,32 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
}

test("Convert ADAM site-only SNV to GATK") {
test("Convert ADAM site-only SNV to htsjdk") {
val vc = ADAMVariantContext(adamSNVBuilder().build)

val converter = new VariantContextConverter

val gatkVC = converter.convert(vc)
assert(gatkVC.getContig === "1")
assert(gatkVC.getStart === 1)
assert(gatkVC.getEnd === 1)
assert(gatkVC.getReference === Allele.create("A", true))
assert(gatkVC.getAlternateAlleles.sameElements(List(Allele.create("T"))))
assert(!gatkVC.hasLog10PError)
assert(!gatkVC.hasID)
assert(!gatkVC.filtersWereApplied)
val htsjdkVC = converter.convert(vc)
assert(htsjdkVC.getContig === "1")
assert(htsjdkVC.getStart === 1)
assert(htsjdkVC.getEnd === 1)
assert(htsjdkVC.getReference === Allele.create("A", true))
assert(htsjdkVC.getAlternateAlleles.sameElements(List(Allele.create("T"))))
assert(!htsjdkVC.hasLog10PError)
assert(!htsjdkVC.hasID)
assert(!htsjdkVC.filtersWereApplied)
}

test("Convert ADAM site-only SNV to GATK with contig conversion") {
test("Convert ADAM site-only SNV to htsjdk with contig conversion") {
val vc = ADAMVariantContext(adamSNVBuilder("NC_000001.10").build)

val converter = new VariantContextConverter(dict = Some(dictionary))

val gatkVC = converter.convert(vc)
assert(gatkVC.getContig === "1")
val htsjdkVC = converter.convert(vc)
assert(htsjdkVC.getContig === "1")
}

test("Convert ADAM SNV w/ genotypes to GATK") {
test("Convert ADAM SNV w/ genotypes to htsjdk") {
val variant = adamSNVBuilder().build
val genotype = Genotype.newBuilder
.setVariant(variant)
Expand All @@ -201,18 +202,18 @@ class VariantContextConverterSuite extends ADAMFunSuite {

val converter = new VariantContextConverter

val gatkVC = converter.convert(ADAMVariantContext(variant, Seq(genotype)))
assert(gatkVC.getNSamples === 1)
assert(gatkVC.hasGenotype("NA12878"))
val gatkGT = gatkVC.getGenotype("NA12878")
assert(gatkGT.getType === GenotypeType.HET)
assert(gatkVC.hasAttribute("FS"))
assert(gatkVC.hasAttribute("MQ"))
assert(gatkVC.hasAttribute("MQ0"))
val htsjdkVC = converter.convert(ADAMVariantContext(variant, Seq(genotype)))
assert(htsjdkVC.getNSamples === 1)
assert(htsjdkVC.hasGenotype("NA12878"))
val htsjdkGT = htsjdkVC.getGenotype("NA12878")
assert(htsjdkGT.getType === GenotypeType.HET)
assert(htsjdkVC.hasAttribute("FS"))
assert(htsjdkVC.hasAttribute("MQ"))
assert(htsjdkVC.hasAttribute("MQ0"))
}

test("Convert GATK multi-allelic sites-only SNVs to ADAM") {
val vc = gatkMultiAllelicSNVBuilder.make
test("Convert htsjdk multi-allelic sites-only SNVs to ADAM") {
val vc = htsjdkMultiAllelicSNVBuilder.make
val converter = new VariantContextConverter

val adamVCs = converter.convert(vc)
Expand All @@ -225,11 +226,11 @@ class VariantContextConverterSuite extends ADAMFunSuite {
}
}

test("Convert GATK multi-allelic SNVs to ADAM") {
test("Convert htsjdk multi-allelic SNVs to ADAM") {
val gb = new GenotypeBuilder("NA12878", List(Allele.create("T"), Allele.create("G")))
gb.AD(Array(4, 2, 3)).PL(Array(59, 0, 181, 1, 66, 102))

val vcb = gatkMultiAllelicSNVBuilder
val vcb = htsjdkMultiAllelicSNVBuilder
vcb.genotypes(gb.make)

val converter = new VariantContextConverter
Expand Down Expand Up @@ -266,7 +267,7 @@ class VariantContextConverterSuite extends ADAMFunSuite {
val gb = new GenotypeBuilder("NA12878", List(Allele.create("A", true), Allele.create("A", true)))
gb.PL(Array(0, 1, 2)).DP(44).attribute("MIN_DP", 38)

val vcb = gatkRefSNV
val vcb = htsjdkRefSNV
vcb.genotypes(gb.make)

val converter = new VariantContextConverter
Expand All @@ -286,4 +287,82 @@ class VariantContextConverterSuite extends ADAMFunSuite {
.map(PhredUtils.logProbabilityToPhred)
.sameElements(List(0, 1, 2)))
}

test("Convert htsjdk variant context with no IDs to ADAM") {
val vcb = htsjdkSNVBuilder
vcb.noID()

val converter = new VariantContextConverter

val adamVCs = converter.convert(vcb.make)
assert(adamVCs.length == 1)

val variant = adamVCs.head.variant
assert(variant.getNames.isEmpty)
}

test("Convert htsjdk variant context with one ID to ADAM") {
val vcb = htsjdkSNVBuilder
vcb.id("rs3131972")

val converter = new VariantContextConverter

val adamVCs = converter.convert(vcb.make)
assert(adamVCs.length == 1)

val variant = adamVCs.head.variant
assert(variant.getNames.length === 1)
assert(variant.getNames.get(0) === "rs3131972")
}

test("Convert htsjdk variant context with multiple IDs to ADAM") {
val vcb = htsjdkSNVBuilder
vcb.id("rs3131972;rs201888535")

val converter = new VariantContextConverter

val adamVCs = converter.convert(vcb.make)
assert(adamVCs.length == 1)

val variant = adamVCs.head.variant
assert(variant.getNames.length === 2)
assert(variant.getNames.get(0) === "rs3131972")
assert(variant.getNames.get(1) === "rs201888535")
}

test("Convert ADAM variant context with no names to htsjdk") {
val variant = adamSNVBuilder()
.build

assert(variant.getNames.isEmpty)

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(!htsjdkVC.hasID)
}

test("Convert ADAM variant context with one name to htsjdk") {
val variant = adamSNVBuilder()
.setNames(ImmutableList.of("rs3131972"))
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(htsjdkVC.hasID)
assert(htsjdkVC.getID === "rs3131972")
}

test("Convert ADAM variant context with multiple names to htsjdk") {
val variant = adamSNVBuilder()
.setNames(ImmutableList.of("rs3131972", "rs201888535"))
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(htsjdkVC.hasID)
assert(htsjdkVC.getID === "rs3131972;rs201888535")
}
}
Loading

0 comments on commit 12f245c

Please sign in to comment.