Skip to content

Commit

Permalink
Increasing unit test coverage for VariantContextConverter (#1276)
Browse files Browse the repository at this point in the history
* Increasing unit test coverage for VariantContextConverter
* Use VCF header lines in VCFInFormatter
  • Loading branch information
heuermh authored and fnothaft committed Nov 18, 2016
1 parent c3afbcb commit e0979a9
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,9 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N
if (vc.isFiltered) {
builder.setFiltersFailed(new java.util.ArrayList(vc.getFilters));
}
if (vc.getAttributeAsBoolean("SOMATIC", false)) {
builder.setSomatic(true)
}
builder.build
}

Expand Down Expand Up @@ -607,6 +610,21 @@ private[adam] class VariantContextConverter(dict: Option[SequenceDictionary] = N
case Some(s) => vcb.id(s)
}

val filtersApplied = Option(variant.getFiltersApplied).getOrElse(false)
val filtersPassed = Option(variant.getFiltersPassed).getOrElse(false)

(filtersApplied, filtersPassed) match {
case (false, false) => vcb.unfiltered
case (false, true) => vcb.passFilters // log warning?
case (true, false) => vcb.filters(new java.util.HashSet(variant.getFiltersFailed()))
case (true, true) => vcb.passFilters
}

val somatic: java.lang.Boolean = Option(variant.getSomatic).getOrElse(false)
if (somatic) {
vcb.attribute("SOMATIC", true)
}

// TODO: Extract provenance INFO fields
try {
vcb.genotypes(vc.genotypes.map(g => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@ import htsjdk.variant.variantcontext.writer.{
}
import htsjdk.variant.vcf.{ VCFHeader, VCFHeaderLine }
import java.io.OutputStream
import org.bdgenomics.adam.converters.{
SupportedHeaderLines,
VariantContextConverter
}
import org.bdgenomics.adam.converters.VariantContextConverter
import org.bdgenomics.adam.models.{
SequenceDictionary,
VariantContext
Expand All @@ -47,13 +44,14 @@ object VCFInFormatter extends InFormatterCompanion[VariantContext, VariantContex
* VCF header.
*/
def apply(gRdd: VariantContextRDD): VCFInFormatter = {
VCFInFormatter(gRdd.sequences, gRdd.samples.map(_.getSampleId))
VCFInFormatter(gRdd.sequences, gRdd.samples.map(_.getSampleId), gRdd.headerLines)
}
}

private[variant] case class VCFInFormatter private (
sequences: SequenceDictionary,
samples: Seq[String]) extends InFormatter[VariantContext, VariantContextRDD, VCFInFormatter] {
samples: Seq[String],
headerLines: Seq[VCFHeaderLine]) extends InFormatter[VariantContext, VariantContextRDD, VCFInFormatter] {

protected val companion = VCFInFormatter

Expand All @@ -75,9 +73,7 @@ private[variant] case class VCFInFormatter private (
.unsetOption(Options.INDEX_ON_THE_FLY)
.build()

val headerLines: Set[VCFHeaderLine] = (SupportedHeaderLines.infoHeaderLines ++
SupportedHeaderLines.formatHeaderLines).toSet
val header = new VCFHeader(headerLines, samples)
val header = new VCFHeader(headerLines.toSet, samples)
header.setSequenceDictionary(sequences.toSAMSequenceDictionary)
writer.writeHeader(header)

Expand Down
12 changes: 6 additions & 6 deletions adam-core/src/test/resources/sorted.lex.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@
##contig=<ID=2,length=249250621>
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C . . . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . . . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . . . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
13 752721 rs3131972 A G . . . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . . . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
2 19190 . GC G . . . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
1 14397 . CTGT C . IndelQD . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . PASS . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
13 752721 rs3131972 A G . PASS . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . PASS . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
2 19190 . GC G . PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
12 changes: 6 additions & 6 deletions adam-core/src/test/resources/sorted.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@
##contig=<ID=2,length=249250621>
##contig=<ID=13,length=249250621>
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
1 14397 . CTGT C . . . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . . . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . . . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
2 19190 . GC G . . . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
13 752721 rs3131972 A G . . . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . . . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
1 14397 . CTGT C . IndelQD . GT:AD:DP:FT:GQ:PL 0/1:16,4:20:rd:99:120,0,256 0/1:8,2:10:dp;rd:60:60,0,256 0/0:39,0:39:PASS:99:0,116,256
1 14522 . G A . VQSRTrancheSNP99.95to100.00 . GT:AD:DP:FT:GQ:PL 0/1:10,5:15:dp:99:99,0,256 0/1:2,5:7:dp;rd:34:128,0,34 0/0:26,0:26:PASS:78:0,78,256
1 63735 rs201888535 CCTA C . PASS . GT:AD:DP:FT:GQ:PL 0/0:27,0:27:PASS:79:0,79,256 0/0:40,0:40:PASS:99:0,117,256 0/1:23,74:97:rd:99:256,0,256
2 19190 . GC G . PASS . GT:AD:DP:FT:GQ:PL 0/1:8,14:22:PASS:99:256,0,256 0/1:18,13:31:PASS:99:256,0,256 0/1:5,15:20:rd:99:256,0,107
13 752721 rs3131972 A G . PASS . GT:AD:DP:FT:GQ:PL 1/1:0,27:27:PASS:81:256,81,0 1/1:0,19:19:dp:57:256,57,0 1/1:0,22:22:PASS:66:256,66,0
13 752791 . A G . PASS . GT:AD:DP:FT:GQ:PL:SB 1/1:0,27:27:PASS:81:256,81,0:0,1,2,3 1/1:0,19:19:dp:57:256,57,0:4,5,6,7 1/1:0,22:22:PASS:66:256,66,0:2,3,4,5
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,23 @@ class VariantContextConverterSuite extends ADAMFunSuite {

assert(variant.getReferenceAllele === "A")
assert(variant.getStart === 0L)
assert(variant.getSomatic === false)
}

test("Convert somatic htsjdk site-only SNV to ADAM") {
val converter = new VariantContextConverter

val vcb: VariantContextBuilder = new VariantContextBuilder()
.alleles(List(Allele.create("A", true), Allele.create("T")))
.start(1L)
.stop(1L)
.chr("1")
.attribute("SOMATIC", true)

val adamVCs = converter.convert(vcb.make)
val adamVC = adamVCs.head
val variant = adamVC.variant.variant
assert(variant.getSomatic === true)
}

test("Convert htsjdk site-only SNV to ADAM with contig conversion") {
Expand Down Expand Up @@ -413,4 +430,94 @@ class VariantContextConverterSuite extends ADAMFunSuite {
assert(htsjdkVC.hasID)
assert(htsjdkVC.getID === "rs3131972;rs201888535")
}

test("Convert ADAM variant context with null filters applied to htsjdk") {
val variant = adamSNVBuilder()
.setFiltersApplied(null)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(!htsjdkVC.filtersWereApplied)
assert(!htsjdkVC.isFiltered)
assert(htsjdkVC.getFilters.isEmpty)
}

test("Convert ADAM variant context with no filters applied to htsjdk") {
val variant = adamSNVBuilder()
.setFiltersApplied(false)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(!htsjdkVC.filtersWereApplied)
assert(!htsjdkVC.isFiltered)
assert(htsjdkVC.getFilters.isEmpty)
}

test("Convert ADAM variant context with passing filters to htsjdk") {
val variant = adamSNVBuilder()
.setFiltersApplied(true)
.setFiltersPassed(true)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(htsjdkVC.filtersWereApplied)
assert(!htsjdkVC.isFiltered)
assert(htsjdkVC.getFilters.isEmpty)
}

test("Convert ADAM variant context with failing filters to htsjdk") {
val variant = adamSNVBuilder()
.setFiltersApplied(true)
.setFiltersPassed(false)
.setFiltersFailed(ImmutableList.of("FILTER1", "FILTER2"))
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(htsjdkVC.filtersWereApplied)
assert(htsjdkVC.isFiltered)
assert(htsjdkVC.getFilters.contains("FILTER1"))
assert(htsjdkVC.getFilters.contains("FILTER2"))
}

test("Convert ADAM variant context with null somatic flag to htsjdk") {
val variant = adamSNVBuilder()
.setSomatic(null)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(!htsjdkVC.hasAttribute("SOMATIC"))
}

test("Convert ADAM variant context with non-somatic variant to htsjdk") {
val variant = adamSNVBuilder()
.setSomatic(false)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(!htsjdkVC.hasAttribute("SOMATIC"))
}

test("Convert ADAM variant context with somatic variant to htsjdk") {
val variant = adamSNVBuilder()
.setSomatic(true)
.build

val converter = new VariantContextConverter

val htsjdkVC = converter.convert(ADAMVariantContext(variant))
assert(htsjdkVC.hasAttribute("SOMATIC"))
assert(htsjdkVC.getAttributeAsBoolean("SOMATIC", false) === true)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,18 @@ class ADAMContextSuite extends ADAMFunSuite {
assert(vcs.size === 6)

val vc = vcs.head
val variant = vc.variant.variant
assert(variant.getContigName === "1")
assert(variant.getStart === 14396L)
assert(variant.getEnd === 14400L)
assert(variant.getReferenceAllele === "CTGT")
assert(variant.getAlternateAllele === "C")
assert(variant.getNames.isEmpty)
assert(variant.getFiltersApplied === true)
assert(variant.getFiltersPassed === false)
assert(variant.getFiltersFailed.contains("IndelQD"))
assert(variant.getSomatic === false)

assert(vc.genotypes.size === 3)

val gt = vc.genotypes.head
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ class VariantContextRDDSuite extends ADAMFunSuite {
.setReferenceAllele("T")
.setAlternateAllele("C")
.setNames(ImmutableList.of("rs3131972", "rs201888535"))
.setFiltersApplied(true)
.setFiltersPassed(true)
.build

val g0 = Genotype.newBuilder().setVariant(v0)
Expand All @@ -67,14 +69,18 @@ class VariantContextRDDSuite extends ADAMFunSuite {
val vcRdd = sc.loadVcf("%s/test.vcf/part-r-00000".format(tempDir))
assert(vcRdd.rdd.count === 1)

val variant = vcRdd.rdd.first.variant
assert(variant.variant.getContigName === "chr11")
assert(variant.variant.getStart === 17409572)
assert(variant.variant.getReferenceAllele === "T")
assert(variant.variant.getAlternateAllele === "C")
assert(variant.variant.getNames.length === 2)
assert(variant.variant.getNames.get(0) === "rs3131972")
assert(variant.variant.getNames.get(1) === "rs201888535")
val variant = vcRdd.rdd.first.variant.variant
assert(variant.getContigName === "chr11")
assert(variant.getStart === 17409572)
assert(variant.getReferenceAllele === "T")
assert(variant.getAlternateAllele === "C")
assert(variant.getNames.length === 2)
assert(variant.getNames.get(0) === "rs3131972")
assert(variant.getNames.get(1) === "rs201888535")
assert(variant.getFiltersApplied === true)
assert(variant.getFiltersPassed === true)
assert(variant.getFiltersFailed.isEmpty)
assert(variant.getSomatic === false)

assert(vcRdd.sequences.records.size === 1)
assert(vcRdd.sequences.records(0).name === "chr11")
Expand Down Expand Up @@ -116,9 +122,7 @@ class VariantContextRDDSuite extends ADAMFunSuite {
}

sparkTest("don't lose any variants when piping as VCF") {
val smallVcf = Thread.currentThread()
.getContextClassLoader
.getResource("small.vcf").getFile
val smallVcf = testFile("small.vcf")
val rdd: VariantContextRDD = sc.loadVcf(smallVcf)
val records = rdd.rdd.count

Expand Down

0 comments on commit e0979a9

Please sign in to comment.