Skip to content

Commit

Permalink
[ADAM-1579] Add unit test coverage for BED12 format.
Browse files Browse the repository at this point in the history
Resolves #1579. Found small bugs that would lead to last 6 fields not getting
written if the thickStart field wasn't written. Also, identified a bug where the
strand would be set to unknown if an independent strand was provided.
  • Loading branch information
fnothaft committed Jul 4, 2017
1 parent 0306717 commit ee331a2
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ private[rdd] class BEDParser extends FeatureParser {

if (hasColumn(3)) f.setName(fields(3))
if (hasColumn(4)) f.setScore(fields(4).toDouble)
if (hasColumn(5)) Features.toStrand(fields(5)).foreach(f.setStrand(_))
if (fields.length > 5) Features.toStrand(fields(5)).foreach(f.setStrand(_))

val attributes = new ArrayBuffer[(String, String)]()
if (hasColumn(6)) attributes += ("thickStart" -> fields(6))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ object FeatureRDD {
val score = Option(feature.getScore).getOrElse(".")
val strand = Features.asString(feature.getStrand)

if (!feature.getAttributes.containsKey("thickStart")) {
if (!feature.getAttributes.containsKey("thickStart") &&
!feature.getAttributes.containsKey("itemRgb") &&
!feature.getAttributes.containsKey("blockCount")) {
// write BED6 format
List(chrom, start, end, name, score, strand).mkString("\t")
} else {
Expand Down
4 changes: 4 additions & 0 deletions adam-core/src/test/resources/small.1_12.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
1 143 26423 line1 0.0 . 150 26400 0,0,0 . . .
1 14397230 26472788 line2 100.0 + 14397230 26472700 255,0,0 1 12075558 14397230
1 169801934 169801939 line3 200.0 - . . 0,255,0 2 100,200 169801934,169801739
1 240997788 240997796 line4 with a space 1000.0 ? . . 0,0,255 . . .
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,13 @@ class ADAMContextSuite extends ADAMFunSuite {
assert(features.count === 10)
}

sparkTest("Can read a BED 12 file") {
// note: this .bed doesn't actually conform to the UCSC BED spec...sigh...
val path = testFile("small.1_12.bed")
val features: RDD[Feature] = sc.loadFeatures(path).rdd
assert(features.count === 4)
}

sparkTest("Can read a .bed file without cache") {
val path = testFile("gencode.v7.annotation.trunc10.bed")
val features: RDD[Feature] = sc.loadFeatures(path, optStorageLevel = Some(StorageLevel.NONE)).rdd
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ class FeatureRDDSuite extends ADAMFunSuite with TypeCheckedTripleEquals {
assert(features.rdd.count === reloadedFeatures.rdd.count)
}

sparkTest("round trip BED format") {
sparkTest("round trip BED6 format") {
val inputPath = testFile("dvl1.200.bed")
val expected = sc.loadBed(inputPath)
val outputPath = tempLocation(".bed")
Expand All @@ -263,6 +263,37 @@ class FeatureRDDSuite extends ADAMFunSuite with TypeCheckedTripleEquals {
})
}

sparkTest("round trip BED12 format") {
val inputPath = testFile("small.1_12.bed")
val expected = sc.loadBed(inputPath)
val outputPath = tempLocation(".bed")
expected.saveAsBed(outputPath, asSingleFile = true)

val feature = expected.rdd.first
val bedCols = FeatureRDD.toBed(feature).split('\t')
assert(bedCols.size === 12)
assert(bedCols(0) === "1")
assert(bedCols(1) === "143")
assert(bedCols(2) === "26423")
assert(bedCols(3) === "line1")
assert(bedCols(4) === "0.0")
assert(bedCols(5) === ".")
assert(bedCols(6) === "150")
assert(bedCols(7) === "26400")
assert(bedCols(8) === "0,0,0")
assert(bedCols(9) === ".")
assert(bedCols(10) === ".")
assert(bedCols(11) === ".")

val actual = sc.loadBed(outputPath)
val pairs = expected.rdd.collect.zip(actual.rdd.collect)
pairs.foreach(p => {
assert(p._1 === p._2)
})

checkFiles(inputPath, outputPath)
}

sparkTest("save IntervalList as GTF format") {
val inputPath = testFile("SeqCap_EZ_Exome_v3.hg19.interval_list")
val features = sc.loadIntervalList(inputPath)
Expand Down

0 comments on commit ee331a2

Please sign in to comment.