diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureParser.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureParser.scala index 14ed9f4cac..7c6f970552 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureParser.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureParser.scala @@ -323,7 +323,7 @@ private[rdd] class BEDParser extends FeatureParser { if (hasColumn(3)) f.setName(fields(3)) if (hasColumn(4)) f.setScore(fields(4).toDouble) - if (hasColumn(5)) Features.toStrand(fields(5)).foreach(f.setStrand(_)) + if (fields.length > 5) Features.toStrand(fields(5)).foreach(f.setStrand(_)) val attributes = new ArrayBuffer[(String, String)]() if (hasColumn(6)) attributes += ("thickStart" -> fields(6)) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureRDD.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureRDD.scala index 374f28ad40..aa1ecc8cbc 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureRDD.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureRDD.scala @@ -210,7 +210,9 @@ object FeatureRDD { val score = Option(feature.getScore).getOrElse(".") val strand = Features.asString(feature.getStrand) - if (!feature.getAttributes.containsKey("thickStart")) { + if (!feature.getAttributes.containsKey("thickStart") && + !feature.getAttributes.containsKey("itemRgb") && + !feature.getAttributes.containsKey("blockCount")) { // write BED6 format List(chrom, start, end, name, score, strand).mkString("\t") } else { diff --git a/adam-core/src/test/resources/small.1_12.bed b/adam-core/src/test/resources/small.1_12.bed new file mode 100644 index 0000000000..29d92f8659 --- /dev/null +++ b/adam-core/src/test/resources/small.1_12.bed @@ -0,0 +1,4 @@ +1 143 26423 line1 0.0 . 150 26400 0,0,0 . . . +1 14397230 26472788 line2 100.0 + 14397230 26472700 255,0,0 1 12075558 14397230 +1 169801934 169801939 line3 200.0 - . . 0,255,0 2 100,200 169801934,169801739 +1 240997788 240997796 line4 with a space 1000.0 ? . . 0,0,255 . . . diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala index 7fba18bdc2..c389073d86 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala @@ -136,6 +136,13 @@ class ADAMContextSuite extends ADAMFunSuite { assert(features.count === 10) } + sparkTest("Can read a BED 12 file") { + // note: this .bed doesn't actually conform to the UCSC BED spec...sigh... + val path = testFile("small.1_12.bed") + val features: RDD[Feature] = sc.loadFeatures(path).rdd + assert(features.count === 4) + } + sparkTest("Can read a .bed file without cache") { val path = testFile("gencode.v7.annotation.trunc10.bed") val features: RDD[Feature] = sc.loadFeatures(path, optStorageLevel = Some(StorageLevel.NONE)).rdd diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/feature/FeatureRDDSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/feature/FeatureRDDSuite.scala index 48e28bcfcf..8fa06e83b3 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/feature/FeatureRDDSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/feature/FeatureRDDSuite.scala @@ -240,7 +240,7 @@ class FeatureRDDSuite extends ADAMFunSuite with TypeCheckedTripleEquals { assert(features.rdd.count === reloadedFeatures.rdd.count) } - sparkTest("round trip BED format") { + sparkTest("round trip BED6 format") { val inputPath = testFile("dvl1.200.bed") val expected = sc.loadBed(inputPath) val outputPath = tempLocation(".bed") @@ -263,6 +263,37 @@ class FeatureRDDSuite extends ADAMFunSuite with TypeCheckedTripleEquals { }) } + sparkTest("round trip BED12 format") { + val inputPath = testFile("small.1_12.bed") + val expected = sc.loadBed(inputPath) + val outputPath = tempLocation(".bed") + expected.saveAsBed(outputPath, asSingleFile = true) + + val feature = expected.rdd.first + val bedCols = FeatureRDD.toBed(feature).split('\t') + assert(bedCols.size === 12) + assert(bedCols(0) === "1") + assert(bedCols(1) === "143") + assert(bedCols(2) === "26423") + assert(bedCols(3) === "line1") + assert(bedCols(4) === "0.0") + assert(bedCols(5) === ".") + assert(bedCols(6) === "150") + assert(bedCols(7) === "26400") + assert(bedCols(8) === "0,0,0") + assert(bedCols(9) === ".") + assert(bedCols(10) === ".") + assert(bedCols(11) === ".") + + val actual = sc.loadBed(outputPath) + val pairs = expected.rdd.collect.zip(actual.rdd.collect) + pairs.foreach(p => { + assert(p._1 === p._2) + }) + + checkFiles(inputPath, outputPath) + } + sparkTest("save IntervalList as GTF format") { val inputPath = testFile("SeqCap_EZ_Exome_v3.hg19.interval_list") val features = sc.loadIntervalList(inputPath)