From 86c492e20ce93170d8530045ea20517a875beaae Mon Sep 17 00:00:00 2001 From: Frank Austin Nothaft Date: Sun, 1 Mar 2015 21:42:04 -0800 Subject: [PATCH] [ADAM-605] Remove visualization code. --- .../org/bdgenomics/adam/cli/ADAMMain.scala | 1 - .../org/bdgenomics/adam/cli/VizReads.scala | 230 ----------- .../src/main/webapp/WEB-INF/layouts/freq.ssp | 359 ----------------- .../src/main/webapp/WEB-INF/layouts/reads.ssp | 360 ----------------- .../main/webapp/WEB-INF/layouts/variants.ssp | 366 ------------------ adam-cli/src/main/webapp/WEB-INF/web.xml | 11 - .../adam/models/TrackedLayout.scala | 96 ----- .../adam/models/TrackedLayoutSuite.scala | 88 ----- 8 files changed, 1511 deletions(-) delete mode 100644 adam-cli/src/main/scala/org/bdgenomics/adam/cli/VizReads.scala delete mode 100644 adam-cli/src/main/webapp/WEB-INF/layouts/freq.ssp delete mode 100644 adam-cli/src/main/webapp/WEB-INF/layouts/reads.ssp delete mode 100644 adam-cli/src/main/webapp/WEB-INF/layouts/variants.ssp delete mode 100644 adam-cli/src/main/webapp/WEB-INF/web.xml delete mode 100644 adam-core/src/main/scala/org/bdgenomics/adam/models/TrackedLayout.scala delete mode 100644 adam-core/src/test/scala/org/bdgenomics/adam/models/TrackedLayoutSuite.scala diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala index affbb7b237..dfcb18da9b 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAMMain.scala @@ -61,7 +61,6 @@ object ADAMMain extends Logging { PrintADAM, PrintGenes, FlagStat, - VizReads, PrintTags, ListDict, AlleleCount, diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VizReads.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VizReads.scala deleted file mode 100644 index edf4bffbf4..0000000000 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/VizReads.scala +++ /dev/null @@ -1,230 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.cli - -import org.apache.hadoop.mapreduce.Job -import org.apache.spark.SparkContext -import org.bdgenomics.adam.models.VariantContext -import org.apache.spark.rdd.RDD -import org.bdgenomics.adam.models.{ OrderedTrackedLayout, ReferenceRegion } -import org.bdgenomics.adam.projections.AlignmentRecordField._ -import org.bdgenomics.adam.projections.Projection -import org.bdgenomics.adam.rdd.ADAMContext._ -import org.bdgenomics.adam.rich.ReferenceMappingContext._ -import org.bdgenomics.formats.avro.{ AlignmentRecord, Genotype, GenotypeAllele } -import org.fusesource.scalate.TemplateEngine -import org.json4s._ -import org.kohsuke.args4j.{ Argument, Option => Args4jOption } -import org.scalatra.json._ -import org.scalatra.ScalatraServlet - -object VizReads extends ADAMCommandCompanion { - val commandName: String = "viz" - val commandDescription: String = "Generates images from sections of the genome" - - var refName = "" - var inputPath = "" - var reads: RDD[AlignmentRecord] = null - var variants: RDD[Genotype] = null - - val trackHeight = 10 - val width = 1200 - val height = 400 - val base = 50 - - def apply(cmdLine: Array[String]): ADAMCommand = { - new VizReads(Args4j[VizReadsArgs](cmdLine)) - } - - def printTrackJson(layout: OrderedTrackedLayout[AlignmentRecord]): List[TrackJson] = { - var tracks = new scala.collection.mutable.ListBuffer[TrackJson] - - // draws a box for each read, in the appropriate track - for ((rec, track) <- layout.trackAssignments) { - val aRec = rec.asInstanceOf[AlignmentRecord] - tracks += new TrackJson(aRec.getReadName, aRec.getStart, aRec.getEnd, track) - } - tracks.toList - } - - def printVariationJson(layout: OrderedTrackedLayout[Genotype]): List[VariationJson] = { - var tracks = new scala.collection.mutable.ListBuffer[VariationJson] - - for ((rec, track) <- layout.trackAssignments) { - val aRec = rec.asInstanceOf[Genotype] - val referenceAllele = aRec.getVariant.getReferenceAllele - val alternateAllele = aRec.getVariant.getAlternateAllele - tracks += new VariationJson(aRec.getVariant.getContig.getContigName, aRec.getAlleles.mkString(" / "), aRec.getVariant.getStart, aRec.getVariant.getEnd, track) - - } - tracks.toList - } - - def printJsonFreq(array: Array[AlignmentRecord], region: ReferenceRegion): List[FreqJson] = { - val freqMap = new java.util.TreeMap[Long, Long] - var freqBuffer = new scala.collection.mutable.ListBuffer[FreqJson] - - // initiates map with 0 values - var i0 = region.start - while (i0 <= region.end) { - freqMap.put(i0, 0) - i0 += 1 - } - - // creates a point for each base showing its frequency - for (rec <- array) { - val aRec = rec.asInstanceOf[AlignmentRecord] - var i = aRec.getStart - while (i <= aRec.getEnd) { - if (i >= region.start && i <= region.end) - freqMap.put(i, freqMap(i) + 1) - i += 1 - } - } - - // convert to list of FreqJsons - val iter = freqMap.keySet.iterator - var key = 0L - while (iter.hasNext) { - key = iter.next() - freqBuffer += FreqJson(key, freqMap(key)) - } - - freqBuffer.toList - } -} - -case class TrackJson(readName: String, start: Long, end: Long, track: Long) -case class VariationJson(contigName: String, alleles: String, start: Long, end: Long, track: Long) -case class FreqJson(base: Long, freq: Long) - -class VizReadsArgs extends Args4jBase with ParquetArgs { - @Argument(required = true, metaVar = "INPUT", usage = "The ADAM Records file to view", index = 0) - var inputPath: String = null - - @Argument(required = true, metaVar = "REFNAME", usage = "The reference to view", index = 1) - var refName: String = null - - @Args4jOption(required = false, name = "-port", usage = "The port to bind to for visualization. The default is 8080.") - var port: Int = 8080 -} - -class VizServlet extends ScalatraServlet with JacksonJsonSupport { - protected implicit val jsonFormats: Formats = DefaultFormats - var regInfo = ReferenceRegion(VizReads.refName, 0, 200) - var filteredLayout: OrderedTrackedLayout[AlignmentRecord] = null - var filteredArray: Array[AlignmentRecord] = null - - get("/?") { - redirect(url("reads")) - } - - get("/reads/?") { - contentType = "text/html" - - filteredLayout = new OrderedTrackedLayout(VizReads.reads.filterByOverlappingRegion(regInfo).collect()) - val templateEngine = new TemplateEngine - templateEngine.layout("adam-cli/src/main/webapp/WEB-INF/layouts/reads.ssp", - Map("regInfo" -> (regInfo.referenceName, regInfo.start.toString, regInfo.end.toString), - "width" -> VizReads.width.toString, - "base" -> VizReads.base.toString, - "numTracks" -> filteredLayout.numTracks.toString, - "trackHeight" -> VizReads.trackHeight.toString)) - } - - get("/reads/:ref") { - contentType = formats("json") - - regInfo = ReferenceRegion(params("ref"), params("start").toLong, params("end").toLong) - filteredLayout = new OrderedTrackedLayout(VizReads.reads.filterByOverlappingRegion(regInfo).collect()) - VizReads.printTrackJson(filteredLayout) - } - - get("/freq") { - contentType = "text/html" - - filteredArray = VizReads.reads.filterByOverlappingRegion(regInfo).collect() - val templateEngine = new TemplateEngine - templateEngine.layout("adam-cli/src/main/webapp/WEB-INF/layouts/freq.ssp", - Map("regInfo" -> (regInfo.referenceName, regInfo.start.toString, regInfo.end.toString), - "width" -> VizReads.width.toString, - "height" -> VizReads.height.toString, - "base" -> VizReads.base.toString)) - } - - get("/freq/:ref") { - contentType = formats("json") - - regInfo = ReferenceRegion(params("ref"), params("start").toLong, params("end").toLong) - filteredArray = VizReads.reads.filterByOverlappingRegion(regInfo).collect() - VizReads.printJsonFreq(filteredArray, regInfo) - } - - get("/variants/?") { - contentType = "text/html" - - val input = VizReads.variants.filterByOverlappingRegion(regInfo).collect() - val filteredGenotypeTrack = new OrderedTrackedLayout(input) - val templateEngine = new TemplateEngine - val displayMap = Map("regInfo" -> (regInfo.referenceName, regInfo.start.toString, regInfo.end.toString), - "width" -> VizReads.width.toString, - "base" -> VizReads.base.toString, - "numTracks" -> filteredGenotypeTrack.numTracks.toString, - "trackHeight" -> VizReads.trackHeight.toString) - templateEngine.layout("adam-cli/src/main/webapp/WEB-INF/layouts/variants.ssp", - displayMap) - } - - get("/variants/:ref") { - contentType = formats("json") - - regInfo = ReferenceRegion(params("ref"), params("start").toLong, params("end").toLong) - val input = VizReads.variants.filterByOverlappingRegion(regInfo).collect() - val filteredGenotypeTrack = new OrderedTrackedLayout(input) - VizReads.printVariationJson(filteredGenotypeTrack) - } -} - -class VizReads(protected val args: VizReadsArgs) extends ADAMSparkCommand[VizReadsArgs] { - val companion: ADAMCommandCompanion = VizReads - - def run(sc: SparkContext, job: Job): Unit = { - VizReads.refName = args.refName - - val proj = Projection(contig, readMapped, readName, start, end) - - if (args.inputPath.endsWith(".bam") || args.inputPath.endsWith(".sam") || args.inputPath.endsWith(".align.adam")) { - VizReads.reads = sc.loadAlignments(args.inputPath, projection = Some(proj)) - } - - if (args.inputPath.endsWith(".vcf") || args.inputPath.endsWith(".gt.adam")) { - VizReads.variants = sc.loadGenotypes(args.inputPath, projection = Some(proj)) - } - - val server = new org.eclipse.jetty.server.Server(args.port) - val handlers = new org.eclipse.jetty.server.handler.ContextHandlerCollection() - server.setHandler(handlers) - handlers.addHandler(new org.eclipse.jetty.webapp.WebAppContext("adam-cli/src/main/webapp", "/")) - server.start() - println("View the visualization at: " + args.port) - println("Frequency visualization at: /freq") - println("Overlapping reads visualization at: /reads") - println("Variant visualization at: /variants") - server.join() - } -} diff --git a/adam-cli/src/main/webapp/WEB-INF/layouts/freq.ssp b/adam-cli/src/main/webapp/WEB-INF/layouts/freq.ssp deleted file mode 100644 index ca7a3cf731..0000000000 --- a/adam-cli/src/main/webapp/WEB-INF/layouts/freq.ssp +++ /dev/null @@ -1,359 +0,0 @@ -<%@ val regInfo: (String, String, String) %> -<%@ val width: String %> -<%@ val height: String %> -<%@ val base: String %> - - - - - - - - - Genome visualization for ADAM - - -

Genome visualization for ADAM

-

current region: ${regInfo._1}:

-
- -
- - - \ No newline at end of file diff --git a/adam-cli/src/main/webapp/WEB-INF/layouts/reads.ssp b/adam-cli/src/main/webapp/WEB-INF/layouts/reads.ssp deleted file mode 100644 index a02d1f47eb..0000000000 --- a/adam-cli/src/main/webapp/WEB-INF/layouts/reads.ssp +++ /dev/null @@ -1,360 +0,0 @@ -<%@ val regInfo: (String, String, String) %> -<%@ val width: String %> -<%@ val base: String %> -<%@ val trackHeight: String %> -<%@ val numTracks: String %> - - - - - - - - - Genome visualization for ADAM - - -

Genome visualization for ADAM

-

current region: ${regInfo._1}:

-
- -
- - - \ No newline at end of file diff --git a/adam-cli/src/main/webapp/WEB-INF/layouts/variants.ssp b/adam-cli/src/main/webapp/WEB-INF/layouts/variants.ssp deleted file mode 100644 index 1c77157291..0000000000 --- a/adam-cli/src/main/webapp/WEB-INF/layouts/variants.ssp +++ /dev/null @@ -1,366 +0,0 @@ -<%@ val regInfo: (String, String, String) %> -<%@ val width: String %> -<%@ val base: String %> -<%@ val trackHeight: String %> -<%@ val numTracks: String %> - - - - - - - - - Genome visualization for ADAM - - -

Genome visualization for ADAM

-

current region: ${regInfo._1}:

-
- -
- - - diff --git a/adam-cli/src/main/webapp/WEB-INF/web.xml b/adam-cli/src/main/webapp/WEB-INF/web.xml deleted file mode 100644 index 0f1124996b..0000000000 --- a/adam-cli/src/main/webapp/WEB-INF/web.xml +++ /dev/null @@ -1,11 +0,0 @@ - - - - adam - org.bdgenomics.adam.cli.VizServlet - - - adam - /* - - diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/TrackedLayout.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/TrackedLayout.scala deleted file mode 100644 index 654ddb01c3..0000000000 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/TrackedLayout.scala +++ /dev/null @@ -1,96 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.models - -import scala.collection.mutable - -/** - * A TrackedLayout is an assignment of values of some type T (which presumably are mappable to - * a reference genome or other linear coordinate space) to 'tracks' -- that is, to integers, - * with the guarantee that no two values assigned to the same track will overlap. - * - * This is the kind of data structure which is required for non-overlapping genome visualization. - * - * @tparam T the type of value which is to be tracked. - */ -trait TrackedLayout[T] { - def numTracks: Int - def trackAssignments: Map[T, Int] -} - -object TrackedLayout { - - def overlaps[T](rec1: T, rec2: T)(implicit rm: ReferenceMapping[T]): Boolean = { - val ref1 = rm.getReferenceRegion(rec1) - val ref2 = rm.getReferenceRegion(rec2) - ref1.overlaps(ref2) - } -} - -/** - * An implementation of TrackedLayout which takes a sequence of ReferenceMappable values, - * and lays them out in order (i.e. from first-to-last) in the naive way: for each - * value, it looks for the track with the lowest index that doesn't already have an overlapping - * value, and it doesn't use any special data structures (it just does a linear search for each - * track.) - * - * @param reads The set of values (i.e. "reads", but anything that is mappable to the reference - * genome ultimately) to lay out in tracks - * @param mapping The (implicit) reference mapping which converts values to ReferenceRegions - * @tparam T the type of value which is to be tracked. - */ -class OrderedTrackedLayout[T](reads: Traversable[T])(implicit val mapping: ReferenceMapping[T]) extends TrackedLayout[T] { - - private var trackBuilder = new mutable.ListBuffer[Track]() - reads.toSeq.foreach(findAndAddToTrack) - trackBuilder = trackBuilder.filter(_.records.nonEmpty) - - val numTracks = trackBuilder.size - val trackAssignments: Map[T, Int] = - Map(trackBuilder.toList.zip(0 to numTracks).flatMap { - case (track: Track, idx: Int) => track.records.map(_ -> idx) - }: _*) - - private def findAndAddToTrack(rec: T) { - val reg = mapping.getReferenceRegion(rec) - if (reg != null) { - val track: Option[Track] = trackBuilder.find(track => !track.conflicts(rec)) - track.map(_ += rec).getOrElse(addTrack(new Track(rec))) - } - } - - private def addTrack(t: Track): Track = { - trackBuilder += t - t - } - - private class Track(val initial: T) { - - val records = new mutable.ListBuffer[T]() - records += initial - - def +=(rec: T): Track = { - records += rec - this - } - - def conflicts(rec: T): Boolean = - records.exists(r => TrackedLayout.overlaps(r, rec)) - } - -} diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/TrackedLayoutSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/TrackedLayoutSuite.scala deleted file mode 100644 index 5c63d9e645..0000000000 --- a/adam-core/src/test/scala/org/bdgenomics/adam/models/TrackedLayoutSuite.scala +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Licensed to Big Data Genomics (BDG) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The BDG licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.bdgenomics.adam.models - -import org.bdgenomics.adam.rich.ReferenceMappingContext._ -import org.bdgenomics.formats.avro.{ AlignmentRecord, Contig } -import org.scalatest.FunSuite - -class TrackedLayoutSuite extends FunSuite { - - def rec(contig: String = null, start: Int = 0, cigar: String = null, end: Int = 0, readMapped: Boolean = true, firstOfPair: Boolean = true): AlignmentRecord = { - val c = Contig.newBuilder().setContigName(contig).build() - - AlignmentRecord.newBuilder() - .setContig(c) - .setReadMapped(readMapped) - .setFirstOfPair(firstOfPair) - .setStart(start) - .setCigar(cigar) - .setEnd(end) - .build() - } - - test("OrderedTrackedLayout lays out no records into zero tracks") { - val layout = new OrderedTrackedLayout[AlignmentRecord](Seq()) - assert(layout.numTracks === 0) - } - - test("OrderedTrackedLayout lays out AlignmentRecords left-to-right in the order they're passed") { - val (r1, r2, r3, r4) = ( - rec("chr1", 100, "100M", 200), - rec("chr1", 150, "100M", 250), - rec("chr1", 200, "100M", 300), - rec("chr2", 200, "100M", 300)) - - val recs = Seq(r1, r2, r3, r4) - val layout: OrderedTrackedLayout[AlignmentRecord] = new OrderedTrackedLayout(recs) - - // Just making sure... r4 shouldn't overlap r1-3 - val rm: ReferenceMapping[AlignmentRecord] = AlignmentRecordReferenceMapping - val refs123 = Seq(r1, r2, r3).map(rm.getReferenceRegion) - assert(!refs123.exists(rm.getReferenceRegion(r4).overlaps)) - - // Now, test the correct track assignments of each record - assert(layout.trackAssignments.get(r1) === Some(0)) - assert(layout.trackAssignments.get(r2) === Some(1)) - assert(layout.trackAssignments.get(r3) === Some(0)) - assert(layout.trackAssignments.get(r4) === Some(0)) - assert(layout.numTracks === 2) - } - - test("OrderedTrackedLayout can handle unaligned reads") { - val (r1, r2, r3, r4) = ( - rec("chr1", 100, "100M", 200), - rec("chr1", 150, "100M", 250), - rec("chr1", 200, "100M", 300), - rec(readMapped = false)) - - val recs = Seq(r1, r2, r3, r4) - val layout: OrderedTrackedLayout[AlignmentRecord] = new OrderedTrackedLayout(recs) - - // Just making sure... r4 shouldn't overlap r1-3 - val rm: ReferenceMapping[AlignmentRecord] = AlignmentRecordReferenceMapping - val refs123 = Seq(r1, r2, r3).map(rm.getReferenceRegion) - - // Now, test the correct track assignments of each record - assert(layout.trackAssignments.get(r1) === Some(0)) - assert(layout.trackAssignments.get(r2) === Some(1)) - assert(layout.trackAssignments.get(r3) === Some(0)) - assert(layout.trackAssignments.get(r4) === None) - assert(layout.numTracks === 2) - } -}