Skip to content

Commit

Permalink
Add pipe API in and out formatters for Features
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Mar 14, 2017
1 parent 1cae769 commit 707567c
Show file tree
Hide file tree
Showing 11 changed files with 568 additions and 2 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.adam.rdd.feature

import java.io.{
BufferedWriter,
OutputStream,
OutputStreamWriter
}
import org.bdgenomics.adam.rdd.{ InFormatter, InFormatterCompanion }
import org.bdgenomics.formats.avro.Feature
import org.bdgenomics.utils.misc.Logging

/**
* InFormatter companion that builds a BEDInFormatter to write features in BED format to a pipe.
*/
object BEDInFormatter extends InFormatterCompanion[Feature, FeatureRDD, BEDInFormatter] {

/**
* Apply method for building the BEDInFormatter from a FeatureRDD.
*
* @param fRdd FeatureRDD to build from.
*/
def apply(fRdd: FeatureRDD): BEDInFormatter = {
BEDInFormatter()
}
}

case class BEDInFormatter private () extends InFormatter[Feature, FeatureRDD, BEDInFormatter] {
protected val companion = BEDInFormatter

/**
* Writes features to an output stream in BED format.
*
* @param os An OutputStream connected to a process we are piping to.
* @param iter An iterator of features to write.
*/
def write(os: OutputStream, iter: Iterator[Feature]) {
val writer = new BufferedWriter(new OutputStreamWriter(os))

// write the features
iter.foreach(f => {
writer.write(FeatureRDD.toBed(f))
writer.newLine()
})

// close the writer, else stream may be defective
writer.close() // os is flushed and closed in InFormatterRunner
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.adam.rdd.feature

import htsjdk.samtools.ValidationStringency
import htsjdk.tribble.readers.{
AsciiLineReader,
AsciiLineReaderIterator
}
import java.io.InputStream
import org.bdgenomics.adam.rdd.OutFormatter
import org.bdgenomics.formats.avro.Feature
import scala.annotation.tailrec
import scala.collection.mutable.ListBuffer

/**
* OutFormatter that reads streaming BED format.
*/
case class BEDOutFormatter() extends OutFormatter[Feature] {
val bedParser = new BEDParser
val stringency = ValidationStringency.STRICT

/**
* Reads features from an input stream in BED format.
*
* @param is An InputStream connected to the process we are piping from.
* @return Returns an iterator of Features read from the stream.
*/
def read(is: InputStream): Iterator[Feature] = {

// make line reader iterator
val lri = new AsciiLineReaderIterator(new AsciiLineReader(is))

@tailrec def convertIterator(iter: AsciiLineReaderIterator,
features: ListBuffer[Feature] = ListBuffer.empty): Iterator[Feature] = {
if (!iter.hasNext) {
iter.close()
features.toIterator
} else {
val nextFeatures = bedParser.parse(iter.next, stringency).fold(features)(features += _)
convertIterator(iter, nextFeatures)
}
}

// convert the iterator
convertIterator(lri)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.adam.rdd.feature

import java.io.{
BufferedWriter,
OutputStream,
OutputStreamWriter
}
import org.bdgenomics.adam.rdd.{ InFormatter, InFormatterCompanion }
import org.bdgenomics.formats.avro.Feature
import org.bdgenomics.utils.misc.Logging

/**
* InFormatter companion that builds a GFF3InFormatter to write features in GFF3 format to a pipe.
*/
object GFF3InFormatter extends InFormatterCompanion[Feature, FeatureRDD, GFF3InFormatter] {

/**
* Apply method for building the GFF3InFormatter from a FeatureRDD.
*
* @param fRdd FeatureRDD to build from.
*/
def apply(fRdd: FeatureRDD): GFF3InFormatter = {
GFF3InFormatter()
}
}

case class GFF3InFormatter private () extends InFormatter[Feature, FeatureRDD, GFF3InFormatter] {
protected val companion = GFF3InFormatter

/**
* Writes features to an output stream in GFF3 format.
*
* @param os An OutputStream connected to a process we are piping to.
* @param iter An iterator of features to write.
*/
def write(os: OutputStream, iter: Iterator[Feature]) {
val writer = new BufferedWriter(new OutputStreamWriter(os))

// write the features
iter.foreach(f => {
writer.write(FeatureRDD.toGff3(f))
writer.newLine()
})

// close the writer, else stream may be defective
writer.close() // os is flushed and closed in InFormatterRunner
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.adam.rdd.feature

import htsjdk.samtools.ValidationStringency
import htsjdk.tribble.readers.{
AsciiLineReader,
AsciiLineReaderIterator
}
import java.io.InputStream
import org.bdgenomics.adam.rdd.OutFormatter
import org.bdgenomics.formats.avro.Feature
import scala.annotation.tailrec
import scala.collection.mutable.ListBuffer

/**
* OutFormatter that reads streaming GFF3 format.
*/
case class GFF3OutFormatter() extends OutFormatter[Feature] {
val gff3Parser = new GFF3Parser
val stringency = ValidationStringency.STRICT

/**
* Reads features from an input stream in GFF3 format.
*
* @param is An InputStream connected to the process we are piping from.
* @return Returns an iterator of Features read from the stream.
*/
def read(is: InputStream): Iterator[Feature] = {

// make line reader iterator
val lri = new AsciiLineReaderIterator(new AsciiLineReader(is))

@tailrec def convertIterator(iter: AsciiLineReaderIterator,
features: ListBuffer[Feature] = ListBuffer.empty): Iterator[Feature] = {
if (!iter.hasNext) {
iter.close()
features.toIterator
} else {
val nextFeatures = gff3Parser.parse(iter.next, stringency).fold(features)(features += _)
convertIterator(iter, nextFeatures)
}
}

// convert the iterator
convertIterator(lri)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.adam.rdd.feature

import java.io.{
BufferedWriter,
OutputStream,
OutputStreamWriter
}
import org.bdgenomics.adam.rdd.{ InFormatter, InFormatterCompanion }
import org.bdgenomics.formats.avro.Feature
import org.bdgenomics.utils.misc.Logging

/**
* InFormatter companion that builds a GTFInFormatter to write features in GTF format to a pipe.
*/
object GTFInFormatter extends InFormatterCompanion[Feature, FeatureRDD, GTFInFormatter] {

/**
* Apply method for building the GTFInFormatter from a FeatureRDD.
*
* @param fRdd FeatureRDD to build from.
*/
def apply(fRdd: FeatureRDD): GTFInFormatter = {
GTFInFormatter()
}
}

case class GTFInFormatter private () extends InFormatter[Feature, FeatureRDD, GTFInFormatter] {
protected val companion = GTFInFormatter

/**
* Writes features to an output stream in GTF format.
*
* @param os An OutputStream connected to a process we are piping to.
* @param iter An iterator of features to write.
*/
def write(os: OutputStream, iter: Iterator[Feature]) {
val writer = new BufferedWriter(new OutputStreamWriter(os))

// write the features
iter.foreach(f => {
writer.write(FeatureRDD.toGtf(f))
writer.newLine()
})

// close the writer, else stream may be defective
writer.close() // os is flushed and closed in InFormatterRunner
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/**
* Licensed to Big Data Genomics (BDG) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The BDG licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.bdgenomics.adam.rdd.feature

import htsjdk.samtools.ValidationStringency
import htsjdk.tribble.readers.{
AsciiLineReader,
AsciiLineReaderIterator
}
import java.io.InputStream
import org.bdgenomics.adam.rdd.OutFormatter
import org.bdgenomics.formats.avro.Feature
import scala.annotation.tailrec
import scala.collection.mutable.ListBuffer

/**
* OutFormatter that reads streaming GTF format.
*/
case class GTFOutFormatter() extends OutFormatter[Feature] {
val gtfParser = new GTFParser
val stringency = ValidationStringency.STRICT

/**
* Reads features from an input stream in GTF format.
*
* @param is An InputStream connected to the process we are piping from.
* @return Returns an iterator of Features read from the stream.
*/
def read(is: InputStream): Iterator[Feature] = {

// make line reader iterator
val lri = new AsciiLineReaderIterator(new AsciiLineReader(is))

@tailrec def convertIterator(iter: AsciiLineReaderIterator,
features: ListBuffer[Feature] = ListBuffer.empty): Iterator[Feature] = {
if (!iter.hasNext) {
iter.close()
features.toIterator
} else {
val nextFeatures = gtfParser.parse(iter.next, stringency).fold(features)(features += _)
convertIterator(iter, nextFeatures)
}
}

// convert the iterator
convertIterator(lri)
}
}
Loading

0 comments on commit 707567c

Please sign in to comment.