Skip to content

Commit

Permalink
Add asSingleFile, disableFastConcat to adam2fasta/q.
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh committed Feb 13, 2018
1 parent 638fd2e commit 1d7179b
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 16 deletions.
19 changes: 14 additions & 5 deletions adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Fasta.scala
Original file line number Diff line number Diff line change
Expand Up @@ -21,18 +21,22 @@ import org.apache.spark.SparkContext
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.utils.cli._
import org.bdgenomics.utils.misc.Logging
import org.kohsuke.args4j.{ Argument, Option => Args4JOption }
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

class ADAM2FastaArgs extends Args4jBase {
@Argument(required = true, metaVar = "ADAM", usage = "The Parquet file to convert", index = 0)
var inputPath: String = null
@Argument(required = true, metaVar = "FASTA", usage = "Location to write the FASTA to", index = 1)
var outputPath: String = null
@Args4JOption(required = false, name = "-coalesce", usage = "Choose the number of partitions to coalesce down to.")
@Args4jOption(required = false, name = "-single", usage = "Saves FASTA as single file")
var asSingleFile: Boolean = false
@Args4jOption(required = false, name = "-defer_merging", usage = "Defers merging single file output")
var disableFastConcat: Boolean = false
@Args4jOption(required = false, name = "-coalesce", usage = "Choose the number of partitions to coalesce down to.")
var coalesce: Int = -1
@Args4JOption(required = false, name = "-force_shuffle_coalesce", usage = "Force shuffle while partitioning, default false.")
@Args4jOption(required = false, name = "-force_shuffle_coalesce", usage = "Force shuffle while partitioning, default false.")
var forceShuffle: Boolean = false
@Args4JOption(required = false, name = "-line_width", usage = "Hard wrap FASTA formatted sequence at line width, default 60")
@Args4jOption(required = false, name = "-line_width", usage = "Hard wrap FASTA formatted sequence at line width, default 60")
var lineWidth: Int = 60
}

Expand Down Expand Up @@ -64,6 +68,11 @@ class ADAM2Fasta(val args: ADAM2FastaArgs) extends BDGSparkCommand[ADAM2FastaArg
} else {
contigs
}
cc.saveAsFasta(args.outputPath, args.lineWidth)
cc.saveAsFasta(
args.outputPath,
args.lineWidth,
asSingleFile = args.asSingleFile,
disableFastConcat = args.disableFastConcat
)
}
}
23 changes: 12 additions & 11 deletions adam-cli/src/main/scala/org/bdgenomics/adam/cli/ADAM2Fastq.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.apache.spark.storage.StorageLevel
import org.bdgenomics.adam.projections.{ AlignmentRecordField, Projection }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4JOption }
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }

class ADAM2FastqArgs extends Args4jBase {
@Argument(required = true, metaVar = "INPUT", usage = "The read file to convert", index = 0)
Expand All @@ -32,20 +32,19 @@ class ADAM2FastqArgs extends Args4jBase {
var outputPath: String = null
@Argument(required = false, metaVar = "SECOND_OUTPUT", usage = "When writing FASTQ data, all second-in-pair reads will go here, if this argument is provided", index = 2)
var outputPath2: String = null

@Args4JOption(required = false, name = "-validation", usage = "SAM tools validation level; when STRICT, checks that all reads are paired.")
@Args4jOption(required = false, name = "-single", usage = "Saves OUTPUT and SECOND_OUTPUT as single files")
var asSingleFile: Boolean = false
@Args4jOption(required = false, name = "-disable_fast_concat", usage = "Disables the parallel file concatenation engine.")
var disableFastConcat: Boolean = false
@Args4jOption(required = false, name = "-validation", usage = "SAM tools validation level; when STRICT, checks that all reads are paired.")
var validationStringency = ValidationStringency.LENIENT

@Args4JOption(required = false, name = "-repartition", usage = "Set the number of partitions to map data to")
@Args4jOption(required = false, name = "-repartition", usage = "Set the number of partitions to map data to")
var repartition: Int = -1

@Args4JOption(required = false, name = "-persist_level", usage = "Persist() intermediate RDDs")
@Args4jOption(required = false, name = "-persist_level", usage = "Persist() intermediate RDDs")
var persistLevel: String = null

@Args4JOption(required = false, name = "-no_projection", usage = "Disable projection on records. No great reason to do this, but useful for testing / comparison.")
@Args4jOption(required = false, name = "-no_projection", usage = "Disable projection on records. No great reason to do this, but useful for testing / comparison.")
var disableProjection: Boolean = false

@Args4JOption(required = false, name = "-output_oq", usage = "Output the original sequencing quality scores")
@Args4jOption(required = false, name = "-output_oq", usage = "Output the original sequencing quality scores")
var outputOriginalBaseQualities = false
}

Expand Down Expand Up @@ -86,6 +85,8 @@ class ADAM2Fastq(val args: ADAM2FastqArgs) extends BDGSparkCommand[ADAM2FastqArg
reads.saveAsFastq(
args.outputPath,
Option(args.outputPath2),
asSingleFile = args.asSingleFile,
disableFastConcat = args.disableFastConcat,
outputOriginalBaseQualities = args.outputOriginalBaseQualities,
validationStringency = args.validationStringency,
persistLevel = Option(args.persistLevel).map(StorageLevel.fromString(_))
Expand Down

0 comments on commit 1d7179b

Please sign in to comment.