diff --git a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/BiallelicGenotyper.scala b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/BiallelicGenotyper.scala
index bdd85d72..1fd68f90 100644
--- a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/BiallelicGenotyper.scala
+++ b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/BiallelicGenotyper.scala
@@ -216,11 +216,11 @@ class BiallelicGenotyper(
// load reads
val projection = Some(Filter(AlignmentRecordField.attributes,
- AlignmentRecordField.origQual,
- AlignmentRecordField.recordGroupName))
+ AlignmentRecordField.originalQuality,
+ AlignmentRecordField.readGroupId))
val reads = sc.loadAlignments(args.inputPath,
optProjection = projection)
- val samples = reads.recordGroups.recordGroups.map(_.sample).toSet
+ val samples = reads.readGroups.readGroups.map(_.sampleId).toSet
require(samples.nonEmpty,
"Didn't see any samples attached to input. Did you forget to add read groups?")
require(samples.size <= 1,
diff --git a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/MergeDiscovered.scala b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/MergeDiscovered.scala
index 67bab2dc..0b4b3e5a 100644
--- a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/MergeDiscovered.scala
+++ b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/MergeDiscovered.scala
@@ -56,7 +56,7 @@ class MergeDiscovered(
sc.loadVariants(args.inputPath)
.transformDataset(_.dropDuplicates("start",
"end",
- "contigName",
+ "referenceName",
"referenceAllele",
"alternateAllele"))
.saveAsParquet(args.outputPath)
diff --git a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/Reassemble.scala b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/Reassemble.scala
index 7ddf923f..d0f1b8d0 100644
--- a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/Reassemble.scala
+++ b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/Reassemble.scala
@@ -20,7 +20,7 @@ package org.bdgenomics.avocado.cli
import org.apache.spark.SparkContext
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
-import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, MDTagging }
+import org.bdgenomics.adam.rdd.read.{ AlignmentRecordDataset, MDTagging }
import org.bdgenomics.avocado.realigner.Realigner
import org.bdgenomics.utils.cli._
import org.kohsuke.args4j.{ Argument, Option => Args4jOption }
diff --git a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/TrioGenotyper.scala b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/TrioGenotyper.scala
index 8fdb78aa..bbc3bc1c 100644
--- a/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/TrioGenotyper.scala
+++ b/avocado-cli/src/main/scala/org/bdgenomics/avocado/cli/TrioGenotyper.scala
@@ -21,8 +21,8 @@ import org.apache.spark.SparkContext
import org.bdgenomics.adam.projections.{ AlignmentRecordField, Filter }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
-import org.bdgenomics.adam.rdd.variant.GenotypeRDD
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
+import org.bdgenomics.adam.rdd.variant.GenotypeDataset
import org.bdgenomics.avocado.genotyping.{
BiallelicGenotyper => Biallelic,
DiscoverVariants => Discover,
@@ -198,8 +198,8 @@ class TrioGenotyper(
// load reads
val projection = Some(Filter(AlignmentRecordField.attributes,
- AlignmentRecordField.origQual,
- AlignmentRecordField.recordGroupName))
+ AlignmentRecordField.originalQuality,
+ AlignmentRecordField.readGroupId))
val firstParentReads = sc.loadAlignments(args.firstParentPath,
optProjection = projection)
val secondParentReads = sc.loadAlignments(args.secondParentPath,
@@ -250,7 +250,7 @@ class TrioGenotyper(
copyNumber,
false)
- val genotypes = GenotypeRDD(sc.union(firstParentGenotypes.rdd,
+ val genotypes = GenotypeDataset(sc.union(firstParentGenotypes.rdd,
secondParentGenotypes.rdd,
childGenotypes.rdd),
variants.sequences,
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala
index 65cd171f..b00e8ebe 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyper.scala
@@ -25,10 +25,10 @@ import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.types.IntegerType
import org.bdgenomics.adam.models.ReferenceRegion
import org.bdgenomics.adam.rdd.GenomeBins
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
import org.bdgenomics.adam.rdd.variant.{
- GenotypeRDD,
- VariantRDD
+ GenotypeDataset,
+ VariantDataset
}
import org.bdgenomics.adam.util.PhredUtils
import org.bdgenomics.avocado.Timers._
@@ -85,21 +85,21 @@ private[avocado] object BiallelicGenotyper extends Serializable with Logging {
* @param maxMapQ The highest mapping quality to allow.
* @return Returns genotype calls.
*/
- def call(reads: AlignmentRecordRDD,
- variants: VariantRDD,
+ def call(reads: AlignmentRecordDataset,
+ variants: VariantDataset,
copyNumber: CopyNumberMap,
scoreAllSites: Boolean,
optDesiredPartitionCount: Option[Int] = None,
optDesiredPartitionSize: Option[Int] = None,
optDesiredMaxCoverage: Option[Int] = None,
maxQuality: Int = 93,
- maxMapQ: Int = 93): GenotypeRDD = CallGenotypes.time {
+ maxMapQ: Int = 93): GenotypeDataset = CallGenotypes.time {
// validate metadata
require(variants.sequences.isCompatibleWith(reads.sequences),
"Variant sequence dictionary (%s) is not compatible with read dictionary (%s).".format(
variants.sequences, reads.sequences))
- val samples = reads.recordGroups.recordGroups.map(_.sample).toSet
+ val samples = reads.readGroups.readGroups.map(_.sampleId).toSet
require(samples.size == 1,
"Currently, we only support a single sample. Saw: %s.".format(
samples.mkString(", ")))
@@ -124,11 +124,11 @@ private[avocado] object BiallelicGenotyper extends Serializable with Logging {
val genotypeRdd = observationsToGenotypes(observationRdd,
samples.head)
- GenotypeRDD(genotypeRdd,
+ GenotypeDataset(genotypeRdd,
variants.sequences,
samples.map(s => {
Sample.newBuilder()
- .setSampleId(s)
+ .setId(s)
.setName(s)
.build()
}).toSeq, org.bdgenomics.adam.converters.DefaultHeaderLines.allHeaderLines)
@@ -153,7 +153,7 @@ private[avocado] object BiallelicGenotyper extends Serializable with Logging {
* @param maxMapQ The highest mapping quality to allow.
* @return Returns genotype calls.
*/
- def discoverAndCall(reads: AlignmentRecordRDD,
+ def discoverAndCall(reads: AlignmentRecordDataset,
copyNumber: CopyNumberMap,
scoreAllSites: Boolean,
optDesiredPartitionCount: Option[Int] = None,
@@ -162,7 +162,7 @@ private[avocado] object BiallelicGenotyper extends Serializable with Logging {
optDesiredPartitionSize: Option[Int] = None,
optDesiredMaxCoverage: Option[Int] = None,
maxQuality: Int = 93,
- maxMapQ: Int = 93): GenotypeRDD = {
+ maxMapQ: Int = 93): GenotypeDataset = {
// get rdd storage level and warn if not persisted
val readSl = reads.rdd.getStorageLevel
@@ -442,7 +442,7 @@ private[avocado] object BiallelicGenotyper extends Serializable with Logging {
// flatten schema
val flatFields = Seq(
- observationsDf("_1.contigName").as("contigName"),
+ observationsDf("_1.referenceName").as("referenceName"),
observationsDf("_1.start").as("start"),
observationsDf("_1.referenceAllele").as("referenceAllele"),
observationsDf("_1.alternateAllele").as("alternateAllele"),
@@ -493,14 +493,14 @@ private[avocado] object BiallelicGenotyper extends Serializable with Logging {
sum("totalCoverage").as("totalCoverage"),
first("isRef").as("isRef"),
first("copyNumber").as("copyNumber"))
- val aggregatedObservationsDf = joinedObservationsDf.groupBy("contigName",
+ val aggregatedObservationsDf = joinedObservationsDf.groupBy("referenceName",
"start",
"referenceAllele",
"alternateAllele")
.agg(aggCols.head, aggCols.tail: _*)
// re-nest the output
- val firstField = struct(aggregatedObservationsDf("contigName"),
+ val firstField = struct(aggregatedObservationsDf("referenceName"),
aggregatedObservationsDf("start"),
aggregatedObservationsDf("referenceAllele"),
aggregatedObservationsDf("alternateAllele"))
@@ -733,7 +733,7 @@ private[avocado] object BiallelicGenotyper extends Serializable with Logging {
.setVariantCallingAnnotations(vcAnnotations)
.setStart(v.getStart)
.setEnd(v.getEnd)
- .setContigName(v.getContigName)
+ .setReferenceName(v.getReferenceName)
.setSampleId(sample)
.setStrandBiasComponents(sbComponents
.map(i => i: java.lang.Integer))
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoverVariants.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoverVariants.scala
index 9be29fd8..66c02c30 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoverVariants.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoverVariants.scala
@@ -19,8 +19,8 @@ package org.bdgenomics.avocado.genotyping
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SQLContext
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
-import org.bdgenomics.adam.rdd.variant.VariantRDD
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
+import org.bdgenomics.adam.rdd.variant.VariantDataset
import org.bdgenomics.avocado.Timers._
import org.bdgenomics.avocado.models.{
Clipped,
@@ -41,19 +41,19 @@ import scala.annotation.tailrec
object DiscoverVariants extends Serializable with Logging {
/**
- * Discovers all variants in an RDD of reads.
+ * Discovers all variants in an dataset of reads.
*
- * @param aRdd RDD of reads.
+ * @param aRdd Dataset of reads.
* @param optPhredThreshold An optional threshold that discards all variants
* not supported by bases of at least a given phred score.
- * @return Returns an RDD of variants.
+ * @return Returns a dataset of variants.
*/
private[avocado] def apply(
- aRdd: AlignmentRecordRDD,
+ aRdd: AlignmentRecordDataset,
optPhredThreshold: Option[Int] = None,
- optMinObservations: Option[Int] = None): VariantRDD = DiscoveringVariants.time {
+ optMinObservations: Option[Int] = None): VariantDataset = DiscoveringVariants.time {
- VariantRDD(variantsInRdd(aRdd.rdd,
+ VariantDataset(variantsInRdd(aRdd.rdd,
optPhredThreshold = optPhredThreshold,
optMinObservations = optMinObservations),
aRdd.sequences,
@@ -87,7 +87,7 @@ object DiscoverVariants extends Serializable with Logging {
val uniqueVariants = optMinObservations.fold({
variantDs.distinct
})(mo => {
- variantDs.groupBy(variantDs("contigName"),
+ variantDs.groupBy(variantDs("referenceName"),
variantDs("start"),
variantDs("referenceAllele"),
variantDs("alternateAllele"))
@@ -132,8 +132,8 @@ object DiscoverVariants extends Serializable with Logging {
// get the read sequence, contig, etc
val sequence = read.getSequence
- val qual = read.getQual
- val contigName = read.getContigName
+ val qual = read.getQuality
+ val referenceName = read.getReferenceName
// advance to the first alignment match
@tailrec def fastForward(
@@ -198,7 +198,7 @@ object DiscoverVariants extends Serializable with Logging {
val newVars = (0 until length).flatMap(i => {
if (qual(i).toInt - 33 >= phredThreshold) {
Some(DiscoveredVariant(
- contigName,
+ referenceName,
pos + i,
ref(i).toString,
sequence(idx + i).toString))
@@ -216,7 +216,7 @@ object DiscoverVariants extends Serializable with Logging {
val insQuals = qual.substring(idx - 1, idx + length).map(_.toInt - 33).sum / length
val newVar = if (insQuals >= phredThreshold) {
DiscoveredVariant(
- contigName,
+ referenceName,
pos - 1,
lastRef,
sequence.substring(idx - 1, idx + length)) :: variants
@@ -230,7 +230,7 @@ object DiscoverVariants extends Serializable with Logging {
val delLength = ref.size
val newVar = if (qual(idx - 1).toInt - 33 >= phredThreshold) {
DiscoveredVariant(
- contigName,
+ referenceName,
pos - 1,
lastRef + ref,
sequence.substring(idx - 1, idx)) :: variants
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoveredVariant.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoveredVariant.scala
index 1a7277c9..8be94d32 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoveredVariant.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/DiscoveredVariant.scala
@@ -30,25 +30,25 @@ private[genotyping] object DiscoveredVariant {
* @return Returns a case class-based representation of the variant.
*/
def apply(variant: Variant): DiscoveredVariant = {
- new DiscoveredVariant(variant.getContigName,
+ new DiscoveredVariant(variant.getReferenceName,
variant.getStart.toInt,
variant.getReferenceAllele,
Some(variant.getAlternateAllele))
}
/**
- * @param contigName The contig this variant is on.
+ * @param referenceName The contig this variant is on.
* @param start The position this variant starts at.
* @param referenceAllele The reference allele this variant varies from.
* @param alternateAllele The substituted allele.
* @return Returns a discovered variant with a defined alternate allele.
*/
def apply(
- contigName: String,
+ referenceName: String,
start: Int,
referenceAllele: String,
alternateAllele: String): DiscoveredVariant = {
- new DiscoveredVariant(contigName, start, referenceAllele, Some(alternateAllele))
+ new DiscoveredVariant(referenceName, start, referenceAllele, Some(alternateAllele))
}
/**
@@ -64,13 +64,13 @@ private[genotyping] object DiscoveredVariant {
/**
* A variant site and alleles.
*
- * @param contigName The contig this variant is on.
+ * @param referenceName The reference this variant is on.
* @param start The position this variant starts at.
* @param referenceAllele The reference allele this variant varies from.
* @param alternateAllele The substituted allele.
*/
case class DiscoveredVariant(
- contigName: String,
+ referenceName: String,
start: Int,
referenceAllele: String,
alternateAllele: Option[String]) {
@@ -87,7 +87,7 @@ case class DiscoveredVariant(
*/
def toVariant: Variant = {
val builder = Variant.newBuilder
- .setContigName(contigName)
+ .setReferenceName(referenceName)
.setStart(start.toLong)
.setEnd(end.toLong)
.setReferenceAllele(referenceAllele)
@@ -100,10 +100,10 @@ case class DiscoveredVariant(
}
def overlaps(v: DiscoveredVariant): Boolean = {
- contigName == v.contigName && start < v.end && end > v.start
+ referenceName == v.referenceName && start < v.end && end > v.start
}
def overlaps(rr: ReferenceRegion): Boolean = {
- contigName == rr.referenceName && start < rr.end && end > rr.start
+ referenceName == rr.referenceName && start < rr.end && end > rr.start
}
}
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/JointAnnotatorCaller.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/JointAnnotatorCaller.scala
index dc9ceaf3..6c66d0be 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/JointAnnotatorCaller.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/JointAnnotatorCaller.scala
@@ -21,8 +21,8 @@ import breeze.stats.distributions.Binomial
import org.apache.spark.rdd.RDD
import org.bdgenomics.adam.models.VariantContext
import org.bdgenomics.adam.rdd.variant.{
- GenotypeRDD,
- VariantContextRDD
+ GenotypeDataset,
+ VariantContextDataset
}
import org.bdgenomics.adam.util.PhredUtils
import org.bdgenomics.avocado.util.LogUtils
@@ -49,7 +49,7 @@ object JointAnnotatorCaller extends Serializable {
* @param genotypes The genotypes to jointly process.
* @return Returns a squared off and annotated set of variant contexts.
*/
- def apply(genotypes: GenotypeRDD): VariantContextRDD = {
+ def apply(genotypes: GenotypeDataset): VariantContextDataset = {
apply(genotypes.toVariantContexts)
}
@@ -59,7 +59,7 @@ object JointAnnotatorCaller extends Serializable {
* @param variantContexts The squared off sites to process.
* @return Returns a squared off and annotated set of variant contexts.
*/
- def apply(variantContexts: VariantContextRDD): VariantContextRDD = {
+ def apply(variantContexts: VariantContextDataset): VariantContextDataset = {
variantContexts.transform(_.flatMap(annotateSite))
}
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/Observer.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/Observer.scala
index 79a7d806..6336ea01 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/Observer.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/Observer.scala
@@ -52,11 +52,11 @@ private[genotyping] object Observer extends Serializable {
// for convenience, get the sample name, mapping quality, sequence,
// qualities, and the contig name
- val sampleId = read.getRecordGroupSample
- val contigName = read.getContigName
- val mapQ = read.getMapq
+ val sampleId = read.getReadGroupSampleId
+ val referenceName = read.getReferenceName
+ val mapQ = read.getMappingQuality
val readSequence = read.getSequence
- val readQualities = read.getQual
+ val readQualities = read.getQuality
val forwardStrand = !read.getReadNegativeStrand
// map over the alignment operators and generate allelic observations
@@ -73,7 +73,7 @@ private[genotyping] object Observer extends Serializable {
(0 until length).map(idx => {
// the key is the (site, allele, sampleId)
- val key = (ReferenceRegion(contigName, pos, pos + 1),
+ val key = (ReferenceRegion(referenceName, pos, pos + 1),
readSequence(readIdx).toString,
sampleId)
@@ -104,7 +104,7 @@ private[genotyping] object Observer extends Serializable {
// the key is the (site, allele, sampleId)
// insertions associate to the site to their left, hence the -1
- val key = (ReferenceRegion(contigName, pos - 1, pos),
+ val key = (ReferenceRegion(referenceName, pos - 1, pos),
bases,
sampleId)
@@ -124,7 +124,7 @@ private[genotyping] object Observer extends Serializable {
// the key is the (site, allele, sampleId)
// deletions have an empty string for the allele
- val key = (ReferenceRegion(contigName, oldPos, pos),
+ val key = (ReferenceRegion(referenceName, oldPos, pos),
"",
sampleId)
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/SquareOffReferenceModel.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/SquareOffReferenceModel.scala
index a8f26d6b..1e334052 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/SquareOffReferenceModel.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/SquareOffReferenceModel.scala
@@ -23,9 +23,9 @@ import org.apache.spark.sql.functions._
import org.bdgenomics.adam.models.{ ReferenceRegion, VariantContext }
import org.bdgenomics.adam.rdd.ADAMContext._
import org.bdgenomics.adam.rdd.variant.{
- GenotypeRDD,
- VariantRDD,
- VariantContextRDD
+ GenotypeDataset,
+ VariantDataset,
+ VariantContextDataset
}
import org.bdgenomics.adam.sql.{
Genotype => GenotypeProduct,
@@ -65,7 +65,7 @@ object SquareOffReferenceModel {
* allele was called across all samples, with genotype likelihood models for
* all samples that had data at the site.
*/
- def apply(genotypes: GenotypeRDD): VariantContextRDD = {
+ def apply(genotypes: GenotypeDataset): VariantContextDataset = {
val variants = extractVariants(genotypes)
@@ -80,14 +80,14 @@ object SquareOffReferenceModel {
* allele was called across all samples, with genotype likelihood models for
* all samples that had data at the site.
*/
- def apply(genotypes: GenotypeRDD,
- variants: VariantRDD): VariantContextRDD = {
+ def apply(genotypes: GenotypeDataset,
+ variants: VariantDataset): VariantContextDataset = {
// join variants back against genotypes
val sites = variants.shuffleRegionJoinAndGroupByLeft(genotypes)
variants.rdd.unpersist()
- val calls = sites.transmute[VariantContext, VariantContextProduct, VariantContextRDD](
+ val calls = sites.transmute[VariantContext, VariantContextProduct, VariantContextDataset](
(rdd: RDD[(Variant, Iterable[Genotype])]) => rdd.map(s => squareOffSite(s._1, s._2)))
calls.replaceSamples(genotypes.samples)
@@ -132,7 +132,7 @@ object SquareOffReferenceModel {
* @param genotypes Genotypes containing both called sites and reference models.
* @return Returns sites where a variant was seen in at least one sample.
*/
- def extractVariants(genotypes: GenotypeRDD): VariantRDD = {
+ def extractVariants(genotypes: GenotypeDataset): VariantDataset = {
val altString = GenotypeAllele.ALT.toString()
@@ -144,7 +144,7 @@ object SquareOffReferenceModel {
val trimUdf = udf((a: String, b: String) => trimRight(a, b))
val trimmerUdf = udf((a: String, b: Int) => a.dropRight(b))
- genotypes.transmuteDataset[Variant, VariantProduct, VariantRDD]((ds: Dataset[GenotypeProduct]) => {
+ genotypes.transmuteDataset[Variant, VariantProduct, VariantDataset]((ds: Dataset[GenotypeProduct]) => {
import ds.sparkSession.implicits._
@@ -164,7 +164,7 @@ object SquareOffReferenceModel {
trimmedVariants.dropDuplicates("start",
"end",
- "contigName",
+ "referenceName",
"referenceAllele",
"alternateAllele")
})
@@ -210,7 +210,7 @@ object SquareOffReferenceModel {
genotypes.find(gt => {
gt.getStart == variant.getStart &&
gt.getEnd == variant.getEnd &&
- gt.getContigName == variant.getContigName &&
+ gt.getReferenceName == variant.getReferenceName &&
gt.getVariant.getReferenceAllele == variant.getReferenceAllele &&
gt.getVariant.getAlternateAllele == variant.getAlternateAllele
})
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/TrioCaller.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/TrioCaller.scala
index 7a5d7c34..859096ee 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/TrioCaller.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/genotyping/TrioCaller.scala
@@ -18,8 +18,8 @@
package org.bdgenomics.avocado.genotyping
import org.bdgenomics.adam.models.VariantContext
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
-import org.bdgenomics.adam.rdd.variant.{ GenotypeRDD, VariantContextRDD }
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
+import org.bdgenomics.adam.rdd.variant.{ GenotypeDataset, VariantContextDataset }
import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele }
import scala.collection.JavaConversions._
@@ -42,10 +42,10 @@ object TrioCaller extends Serializable {
* @param rdd The reads to extract the sample ID from.
* @return The sample ID.
*/
- def extractSampleId(rdd: AlignmentRecordRDD): String = {
- require(!rdd.recordGroups.isEmpty, "Record groups are empty.")
- val samples = rdd.recordGroups.recordGroups
- .map(rg => rg.sample)
+ def extractSampleId(rdd: AlignmentRecordDataset): String = {
+ require(!rdd.readGroups.isEmpty, "Read groups are empty.")
+ val samples = rdd.readGroups.readGroups
+ .map(rg => rg.sampleId)
.distinct
require(samples.size == 1,
"Had multiple sample names (%s) attached to reads.".format(
@@ -57,18 +57,18 @@ object TrioCaller extends Serializable {
/**
* Trio calls genotypes in a pedigree with two parents and one child.
*
- * @param rdd RDD of base genotypes.
+ * @param genotypes Dataset of base genotypes.
* @param firstParentId The sample ID for the first parent.
* @param secondParentId The sample ID for the second parent.
* @param childId The sample ID for the child.
* @return Returns the final genotypes.
*/
- def apply(rdd: GenotypeRDD,
+ def apply(genotypes: GenotypeDataset,
firstParentId: String,
secondParentId: String,
- childId: String): GenotypeRDD = {
+ childId: String): GenotypeDataset = {
- apply(rdd.toVariantContexts,
+ apply(genotypes.toVariantContexts,
firstParentId,
secondParentId,
childId).toGenotypes
@@ -77,17 +77,17 @@ object TrioCaller extends Serializable {
/**
* Trio calls genotypes in a pedigree with two parents and one child.
*
- * @param rdd RDD of base genotypes.
+ * @param genotypes Dataset of base genotypes.
* @param firstParentId The sample ID for the first parent.
* @param secondParentId The sample ID for the second parent.
* @param childId The sample ID for the child.
* @return Returns the final genotypes.
*/
- private[genotyping] def apply(rdd: VariantContextRDD,
+ private[genotyping] def apply(genotypes: VariantContextDataset,
firstParentId: String,
secondParentId: String,
- childId: String): VariantContextRDD = {
- rdd.transform(rdd => {
+ childId: String): VariantContextDataset = {
+ genotypes.transform(rdd => {
rdd.filter(!filterRef(_))
.map(processVariant(_, firstParentId, secondParentId, childId))
.filter(!filterRef(_))
@@ -124,7 +124,7 @@ object TrioCaller extends Serializable {
def makeNoCall(sampleId: String): Genotype = {
Genotype.newBuilder
- .setContigName(vc.variant.variant.getContigName)
+ .setReferenceName(vc.variant.variant.getReferenceName)
.setStart(vc.variant.variant.getStart)
.setEnd(vc.variant.variant.getEnd)
.setVariant(vc.variant.variant)
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/models/CopyNumberMap.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/models/CopyNumberMap.scala
index fd74b7ef..80c8052b 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/models/CopyNumberMap.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/models/CopyNumberMap.scala
@@ -19,7 +19,7 @@ package org.bdgenomics.avocado.models
import org.apache.spark.SparkContext._
import org.bdgenomics.adam.models.ReferenceRegion
-import org.bdgenomics.adam.rdd.feature.FeatureRDD
+import org.bdgenomics.adam.rdd.feature.FeatureDataset
import scala.math.{ max, min }
private[avocado] object CopyNumberMap extends Serializable {
@@ -40,10 +40,11 @@ private[avocado] object CopyNumberMap extends Serializable {
* Creates a copy number variant map from CNVs stored as features.
*
* @param basePloidy The ploidy of this sample.
+ * @param features Dataset of features.
* @return Returns a map containing copy number variants.
*/
def apply(basePloidy: Int,
- features: FeatureRDD): CopyNumberMap = {
+ features: FeatureDataset): CopyNumberMap = {
val cnvMap = features.rdd
.flatMap(f => f.getFeatureType match {
@@ -68,19 +69,19 @@ private[avocado] object CopyNumberMap extends Serializable {
* An object that stores copy number variation.
*
* @param basePloidy The ploidy of this sample.
- * @param variantsByContig A map mapping contig names to the regions containing
- * copy number variants. These regions are sorted per contig, and are in
+ * @param variantsByReference A map mapping reference names to the regions containing
+ * copy number variants. These regions are sorted per reference, and are in
* tuples with the observed copy number over that region.
*/
private[avocado] case class CopyNumberMap private (
val basePloidy: Int,
- private[models] val variantsByContig: Map[String, Seq[(ReferenceRegion, Int)]]) {
+ private[models] val variantsByReference: Map[String, Seq[(ReferenceRegion, Int)]]) {
/**
* @return The lowest copy number seen over all regions.
*/
def minPloidy: Int = {
- variantsByContig.values
+ variantsByReference.values
.flatMap(s => s.map(_._2))
.fold(basePloidy)(_ min _)
}
@@ -89,7 +90,7 @@ private[avocado] case class CopyNumberMap private (
* @return The highest copy number seen over all regions.
*/
def maxPloidy: Int = {
- variantsByContig.values
+ variantsByReference.values
.flatMap(s => s.map(_._2))
.fold(basePloidy)(_ max _)
}
@@ -103,7 +104,7 @@ private[avocado] case class CopyNumberMap private (
def overlappingVariants(
rr: ReferenceRegion): Iterable[(ReferenceRegion, Int)] = {
- variantsByContig.get(rr.referenceName)
+ variantsByReference.get(rr.referenceName)
.fold(Iterable.empty[(ReferenceRegion, Int)])(i => {
i.dropWhile(!_._1.overlaps(rr))
.takeWhile(_._1.overlaps(rr))
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/realigner/Realigner.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/realigner/Realigner.scala
index f007b815..529363b3 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/realigner/Realigner.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/realigner/Realigner.scala
@@ -18,7 +18,7 @@
package org.bdgenomics.avocado.realigner
import org.apache.spark.rdd.RDD
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
import org.bdgenomics.avocado.Timers._
import org.bdgenomics.avocado.models.{
Clipped,
@@ -43,8 +43,8 @@ object Realigner extends Logging {
* @param kmerLength The length k of the k-mers.
* @return Returns the realigned reads.
*/
- def realign(reads: AlignmentRecordRDD,
- kmerLength: Int): AlignmentRecordRDD = {
+ def realign(reads: AlignmentRecordDataset,
+ kmerLength: Int): AlignmentRecordDataset = {
reads.transform(realignRdd(_, kmerLength))
}
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardFilterGenotypes.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardFilterGenotypes.scala
index e1efe565..7d218660 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardFilterGenotypes.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardFilterGenotypes.scala
@@ -18,7 +18,7 @@
package org.bdgenomics.avocado.util
import htsjdk.variant.vcf.{ VCFFilterHeaderLine, VCFHeaderLine }
-import org.bdgenomics.adam.rdd.variant.GenotypeRDD
+import org.bdgenomics.adam.rdd.variant.GenotypeDataset
import org.bdgenomics.formats.avro.{
Genotype,
GenotypeAllele,
@@ -166,17 +166,17 @@ private[avocado] trait HardFilterGenotypesArgs extends Serializable {
private[avocado] object HardFilterGenotypes extends Serializable {
/**
- * Applies hard filters to a GenotypeRDD.
+ * Applies hard filters to a GenotypeDataset.
*
- * @param grdd GenotypeRDD to filter.
+ * @param genotypes GenotypeDataset to filter.
* @param args The hard filter configuration to apply.
* @param filterRefGenotypes If true, discards homozygous ref calls.
- * @return A new GenotypeRDD of hard filtered genotypes.
+ * @return A new GenotypeDataset of hard filtered genotypes.
*/
- def apply(grdd: GenotypeRDD,
+ def apply(genotypes: GenotypeDataset,
args: HardFilterGenotypesArgs,
filterRefGenotypes: Boolean = true,
- emitAllGenotypes: Boolean = false): GenotypeRDD = {
+ emitAllGenotypes: Boolean = false): GenotypeDataset = {
// make snp and indel filters
val snpFilters = buildSnpHardFilters(args)
@@ -242,7 +242,7 @@ private[avocado] object HardFilterGenotypes extends Serializable {
// flat map the filters over the genotype rdd
val minQuality = args.minQuality
- grdd.transform(rdd => {
+ genotypes.transform(rdd => {
rdd.flatMap(filterGenotype(_,
minQuality,
snpFilters,
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardLimiter.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardLimiter.scala
index d24149ba..f8e967e5 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardLimiter.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/util/HardLimiter.scala
@@ -109,7 +109,7 @@ private[avocado] object HardLimiter extends Serializable {
val (lastRead, _) = kv
assert(lastRead.getStart <= readStart,
"New read (%s) is before last read (%s).".format(read, lastRead))
- assert(lastRead.getContigName == read._1.getContigName)
+ assert(lastRead.getReferenceName == read._1.getReferenceName)
})
// any read that ends before this new read starts can be flushed
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/util/PrefilterReads.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/util/PrefilterReads.scala
index bd739081..2deef675 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/util/PrefilterReads.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/util/PrefilterReads.scala
@@ -18,7 +18,7 @@
package org.bdgenomics.avocado.util
import org.bdgenomics.adam.models.SequenceDictionary
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
import org.bdgenomics.formats.avro.AlignmentRecord
trait PrefilterReadsArgs extends Serializable {
@@ -50,36 +50,36 @@ trait PrefilterReadsArgs extends Serializable {
}
/**
- * Reifies an input AlignmentRecordRDD down to the contigs and reads we
+ * Reifies an input AlignmentRecordDataset down to the references and reads we
* want to genotype.
*/
object PrefilterReads extends Serializable {
/**
- * Filters out reads and contigs that should not be processed.
+ * Filters out reads and references that should not be processed.
*
- * @param rdd RDD of reads and associated metadata.
+ * @param reads Dataset of reads and associated metadata.
* @param args Arguments specifying the filters to apply.
- * @return Returns a new AlignmentRecordRDD where reads that we don't want
- * to use in genotyping have been discarded, and where contigs that we
+ * @return Returns a new AlignmentRecordDataset where reads that we don't want
+ * to use in genotyping have been discarded, and where references that we
* don't want to genotype have been removed.
*/
- def apply(rdd: AlignmentRecordRDD,
- args: PrefilterReadsArgs): AlignmentRecordRDD = {
+ def apply(reads: AlignmentRecordDataset,
+ args: PrefilterReadsArgs): AlignmentRecordDataset = {
// get filter functions
- val contigFn = contigFilterFn(args)
- val readFn = readFilterFn(args, contigFn)
+ val referenceFn = referenceFilterFn(args)
+ val readFn = readFilterFn(args, referenceFn)
- // filter contigs and construct a new sequence dictionary
- val sequences = new SequenceDictionary(rdd.sequences
+ // filter references and construct a new sequence dictionary
+ val sequences = new SequenceDictionary(reads.sequences
.records
- .filter(r => contigFn(r.name)))
+ .filter(r => referenceFn(r.name)))
- // filter reads and construct a new rdd
- rdd.transform(r => {
+ // filter reads and construct a new dataset
+ reads.transform(r => {
r.filter(readFn)
- .map(maybeNullifyMate(_, contigFn))
+ .map(maybeNullifyMate(_, referenceFn))
}).replaceSequences(sequences)
}
@@ -87,13 +87,13 @@ object PrefilterReads extends Serializable {
* Nullifies the mate mapping info for reads whose mate is filtered.
*
* Needed to generate SAM/BAM/CRAM files containing filtered reads.
- * If this isn't run, the conversion will error as the mate contig
+ * If this isn't run, the conversion will error as the mate reference
* names are not found in the sequence dictionary.
*
* @param read Read to check for filtered mate.
- * @param filterFn The function to use to filter contig names.
+ * @param filterFn The function to use to filter reference names.
* @return Returns a read whose mate mapping info has been nullified if the
- * mate mapping fields indicate that the mate is mapped to a contig that has
+ * mate mapping fields indicate that the mate is mapped to a reference that has
* been filtered out.
*/
private[util] def maybeNullifyMate(
@@ -102,12 +102,12 @@ object PrefilterReads extends Serializable {
if (read.getReadPaired &&
read.getMateMapped) {
- if (filterFn(read.getMateContigName)) {
+ if (filterFn(read.getMateReferenceName)) {
read
} else {
AlignmentRecord.newBuilder(read)
.setMateMapped(false)
- .setMateContigName(null)
+ .setMateReferenceName(null)
.build
}
} else {
@@ -116,11 +116,11 @@ object PrefilterReads extends Serializable {
}
/**
- * @param args The arguments specifying which contigs to keep.
- * @return Returns a function that returns true if a contig with a given name
+ * @param args The arguments specifying which references to keep.
+ * @return Returns a function that returns true if a reference with a given name
* should be kept.
*/
- protected[util] def contigFilterFn(args: PrefilterReadsArgs): (String => Boolean) = {
+ protected[util] def referenceFilterFn(args: PrefilterReadsArgs): (String => Boolean) = {
val fns = Iterable(filterNonGrcAutosome(_), filterNonGrcSex(_), filterNonGrcMitochondrial(_),
filterGrcAutosome(_), filterGrcSex(_), filterGrcMitochondrial(_))
val filteredFns = Iterable(true, !args.autosomalOnly, args.keepMitochondrialChromosome,
@@ -140,18 +140,18 @@ object PrefilterReads extends Serializable {
/**
* @param args The arguments specifying which reads to keep.
- * @param contigFilterFn A function that determines which contigs should be
- * kept, given the contig name.
+ * @param referenceFilterFn A function that determines which references should be
+ * kept, given the reference name.
* @return Returns a function that returns true if a read should be kept.
*/
protected[util] def readFilterFn(
args: PrefilterReadsArgs,
- contigFilterFn: (String => Boolean)): (AlignmentRecord => Boolean) = {
+ referenceFilterFn: (String => Boolean)): (AlignmentRecord => Boolean) = {
def baseFilterFn(r: AlignmentRecord): Boolean = {
(filterMapped(r, args.keepNonPrimary) &&
filterMappingQuality(r, args.minMappingQuality) &&
- contigFilterFn(r.getContigName))
+ referenceFilterFn(r.getReferenceName))
}
if (args.keepDuplicates) {
@@ -190,76 +190,76 @@ object PrefilterReads extends Serializable {
*/
protected[util] def filterMappingQuality(read: AlignmentRecord,
minMappingQuality: Int): Boolean = {
- // if mapq is not set, ignore
- if (read.getMapq == null) {
+ // if mappingQuality is not set, ignore
+ if (read.getMappingQuality == null) {
true
} else {
- read.getMapq > minMappingQuality
+ read.getMappingQuality > minMappingQuality
}
}
/**
- * @param contigName Contig name to test for filtration.
- * @return Returns true if the contig matches the naming scheme for GRCh
+ * @param referenceName Reference name to test for filtration.
+ * @return Returns true if the reference matches the naming scheme for GRCh
* autosomal chromosomes.
*/
- protected[util] def filterGrcAutosome(contigName: String): Boolean = {
- contigName != null &&
- contigName.size >= 4 &&
- contigName.startsWith("chr") && contigName.drop(3).forall(_.isDigit)
+ protected[util] def filterGrcAutosome(referenceName: String): Boolean = {
+ referenceName != null &&
+ referenceName.size >= 4 &&
+ referenceName.startsWith("chr") && referenceName.drop(3).forall(_.isDigit)
}
/**
- * @param contigName Contig name to test for filtration.
- * @return Returns true if the contig matches the naming scheme for GRCh
+ * @param referenceName Reference name to test for filtration.
+ * @return Returns true if the reference matches the naming scheme for GRCh
* sex chromosomes.
*/
- protected[util] def filterGrcSex(contigName: String): Boolean = {
- if (contigName != null &&
- contigName.length == 4 &&
- contigName.startsWith("chr")) {
- contigName(3) == 'X' || contigName(3) == 'Y' ||
- contigName(3) == 'Z' || contigName(3) == 'W'
+ protected[util] def filterGrcSex(referenceName: String): Boolean = {
+ if (referenceName != null &&
+ referenceName.length == 4 &&
+ referenceName.startsWith("chr")) {
+ referenceName(3) == 'X' || referenceName(3) == 'Y' ||
+ referenceName(3) == 'Z' || referenceName(3) == 'W'
} else {
false
}
}
/**
- * @param contigName Contig name to test for filtration.
- * @return Returns true if the contig matches the GRCh mitochondrial
+ * @param referenceName Reference name to test for filtration.
+ * @return Returns true if the reference matches the GRCh mitochondrial
* chromosome name.
*/
- protected[util] def filterGrcMitochondrial(contigName: String): Boolean = {
- contigName != null && contigName == "chrM"
+ protected[util] def filterGrcMitochondrial(referenceName: String): Boolean = {
+ referenceName != null && referenceName == "chrM"
}
/**
- * @param contigName Contig name to test for filtration.
- * @return Returns true if the contig matches the naming scheme for HG/UCSC
+ * @param referenceName Reference name to test for filtration.
+ * @return Returns true if the reference matches the naming scheme for HG/UCSC
* autosomal chromosomes.
*/
- protected[util] def filterNonGrcAutosome(contigName: String): Boolean = {
- contigName != null && contigName.forall(_.isDigit)
+ protected[util] def filterNonGrcAutosome(referenceName: String): Boolean = {
+ referenceName != null && referenceName.forall(_.isDigit)
}
/**
- * @param contigName Contig name to test for filtration.
- * @return Returns true if the contig matches the naming scheme for HG/UCSC
+ * @param referenceName Reference name to test for filtration.
+ * @return Returns true if the reference matches the naming scheme for HG/UCSC
* sex chromosomes.
*/
- protected[util] def filterNonGrcSex(contigName: String): Boolean = {
- contigName != null &&
- (contigName == "X" || contigName == "Y" ||
- contigName == "Z" || contigName == "W")
+ protected[util] def filterNonGrcSex(referenceName: String): Boolean = {
+ referenceName != null &&
+ (referenceName == "X" || referenceName == "Y" ||
+ referenceName == "Z" || referenceName == "W")
}
/**
- * @param contigName Contig name to test for filtration.
- * @return Returns true if the contig matches the HG/UCSC mitochondrial
+ * @param referenceName Reference name to test for filtration.
+ * @return Returns true if the reference matches the HG/UCSC mitochondrial
* chromosome name.
*/
- protected[util] def filterNonGrcMitochondrial(contigName: String): Boolean = {
- contigName != null && contigName == "MT"
+ protected[util] def filterNonGrcMitochondrial(referenceName: String): Boolean = {
+ referenceName != null && referenceName == "MT"
}
}
diff --git a/avocado-core/src/main/scala/org/bdgenomics/avocado/util/RewriteHets.scala b/avocado-core/src/main/scala/org/bdgenomics/avocado/util/RewriteHets.scala
index 14e899a8..ad991ef6 100644
--- a/avocado-core/src/main/scala/org/bdgenomics/avocado/util/RewriteHets.scala
+++ b/avocado-core/src/main/scala/org/bdgenomics/avocado/util/RewriteHets.scala
@@ -17,7 +17,7 @@
*/
package org.bdgenomics.avocado.util
-import org.bdgenomics.adam.rdd.variant.GenotypeRDD
+import org.bdgenomics.adam.rdd.variant.GenotypeDataset
import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele }
import scala.collection.JavaConversions._
@@ -54,15 +54,15 @@ private[avocado] trait RewriteHetsArgs extends Serializable {
object RewriteHets extends Serializable {
/**
- * Identifies high allelic fraction het calls in an RDD of genotypes and
+ * Identifies high allelic fraction het calls in an dataset of genotypes and
* rewrites them as homozygous alt calls.
*
- * @param rdd The RDD of genotypes to filter.
+ * @param genotypes The dataset of genotypes to filter.
* @param args The arguments to configure the rewriter.
- * @return Returns a new RDD of genotypes.
+ * @return Returns a new dataset of genotypes.
*/
- def apply(rdd: GenotypeRDD,
- args: RewriteHetsArgs): GenotypeRDD = {
+ def apply(genotypes: GenotypeDataset,
+ args: RewriteHetsArgs): GenotypeDataset = {
val maxSnpAllelicFraction = args.maxHetSnpAltAllelicFraction
val maxIndelAllelicFraction = args.maxHetIndelAltAllelicFraction
@@ -70,13 +70,13 @@ object RewriteHets extends Serializable {
val rewriteHetIndels = !args.disableHetIndelRewriting
if (rewriteHetSnps || rewriteHetIndels) {
- rdd.transform(gtRdd => gtRdd.map(processGenotype(_,
+ genotypes.transform(gtRdd => gtRdd.map(processGenotype(_,
maxSnpAllelicFraction,
maxIndelAllelicFraction,
rewriteHetSnps,
rewriteHetIndels)))
} else {
- rdd
+ genotypes
}
}
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyperSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyperSuite.scala
index 89207621..4676d6ce 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyperSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/BiallelicGenotyperSuite.scala
@@ -18,15 +18,15 @@
package org.bdgenomics.avocado.genotyping
import org.bdgenomics.adam.models.{
- RecordGroup,
- RecordGroupDictionary,
+ ReadGroup,
+ ReadGroupDictionary,
SequenceDictionary,
SequenceRecord
}
import org.bdgenomics.adam.rdd.ADAMContext._
-import org.bdgenomics.adam.rdd.feature.FeatureRDD
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
-import org.bdgenomics.adam.rdd.variant.VariantRDD
+import org.bdgenomics.adam.rdd.feature.FeatureDataset
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
+import org.bdgenomics.adam.rdd.variant.VariantDataset
import org.bdgenomics.avocado.AvocadoFunSuite
import org.bdgenomics.avocado.models.{ CopyNumberMap, Observation }
import org.bdgenomics.avocado.util.{
@@ -51,26 +51,26 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
.toSeq
val perfectRead = AlignmentRecord.newBuilder
- .setContigName("1")
+ .setReferenceName("1")
.setStart(10L)
.setEnd(25L)
.setCigar("15M")
.setMismatchingPositions("15")
.setSequence("ATGGTCCACGAATAA")
- .setQual("DEFGHIIIIIHGFED")
- .setMapq(50)
+ .setQuality("DEFGHIIIIIHGFED")
+ .setMappingQuality(50)
.setReadMapped(true)
.build
val snpRead = AlignmentRecord.newBuilder(perfectRead)
.setMismatchingPositions("6C8")
.setSequence("ATGGTCAACGAATAA")
- .setMapq(40)
+ .setMappingQuality(40)
.setReadNegativeStrand(true)
.build
val snp = Variant.newBuilder
- .setContigName("1")
+ .setReferenceName("1")
.setStart(16L)
.setEnd(17L)
.setReferenceAllele("C")
@@ -78,7 +78,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
.build
val cnSnp = Variant.newBuilder
- .setContigName("1")
+ .setReferenceName("1")
.setStart(17L)
.setEnd(18L)
.setReferenceAllele("A")
@@ -86,14 +86,14 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
.build
val cnvDup = Feature.newBuilder
- .setContigName("1")
+ .setReferenceName("1")
.setStart(17L)
.setEnd(18L)
.setFeatureType("DUP")
.build
val cnvDel = Feature.newBuilder
- .setContigName("1")
+ .setReferenceName("1")
.setStart(17L)
.setEnd(18L)
.setFeatureType("DEL")
@@ -144,16 +144,16 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
sparkTest("score snps in a read overlapping a copy number dup boundary") {
val genotypes = BiallelicGenotyper.call(
- AlignmentRecordRDD(sc.parallelize(Seq(snpRead)),
+ AlignmentRecordDataset(sc.parallelize(Seq(snpRead)),
SequenceDictionary.empty,
- RecordGroupDictionary(Seq(RecordGroup("rg1", "rg1"))),
+ ReadGroupDictionary(Seq(ReadGroup("rg1", "rg1"))),
Seq.empty),
- VariantRDD(sc.parallelize(Seq(snp, cnSnp)),
+ VariantDataset(sc.parallelize(Seq(snp, cnSnp)),
SequenceDictionary.empty,
Seq.empty),
CopyNumberMap(2,
- FeatureRDD(sc.parallelize(Seq(cnvDup)),
- SequenceDictionary.empty)),
+ FeatureDataset(sc.parallelize(Seq(cnvDup)),
+ SequenceDictionary.empty, Seq.empty)),
false,
maxQuality = 40,
maxMapQ = 40)
@@ -180,16 +180,16 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
sparkTest("score snps in a read overlapping a copy number del boundary") {
val genotypes = BiallelicGenotyper.call(
- AlignmentRecordRDD(sc.parallelize(Seq(snpRead)),
+ AlignmentRecordDataset(sc.parallelize(Seq(snpRead)),
SequenceDictionary.empty,
- RecordGroupDictionary(Seq(RecordGroup("rg1", "rg1"))),
+ ReadGroupDictionary(Seq(ReadGroup("rg1", "rg1"))),
Seq.empty),
- VariantRDD(sc.parallelize(Seq(snp, cnSnp)),
+ VariantDataset(sc.parallelize(Seq(snp, cnSnp)),
SequenceDictionary.empty,
Seq.empty),
CopyNumberMap(2,
- FeatureRDD(sc.parallelize(Seq(cnvDel)),
- SequenceDictionary.empty)),
+ FeatureDataset(sc.parallelize(Seq(cnvDel)),
+ SequenceDictionary.empty, Seq.empty)),
false,
maxQuality = 40,
maxMapQ = 40)
@@ -307,7 +307,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
assert(genotype.getVariant === snp)
assert(genotype.getStart === snp.getStart)
assert(genotype.getEnd === snp.getEnd)
- assert(genotype.getContigName === snp.getContigName)
+ assert(genotype.getReferenceName === snp.getReferenceName)
assert(genotype.getSampleId === "sample")
assert(genotype.getGenotypeQuality === 73)
assert(genotype.getAlleles.size === 2)
@@ -327,7 +327,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.104160.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val variants = DiscoverVariants(reads)
@@ -384,7 +384,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878_snp_A2G_chr20_225058.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val genotypes = BiallelicGenotyper.discoverAndCall(reads,
@@ -413,7 +413,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878_snp_A2G_chr20_225058.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val genotypes = BiallelicGenotyper.discoverAndCall(reads,
@@ -446,7 +446,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.832736.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val genotypes = BiallelicGenotyper.discoverAndCall(reads,
@@ -480,7 +480,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.839395.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val genotypes = BiallelicGenotyper.discoverAndCall(reads,
@@ -548,7 +548,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.567239.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val variants = DiscoverVariants(reads)
@@ -575,7 +575,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.875159.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val variants = DiscoverVariants(reads)
@@ -601,7 +601,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.1_1777263.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val gts = BiallelicGenotyper.discoverAndCall(reads,
@@ -637,7 +637,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.877715.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val gts = BiallelicGenotyper.discoverAndCall(reads, CopyNumberMap.empty(2), false)
@@ -655,7 +655,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.886049.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val gts = BiallelicGenotyper.discoverAndCall(reads, CopyNumberMap.empty(2), false)
@@ -673,7 +673,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.889159.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val gts = BiallelicGenotyper.discoverAndCall(reads,
@@ -705,7 +705,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.866511.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
}).realignIndels()
val gts = BiallelicGenotyper.discoverAndCall(reads,
@@ -720,7 +720,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.905130.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val gts = BiallelicGenotyper.discoverAndCall(reads,
@@ -735,7 +735,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
assert(gts.size === 1)
val gt = gts.head
- assert(gt.getVariant.getContigName === "1")
+ assert(gt.getVariant.getReferenceName === "1")
assert(gt.getVariant.getStart === 905129L)
assert(gt.getVariant.getEnd === 905132L)
assert(gt.getVariant.getReferenceAllele === "ATG")
@@ -747,7 +747,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.905130.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val genotypes = BiallelicGenotyper.discoverAndCall(reads,
@@ -770,7 +770,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
assert(refCountByGt.count(_ == 0) === 2)
val gt = gts.filter(_.getVariant.getStart == 905129L).head
- assert(gt.getVariant.getContigName === "1")
+ assert(gt.getVariant.getReferenceName === "1")
assert(gt.getVariant.getEnd === 905132L)
assert(gt.getVariant.getReferenceAllele === "ATG")
assert(gt.getVariant.getAlternateAllele === "A")
@@ -781,7 +781,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.907170.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 0)
+ rdd.filter(_.getMappingQuality > 0)
})
val gts = BiallelicGenotyper.discoverAndCall(reads,
@@ -793,7 +793,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
assert(gts.size === 1)
val gt = gts.head
- assert(gt.getVariant.getContigName === "1")
+ assert(gt.getVariant.getReferenceName === "1")
assert(gt.getVariant.getStart === 907169L)
assert(gt.getVariant.getEnd === 907171L)
assert(gt.getVariant.getReferenceAllele === "AG")
@@ -805,7 +805,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val readPath = resourceUrl("NA12878.chr1.240898.sam")
val reads = sc.loadAlignments(readPath.toString)
.transform(rdd => {
- rdd.filter(_.getMapq > 10)
+ rdd.filter(_.getMappingQuality > 10)
})
val gts = BiallelicGenotyper.discoverAndCall(reads,
@@ -817,7 +817,7 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
assert(gts.size === 1)
val gt = gts.head
- assert(gt.getVariant.getContigName === "1")
+ assert(gt.getVariant.getReferenceName === "1")
assert(gt.getVariant.getStart === 240897L)
assert(gt.getVariant.getEnd === 240898L)
assert(gt.getVariant.getReferenceAllele === "T")
@@ -829,14 +829,14 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
def makeRead(allele: Char): AlignmentRecord = {
assert(allele != 'T')
AlignmentRecord.newBuilder
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setStart(10L)
.setEnd(15L)
.setSequence("AC%sTG".format(allele))
.setCigar("5M")
.setMismatchingPositions("2T2")
- .setQual(Seq(50, 50, 50, 50, 50).map(q => (q + 33).toInt).mkString)
- .setMapq(50)
+ .setQuality(Seq(50, 50, 50, 50, 50).map(q => (q + 33).toInt).mkString)
+ .setMappingQuality(50)
.setReadMapped(true)
.setPrimaryAlignment(true)
.build
@@ -844,10 +844,10 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
val reads = Seq(makeRead('A'), makeRead('A'), makeRead('A'), makeRead('A'),
makeRead('C'), makeRead('C'), makeRead('C'), makeRead('C'))
- val readRdd = AlignmentRecordRDD(
+ val readRdd = AlignmentRecordDataset(
sc.parallelize(reads),
SequenceDictionary(SequenceRecord("ctg", 16L)),
- RecordGroupDictionary(Seq(RecordGroup("rg1", "rg1"))),
+ ReadGroupDictionary(Seq(ReadGroup("rg1", "rg1"))),
Seq.empty)
val gts = BiallelicGenotyper.discoverAndCall(readRdd,
@@ -876,12 +876,12 @@ class BiallelicGenotyperSuite extends AvocadoFunSuite {
assert(gts.size === 2)
val taaaGt = gts.filter(_.getVariant.getAlternateAllele === "TAAA").head
- assert(taaaGt.getVariant.getContigName === "1")
+ assert(taaaGt.getVariant.getReferenceName === "1")
assert(taaaGt.getVariant.getReferenceAllele === "T")
assert(taaaGt.getVariant.getAlternateAllele === "TAAA")
assert(taaaGt.getAlleles.count(_ == GenotypeAllele.ALT) === 2)
val caaaGt = gts.filter(_.getVariant.getAlternateAllele === "CAAA").head
- assert(caaaGt.getVariant.getContigName === "1")
+ assert(caaaGt.getVariant.getReferenceName === "1")
assert(caaaGt.getVariant.getReferenceAllele === "T")
assert(caaaGt.getVariant.getAlternateAllele === "CAAA")
assert(caaaGt.getAlleles.count(_ == GenotypeAllele.OTHER_ALT) === 2)
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/DiscoverVariantsSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/DiscoverVariantsSuite.scala
index 17332534..f61a0735 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/DiscoverVariantsSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/DiscoverVariantsSuite.scala
@@ -18,11 +18,11 @@
package org.bdgenomics.avocado.genotyping
import org.bdgenomics.adam.models.{
- RecordGroupDictionary,
+ ReadGroupDictionary,
SequenceDictionary,
SequenceRecord
}
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
import org.bdgenomics.avocado.AvocadoFunSuite
import org.bdgenomics.formats.avro.{ AlignmentRecord, Variant }
@@ -31,104 +31,104 @@ class DiscoverVariantsSuite extends AvocadoFunSuite {
val unalignedRead = AlignmentRecord.newBuilder()
.setReadMapped(false)
.setSequence("ACACATGA")
- .setQual("!!!!!!!!")
+ .setQuality("!!!!!!!!")
.build
val perfectReadMCigar = AlignmentRecord.newBuilder()
.setReadMapped(true)
- .setContigName("1")
+ .setReferenceName("1")
.setStart(10L)
.setEnd(18L)
.setSequence("ACACATGA")
- .setQual("!!!!!!!!")
+ .setQuality("!!!!!!!!")
.setCigar("8M")
.setMismatchingPositions("8")
.build
val perfectReadEqCigar = AlignmentRecord.newBuilder()
.setReadMapped(true)
- .setContigName("1")
+ .setReferenceName("1")
.setStart(10L)
.setEnd(18L)
.setSequence("ACACATGA")
- .setQual("!!!!!!!!")
+ .setQuality("!!!!!!!!")
.setCigar("8=")
.setMismatchingPositions("8")
.build
val snpReadMCigar = AlignmentRecord.newBuilder()
.setReadMapped(true)
- .setContigName("1")
+ .setReferenceName("1")
.setStart(10L)
.setEnd(18L)
.setSequence("ACACATGA")
- .setQual("!!!!!!!!")
+ .setQuality("!!!!!!!!")
.setCigar("8M")
.setMismatchingPositions("4C3")
.build
val snpReadEqCigar = AlignmentRecord.newBuilder()
.setReadMapped(true)
- .setContigName("1")
+ .setReferenceName("1")
.setStart(10L)
.setEnd(18L)
.setSequence("ACACATGA")
- .setQual("!!!!!!!!")
+ .setQuality("!!!!!!!!")
.setCigar("4=1X3=")
.setMismatchingPositions("4C3")
.build
val snpReadHardClip = AlignmentRecord.newBuilder()
.setReadMapped(true)
- .setContigName("1")
+ .setReferenceName("1")
.setStart(10L)
.setEnd(18L)
.setSequence("ACACATGA")
- .setQual("!!!!!!!!")
+ .setQuality("!!!!!!!!")
.setCigar("2H8M")
.setMismatchingPositions("4C3")
.build
val snpReadSoftClip = AlignmentRecord.newBuilder()
.setReadMapped(true)
- .setContigName("1")
+ .setReferenceName("1")
.setStart(10L)
.setEnd(18L)
.setSequence("TGACACATGA")
- .setQual("!!!!!!!!!!")
+ .setQuality("!!!!!!!!!!")
.setCigar("2S8M")
.setMismatchingPositions("4C3")
.build
val insertRead = AlignmentRecord.newBuilder()
.setReadMapped(true)
- .setContigName("2")
+ .setReferenceName("2")
.setStart(10L)
.setEnd(18L)
.setSequence("ACACTTATGA")
- .setQual("!!!!!!!!!!")
+ .setQuality("!!!!!!!!!!")
.setCigar("4M2I4M")
.setMismatchingPositions("8")
.build
val deleteRead = AlignmentRecord.newBuilder()
.setReadMapped(true)
- .setContigName("3")
+ .setReferenceName("3")
.setStart(10L)
.setEnd(20L)
.setSequence("ACACATGA")
- .setQual("!!!!!!!!")
+ .setQuality("!!!!!!!!")
.setCigar("4M2D4M")
.setMismatchingPositions("4^TT4")
.build
val mnpRead = AlignmentRecord.newBuilder()
.setReadMapped(true)
- .setContigName("3")
+ .setReferenceName("3")
.setStart(10L)
.setEnd(18L)
.setSequence("ACACATGA")
- .setQual("!!!!!!!!")
+ .setQuality("!!!!!!!!")
.setCigar("8M")
.setMismatchingPositions("3T0T3")
.build
@@ -153,7 +153,7 @@ class DiscoverVariantsSuite extends AvocadoFunSuite {
}
def validateSnp(snp: Variant) {
- assert(snp.getContigName() === "1")
+ assert(snp.getReferenceName() === "1")
assert(snp.getStart() === 14L)
assert(snp.getEnd() === 15L)
assert(snp.getReferenceAllele === "C")
@@ -185,7 +185,7 @@ class DiscoverVariantsSuite extends AvocadoFunSuite {
}
def validateInsertion(ins: Variant) {
- assert(ins.getContigName() === "2")
+ assert(ins.getReferenceName() === "2")
assert(ins.getStart() === 13L)
assert(ins.getEnd() === 14L)
assert(ins.getReferenceAllele() === "C")
@@ -206,7 +206,7 @@ class DiscoverVariantsSuite extends AvocadoFunSuite {
}
def validateDeletion(del: Variant) {
- assert(del.getContigName() === "3")
+ assert(del.getReferenceName() === "3")
assert(del.getStart() === 13L)
assert(del.getEnd() === 16L)
assert(del.getReferenceAllele() === "CTT")
@@ -233,12 +233,12 @@ class DiscoverVariantsSuite extends AvocadoFunSuite {
snpReadMCigar, snpReadEqCigar,
insertRead,
deleteRead))
- val readRdd = AlignmentRecordRDD(rdd,
+ val readRdd = AlignmentRecordDataset(rdd,
SequenceDictionary(
SequenceRecord("1", 50L),
SequenceRecord("2", 40L),
SequenceRecord("3", 30L)),
- RecordGroupDictionary.empty,
+ ReadGroupDictionary.empty,
Seq.empty)
val variantRdd = DiscoverVariants(readRdd)
@@ -250,7 +250,7 @@ class DiscoverVariantsSuite extends AvocadoFunSuite {
test("break TT->CA mnp into two snps") {
val variants = DiscoverVariants.variantsInRead(mnpRead, 0)
assert(variants.size === 2)
- assert(variants.forall(_.contigName == "3"))
+ assert(variants.forall(_.referenceName == "3"))
assert(variants.forall(_.referenceAllele == "T"))
val optC = variants.find(_.alternateAllele == Some("C"))
assert(optC.isDefined)
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/DiscoveredVariantSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/DiscoveredVariantSuite.scala
index b3e7a90f..003c2e80 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/DiscoveredVariantSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/DiscoveredVariantSuite.scala
@@ -24,7 +24,7 @@ class DiscoveredVariantSuite extends FunSuite {
test("round trip conversion to/from variant") {
val variant = Variant.newBuilder
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setStart(100L)
.setEnd(101L)
.setReferenceAllele("A")
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/JointAnnotatorCallerSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/JointAnnotatorCallerSuite.scala
index f8df3644..3410d451 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/JointAnnotatorCallerSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/JointAnnotatorCallerSuite.scala
@@ -31,7 +31,7 @@ import scala.collection.JavaConversions._
class JointAnnotatorCallerSuite extends AvocadoFunSuite {
val baseGt = Genotype.newBuilder
- .setContigName("chr1")
+ .setReferenceName("chr1")
.setStart(1000)
.setEnd(1001)
.setVariant(Variant.newBuilder
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/ObserverSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/ObserverSuite.scala
index d5743a0a..3f0a5307 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/ObserverSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/ObserverSuite.scala
@@ -33,10 +33,10 @@ class ObserverSuite extends AvocadoFunSuite {
val read = AlignmentRecord.newBuilder
.setStart(10L)
.setEnd(11L)
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setSequence("AAAA")
- .setQual("****")
- .setMapq(0)
+ .setQuality("****")
+ .setMappingQuality(0)
.setReadMapped(true)
.setCigar("4S")
.setMismatchingPositions("0")
@@ -68,17 +68,17 @@ class ObserverSuite extends AvocadoFunSuite {
val read = AlignmentRecord.newBuilder
.setStart(10L)
.setEnd(15L)
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setSequence("ACGT")
- .setQual(Array(20, 30, 40, 50)
+ .setQuality(Array(20, 30, 40, 50)
.map(v => (v + 33).toChar)
.mkString)
.setReadMapped(true)
.setReadNegativeStrand(false)
.setCigar("4M")
.setMismatchingPositions("4")
- .setMapq(50)
- .setRecordGroupSample("sample")
+ .setMappingQuality(50)
+ .setReadGroupSampleId("sample")
.build()
val obs = Observer.observeRead(read)
@@ -115,17 +115,17 @@ class ObserverSuite extends AvocadoFunSuite {
val read = AlignmentRecord.newBuilder
.setStart(10L)
.setEnd(12L)
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setSequence("ACGT")
- .setQual(Array(20, 30, 40, 50)
+ .setQuality(Array(20, 30, 40, 50)
.map(v => (v + 33).toChar)
.mkString)
.setReadMapped(true)
.setReadNegativeStrand(false)
.setCigar("1M2I1M")
.setMismatchingPositions("2")
- .setMapq(50)
- .setRecordGroupSample("sample")
+ .setMappingQuality(50)
+ .setReadGroupSampleId("sample")
.build()
val obs = Observer.observeRead(read)
@@ -174,17 +174,17 @@ class ObserverSuite extends AvocadoFunSuite {
val read = AlignmentRecord.newBuilder
.setStart(10L)
.setEnd(17L)
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setSequence("ACGT")
- .setQual(Array(20, 30, 40, 50)
+ .setQuality(Array(20, 30, 40, 50)
.map(v => (v + 33).toChar)
.mkString)
.setReadMapped(true)
.setReadNegativeStrand(false)
.setCigar("2M2D2M")
.setMismatchingPositions("2^NN2")
- .setMapq(50)
- .setRecordGroupSample("sample")
+ .setMappingQuality(50)
+ .setReadGroupSampleId("sample")
.build()
val obs = Observer.observeRead(read)
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/SquareOffReferenceModelSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/SquareOffReferenceModelSuite.scala
index 485e9152..f379dfb7 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/SquareOffReferenceModelSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/SquareOffReferenceModelSuite.scala
@@ -58,7 +58,7 @@ class SquareOffReferenceModelSuite extends AvocadoFunSuite {
val variants = discoveredVariants.rdd.collect
assert(variants.size === 3)
- assert(variants.forall(_.getContigName == "chr22"))
+ assert(variants.forall(_.getReferenceName == "chr22"))
val s602 = variants.filter(_.getStart == 16157602L)
assert(s602.size === 1)
assert(s602.forall(_.getReferenceAllele == "G"))
@@ -77,14 +77,14 @@ class SquareOffReferenceModelSuite extends AvocadoFunSuite {
test("find genotype if variant is present") {
val variant = Variant.newBuilder()
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setStart(1L)
.setEnd(2L)
.setReferenceAllele("A")
.setAlternateAllele("T")
.build
val genotypes = Iterable(Genotype.newBuilder
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setStart(1L)
.setEnd(2L)
.setVariant(Variant.newBuilder()
@@ -101,14 +101,14 @@ class SquareOffReferenceModelSuite extends AvocadoFunSuite {
test("don't find genotype if variant is not present") {
val variant = Variant.newBuilder()
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setStart(1L)
.setEnd(2L)
.setReferenceAllele("A")
.setAlternateAllele("T")
.build
val genotypes = Iterable(Genotype.newBuilder
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setStart(1L)
.setEnd(10L)
.setVariant(Variant.newBuilder()
@@ -126,14 +126,14 @@ class SquareOffReferenceModelSuite extends AvocadoFunSuite {
val variant = Variant.newBuilder
.setStart(100L)
.setEnd(101L)
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setReferenceAllele("A")
.setAlternateAllele("G")
.build
val genotypes = Iterable(Genotype.newBuilder
.setStart(90L)
.setEnd(110L)
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setNonReferenceLikelihoods(Seq(0.0, -1.0, -2.0)
.map(d => d: java.lang.Double))
.build)
@@ -146,7 +146,7 @@ class SquareOffReferenceModelSuite extends AvocadoFunSuite {
optExcisedGenotype.foreach(gt => {
assert(gt.getStart === 100L)
assert(gt.getEnd === 101L)
- assert(gt.getContigName === "ctg")
+ assert(gt.getReferenceName === "ctg")
assert(gt.getVariant.getReferenceAllele === "A")
assert(gt.getVariant.getAlternateAllele === "G")
assert(gt.getGenotypeLikelihoods.size === 3)
@@ -160,12 +160,12 @@ class SquareOffReferenceModelSuite extends AvocadoFunSuite {
val variant = Variant.newBuilder
.setStart(100L)
.setEnd(101L)
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setReferenceAllele("A")
.setAlternateAllele("G")
.build
val genotypes = Iterable(Genotype.newBuilder
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setStart(100L)
.setEnd(101L)
.setVariant(Variant.newBuilder()
@@ -176,7 +176,7 @@ class SquareOffReferenceModelSuite extends AvocadoFunSuite {
.build, Genotype.newBuilder
.setStart(90L)
.setEnd(110L)
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setNonReferenceLikelihoods(Seq(0.0, -1.0, -2.0)
.map(d => d: java.lang.Double))
.setSampleId("sample2")
@@ -189,7 +189,7 @@ class SquareOffReferenceModelSuite extends AvocadoFunSuite {
assert(vc.genotypes.size === 2)
assert(vc.genotypes.forall(_.getStart == 100L))
assert(vc.genotypes.forall(_.getEnd == 101L))
- assert(vc.genotypes.forall(_.getContigName == "ctg"))
+ assert(vc.genotypes.forall(_.getReferenceName == "ctg"))
assert(vc.genotypes.forall(_.getVariant.getReferenceAllele == "A"))
assert(vc.genotypes.forall(_.getVariant.getAlternateAllele == "G"))
assert(vc.genotypes.count(_.getSampleId == "sample1") === 1)
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/TrioCallerSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/TrioCallerSuite.scala
index 005a2402..3b6be3e6 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/TrioCallerSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/genotyping/TrioCallerSuite.scala
@@ -19,13 +19,13 @@ package org.bdgenomics.avocado.genotyping
import htsjdk.samtools.ValidationStringency
import org.bdgenomics.adam.models.{
- RecordGroup,
- RecordGroupDictionary,
+ ReadGroup,
+ ReadGroupDictionary,
SequenceDictionary,
VariantContext
}
import org.bdgenomics.adam.rdd.ADAMContext._
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
import org.bdgenomics.avocado.AvocadoFunSuite
import org.bdgenomics.formats.avro.{
AlignmentRecord,
@@ -38,42 +38,42 @@ import scala.collection.JavaConversions._
class TrioCallerSuite extends AvocadoFunSuite {
- def makeRdd(recordGroups: RecordGroupDictionary): AlignmentRecordRDD = {
- AlignmentRecordRDD(sc.emptyRDD[AlignmentRecord],
+ def makeRdd(readGroups: ReadGroupDictionary): AlignmentRecordDataset = {
+ AlignmentRecordDataset(sc.emptyRDD[AlignmentRecord],
SequenceDictionary.empty,
- recordGroups,
+ readGroups,
Seq.empty[ProcessingStep])
}
sparkTest("cannot have a sample with no record groups") {
intercept[IllegalArgumentException] {
- TrioCaller.extractSampleId(makeRdd(RecordGroupDictionary.empty))
+ TrioCaller.extractSampleId(makeRdd(ReadGroupDictionary.empty))
}
}
sparkTest("cannot have a sample with discordant sample ids") {
intercept[IllegalArgumentException] {
- TrioCaller.extractSampleId(makeRdd(RecordGroupDictionary(Seq(
- RecordGroup("sample1", "rg1"),
- RecordGroup("sample2", "rg2")))))
+ TrioCaller.extractSampleId(makeRdd(ReadGroupDictionary(Seq(
+ ReadGroup("sample1", "rg1"),
+ ReadGroup("sample2", "rg2")))))
}
}
sparkTest("extract id from a single read group") {
- val sampleId = TrioCaller.extractSampleId(makeRdd(RecordGroupDictionary(Seq(
- RecordGroup("sample1", "rg1")))))
+ val sampleId = TrioCaller.extractSampleId(makeRdd(ReadGroupDictionary(Seq(
+ ReadGroup("sample1", "rg1")))))
assert(sampleId === "sample1")
}
sparkTest("extract id from multiple read groups") {
- val sampleId = TrioCaller.extractSampleId(makeRdd(RecordGroupDictionary(Seq(
- RecordGroup("sample1", "rg1"),
- RecordGroup("sample1", "rg2")))))
+ val sampleId = TrioCaller.extractSampleId(makeRdd(ReadGroupDictionary(Seq(
+ ReadGroup("sample1", "rg1"),
+ ReadGroup("sample1", "rg2")))))
assert(sampleId === "sample1")
}
val variant = Variant.newBuilder
- .setContigName("chr")
+ .setReferenceName("chr")
.setStart(100L)
.setEnd(101L)
.setReferenceAllele("A")
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/models/CopyNumberMapSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/models/CopyNumberMapSuite.scala
index 0eec38a7..11d2cc51 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/models/CopyNumberMapSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/models/CopyNumberMapSuite.scala
@@ -18,7 +18,7 @@
package org.bdgenomics.avocado.models
import org.bdgenomics.adam.models.ReferenceRegion
-import org.bdgenomics.adam.rdd.feature.FeatureRDD
+import org.bdgenomics.adam.rdd.feature.FeatureDataset
import org.bdgenomics.avocado.AvocadoFunSuite
import org.bdgenomics.formats.avro.Feature
@@ -30,24 +30,24 @@ class CopyNumberMapSuite extends AvocadoFunSuite {
assert(emptyMap.basePloidy === 2)
assert(emptyMap.minPloidy === 2)
assert(emptyMap.maxPloidy === 2)
- assert(emptyMap.variantsByContig.isEmpty)
+ assert(emptyMap.variantsByReference.isEmpty)
}
sparkTest("create a map with only diploid features") {
val cnvs = Seq(Feature.newBuilder
.setStart(100L)
.setEnd(201L)
- .setContigName("chr1")
+ .setReferenceName("chr1")
.setFeatureType("DIP")
.build)
val emptyMap = CopyNumberMap(2,
- FeatureRDD(sc.parallelize(cnvs)))
+ FeatureDataset(sc.parallelize(cnvs)))
assert(emptyMap.basePloidy === 2)
assert(emptyMap.minPloidy === 2)
assert(emptyMap.maxPloidy === 2)
- assert(emptyMap.variantsByContig.isEmpty)
+ assert(emptyMap.variantsByReference.isEmpty)
assert(emptyMap.overlappingVariants(ReferenceRegion("chr1", 100L, 201L))
.isEmpty)
}
@@ -56,42 +56,42 @@ class CopyNumberMapSuite extends AvocadoFunSuite {
val cnvs = Seq(Feature.newBuilder
.setStart(100L)
.setEnd(201L)
- .setContigName("chr1")
+ .setReferenceName("chr1")
.setFeatureType("DIP")
.build,
Feature.newBuilder
.setStart(1000L)
.setEnd(2000L)
- .setContigName("chr1")
+ .setReferenceName("chr1")
.setFeatureType("DUP")
.build,
Feature.newBuilder
.setStart(2000L)
.setEnd(3000L)
- .setContigName("chr1")
+ .setReferenceName("chr1")
.setFeatureType("DEL")
.build,
Feature.newBuilder
.setStart(2000L)
.setEnd(3000L)
- .setContigName("chr2")
+ .setReferenceName("chr2")
.setFeatureType("DEL")
.build)
val cnvMap = CopyNumberMap(2,
- FeatureRDD(sc.parallelize(cnvs)))
+ FeatureDataset(sc.parallelize(cnvs)))
assert(cnvMap.basePloidy === 2)
assert(cnvMap.minPloidy === 1)
assert(cnvMap.maxPloidy === 3)
- assert(cnvMap.variantsByContig.size === 2)
- val chr1Cnvs = cnvMap.variantsByContig("chr1")
+ assert(cnvMap.variantsByReference.size === 2)
+ val chr1Cnvs = cnvMap.variantsByReference("chr1")
assert(chr1Cnvs.size === 2)
assert(chr1Cnvs(0)._1 === ReferenceRegion("chr1", 1000L, 2000L))
assert(chr1Cnvs(0)._2 === 3)
assert(chr1Cnvs(1)._1 === ReferenceRegion("chr1", 2000L, 3000L))
assert(chr1Cnvs(1)._2 === 1)
- val chr2Cnvs = cnvMap.variantsByContig("chr2")
+ val chr2Cnvs = cnvMap.variantsByReference("chr2")
assert(chr2Cnvs.size === 1)
assert(chr2Cnvs(0)._1 === ReferenceRegion("chr2", 2000L, 3000L))
assert(chr2Cnvs(0)._2 === 1)
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/realigner/RealignerSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/realigner/RealignerSuite.scala
index c21615be..e9cb71a0 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/realigner/RealignerSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/realigner/RealignerSuite.scala
@@ -20,11 +20,11 @@ package org.bdgenomics.avocado.realigner
import org.bdgenomics.adam.models.{
SequenceDictionary,
SequenceRecord,
- RecordGroup,
- RecordGroupDictionary
+ ReadGroup,
+ ReadGroupDictionary
}
import org.bdgenomics.adam.rdd.ADAMContext._
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
import org.bdgenomics.avocado.AvocadoFunSuite
import org.bdgenomics.avocado.models.{
Clipped,
@@ -192,9 +192,9 @@ class RealignerSuite extends AvocadoFunSuite {
def makeAndRealignRdd(reads: Seq[AlignmentRecord],
kmerLength: Int): Array[AlignmentRecord] = {
- val gRdd = AlignmentRecordRDD(sc.parallelize(reads),
+ val gRdd = AlignmentRecordDataset(sc.parallelize(reads),
SequenceDictionary(SequenceRecord("ctg", 50L)),
- RecordGroupDictionary(Seq(RecordGroup("rg", "rg"))),
+ ReadGroupDictionary(Seq(ReadGroup("rg", "rg"))),
Seq.empty)
// realign the genomic rdd
@@ -219,8 +219,8 @@ class RealignerSuite extends AvocadoFunSuite {
AlignmentRecord.newBuilder()
.setReadName(rId.toString)
- .setContigName("ctg")
- .setRecordGroupName("rg")
+ .setReferenceName("ctg")
+ .setReadGroupId("rg")
.setReadMapped(true)
.setSequence(sequence.drop(rId).take(readLength))
.setStart(rId.toLong)
@@ -262,8 +262,8 @@ class RealignerSuite extends AvocadoFunSuite {
AlignmentRecord.newBuilder()
.setReadName(rId.toString)
- .setContigName("ctg")
- .setRecordGroupName("rg")
+ .setReferenceName("ctg")
+ .setReadGroupId("rg")
.setReadMapped(true)
.setSequence(sequence.drop(rId).take(readLength))
.setStart(rId.toLong)
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/util/HardLimiterSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/util/HardLimiterSuite.scala
index e991bf87..f0fe3ff9 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/util/HardLimiterSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/util/HardLimiterSuite.scala
@@ -26,7 +26,7 @@ class HardLimiterSuite extends AvocadoFunSuite {
val reads = (0 to 5).map(i => {
AlignmentRecord.newBuilder()
- .setContigName("ctg")
+ .setReferenceName("ctg")
.setStart(i.toLong)
.setEnd(i.toLong + 3L)
.build()
@@ -139,10 +139,10 @@ class HardLimiterSuite extends AvocadoFunSuite {
}
}
- test("adding a read that is on the wrong contig should fire an assert") {
+ test("adding a read that is on the wrong reference should fire an assert") {
intercept[AssertionError] {
val randomRead = AlignmentRecord.newBuilder()
- .setContigName("random")
+ .setReferenceName("random")
.setStart(100L)
.setEnd(101L)
.build()
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/util/PrefilterReadsSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/util/PrefilterReadsSuite.scala
index 657ced72..f420518b 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/util/PrefilterReadsSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/util/PrefilterReadsSuite.scala
@@ -19,12 +19,12 @@ package org.bdgenomics.avocado.util
import org.bdgenomics.avocado.AvocadoFunSuite
import org.bdgenomics.adam.models.{
- RecordGroupDictionary,
+ ReadGroupDictionary,
SequenceDictionary,
SequenceRecord
}
import org.bdgenomics.adam.rdd.ADAMContext._
-import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
+import org.bdgenomics.adam.rdd.read.AlignmentRecordDataset
import org.bdgenomics.formats.avro.AlignmentRecord
case class TestPrefilterReadsArgs(var autosomalOnly: Boolean = false,
@@ -68,7 +68,7 @@ class PrefilterReadsSuite extends AvocadoFunSuite {
assert(!PrefilterReads.filterMapped(unmappedRead, true))
}
- val contigNames = Seq("chr1",
+ val referenceNames = Seq("chr1",
"1",
"chrX",
"X",
@@ -90,7 +90,7 @@ class PrefilterReadsSuite extends AvocadoFunSuite {
}
}
- contigNames.zipWithIndex
+ referenceNames.zipWithIndex
.foreach(p => assertIdx(p._2, p._1))
}
@@ -119,31 +119,31 @@ class PrefilterReadsSuite extends AvocadoFunSuite {
}
test("filter autosomal chromosomes from generator") {
- testChromosomeHelperSet(PrefilterReads.contigFilterFn(TestPrefilterReadsArgs(autosomalOnly = true)), Set(0, 1))
+ testChromosomeHelperSet(PrefilterReads.referenceFilterFn(TestPrefilterReadsArgs(autosomalOnly = true)), Set(0, 1))
}
test("filter autosomal + sex chromosomes from generator") {
- testChromosomeHelperSet(PrefilterReads.contigFilterFn(TestPrefilterReadsArgs()), Set(0, 1,
+ testChromosomeHelperSet(PrefilterReads.referenceFilterFn(TestPrefilterReadsArgs()), Set(0, 1,
2, 3,
4, 5))
}
test("filter all chromosomes from generator") {
- testChromosomeHelperSet(PrefilterReads.contigFilterFn(TestPrefilterReadsArgs(keepMitochondrialChromosome = true)), Set(0, 1, 2, 3, 4, 5, 6, 7))
+ testChromosomeHelperSet(PrefilterReads.referenceFilterFn(TestPrefilterReadsArgs(keepMitochondrialChromosome = true)), Set(0, 1, 2, 3, 4, 5, 6, 7))
}
- test("update a read whose mate is mapped to a filtered contig") {
+ test("update a read whose mate is mapped to a filtered reference") {
val read = AlignmentRecord.newBuilder()
.setReadPaired(true)
.setMateMapped(true)
- .setMateContigName("notARealContig")
+ .setMateReferenceName("notARealReference")
.build
- val filters = PrefilterReads.contigFilterFn(TestPrefilterReadsArgs())
+ val filters = PrefilterReads.referenceFilterFn(TestPrefilterReadsArgs())
val nullified = PrefilterReads.maybeNullifyMate(read, filters)
assert(!nullified.getMateMapped)
- assert(nullified.getMateContigName == null)
+ assert(nullified.getMateReferenceName == null)
}
val reads = Seq(AlignmentRecord.newBuilder()
@@ -154,12 +154,12 @@ class PrefilterReadsSuite extends AvocadoFunSuite {
AlignmentRecord.newBuilder()
.setReadMapped(true)
.setDuplicateRead(false)).flatMap(rb => {
- contigNames.map(cn => rb.setContigName(cn).build)
+ referenceNames.map(cn => rb.setReferenceName(cn).build)
})
def testReadHelperSet(testArgs: PrefilterReadsArgs, passIdxSet: Set[Int]) {
val testFn = PrefilterReads.readFilterFn(testArgs,
- PrefilterReads.contigFilterFn(testArgs))
+ PrefilterReads.referenceFilterFn(testArgs))
def assertIdx(idx: Int, testRead: AlignmentRecord) = {
if (passIdxSet(idx)) {
@@ -203,16 +203,16 @@ class PrefilterReadsSuite extends AvocadoFunSuite {
Set(16, 17, 18, 19, 20, 21, 22, 23))
}
- val sequences = new SequenceDictionary(contigNames.map(cn => SequenceRecord(cn, 10L))
+ val sequences = new SequenceDictionary(referenceNames.map(cn => SequenceRecord(cn, 10L))
.toVector)
- def testRdd(args: PrefilterReadsArgs, numReads: Int, numContigs: Int) {
+ def testRdd(args: PrefilterReadsArgs, numReads: Int, numReferences: Int) {
- val readRdd = AlignmentRecordRDD(sc.parallelize(reads), sequences, RecordGroupDictionary.empty, Seq.empty)
+ val readRdd = AlignmentRecordDataset(sc.parallelize(reads), sequences, ReadGroupDictionary.empty, Seq.empty)
val filteredRdd = PrefilterReads(readRdd, args)
assert(filteredRdd.rdd.count === numReads)
- assert(filteredRdd.sequences.records.size === numContigs)
+ assert(filteredRdd.sequences.records.size === numReferences)
}
sparkTest("filter rdd of reads mapped to autosomal chromosomes from generator") {
diff --git a/avocado-core/src/test/scala/org/bdgenomics/avocado/util/RewriteHetsSuite.scala b/avocado-core/src/test/scala/org/bdgenomics/avocado/util/RewriteHetsSuite.scala
index 05084351..d82451d3 100644
--- a/avocado-core/src/test/scala/org/bdgenomics/avocado/util/RewriteHetsSuite.scala
+++ b/avocado-core/src/test/scala/org/bdgenomics/avocado/util/RewriteHetsSuite.scala
@@ -19,7 +19,7 @@ package org.bdgenomics.avocado.util
import org.bdgenomics.avocado.AvocadoFunSuite
import org.bdgenomics.adam.models.SequenceDictionary
-import org.bdgenomics.adam.rdd.variant.GenotypeRDD
+import org.bdgenomics.adam.rdd.variant.GenotypeDataset
import org.bdgenomics.formats.avro.{ Genotype, GenotypeAllele, Variant }
import scala.collection.JavaConversions._
@@ -165,9 +165,9 @@ class RewriteHetsSuite extends AvocadoFunSuite {
homRefSnp, homRefIndel,
homAltSnp, homAltIndel)
- def gtRdd: GenotypeRDD = {
+ def gtRdd: GenotypeDataset = {
val rdd = sc.parallelize(genotypes)
- GenotypeRDD(rdd,
+ GenotypeDataset(rdd,
SequenceDictionary.empty,
Seq.empty,
Seq.empty)
diff --git a/pom.xml b/pom.xml
index 77de3787..761ed6d1 100644
--- a/pom.xml
+++ b/pom.xml
@@ -15,15 +15,15 @@
avocado: A Variant Caller, Distributed
- 0.24.0
- 1.8.0
+ 0.26.0
+ 1.8.2
1.8
- 2.11.4
+ 2.11.12
2.11
- 2.2.0
+ 2.3.2
- 2.6.0
- 0.2.11
+ 2.7.5
+ 0.2.14
1.1.1
@@ -71,7 +71,7 @@
org.apache.maven.plugins
maven-assembly-plugin
- 2.4.1
+ 3.1.0
org.apache.maven.plugins
@@ -86,7 +86,7 @@
pl.project13.maven
git-commit-id-plugin
- 2.2.1
+ 2.2.2
true
@@ -122,7 +122,7 @@
org.apache.maven.plugins
maven-compiler-plugin
- 3.5.1
+ 3.8.0
${java.version}
@@ -137,7 +137,7 @@
org.apache.maven.plugins
maven-surefire-plugin
- 2.7
+ 2.22.1
@@ -153,12 +153,12 @@
exec-maven-plugin
org.codehaus.mojo
- 1.3.2
+ 1.5.0
org.codehaus.mojo
build-helper-maven-plugin
- 1.10
+ 1.12
org.apache.maven.plugins
@@ -170,6 +170,11 @@
-Psonatype-oss-release -DskipTests
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+ 3.1.0
+
@@ -276,24 +281,34 @@
-
-
- Sonatype
- http://oss.sonatype.org/content/repositories/snapshots/
-
-
- Apache
- http://people.apache.org/repo/m2-snapshot-repository
-
-
-
+
+ org.seqdoop
+ hadoop-bam
+ 7.9.2
+
+
+ org.seqdoop
+ htsjdk
+
+
+
org.bdgenomics.utils
utils-cli-spark2_2.11
${utils.version}
+
+ org.bdgenomics.utils
+ utils-intervalrdd-spark2_2.11
+ ${utils.version}
+
+
+ org.bdgenomics.utils
+ utils-io-spark2_2.11
+ ${utils.version}
+
org.bdgenomics.avocado
avocado-core_2.11
@@ -312,6 +327,12 @@
${utils.version}
test-jar
test
+
+
+ org.apache.spark
+ *
+
+
org.bdgenomics.utils
@@ -368,7 +389,7 @@
org.scalatest
scalatest_${scala.version.prefix}
- 2.2.6
+ 3.0.6
test
@@ -379,12 +400,12 @@
commons-io
commons-io
- 1.3.2
+ 2.6
args4j
args4j
- 2.0.23
+ 2.0.31
org.apache.spark
@@ -426,7 +447,7 @@
org.apache.maven.plugins
maven-source-plugin
- 2.2.1
+ 3.0.1
attach-sources
@@ -440,7 +461,7 @@
org.apache.maven.plugins
maven-javadoc-plugin
- 2.9.1
+ 3.0.1
attach-javadocs
@@ -495,7 +516,7 @@
scoverage-maven-plugin
${scoverage.plugin.version}
- 2.11.4
+ 2.11.12
org.bdgenomics.avocado.Timers
true
90