diff --git a/adam-apis/pom.xml b/adam-apis/pom.xml index 261658d014..4039885eb0 100644 --- a/adam-apis/pom.xml +++ b/adam-apis/pom.xml @@ -106,6 +106,7 @@ org.bdgenomics.bdg-formats bdg-formats + 0.7.2-SNAPSHOT org.bdgenomics.adam diff --git a/adam-cli/pom.xml b/adam-cli/pom.xml index 14242a19df..283a555469 100644 --- a/adam-cli/pom.xml +++ b/adam-cli/pom.xml @@ -167,6 +167,7 @@ org.bdgenomics.bdg-formats bdg-formats + 0.7.2-SNAPSHOT org.bdgenomics.adam diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fragments2Reads.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fragments2Reads.scala index a14bc04153..c26a2b3808 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fragments2Reads.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Fragments2Reads.scala @@ -54,4 +54,4 @@ class Fragments2Reads(protected val args: Fragments2ReadsArgs) extends BDGSparkC SequenceDictionary.empty, RecordGroupDictionary.empty) } -} +} \ No newline at end of file diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala index 629ff05579..66f5f25285 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/Transform.scala @@ -32,7 +32,7 @@ import org.bdgenomics.adam.models.{ import org.bdgenomics.adam.projections.{ AlignmentRecordField, Filter } import org.bdgenomics.adam.rdd.ADAMContext._ import org.bdgenomics.adam.rdd.ADAMSaveAnyArgs -import org.bdgenomics.adam.rdd.read.MDTagging +import org.bdgenomics.adam.rdd.read.{ AlignmentRecordRDD, MDTagging } import org.bdgenomics.adam.rich.RichVariant import org.bdgenomics.formats.avro.AlignmentRecord import org.bdgenomics.utils.cli._ @@ -267,7 +267,7 @@ class Transform(protected val args: TransformArgs) extends BDGSparkCommand[Trans ) } - val aRdd = + val aRdd: AlignmentRecordRDD = if (args.forceLoadBam) { sc.loadBam(args.inputPath) } else if (args.forceLoadFastq) { diff --git a/adam-core/pom.xml b/adam-core/pom.xml index 6754bb5e54..40a5cc7a6e 100644 --- a/adam-core/pom.xml +++ b/adam-core/pom.xml @@ -118,6 +118,7 @@ org.bdgenomics.bdg-formats bdg-formats + 0.7.2-SNAPSHOT commons-io diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromKnowns.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromKnowns.scala index d7ba7392d4..dfddb803ec 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromKnowns.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromKnowns.scala @@ -70,7 +70,7 @@ class ConsensusGeneratorFromKnowns(file: String, @transient sc: SparkContext) ex // get region val start = reads.map(_.record.getStart).min val end = reads.map(_.getEnd).max - val refId = reads.head.record.getContig.getContigName + val refId = reads.head.record.getContigName val region = ReferenceRegion(refId, start, end + 1) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromReads.scala b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromReads.scala index 0fbf45ea50..89e0769eee 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromReads.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/algorithms/consensus/ConsensusGeneratorFromReads.scala @@ -78,7 +78,7 @@ class ConsensusGeneratorFromReads extends ConsensusGenerator { Consensus.generateAlternateConsensus( r.getSequence, ReferencePosition( - r.getContig.getContigName, + r.getContigName, r.getStart ), r.samtoolsCigar diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/AlignmentRecordConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/AlignmentRecordConverter.scala index a7bc70900c..0f39758a3b 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/AlignmentRecordConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/AlignmentRecordConverter.scala @@ -118,8 +118,7 @@ class AlignmentRecordConverter extends Serializable { }) // set the reference name, and alignment position, for mate - Option(adamRecord.getMateContig) - .map(_.getContigName) + Option(adamRecord.getMateContigName) .foreach(builder.setMateReferenceName) Option(adamRecord.getMateAlignmentStart) .foreach(s => builder.setMateAlignmentStart(s.toInt + 1)) @@ -155,8 +154,8 @@ class AlignmentRecordConverter extends Serializable { // only set alignment flags if read is aligned if (m) { // if we are aligned, we must have a reference - assert(adamRecord.getContig != null, "Cannot have null contig if aligned.") - builder.setReferenceName(adamRecord.getContig.getContigName) + assert(adamRecord.getContigName != null, "Cannot have null contig if aligned.") + builder.setReferenceName(adamRecord.getContigName) // set the cigar, if provided Option(adamRecord.getCigar).foreach(builder.setCigarString) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala index b0d8b1d8e2..6583de2438 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala @@ -93,7 +93,7 @@ object FragmentConverter extends Serializable { // build record AlignmentRecord.newBuilder() - .setContig(contig) + .setContigName(contig.getContigName) .setStart(fragmentRegion.start) .setEnd(fragmentRegion.end) .setSequence(fragmentString) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala b/adam-core/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala index afec7c03b6..a4456187b2 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala @@ -81,7 +81,7 @@ class SAMRecordConverter extends Serializable with Logging { val readReference: Int = samRecord.getReferenceIndex if (readReference != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { dict(samRecord.getReferenceName).foreach { (rec) => - builder.setContig(SequenceRecord.toADAMContig(rec)) + builder.setContigName(SequenceRecord.toADAMContig(rec).getContigName) } // set read alignment flag @@ -128,7 +128,7 @@ class SAMRecordConverter extends Serializable with Logging { if (mateReference != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) { dict(samRecord.getMateReferenceName).foreach { (rec) => - builder.setMateContig(SequenceRecord.toADAMContig(rec)) + builder.setMateContigName(SequenceRecord.toADAMContig(rec).getContigName) } val mateStart = samRecord.getMateAlignmentStart diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala index db2598df6d..7c57a10b62 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala @@ -49,7 +49,7 @@ object ReferencePosition extends Serializable { * @see fivePrime */ def apply(record: AlignmentRecord): ReferencePosition = { - new ReferencePosition(record.getContig.getContigName, record.getStart) + new ReferencePosition(record.getContigName, record.getStart) } /** diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala index f3d042ed4a..75906a7f8e 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala @@ -82,7 +82,7 @@ object ReferenceRegion { if (record.getReadMapped) { Some( ReferenceRegion( - record.getContig.getContigName, + record.getContigName, record.getStart, record.getEnd ) @@ -92,7 +92,7 @@ object ReferenceRegion { } def apply(record: AlignmentRecord): ReferenceRegion = { - ReferenceRegion(record.getContig.getContigName, record.getStart, record.getEnd) + ReferenceRegion(record.getContigName, record.getStart, record.getEnd) } /** diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala b/adam-core/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala index d26ee46a1c..205e44975c 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala @@ -345,27 +345,6 @@ object SequenceRecord { fromADAMContig(fragment.getContig) } - /** - * Convert an Read into one or more SequenceRecords. - * The reason that we can't simply use the "fromSpecificRecord" method, below, is that each Read - * can (through the fact that it could be a pair of reads) contain 1 or 2 possible SequenceRecord entries - * for the SequenceDictionary itself. Both have to be extracted, separately. - * - * @param rec The Read from which to extract the SequenceRecord entries - * @return a list of all SequenceRecord entries derivable from this record. - */ - def fromADAMRecord(rec: AlignmentRecord): Set[SequenceRecord] = { - assert(rec != null, "Read was null") - if (rec.getContig != null || rec.getMateContig != null) { - // The contig should be null for unmapped read - List(Option(rec.getContig), Option(rec.getMateContig)) - .flatten - .map(fromADAMContig) - .toSet - } else - Set() - } - def fromSpecificRecord(rec: IndexedRecord): SequenceRecord = { val schema = rec.getSchema if (schema.getField("referenceId") != null) { @@ -381,4 +360,6 @@ object SequenceRecord { throw new AssertionError("Missing information to generate SequenceRecord") } } + } + diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala index f9db02e13d..8f58ccdd08 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala @@ -229,9 +229,7 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log val projected: RDD[T] = loadParquet[T](filePath, None, projection = Some(projection)) val recs: RDD[SequenceRecord] = - if (isADAMRecord) { - projected.asInstanceOf[RDD[AlignmentRecord]].distinct().flatMap(rec => SequenceRecord.fromADAMRecord(rec)) - } else if (isADAMContig) { + if (isADAMContig) { projected.asInstanceOf[RDD[NucleotideContigFragment]].distinct().map(ctg => SequenceRecord.fromADAMContigFragment(ctg)) } else { projected.distinct().map(SequenceRecord.fromSpecificRecord(_)) diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/fragment/FragmentRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/fragment/FragmentRDDFunctions.scala index dc46354220..e090ec2057 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/fragment/FragmentRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/fragment/FragmentRDDFunctions.scala @@ -21,18 +21,14 @@ import org.apache.spark.rdd.RDD import org.bdgenomics.adam.converters.AlignmentRecordConverter import org.bdgenomics.adam.models.SequenceRecord import org.bdgenomics.adam.rdd.ADAMSequenceDictionaryRDDAggregator +import org.apache.spark.Logging import org.bdgenomics.formats.avro._ import scala.collection.JavaConversions._ -class FragmentRDDFunctions(rdd: RDD[Fragment]) extends ADAMSequenceDictionaryRDDAggregator[Fragment](rdd) { +class FragmentRDDFunctions(rdd: RDD[Fragment]) extends Serializable with Logging { def toReads: RDD[AlignmentRecord] = { val converter = new AlignmentRecordConverter rdd.flatMap(converter.convertFragment) } - - def getSequenceRecordsFromElement(elem: Fragment): Set[SequenceRecord] = { - val alignments = asScalaBuffer(elem.getAlignments) - alignments.flatMap(SequenceRecord.fromADAMRecord).toSet - } } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala index b39d4319e1..b70efc3d47 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctions.scala @@ -32,7 +32,7 @@ import org.apache.avro.specific.{ SpecificDatumWriter, SpecificRecordBase } import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{ FileSystem, FileUtil, Path } import org.apache.hadoop.io.LongWritable -import org.apache.spark.SparkContext +import org.apache.spark.{ Logging, SparkContext } import org.apache.spark.broadcast.Broadcast import org.apache.spark.rdd.MetricsContext._ import org.apache.spark.rdd.RDD @@ -53,9 +53,7 @@ import scala.annotation.tailrec import scala.language.implicitConversions import scala.reflect.ClassTag -class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) - extends ADAMSequenceDictionaryRDDAggregator[AlignmentRecord](rdd) { - +class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) extends Serializable with Logging { /** * Calculates the subset of the RDD whose AlignmentRecords overlap the corresponding * query ReferenceRegion. Equality of the reference sequence (to which these are aligned) @@ -72,7 +70,7 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) def filterByOverlappingRegion(query: ReferenceRegion): RDD[AlignmentRecord] = { def overlapsQuery(rec: AlignmentRecord): Boolean = rec.getReadMapped && - rec.getContig.getContigName == query.referenceName && + rec.getContigName == query.referenceName && rec.getStart < query.end && rec.getEnd > query.start rdd.filter(overlapsQuery) @@ -126,7 +124,6 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * As such, we must force the user to pass in the schema. * * @tparam T The type of the specific record we are saving. - * * @param filename Path to save records to. * @param sc SparkContext used for identifying underlying file system. * @param schema Schema of records we are saving. @@ -171,7 +168,6 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * aligned to. * @param rgd Record group dictionary describing the record groups these * reads are from. - * * @see adamSave * @see adamAlignedRecordSave */ @@ -212,7 +208,6 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * aligned to. * @param rgd Record group dictionary describing the record groups these * reads are from. - * * @see adamSave * @see adamSAMSave * @see saveAsParquet @@ -235,7 +230,6 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * aligned to. * @param rgd Record group dictionary describing the record groups these * reads are from. - * * @see adamAlignedRecordSave * @see adamSAMSave * @see saveAsParquet @@ -263,9 +257,7 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * aligned to. * @param rgd Record group dictionary describing the record groups these * reads are from. - * * @return A string on the driver representing this RDD of reads in SAM format. - * * @see adamConvertToSAM */ def adamSAMString(sd: SequenceDictionary, @@ -593,10 +585,6 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) } } - def getSequenceRecordsFromElement(elem: AlignmentRecord): Set[SequenceRecord] = { - SequenceRecord.fromADAMRecord(elem).toSet - } - /** * Converts an RDD of ADAM read records into SAM records. * @@ -635,7 +623,6 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * * @param kmerLength The value of _k_ to use for cutting _k_-mers. * @return Returns an RDD containing k-mer/count pairs. - * * @see adamCountQmers */ def adamCountKmers(kmerLength: Int): RDD[(String, Long)] = { @@ -696,7 +683,6 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * Realigns indels using a concensus-based heuristic. * * @see RealignIndels - * * @param isSorted If the input data is sorted, setting this parameter to true avoids a second sort. * @param maxIndelSize The size of the largest indel to use for realignment. * @param maxConsensusNumber The maximum number of consensus sequences to realign against per @@ -704,7 +690,6 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * @param lodThreshold Log-odds threhold to use when realigning; realignments are only finalized * if the log-odds threshold is exceeded. * @param maxTargetSize The maximum width of a single target region for realignment. - * * @return Returns an RDD of mapped reads which have been realigned. */ def adamRealignIndels( @@ -724,6 +709,7 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) /** * Groups all reads by record group and read name + * * @return SingleReadBuckets with primary, secondary and unmapped reads */ def adamSingleReadBuckets(): RDD[SingleReadBucket] = { @@ -759,6 +745,7 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) /** * Returns the subset of the ADAMRecords which have an attribute with the given name. + * * @param tagName The name of the attribute to filter on (should be length 2) * @return An RDD[Read] containing the subset of records with a tag that matches the given name. */ @@ -928,7 +915,6 @@ class AlignmentRecordRDDFunctions(rdd: RDD[AlignmentRecord]) * were _originally_ paired together. * * @note The RDD that this is called on should be the RDD with the first read from the pair. - * * @param secondPairRdd The rdd containing the second read from the pairs. * @param validationStringency How stringently to validate the reads. * @return Returns an RDD with the pair information recomputed. diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/FlagStat.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/FlagStat.scala index 6fcc690fb4..d2151480ef 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/FlagStat.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/FlagStat.scala @@ -44,7 +44,7 @@ object DuplicateMetrics { b2i(f(record)), b2i(f(record) && record.getReadMapped && record.getMateMapped), b2i(f(record) && record.getReadMapped && !record.getMateMapped), - b2i(f(record) && (!isSameContig(record.getContig, record.getMateContig))) + b2i(f(record) && (!isSameContig(record.getContigName, record.getMateContigName))) ) } (duplicateMetrics(isPrimary), duplicateMetrics(isSecondary)) @@ -97,7 +97,7 @@ object FlagStat { rdd.map { p => val mateMappedToDiffChromosome = - p.getReadPaired && p.getReadMapped && p.getMateMapped && !isSameContig(p.getContig, p.getMateContig) + p.getReadPaired && p.getReadMapped && p.getMateMapped && !isSameContig(p.getContigName, p.getMateContigName) val (primaryDuplicates, secondaryDuplicates) = DuplicateMetrics(p) new FlagStatMetrics( 1, diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala index 0d86459650..9f171c287c 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala @@ -72,8 +72,7 @@ case class MDTagging( val referenceFileB = sc.broadcast(referenceFile) reads.map(read => { (for { - contig <- Option(read.getContig) - contigName <- Option(contig.getContigName) + contig <- Option(read.getContigName) if read.getReadMapped } yield { maybeMDTagRead(read, referenceFileB.value.extract(ReferenceRegion(read))) @@ -105,5 +104,5 @@ object MDTagging { case class IncorrectMDTagException(read: AlignmentRecord, mdTag: String) extends Exception { override def getMessage: String = - s"Read: ${read.getReadName}, pos: ${read.getContig.getContigName}:${read.getStart}, cigar: ${read.getCigar}, existing MD tag: ${read.getMismatchingPositions}, correct MD tag: $mdTag" + s"Read: ${read.getReadName}, pos: ${read.getContigName}:${read.getStart}, cigar: ${read.getCigar}, existing MD tag: ${read.getMismatchingPositions}, correct MD tag: $mdTag" } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala index c20f6ed386..e0c1b440ad 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala @@ -102,7 +102,7 @@ object IndelRealignmentTarget { maxIndelSize: Int): Seq[IndelRealignmentTarget] = CreateIndelRealignmentTargets.time { val region = ReferenceRegion(read.record) - val refId = read.record.getContig.getContigName + val refId = read.record.getContigName var pos = List[ReferenceRegion]() var referencePos = read.record.getStart val cigar = read.samtoolsCigar diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala index d0995d8fd1..d98d6101c5 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala @@ -251,7 +251,7 @@ private[rdd] class RealignIndels( // get reference from reads val (reference, refStart, refEnd) = getReferenceFromReads(reads.map(r => new RichAlignmentRecord(r))) - val refRegion = ReferenceRegion(reads.head.record.getContig.getContigName, refStart, refEnd) + val refRegion = ReferenceRegion(reads.head.record.getContigName, refStart, refEnd) // preprocess reads and get consensus val readsToClean = consensusModel.preprocessReadsForRealignment( diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala index e6bcf46eac..3f07fdfd27 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala @@ -132,7 +132,7 @@ class RichAlignmentRecord(val record: AlignmentRecord) { } else { Strand.Forward } - ReferencePosition(record.getContig.getContigName, fivePrimePosition, strand) + ReferencePosition(record.getContigName, fivePrimePosition, strand) } catch { case e: Throwable => { println("caught " + e + " when trying to get position for " + record) @@ -167,7 +167,7 @@ class RichAlignmentRecord(val record: AlignmentRecord) { def getReferenceContext(readOffset: Int, referencePosition: Long, cigarElem: CigarElement, elemOffset: Int): ReferenceSequenceContext = { val position = if (record.getReadMapped) { - Some(ReferencePosition(record.getContig.getContigName, referencePosition)) + Some(ReferencePosition(record.getContigName, referencePosition)) } else { None } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/util/Util.scala b/adam-core/src/main/scala/org/bdgenomics/adam/util/Util.scala index c918dcb711..57f2bbf728 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/util/Util.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/util/Util.scala @@ -16,9 +16,13 @@ * limitations under the License. */ package org.bdgenomics.adam.util - import org.bdgenomics.formats.avro.Contig +// We may want to reintroduce in the future MD5 concordance test for isSameContig as +// was dpne previously in the commented out code below +// With change to factoring Contig out of AlignmentRecord it was most +// straightforward to simply check for contig name equivalence for now +/* object Util { def isSameContig(left: Contig, right: Contig): Boolean = { val leftName = Option(left).map(_.getContigName) @@ -27,4 +31,11 @@ object Util { val rightMD5 = Option(right).map(_.getContigMD5) leftName == rightName && (leftMD5.isEmpty || rightMD5.isEmpty || leftMD5 == rightMD5) } +*/ + +object Util { + def isSameContig(left: String, right: String): Boolean = { + left == right + } + } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/converters/AlignmentRecordConverterSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/converters/AlignmentRecordConverterSuite.scala index e0ff82056b..c106d4f958 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/converters/AlignmentRecordConverterSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/converters/AlignmentRecordConverterSuite.scala @@ -66,14 +66,8 @@ class AlignmentRecordConverterSuite extends FunSuite { // add reference details adamRead.setRecordGroupName("record_group") adamRead.setRecordGroupSample("sample") - adamRead.setContig(Contig.newBuilder() - .setContigName("referencetest") - .build()) - adamRead.setMateContig(Contig.newBuilder() - .setContigName("matereferencetest") - .setContigLength(6L) - .setReferenceURL("test://chrom1") - .build()) + adamRead.setContigName("referencetest") + adamRead.setMateContigName("matereferencetest") adamRead.setMateAlignmentStart(6L) // make sequence dictionary @@ -115,14 +109,8 @@ class AlignmentRecordConverterSuite extends FunSuite { // add reference details adamRead.setRecordGroupName("record_group") adamRead.setRecordGroupSample("sample") - adamRead.setContig(Contig.newBuilder() - .setContigName("referencetest") - .build()) - adamRead.setMateContig(Contig.newBuilder() - .setContigName("matereferencetest") - .setContigLength(6L) - .setReferenceURL("test://chrom1") - .build()) + adamRead.setContigName("referencetest") + adamRead.setMateContigName("matereferencetest") adamRead.setMateAlignmentStart(6L) // make sequence dictionary @@ -312,9 +300,7 @@ class AlignmentRecordConverterSuite extends FunSuite { test("converting a fragment with alignments should restore the alignments") { val alignments = List(AlignmentRecord.newBuilder() .setReadMapped(true) - .setContig(Contig.newBuilder() - .setContigName("1") - .build()) + .setContigName("1") .setStart(10L) .setEnd(20L) .setReadName("testRead") @@ -341,7 +327,7 @@ class AlignmentRecordConverterSuite extends FunSuite { assert(read.getCigar === "10M") assert(read.getSequence === "TACTGTGGGT") assert(read.getQual === "?????*****") - assert(read.getContig.getContigName === "1") + assert(read.getContigName === "1") } } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/converters/FragmentConverterSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/converters/FragmentConverterSuite.scala index d52e687e68..c72913e645 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/converters/FragmentConverterSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/converters/FragmentConverterSuite.scala @@ -41,7 +41,7 @@ class FragmentConverterSuite extends ADAMFunSuite { val convertedRead = convertedReads.head assert(convertedRead.getSequence === "ACACACAC") - assert(convertedRead.getContig.getContigName === "ctg") + assert(convertedRead.getContigName === "ctg") assert(convertedRead.getStart === 0L) assert(convertedRead.getEnd === 8L) } @@ -65,11 +65,11 @@ class FragmentConverterSuite extends ADAMFunSuite { val secondRead = reads.filter(_.getStart != 0L).head assert(firstRead.getSequence === "ACACACAC") - assert(firstRead.getContig.getContigName === "ctg") + assert(firstRead.getContigName === "ctg") assert(firstRead.getStart === 0L) assert(firstRead.getEnd === 8L) assert(secondRead.getSequence === "AATTCCGGCCTTAA") - assert(secondRead.getContig.getContigName === "ctg") + assert(secondRead.getContigName === "ctg") assert(secondRead.getStart === 14L) assert(secondRead.getEnd === 28L) } @@ -95,7 +95,7 @@ class FragmentConverterSuite extends ADAMFunSuite { assert(reads.length === 1) val read = reads(0) assert(read.getSequence === "ACACACACTGTGTGAATTCCGGCCTTAA") - assert(read.getContig.getContigName === "ctg") + assert(read.getContigName === "ctg") assert(read.getStart === 0L) assert(read.getEnd === 28L) } @@ -132,36 +132,36 @@ class FragmentConverterSuite extends ADAMFunSuite { assert(reads.length === 4) - val ctg1Reads = reads.filter(_.getContig.getContigName == "ctg1") + val ctg1Reads = reads.filter(_.getContigName == "ctg1") assert(ctg1Reads.length === 1) val ctg1Read = ctg1Reads.head assert(ctg1Read.getSequence === "ACACACACTGTGTGAATTCCGGCCTTAA") - assert(ctg1Read.getContig.getContigName === "ctg1") + assert(ctg1Read.getContigName === "ctg1") assert(ctg1Read.getStart === 0L) assert(ctg1Read.getEnd === 28L) - val ctg2Reads = reads.filter(_.getContig.getContigName == "ctg2") + val ctg2Reads = reads.filter(_.getContigName == "ctg2") assert(ctg2Reads.length === 2) val firstCtg2Read = ctg2Reads.filter(_.getStart == 0L).head val secondCtg2Read = ctg2Reads.filter(_.getStart != 0L).head assert(firstCtg2Read.getSequence === "ACACACAC") - assert(firstCtg2Read.getContig.getContigName === "ctg2") + assert(firstCtg2Read.getContigName === "ctg2") assert(firstCtg2Read.getStart === 0L) assert(firstCtg2Read.getEnd === 8L) assert(secondCtg2Read.getSequence === "AATTCCGGCCTTAA") - assert(secondCtg2Read.getContig.getContigName === "ctg2") + assert(secondCtg2Read.getContigName === "ctg2") assert(secondCtg2Read.getStart === 14L) assert(secondCtg2Read.getEnd === 28L) - val ctg3Reads = reads.filter(_.getContig.getContigName == "ctg3") + val ctg3Reads = reads.filter(_.getContigName == "ctg3") assert(ctg3Reads.length === 1) val ctg3Read = ctg3Reads.head assert(ctg3Read.getSequence === "AATTCCGGCCTTAA") - assert(ctg3Read.getContig.getContigName === "ctg3") + assert(ctg3Read.getContigName === "ctg3") assert(ctg3Read.getStart === 14L) assert(ctg3Read.getEnd === 28L) } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala index b2c35c66d2..8fdae451b6 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferencePositionSuite.scala @@ -28,7 +28,7 @@ class ReferencePositionSuite extends FunSuite { .build val read = AlignmentRecord.newBuilder() - .setContig(contig) + .setContigName(contig.getContigName) .setStart(1L) .setReadMapped(true) .build() diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala index 8afd815bd7..b0bd31c5d0 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala @@ -137,10 +137,7 @@ class ReferenceRegionSuite extends FunSuite { .setStart(1L) .setCigar("5M") .setEnd(6L) - .setContig(Contig.newBuilder - .setContigName("chr1") - .setContigLength(10L) - .build) + .setContigName("chr1") .build() assert(ReferenceRegion(read).contains(point("chr1", 1L))) @@ -192,7 +189,7 @@ class ReferenceRegionSuite extends FunSuite { val read = AlignmentRecord.newBuilder() .setStart(5L) .setSequence("ACGT") - .setContig(contig) + .setContigName(contig.getContigName) .setReadMapped(true) .setCigar("5M") .setEnd(10L) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/models/SingleReadBucketSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/models/SingleReadBucketSuite.scala index 72ef98d5d0..758872b7a6 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/models/SingleReadBucketSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/models/SingleReadBucketSuite.scala @@ -53,9 +53,7 @@ class SingleReadBucketSuite extends FunSuite { .setPrimaryAlignment(true) .setReadNegativeStrand(false) .setCigar("8M") - .setContig(Contig.newBuilder() - .setContigName("1") - .build()) + .setContigName("1") .setStart(10L) .setEnd(18L) .setInferredInsertSize(8L) @@ -69,9 +67,7 @@ class SingleReadBucketSuite extends FunSuite { .setPrimaryAlignment(true) .setReadNegativeStrand(true) .setCigar("8M") - .setContig(Contig.newBuilder() - .setContigName("1") - .build()) + .setContigName("1") .setStart(22L) .setEnd(30L) .setInferredInsertSize(8L) @@ -95,9 +91,7 @@ class SingleReadBucketSuite extends FunSuite { .setPrimaryAlignment(true) .setCigar("8M6H") .setBasesTrimmedFromEnd(6) - .setContig(Contig.newBuilder() - .setContigName("1") - .build()) + .setContigName("1") .setStart(10L) .setEnd(18L) .build(), AlignmentRecord.newBuilder() @@ -110,9 +104,7 @@ class SingleReadBucketSuite extends FunSuite { .setPrimaryAlignment(true) .setReadNegativeStrand(true) .setCigar("8M") - .setContig(Contig.newBuilder() - .setContigName("1") - .build()) + .setContigName("1") .setStart(22L) .setEnd(30L) .build()) @@ -127,9 +119,7 @@ class SingleReadBucketSuite extends FunSuite { .setSupplementaryAlignment(true) .setBasesTrimmedFromStart(8) .setCigar("8H6M") - .setContig(Contig.newBuilder() - .setContigName("2") - .build()) + .setContigName("2") .setStart(100L) .setEnd(106L) .build()) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala index 4eea2176c8..0c9185e6c9 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala @@ -165,7 +165,7 @@ class ADAMContextSuite extends ADAMFunSuite { val a0 = AlignmentRecord.newBuilder() .setRecordGroupName("group0") .setReadName("read0") - .setContig(contig) + .setContigName(contig.getContigName) .setStart(100) .setPrimaryAlignment(true) .setReadPaired(false) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoinSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoinSuite.scala index 76fc5a2070..4e77e31ab4 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoinSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoinSuite.scala @@ -90,7 +90,7 @@ class BroadcastRegionJoinSuite extends ADAMFunSuite { .build val built = AlignmentRecord.newBuilder() - .setContig(contig) + .setContigName(contig.getContigName) .setStart(1L) .setReadMapped(true) .setCigar("1M") @@ -117,7 +117,7 @@ class BroadcastRegionJoinSuite extends ADAMFunSuite { .build val builder = AlignmentRecord.newBuilder() - .setContig(contig) + .setContigName(contig.getContigName) .setStart(1L) .setReadMapped(true) .setCigar("1M") @@ -154,7 +154,7 @@ class BroadcastRegionJoinSuite extends ADAMFunSuite { .build val built = AlignmentRecord.newBuilder() - .setContig(contig) + .setContigName(contig.getContigName) .setStart(1L) .setReadMapped(true) .setCigar("1M") @@ -194,14 +194,14 @@ class BroadcastRegionJoinSuite extends ADAMFunSuite { .build val builtRef1 = AlignmentRecord.newBuilder() - .setContig(contig1) + .setContigName(contig1.getContigName) .setStart(1L) .setReadMapped(true) .setCigar("1M") .setEnd(2L) .build() val builtRef2 = AlignmentRecord.newBuilder() - .setContig(contig2) + .setContigName(contig2.getContigName) .setStart(1) .setReadMapped(true) .setCigar("1M") @@ -243,14 +243,14 @@ class BroadcastRegionJoinSuite extends ADAMFunSuite { .build val builtRef1 = AlignmentRecord.newBuilder() - .setContig(contig1) + .setContigName(contig1.getContigName) .setStart(1L) .setReadMapped(true) .setCigar("1M") .setEnd(2L) .build() val builtRef2 = AlignmentRecord.newBuilder() - .setContig(contig2) + .setContigName(contig2.getContigName) .setStart(1L) .setReadMapped(true) .setCigar("1M") diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/GenomicPositionPartitionerSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/GenomicPositionPartitionerSuite.scala index d221930a8c..cffe849d4b 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/GenomicPositionPartitionerSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/GenomicPositionPartitionerSuite.scala @@ -82,7 +82,7 @@ class GenomicPositionPartitionerSuite extends ADAMFunSuite { val count = 1000 val pos = sc.parallelize((1 to count).map(i => adamRecord("chr1", "read_%d".format(i), rand.nextInt(100), readMapped = true)), 1) val parts = 200 - val pairs = pos.map(p => (ReferencePosition(p.getContig.getContigName, p.getStart), p)) + val pairs = pos.map(p => (ReferencePosition(p.getContigName, p.getStart), p)) val parter = new RangePartitioner(parts, pairs) val partitioned = pairs.sortByKey().partitionBy(parter) @@ -109,7 +109,7 @@ class GenomicPositionPartitionerSuite extends ADAMFunSuite { assert(rdd.count() === 200) val keyed = - rdd.map(rec => (ReferencePosition(rec.getContig.getContigName, rec.getStart), rec)).sortByKey() + rdd.map(rec => (ReferencePosition(rec.getContigName, rec.getStart), rec)).sortByKey() val keys = keyed.map(_._1).collect() assert(!keys.exists(rp => parter.getPartition(rp) < 0 || parter.getPartition(rp) >= parts)) @@ -131,7 +131,7 @@ class GenomicPositionPartitionerSuite extends ADAMFunSuite { .build AlignmentRecord.newBuilder() - .setContig(contig) + .setContigName(contig.getContigName) .setReadName(readName) .setReadMapped(readMapped) .setStart(start) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoinSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoinSuite.scala index f13adcf329..283c13191a 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoinSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoinSuite.scala @@ -40,7 +40,7 @@ class ShuffleRegionJoinSuite extends ADAMFunSuite { .build val built = AlignmentRecord.newBuilder() - .setContig(contig) + .setContigName(contig.getContigName) .setStart(1L) .setReadMapped(true) .setCigar("1M") @@ -90,14 +90,14 @@ class ShuffleRegionJoinSuite extends ADAMFunSuite { .build val builtRef1 = AlignmentRecord.newBuilder() - .setContig(contig1) + .setContigName(contig1.getContigName) .setStart(1L) .setReadMapped(true) .setCigar("1M") .setEnd(2L) .build() val builtRef2 = AlignmentRecord.newBuilder() - .setContig(contig2) + .setContigName(contig2.getContigName) .setStart(1) .setReadMapped(true) .setCigar("1M") @@ -149,14 +149,14 @@ class ShuffleRegionJoinSuite extends ADAMFunSuite { .build val builtRef1 = AlignmentRecord.newBuilder() - .setContig(contig1) + .setContigName(contig1.getContigName) .setStart(1L) .setReadMapped(true) .setCigar("1M") .setEnd(2L) .build() val builtRef2 = AlignmentRecord.newBuilder() - .setContig(contig2) + .setContigName(contig2.getContigName) .setStart(1L) .setReadMapped(true) .setCigar("1M") diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctionsSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctionsSuite.scala index 32c3b7a2f7..6a2a2e2b0b 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctionsSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDFunctionsSuite.scala @@ -41,7 +41,7 @@ class AlignmentRecordRDDFunctionsSuite extends ADAMFunSuite { .setContigName(random.nextInt(numReadsToCreate / 10).toString) .build val start = random.nextInt(1000000) - builder.setContig(contig).setStart(start).setEnd(start) + builder.setContigName(contig.getContigName).setStart(start).setEnd(start) } builder.setReadName((0 until 20).map(i => (random.nextInt(100) + 64)).mkString) builder.build() @@ -53,7 +53,7 @@ class AlignmentRecordRDDFunctionsSuite extends ADAMFunSuite { assert(unmapped.forall(p => p._2 > mapped.takeRight(1)(0)._2)) // Make sure that we appropriately sorted the reads val expectedSortedReads = mapped.sortWith( - (a, b) => a._1.getContig.getContigName.toString < b._1.getContig.getContigName.toString && a._1.getStart < b._1.getStart) + (a, b) => a._1.getContigName.toString < b._1.getContigName.toString && a._1.getStart < b._1.getStart) assert(expectedSortedReads === mapped) } @@ -350,7 +350,7 @@ class AlignmentRecordRDDFunctionsSuite extends ADAMFunSuite { // that some fields should be disregarded if the read is not mapped if (p1.getReadMapped && p2.getReadMapped) { assert(p1.getDuplicateRead === p2.getDuplicateRead) - assert(p1.getContig.getContigName === p2.getContig.getContigName) + assert(p1.getContigName === p2.getContigName) assert(p1.getStart === p2.getStart) assert(p1.getEnd === p2.getEnd) assert(p1.getCigar === p2.getCigar) @@ -371,7 +371,7 @@ class AlignmentRecordRDDFunctionsSuite extends ADAMFunSuite { assert(p1.getMateMapped === p2.getMateMapped) if (p1.getMateMapped && p2.getMateMapped) { assert(p1.getMateNegativeStrand === p2.getMateNegativeStrand) - assert(p1.getMateContig.getContigName === p2.getMateContig.getContigName) + assert(p1.getMateContigName === p2.getMateContigName) assert(p1.getMateAlignmentStart === p2.getMateAlignmentStart) assert(p1.getMateAlignmentEnd === p2.getMateAlignmentEnd) } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MDTaggingSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MDTaggingSuite.scala index e6c0929b82..840c35b764 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MDTaggingSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MDTaggingSuite.scala @@ -45,7 +45,7 @@ class MDTaggingSuite extends ADAMFunSuite { id -> mdTag, AlignmentRecord .newBuilder - .setContig(contig) + .setContigName(contig.getContigName) .setStart(start.toLong) .setEnd(end.toLong) .setSequence(seq) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala index 2f68b8bbfb..6fab0b1676 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala @@ -49,7 +49,7 @@ class MarkDuplicatesSuite extends ADAMFunSuite { .build AlignmentRecord.newBuilder() - .setContig(contig) + .setContigName(contig.getContigName) .setStart(start) .setQual(qual) .setCigar(cigar) @@ -79,14 +79,14 @@ class MarkDuplicatesSuite extends ADAMFunSuite { readName = readName, avgPhredScore = avgPhredScore) firstOfPair.setReadInFragment(0) firstOfPair.setMateMapped(true) - firstOfPair.setMateContig(secondContig) + firstOfPair.setMateContigName(secondContig.getContigName) firstOfPair.setMateAlignmentStart(secondStart) firstOfPair.setReadPaired(true) val secondOfPair = createMappedRead(secondReferenceName, secondStart, secondEnd, readName = readName, avgPhredScore = avgPhredScore, isNegativeStrand = true) secondOfPair.setReadInFragment(1) secondOfPair.setMateMapped(true) - secondOfPair.setMateContig(firstContig) + secondOfPair.setMateContigName(firstContig.getContigName) secondOfPair.setMateAlignmentStart(firstStart) secondOfPair.setReadPaired(true) Seq(firstOfPair, secondOfPair) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala index 04b34719bc..e158128afa 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala @@ -49,9 +49,7 @@ class IndelRealignmentTargetSuite extends ADAMFunSuite { .setReadNegativeStrand(false) .setMapq(60) .setQual(sequence) // no typo, we just don't care - .setContig(Contig.newBuilder() - .setContigName("1") - .build()) + .setContigName("1") .setMismatchingPositions(mdtag) .build()) } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala index c1ed855105..63d0477b85 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndelsSuite.scala @@ -182,7 +182,7 @@ class RealignIndelsSuite extends ADAMFunSuite { .setContigName("chr1") .build() val reads = Seq(AlignmentRecord.newBuilder() - .setContig(ctg) + .setContigName(ctg.getContigName) .setStart(1L) .setEnd(4L) .setSequence("AAA") @@ -191,7 +191,7 @@ class RealignIndelsSuite extends ADAMFunSuite { .setReadMapped(true) .setMismatchingPositions("3") .build(), AlignmentRecord.newBuilder() - .setContig(ctg) + .setContigName(ctg.getContigName) .setStart(9L) .setEnd(12L) .setSequence("AAA") @@ -213,7 +213,7 @@ class RealignIndelsSuite extends ADAMFunSuite { .setContigName("chr1") .build() val reads = sc.parallelize(Seq(AlignmentRecord.newBuilder() - .setContig(ctg) + .setContigName(ctg.getContigName) .setStart(1L) .setEnd(4L) .setSequence("AAA") @@ -222,7 +222,7 @@ class RealignIndelsSuite extends ADAMFunSuite { .setReadMapped(true) .setMismatchingPositions("3") .build(), AlignmentRecord.newBuilder() - .setContig(ctg) + .setContigName(ctg.getContigName) .setStart(10L) .setEnd(13L) .setSequence("AAA") @@ -231,7 +231,7 @@ class RealignIndelsSuite extends ADAMFunSuite { .setReadMapped(true) .setMismatchingPositions("3") .build(), AlignmentRecord.newBuilder() - .setContig(ctg) + .setContigName(ctg.getContigName) .setStart(4L) .setEnd(7L) .setSequence("AAA") @@ -240,7 +240,7 @@ class RealignIndelsSuite extends ADAMFunSuite { .setReadMapped(true) .setMismatchingPositions("3") .build(), AlignmentRecord.newBuilder() - .setContig(ctg) + .setContigName(ctg.getContigName) .setStart(7L) .setEnd(10L) .setSequence("AAA") diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rich/DecadentReadSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rich/DecadentReadSuite.scala index 960672382c..5ae3155d12 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rich/DecadentReadSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rich/DecadentReadSuite.scala @@ -36,7 +36,7 @@ class DecadentReadSuite extends ADAMFunSuite { .newBuilder() .setReadMapped(true) .setStart(1000) - .setContig(contig) + .setContigName(contig.getContigName) .setMismatchingPositions("10") .setSequence("AACCTTGGC") .setQual("FFFFFFFFF") @@ -58,7 +58,7 @@ class DecadentReadSuite extends ADAMFunSuite { .newBuilder() .setReadMapped(true) .setStart(1000) - .setContig(contig) + .setContigName(contig.getContigName) .setMismatchingPositions("1TT10") .setSequence("ATTGGGGGGGGGG") .setQual("FFFFFFFFFFFFF") @@ -80,7 +80,7 @@ class DecadentReadSuite extends ADAMFunSuite { .newBuilder() .setReadMapped(true) .setStart(1000) - .setContig(contig) + .setContigName(contig.getContigName) .setMismatchingPositions("10") .setSequence("AACCTTGGC") .setCigar("9M1H").build()) @@ -91,9 +91,7 @@ class DecadentReadSuite extends ADAMFunSuite { test("converting bad read should fail") { val readBad = AlignmentRecord.newBuilder() - .setContig(Contig.newBuilder() - .setContigName("1") - .build()) + .setContigName("1") .setStart(248262648L) .setEnd(248262721L) .setMapq(23) @@ -111,9 +109,7 @@ class DecadentReadSuite extends ADAMFunSuite { def badGoodReadRDD: RDD[AlignmentRecord] = { val readBad = AlignmentRecord.newBuilder() - .setContig(Contig.newBuilder() - .setContigName("1") - .build()) + .setContigName("1") .setStart(248262648L) .setEnd(248262721L) .setMapq(23) @@ -124,9 +120,7 @@ class DecadentReadSuite extends ADAMFunSuite { .setMismatchingPositions("3^C71") .build() val readGood = AlignmentRecord.newBuilder() - .setContig(Contig.newBuilder() - .setContigName("1") - .build()) + .setContigName("1") .setStart(248262648L) .setEnd(248262721L) .setMapq(23) diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/rich/RichAlignmentRecordSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/rich/RichAlignmentRecordSuite.scala index 804f967fa9..ca90d2f6c8 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/rich/RichAlignmentRecordSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/rich/RichAlignmentRecordSuite.scala @@ -69,14 +69,14 @@ class RichAlignmentRecordSuite extends FunSuite { test("Cigar Clipping Sequence") { val contig = Contig.newBuilder.setContigName("chr1").build - val softClippedRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(100).setCigar("10S90M").setContig(contig).build() + val softClippedRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(100).setCigar("10S90M").setContigName(contig.getContigName).build() assert(softClippedRead.referencePositions(0).map(_.pos) == Some(90L)) } test("tags contains optional fields") { val contig = Contig.newBuilder.setContigName("chr1").build - val rec = AlignmentRecord.newBuilder().setAttributes("XX:i:3\tYY:Z:foo").setContig(contig).build() + val rec = AlignmentRecord.newBuilder().setAttributes("XX:i:3\tYY:Z:foo").setContigName(contig.getContigName).build() assert(rec.tags.size === 2) assert(rec.tags(0) === Attribute("XX", TagType.Integer, 3)) assert(rec.tags(1) === Attribute("YY", TagType.String, "foo")) @@ -86,36 +86,36 @@ class RichAlignmentRecordSuite extends FunSuite { val contig = Contig.newBuilder.setContigName("chr1").build - val hardClippedRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("90M10H").setContig(contig).build() + val hardClippedRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("90M10H").setContigName(contig.getContigName).build() assert(hardClippedRead.referencePositions.length == 90) assert(hardClippedRead.referencePositions(0).map(_.pos) == Some(1000L)) - val softClippedRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("10S90M").setContig(contig).build() + val softClippedRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("10S90M").setContigName(contig.getContigName).build() assert(softClippedRead.referencePositions.length == 100) assert(softClippedRead.referencePositions(0).map(_.pos) == Some(990L)) assert(softClippedRead.referencePositions(10).map(_.pos) == Some(1000L)) - val doubleMatchNonsenseRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("10M10M").setContig(contig).build() + val doubleMatchNonsenseRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("10M10M").setContigName(contig.getContigName).build() Range(0, 20).foreach(i => assert(doubleMatchNonsenseRead.referencePositions(i).map(_.pos) == Some(1000 + i))) - val deletionRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("5M5D10M").setContig(contig).build() + val deletionRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("5M5D10M").setContigName(contig.getContigName).build() assert(deletionRead.referencePositions.length == 15) assert(deletionRead.referencePositions(0).map(_.pos) == Some(1000L)) assert(deletionRead.referencePositions(5).map(_.pos) == Some(1010L)) - val insertionRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("10M2I10M").setContig(contig).build() + val insertionRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("10M2I10M").setContigName(contig.getContigName).build() assert(insertionRead.referencePositions.length == 22) assert(insertionRead.referencePositions(0).map(_.pos) == Some(1000L)) assert(insertionRead.referencePositions(10).map(_.pos) == None) assert(insertionRead.referencePositions(12).map(_.pos) == Some(1010L)) - val indelRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("10M3D10M2I").setContig(contig).build() + val indelRead = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("10M3D10M2I").setContigName(contig.getContigName).build() assert(indelRead.referencePositions.length == 22) assert(indelRead.referencePositions(0).map(_.pos) == Some(1000L)) assert(indelRead.referencePositions(10).map(_.pos) == Some(1013L)) assert(indelRead.referencePositions(20).map(_.pos) == None) - val hg00096read = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("1S28M1D32M1I15M1D23M").setContig(contig).build() + val hg00096read = AlignmentRecord.newBuilder().setReadMapped(true).setStart(1000).setCigar("1S28M1D32M1I15M1D23M").setContigName(contig.getContigName).build() assert(hg00096read.referencePositions.length == 100) assert(hg00096read.referencePositions(0).map(_.pos) == Some(999L)) assert(hg00096read.referencePositions(1).map(_.pos) == Some(1000L)) @@ -137,7 +137,7 @@ class RichAlignmentRecordSuite extends FunSuite { test("read overlap reference position") { val contig = Contig.newBuilder.setContigName("chr1").build - val record = RichAlignmentRecord(AlignmentRecord.newBuilder().setReadMapped(true).setCigar("10M").setStart(10L).setEnd(20L).setContig(contig).build()) + val record = RichAlignmentRecord(AlignmentRecord.newBuilder().setReadMapped(true).setCigar("10M").setStart(10L).setEnd(20L).setContigName(contig.getContigName).build()) assert(record.overlapsReferencePosition(ReferencePosition("chr1", 10)) == true) assert(record.overlapsReferencePosition(ReferencePosition("chr1", 14)) == true) @@ -148,7 +148,7 @@ class RichAlignmentRecordSuite extends FunSuite { test("read overlap same position different contig") { val contig = Contig.newBuilder.setContigName("chr1").build - val record = RichAlignmentRecord(AlignmentRecord.newBuilder().setReadMapped(true).setCigar("10M").setStart(10L).setEnd(20L).setContig(contig).build()) + val record = RichAlignmentRecord(AlignmentRecord.newBuilder().setReadMapped(true).setCigar("10M").setStart(10L).setEnd(20L).setContigName(contig.getContigName).build()) assert(record.overlapsReferencePosition(ReferencePosition("chr2", 10)) == false) } diff --git a/adam-core/src/test/scala/org/bdgenomics/adam/util/UtilSuite.scala b/adam-core/src/test/scala/org/bdgenomics/adam/util/UtilSuite.scala index 0d4069d784..c222e1d8aa 100644 --- a/adam-core/src/test/scala/org/bdgenomics/adam/util/UtilSuite.scala +++ b/adam-core/src/test/scala/org/bdgenomics/adam/util/UtilSuite.scala @@ -24,20 +24,23 @@ class UtilSuite extends ADAMFunSuite { test("isSameConfig") { val a = Contig.newBuilder().setContigName("foo") val b = Contig.newBuilder().setContigName("bar") - assert(!Util.isSameContig(a.build(), b.build())) + assert(!Util.isSameContig(a.build().getContigName, b.build().getContigName)) b.setContigName("foo") - assert(Util.isSameContig(a.build(), b.build())) + assert(Util.isSameContig(a.build().getContigName, b.build().getContigName)) // proper null handling assert(Util.isSameContig(null, null)) - assert(!Util.isSameContig(null, b.build())) - assert(!Util.isSameContig(a.build(), null)) + assert(!Util.isSameContig(null, b.build().getContigName)) + assert(!Util.isSameContig(a.build().getContigName, null)) - a.setContigMD5("md5") + // Prior to removing Contig from AlignmentRecprd the code below + // tested for MD5 concordance. We may want to revisit how contig MD5 concordance + // is checked in future + //a.setContigMD5("md5") // both md5s need to be set to change equality - assert(!Util.isSameContig(a.build(), b.build())) - b.setContigMD5("md5") - assert(Util.isSameContig(a.build(), b.build())) + //assert(!Util.isSameContig(a.build().getContigName, b.build().getContigName)) + //b.setContigMD5("md5") + //assert(Util.isSameContig(a.build().getContigName, b.build().getContigName)) } } diff --git a/pom.xml b/pom.xml index 274496a2ba..752b652656 100644 --- a/pom.xml +++ b/pom.xml @@ -316,7 +316,7 @@ org.bdgenomics.bdg-formats bdg-formats - 0.7.0 + 0.7.2-SNAPSHOT commons-io