Skip to content

Commit

Permalink
Remove back reference between VariantAnnotation and Variant
Browse files Browse the repository at this point in the history
  • Loading branch information
heuermh authored and fnothaft committed Dec 9, 2016
1 parent fa83f4b commit 2398aa2
Show file tree
Hide file tree
Showing 20 changed files with 41 additions and 446 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ ADAM ACTIONS
CONVERSION OPERATIONS
vcf2adam : Convert a VCF file to the corresponding ADAM format
adam2vcf : Convert an ADAM variant to the VCF ADAM format
anno2adam : Convert a annotation file (in VCF format) to the corresponding ADAM format
fasta2adam : Converts a text FASTA sequence file into an ADAMNucleotideContig Parquet file which represents assembled sequences.
adam2fasta : Convert ADAM nucleotide contig fragments to FASTA files
transformFeatures : Convert a file with sequence features into corresponding ADAM format and vice versa
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,7 @@ import org.bdgenomics.adam.rdd.fragment.FragmentRDD
import org.bdgenomics.adam.rdd.read.AlignmentRecordRDD
import org.bdgenomics.adam.rdd.variant.{
GenotypeRDD,
VariantRDD,
VariantAnnotationRDD
VariantRDD
}

object JavaADAMContext {
Expand Down Expand Up @@ -84,16 +83,6 @@ class JavaADAMContext(val ac: ADAMContext) extends Serializable {
ac.loadFeatures(filePath)
}

/**
* Loads in variant annotations.
*
* @param filePath The path to load the file from.
* @return Returns a VariantAnnotationRDD.
*/
def loadVariantAnnotations(filePath: java.lang.String): VariantAnnotationRDD = {
ac.loadVariantAnnotations(filePath)
}

/**
* Loads in genotypes.
*
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -86,14 +86,4 @@ class JavaADAMContextSuite extends ADAMFunSuite {

assert(newRdd.jrdd.count() === 6)
}

ignore("can read and write a small .vcf as annotations") {
val path = copyResource("small.vcf")
val aRdd = sc.loadVariantAnnotations(path)
assert(aRdd.jrdd.count() === 6)

val newRdd = JavaADAMAnnotationConduit.conduit(aRdd, sc)

assert(newRdd.jrdd.count() === 6)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ object ADAMMain {
List(
Vcf2ADAM,
ADAM2Vcf,
VcfAnnotation2ADAM,
Fasta2ADAM,
ADAM2Fasta,
ADAM2Fastq,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -322,10 +322,7 @@ private[adam] class VariantContextConverter(
builder.setFiltersPassed(!vc.isFiltered)
}
if (vc.isFiltered) {
builder.setFiltersFailed(new java.util.ArrayList(vc.getFilters));
}
if (vc.getAttributeAsBoolean("SOMATIC", false)) {
builder.setSomatic(true)
builder.setFiltersFailed(new java.util.ArrayList(vc.getFilters))
}
builder.build
}
Expand Down Expand Up @@ -1234,12 +1231,6 @@ private[adam] class VariantContextConverter(
case (true, true) => vcb.passFilters
}

val somatic: java.lang.Boolean = Option(variant.getSomatic).getOrElse(false)
if (somatic) {
vcb.attribute("SOMATIC", true)
}

// attach genotypes
try {
Some(vcb.genotypes(vc.genotypes.map(g => bdgConvFn(g)))
.make)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -169,16 +169,6 @@ object ReferenceRegion {
ReferenceRegion(variant.getContigName, variant.getStart, variant.getEnd)
}

/**
* Builds a reference region from a variant annotation.
*
* @param annotation VariantAnnotation to extract region from.
* @return The site where the variant for the specified variant annotation covers.
*/
def apply(annotation: VariantAnnotation): ReferenceRegion = {
ReferenceRegion(annotation.getVariant)
}

private def checkRead(record: AlignmentRecord) {
require(record.getReadMapped,
"Cannot build reference region for unmapped read %s.".format(record))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ import org.bdgenomics.formats.avro.VariantAnnotation
*/
object VariantAnnotationField extends FieldEnumeration(VariantAnnotation.SCHEMA$) {

val variant, ancestralAllele, alleleCount, readDepth, forwardReadDepth, reverseReadDepth, alleleFrequency, cigar, dbSnp, hapMap2, hapMap3, validated, thousandGenomes, transcriptEffects, attributes = SchemaValue
val ancestralAllele, alleleCount, readDepth, forwardReadDepth, reverseReadDepth, alleleFrequency, cigar, dbSnp, hapMap2, hapMap3, validated, thousandGenomes, somatic, transcriptEffects, attributes = SchemaValue
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@ import org.bdgenomics.formats.avro.Variant
*/
object VariantField extends FieldEnumeration(Variant.SCHEMA$) {

val contigName, start, end, names, referenceAllele, alternateAllele, filtersApplied, filtersPassed, filtersFailed, somatic = SchemaValue
val contigName, start, end, names, referenceAllele, alternateAllele, filtersApplied, filtersPassed, filtersFailed = SchemaValue
}
58 changes: 0 additions & 58 deletions adam-core/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1267,64 +1267,6 @@ class ADAMContext(@transient val sc: SparkContext) extends Serializable with Log
FragmentRDD(rdd, sd, rgd)
}

/**
* Loads variant annotations stored in VCF format.
*
* @param filePath The path to the VCF file(s) to load annotations from.
* @return Returns VariantAnnotationRDD.
*/
def loadVcfAnnotations(
filePath: String): VariantAnnotationRDD = {
loadVcf(filePath).toVariantAnnotationRDD
}

/**
* Loads VariantAnnotations stored in Parquet, with metadata.
*
* @param filePath The path to load files from.
* @param predicate An optional predicate to push down into the file.
* @param projection An optional projection to use for reading.
* @return Returns VariantAnnotationRDD.
*/
def loadParquetVariantAnnotations(
filePath: String,
predicate: Option[FilterPredicate] = None,
projection: Option[Schema] = None): VariantAnnotationRDD = {

// load header lines
val headers = loadHeaderLines(filePath)

val sd = loadAvroSequences(filePath)
val rdd = loadParquet[VariantAnnotation](filePath, predicate, projection)

VariantAnnotationRDD(rdd, sd, headers)
}

/**
* Loads VariantAnnotations into an RDD, and automatically detects
* the underlying storage format.
*
* Can load variant annotations from either Parquet or VCF.
*
* @see loadVcfAnnotations
* @see loadParquetVariantAnnotations
*
* @param filePath The path to load files from.
* @param projection An optional projection to use for reading.
* @return Returns VariantAnnotationRDD.
*/
def loadVariantAnnotations(
filePath: String,
projection: Option[Schema] = None): VariantAnnotationRDD = {
if (filePath.endsWith(".vcf")) {
log.info(s"Loading $filePath as VCF, and converting to variant annotations. Projection is ignored.")
loadVcfAnnotations(filePath)
} else {
log.info(s"Loading $filePath as Parquet containing VariantAnnotations.")
loadParquetVariantAnnotations(filePath, None, projection)
}
}

/**
* Loads Features from a file, autodetecting the file type.
*
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -60,29 +60,6 @@ case class VariantContextRDD(rdd: RDD[VariantContext],
@transient headerLines: Seq[VCFHeaderLine] = SupportedHeaderLines.allHeaderLines) extends MultisampleGenomicRDD[VariantContext, VariantContextRDD]
with Logging {

/**
* Left outer join database variant annotations.
*
* @param ann Annotation RDD to join against.
* @return Returns a VariantContextRDD where annotations have been filled in.
*/
def joinVariantAnnotations(ann: VariantAnnotationRDD): VariantContextRDD = {
replaceRdd(rdd.keyBy(_.variant)
.leftOuterJoin(ann.rdd.keyBy(dba => RichVariant(dba.getVariant)))
.values
.map(kv => VariantContext(kv._1, kv._2)))
}

/**
* @return Returns a VariantAnnotationRDD containing the variant
* annotations attached to this VariantContextRDD.
*/
def toVariantAnnotationRDD: VariantAnnotationRDD = {
VariantAnnotationRDD(rdd.flatMap(_.annotations),
sequences,
headerLines)
}

/**
* @return Returns a GenotypeRDD containing the Genotypes in this RDD.
*/
Expand Down
Loading

0 comments on commit 2398aa2

Please sign in to comment.