From e3540ac69146a122933c79152b93c7a4cbf7875d Mon Sep 17 00:00:00 2001 From: Alyssa Morrow Date: Mon, 29 Jan 2018 16:44:44 -0800 Subject: [PATCH] added java friendly version of filterByOverlappingRegions --- .../adam/cli/TransformAlignments.scala | 3 +-- .../org/bdgenomics/adam/rdd/GenomicRDD.scala | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformAlignments.scala b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformAlignments.scala index c685b3c1f2..dfc332d081 100644 --- a/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformAlignments.scala +++ b/adam-cli/src/main/scala/org/bdgenomics/adam/cli/TransformAlignments.scala @@ -17,7 +17,6 @@ */ package org.bdgenomics.adam.cli -import scala.collection.JavaConverters._ import htsjdk.samtools.ValidationStringency import java.time.Instant import org.apache.parquet.filter2.dsl.Dsl._ @@ -488,7 +487,7 @@ class TransformAlignments(protected val args: TransformAlignmentsArgs) extends B if (args.regionPredicate != null) { val loci = ReferenceRegion.fromString(args.regionPredicate) - loadedReads.filterByOverlappingRegions(loci.asJava) + loadedReads.filterByOverlappingRegions(loci) } else { loadedReads } diff --git a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala index 1ab732f71e..90b0fa94e4 100644 --- a/adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala +++ b/adam-core/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala @@ -725,6 +725,25 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] extends Logging { * @return Returns a new GenomicRDD containing only data that overlaps the * querys region. */ + def filterByOverlappingRegions(querys: Iterable[ReferenceRegion]): U = { + replaceRdd(rdd.filter(elem => { + + val regions = getReferenceRegions(elem) + + querys.exists(query => { + regions.exists(_.overlaps(query)) + }) + }), optPartitionMap) + } + + /** + * Runs a filter that selects data in the underlying RDD that overlaps + * several genomic regions. Java friendly version. + * + * @param querys The regions to query for. + * @return Returns a new GenomicRDD containing only data that overlaps the + * querys region. + */ def filterByOverlappingRegions(querys: java.lang.Iterable[ReferenceRegion]): U = { replaceRdd(rdd.filter(elem => {