diff --git a/build.sbt b/build.sbt
index 192233aa58..abb411cc40 100644
--- a/build.sbt
+++ b/build.sbt
@@ -1,57 +1,41 @@
 organization := "org.hammerlab.adam"
 
-name := ParentPlugin.sparkName("adam-core")
+name := sparkName("adam-core")
 
 version := "0.20.4-SNAPSHOT"
 
-sonatypeProfileName := "org.hammerlab"
-
-val utilsVersion = "0.2.9"
-
 hadoopVersion := "2.7.3"
 
 scalatestVersion := "2.2.1"
 
-testDeps ++= Seq(
-  "org.bdgenomics.utils" %% "utils-misc" % utilsVersion classifier("tests") exclude("org.apache.spark", "*"),
+addSparkDeps
+publishTestJar
+enableScalariform
+
+// Using ":=" here to clobber the usual default hammerlab-test-libs that are added by parent-plugin, which use
+// Scalatest 3.0.0.
+testDeps := Seq(
   "org.mockito" % "mockito-core" % "1.10.19"
 )
 
-providedDeps ++= Seq(
-  libraries.value('hadoop),
-  libraries.value('spark)
-)
+testJarTestDeps += (libraries.value('bdg_utils_misc) exclude("org.apache.spark", "*"))
 
 libraryDependencies ++= Seq(
-  "org.bdgenomics.utils" %% "utils-metrics" % utilsVersion,
-  "org.bdgenomics.utils" %% "utils-misc" % utilsVersion,
-  "org.bdgenomics.utils" %% "utils-io" % utilsVersion exclude("com.fasterxml.jackson.core", "*"),
-  "org.bdgenomics.utils" %% "utils-cli" % utilsVersion,
-  "org.bdgenomics.utils" %% "utils-intervalrdd" % utilsVersion,
-  "com.esotericsoftware.kryo" % "kryo" % "2.24.0",
-  "org.bdgenomics.bdg-formats" % "bdg-formats" % "0.10.0",
-  "commons-io" % "commons-io" % "2.4",
+  libraries.value('bdg_formats),
+  libraries.value('bdg_utils_cli),
+  libraries.value('bdg_utils_intervalrdd),
+  libraries.value('bdg_utils_io),
+  libraries.value('bdg_utils_metrics),
+  libraries.value('bdg_utils_misc),
+  libraries.value('commons_io),
+  libraries.value('hadoop_bam) exclude("com.github.samtools", "htsjdk"),
+  libraries.value('log4j),
+  "com.github.samtools" % "htsjdk" % "2.5.0",
+  "com.netflix.servo" % "servo-core" % "0.10.0",
   "it.unimi.dsi" % "fastutil" % "6.6.5",
   "org.apache.avro" % "avro" % "1.8.0",
-  "org.slf4j" % "slf4j-log4j12" % "1.7.21",
+  "org.apache.httpcomponents" % "httpclient" % "4.5.2",
   "org.apache.parquet" % "parquet-avro" % "1.8.1",
   "org.apache.parquet" % "parquet-scala_2.10" % "1.8.1" exclude("org.scala-lang", "scala-library"),
-  libraries.value('hadoop_bam),
-  "com.github.samtools" % "htsjdk" % "2.5.0",
-  "org.apache.httpcomponents" % "httpclient" % "4.5.2",
-  "com.netflix.servo" % "servo-core" % "0.10.0",
-  "org.hammerlab" %% "genomic-loci" % "1.4.2" exclude("com.github.samtools", "htsjdk")
+  "org.hammerlab" %% "genomic-loci" % "1.4.4" exclude("com.github.samtools", "htsjdk")
 )
-
-import scalariform.formatter.preferences._
-import com.typesafe.sbt.SbtScalariform
-import com.typesafe.sbt.SbtScalariform.ScalariformKeys
-
-SbtScalariform.defaultScalariformSettings
-
-ScalariformKeys.preferences := ScalariformKeys.preferences.value
-                               .setPreference(AlignParameters, true)
-                               .setPreference(CompactStringConcatenation, false)
-                               .setPreference(AlignSingleLineCaseStatements, true)
-                               .setPreference(DoubleIndentClassDeclaration, true)
-                               .setPreference(PreserveDanglingCloseParenthesis, true)
diff --git a/project/plugins.sbt b/project/plugins.sbt
index c7a556c033..37f576a434 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -1,2 +1 @@
-addSbtPlugin("org.hammerlab" % "sbt-parent" % "1.2.3")
-addSbtPlugin("com.typesafe.sbt" % "sbt-scalariform" % "1.3.0")
+addSbtPlugin("org.hammerlab" % "sbt-parent" % "1.4.0")
diff --git a/src/main/java/org/bdgenomics/adam/io/FastqRecordReader.java b/src/main/java/org/bdgenomics/adam/io/FastqRecordReader.java
index ba79724427..37fdc1a705 100644
--- a/src/main/java/org/bdgenomics/adam/io/FastqRecordReader.java
+++ b/src/main/java/org/bdgenomics/adam/io/FastqRecordReader.java
@@ -40,7 +40,7 @@
  * a single Text output. This is then fed into the FastqConverter, which
  * converts the single Text instance into two AlignmentRecords.
  */
-abstract class FastqRecordReader extends RecordReader<Void,Text> {
+abstract class FastqRecordReader extends RecordReader<Void, Text> {
     /*
      * fastq format:
      * <fastq>  :=  <block>+
@@ -57,53 +57,53 @@ abstract class FastqRecordReader extends RecordReader<Void,Text> {
      * For now I'm going to assume single-line sequences.  This works for our sequencing
      * application.  We'll see if someone complains in other applications.
      */
-    
+
     /**
      * First valid data index in the stream.
      */
     private long start;
-    
+
     /**
      * First index value beyond the slice, i.e. slice is in range [start,end).
      */
     protected long end;
-    
+
     /**
      * Current position in file.
      */
     protected long pos;
-    
+
     /**
      * Path of the file being parsed.
      */
     private Path file;
-    
+
     /**
      * The line reader we are using to read the file.
      */
     private LineReader lineReader;
-    
+
     /**
      * The input stream we are using to read the file.
      */
     private InputStream inputStream;
-    
+
     /**
      * The text for a single record pair we have parsed out.
      * Hadoop's RecordReader contract requires us to save this as state.
      */
     private Text currentValue;
-    
+
     /**
      * Newline string for matching on.
      */
     private static final byte[] newline = "\n".getBytes();
-    
+
     /**
      * Maximum length for a read string.
      */
     private static final int MAX_LINE_LENGTH = 10000;
-    
+
     /**
      * Builds a new record reader given a config file and an input split.
      *
@@ -115,13 +115,13 @@ protected FastqRecordReader(final Configuration conf, final FileSplit split) thr
         file = split.getPath();
         start = split.getStart();
         end = start + split.getLength();
-        
+
         FileSystem fs = file.getFileSystem(conf);
         FSDataInputStream fileIn = fs.open(file);
-        
+
         CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
         CompressionCodec codec = codecFactory.getCodec(file);
-        
+
         if (codec == null) { // no codec.  Uncompressed file.
             positionAtFirstRecord(fileIn);
             inputStream = fileIn;
@@ -130,14 +130,12 @@ protected FastqRecordReader(final Configuration conf, final FileSplit split) thr
             if (start != 0) {
                 throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
             }
-            
             inputStream = codec.createInputStream(fileIn);
             end = Long.MAX_VALUE; // read until the end of the file
         }
-        
         lineReader = new LineReader(inputStream);
     }
-    
+
     /**
      * Checks to see whether the buffer is positioned at a valid record.
      *
@@ -154,19 +152,19 @@ protected FastqRecordReader(final Configuration conf, final FileSplit split) thr
      *
      * @param stream The stream to reposition.
      */
-    protected void positionAtFirstRecord(FSDataInputStream stream) throws IOException {
+    protected final void positionAtFirstRecord(final FSDataInputStream stream) throws IOException {
         Text buffer = new Text();
-        
+
         if (true) { // (start > 0) // use start>0 to assume that files start with valid data
             // Advance to the start of the first record that ends with /1
             // We use a temporary LineReader to read lines until we find the
             // position of the right one.  We then seek the file to that position.
             stream.seek(start);
             LineReader reader = new LineReader(stream);
-            
+
             int bytesRead = 0;
             do {
-                bytesRead = reader.readLine(buffer, (int)Math.min(MAX_LINE_LENGTH, end - start));
+                bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
                 int bufferLength = buffer.getLength();
                 if (bytesRead > 0 && !checkBuffer(bufferLength, buffer)) {
                     start += bytesRead;
@@ -175,9 +173,9 @@ protected void positionAtFirstRecord(FSDataInputStream stream) throws IOExceptio
                     //
                     // If this isn't the start of a record, we want to backtrack to its end
                     long backtrackPosition = start + bytesRead;
-                    
-                    bytesRead = reader.readLine(buffer, (int)Math.min(MAX_LINE_LENGTH, end - start));
-                    bytesRead = reader.readLine(buffer, (int)Math.min(MAX_LINE_LENGTH, end - start));
+
+                    bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
+                    bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
                     if (bytesRead > 0 && buffer.getLength() > 0 && buffer.getBytes()[0] == '+') {
                         break; // all good!
                     } else {
@@ -188,39 +186,39 @@ protected void positionAtFirstRecord(FSDataInputStream stream) throws IOExceptio
                     }
                 }
             } while (bytesRead > 0);
-            
+
             stream.seek(start);
         }
-        
         pos = start;
     }
-    
+
     /**
      * Method is a no-op.
      *
      * @param split The input split that we will parse.
      * @param context The Hadoop task context.
      */
-    public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {}
-    
+    public final void initialize(final InputSplit split, final TaskAttemptContext context)
+            throws IOException, InterruptedException {}
+
     /**
      * FASTQ has no keys, so we return null.
      *
      * @return Always returns null.
      */
-    public Void getCurrentKey() {
+    public final Void getCurrentKey() {
         return null;
     }
-    
+
     /**
      * Returns the last interleaved FASTQ record.
      *
      * @return The text corresponding to the last read pair.
      */
-    public Text getCurrentValue() {
+    public final Text getCurrentValue() {
         return currentValue;
     }
-    
+
     /**
      * Seeks ahead in our split to the next key-value pair.
      *
@@ -229,42 +227,41 @@ public Text getCurrentValue() {
      *
      * @return True if reading the next read pair succeeded.
      */
-    public boolean nextKeyValue() throws IOException, InterruptedException {
+    public final boolean nextKeyValue() throws IOException, InterruptedException {
         currentValue = new Text();
-        
         return next(currentValue);
     }
-    
+
     /**
      * Close this RecordReader to future operations.
      */
-    public void close() throws IOException {
+    public final void close() throws IOException {
         inputStream.close();
     }
-    
+
     /**
      * How much of the input has the RecordReader consumed?
      *
      * @return Returns a value on [0.0, 1.0] that notes how many bytes we
      *   have read so far out of the total bytes to read.
      */
-    public float getProgress() {
+    public final float getProgress() {
         if (start == end) {
             return 1.0f;
         } else {
             return Math.min(1.0f, (pos - start) / (float)(end - start));
         }
     }
-    
+
     /**
      * Produces a debugging message with the file position.
      *
      * @return Returns a string containing {filename}:{index}.
      */
-    protected String makePositionMessage() {
+    protected final String makePositionMessage() {
         return file.toString() + ":" + pos;
     }
-    
+
     /**
      * Parses a read from an interleaved FASTQ file.
      *
@@ -277,7 +274,9 @@ protected String makePositionMessage() {
      * @throws RuntimeException Throws exception if FASTQ record doesn't
      *   have proper formatting (e.g., record doesn't start with @).
      */
-    protected boolean lowLevelFastqRead(Text readName, Text value) throws IOException {
+    protected final boolean lowLevelFastqRead(final Text readName, final Text value)
+            throws IOException {
+
         // ID line
         readName.clear();
         long skipped = appendLineInto(readName, true);
@@ -292,31 +291,30 @@ protected boolean lowLevelFastqRead(Text readName, Text value) throws IOExceptio
                                        ". Line: " +
                                        readName + ". \n");
         }
-        
         value.append(readName.getBytes(), 0, readName.getLength());
-        
+
         // sequence
         appendLineInto(value, false);
-        
+
         // separator line
         appendLineInto(value, false);
-        
+
         // quality
         appendLineInto(value, false);
-        
+
         return true;
     }
-    
+
     /**
      * Reads from the input split.
      *
      * @param value Text record to write input value into.
      * @return Returns whether this read was successful or not.
      *
-     * @see lowLevelFastqRead
+     * @see #lowLevelFastqRead(Text, Text)
      */
     abstract protected boolean next(Text value) throws IOException;
-        
+
     /**
      * Reads a newline into a text record from the underlying line reader.
      *
@@ -330,14 +328,15 @@ protected boolean lowLevelFastqRead(Text readName, Text value) throws IOExceptio
     private int appendLineInto(final Text dest, final boolean eofOk) throws EOFException, IOException {
         Text buf = new Text();
         int bytesRead = lineReader.readLine(buf, MAX_LINE_LENGTH);
-        
-        if (bytesRead < 0 || (bytesRead == 0 && !eofOk))
+
+        if (bytesRead < 0 || (bytesRead == 0 && !eofOk)) {
             throw new EOFException();
-        
+        }
+
         dest.append(buf.getBytes(), 0, buf.getLength());
         dest.append(newline, 0, 1);
         pos += bytesRead;
-        
+
         return bytesRead;
     }
 }
\ No newline at end of file
diff --git a/src/main/java/org/bdgenomics/adam/io/InterleavedFastqInputFormat.java b/src/main/java/org/bdgenomics/adam/io/InterleavedFastqInputFormat.java
index 05206cd084..b9c726ef72 100755
--- a/src/main/java/org/bdgenomics/adam/io/InterleavedFastqInputFormat.java
+++ b/src/main/java/org/bdgenomics/adam/io/InterleavedFastqInputFormat.java
@@ -41,7 +41,7 @@
  * This reader is based on the FastqInputFormat that's part of Hadoop-BAM,
  * found at https://github.com/HadoopGenomics/Hadoop-BAM/blob/master/src/main/java/org/seqdoop/hadoop_bam/FastqInputFormat.java
  */
-public class InterleavedFastqInputFormat extends FileInputFormat<Void,Text> {
+public final class InterleavedFastqInputFormat extends FileInputFormat<Void, Text> {
 
     /**
      * A record reader for the interleaved FASTQ format.
@@ -70,7 +70,7 @@ private static class InterleavedFastqRecordReader extends FastqRecordReader {
          * @return Returns true if the buffer contains the first line of a properly
          *   formatted pair of FASTQ records.
          */
-        protected boolean checkBuffer(int bufferLength, Text buffer) {
+        protected boolean checkBuffer(final int bufferLength, final Text buffer) {
             return (bufferLength >= 2 &&
                     buffer.getBytes()[0] == '@' &&
                     buffer.getBytes()[bufferLength - 2] == '/' &&
@@ -87,28 +87,22 @@ protected boolean checkBuffer(int bufferLength, Text buffer) {
          *   middle of a read, or if we have a read that is incorrectly
          *   formatted (missing readname delimiters).
          */
-        protected boolean next(Text value) throws IOException {
+        protected boolean next(final Text value) throws IOException {
             if (pos >= end)
                 return false; // past end of slice
             try {
                 Text readName1 = new Text();
                 Text readName2 = new Text();
-
                 value.clear();
 
                 // first read of the pair
-                boolean gotData = lowLevelFastqRead(readName1, value);
-
-                if (!gotData)
+                if (!lowLevelFastqRead(readName1, value)) {
                     return false;
+                }
 
                 // second read of the pair
-                gotData = lowLevelFastqRead(readName2, value);
-
-                if (!gotData)
-                    return false;
+                return lowLevelFastqRead(readName2, value);
 
-                return true;
             } catch (EOFException e) {
                 throw new RuntimeException("unexpected end of file in fastq record at " + makePositionMessage());
             }
@@ -123,12 +117,11 @@ protected boolean next(Text value) throws IOException {
      * @return Returns the interleaved FASTQ record reader.
      */
     public RecordReader<Void, Text> createRecordReader(
-            InputSplit genericSplit,
-            TaskAttemptContext context) throws IOException, InterruptedException {
+            final InputSplit genericSplit,
+            final TaskAttemptContext context) throws IOException, InterruptedException {
         context.setStatus(genericSplit.toString());
 
         // cast as per example in TextInputFormat
-        return new InterleavedFastqRecordReader(context.getConfiguration(), 
-                                                (FileSplit)genericSplit); 
+        return new InterleavedFastqRecordReader(context.getConfiguration(), (FileSplit) genericSplit);
     }
 }
diff --git a/src/main/java/org/bdgenomics/adam/io/SingleFastqInputFormat.java b/src/main/java/org/bdgenomics/adam/io/SingleFastqInputFormat.java
index 762e91b57f..aa6a754b23 100644
--- a/src/main/java/org/bdgenomics/adam/io/SingleFastqInputFormat.java
+++ b/src/main/java/org/bdgenomics/adam/io/SingleFastqInputFormat.java
@@ -33,8 +33,8 @@
  * This reader is based on the FastqInputFormat that's part of Hadoop-BAM,
  * found at https://github.com/HadoopGenomics/Hadoop-BAM/blob/master/src/main/java/org/seqdoop/hadoop_bam/FastqInputFormat.java
  */
-public class SingleFastqInputFormat extends FileInputFormat<Void,Text> {
-    
+public final class SingleFastqInputFormat extends FileInputFormat<Void, Text> {
+
     /**
      * A record reader for the standard FASTQ format.
      *
@@ -61,7 +61,7 @@ private static class SingleFastqRecordReader extends FastqRecordReader {
          * @return Returns true if the buffer contains the first line of a properly
          *   formatted pair of FASTQ records.
          */
-        protected boolean checkBuffer(int bufferLength, Text buffer) {
+        protected boolean checkBuffer(final int bufferLength, final Text buffer) {
             return (bufferLength >= 0 &&
                     buffer.getBytes()[0] == '@');
         }
@@ -76,18 +76,15 @@ protected boolean checkBuffer(int bufferLength, Text buffer) {
          *   middle of a read, or if we have a read that is incorrectly
          *   formatted (missing readname delimiters).
          */
-        protected boolean next(Text value) throws IOException {
+        protected boolean next(final Text value) throws IOException {
             if (pos >= end)
                 return false; // past end of slice
             try {
                 Text readName = new Text();
-
                 value.clear();
 
                 // first read of the pair
-                boolean gotData = lowLevelFastqRead(readName, value);
-
-                return gotData;
+                return lowLevelFastqRead(readName, value);
             } catch (EOFException e) {
                 throw new RuntimeException("unexpected end of file in fastq record at " + makePositionMessage());
             }
@@ -102,12 +99,11 @@ protected boolean next(Text value) throws IOException {
      * @return Returns the interleaved FASTQ record reader.
      */
     public RecordReader<Void, Text> createRecordReader(
-            InputSplit genericSplit,
-            TaskAttemptContext context) throws IOException, InterruptedException {
+            final InputSplit genericSplit,
+            final TaskAttemptContext context) throws IOException, InterruptedException {
         context.setStatus(genericSplit.toString());
 
         // cast as per example in TextInputFormat
-        return new SingleFastqRecordReader(context.getConfiguration(), 
-                                           (FileSplit)genericSplit); 
+        return new SingleFastqRecordReader(context.getConfiguration(), (FileSplit) genericSplit);
     }
 }
diff --git a/src/main/scala/org/bdgenomics/adam/converters/FastaConverter.scala b/src/main/scala/org/bdgenomics/adam/converters/FastaConverter.scala
index 79683c8826..aec39fc3e3 100644
--- a/src/main/scala/org/bdgenomics/adam/converters/FastaConverter.scala
+++ b/src/main/scala/org/bdgenomics/adam/converters/FastaConverter.scala
@@ -17,7 +17,6 @@
  */
 package org.bdgenomics.adam.converters
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.bdgenomics.formats.avro.{ Contig, NucleotideContigFragment }
 import scala.collection.mutable
diff --git a/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala b/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala
index 0defab2f36..44ac65a0ea 100644
--- a/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala
+++ b/src/main/scala/org/bdgenomics/adam/converters/FragmentConverter.scala
@@ -17,7 +17,6 @@
  */
 package org.bdgenomics.adam.converters
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.models.ReferenceRegion
 import org.bdgenomics.formats.avro._
@@ -92,7 +91,7 @@ private[adam] object FragmentConverter extends Serializable {
         // are our fragments adjacent?
         val newLastFragment = if (lastRegion.isAdjacent(thisRegion)) {
           // if they are, merge the fragments
-          // we have sorted these fragments before we started, so the orientation is already known
+          // we have sorted these fragments before we started, so the strand is already known
           (lastRegion.hull(thisRegion), lastString + thisString)
         } else {
           // if they aren't, prepend the last fragment to the list
diff --git a/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala b/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala
index d54d3636f3..9e72c8d000 100644
--- a/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala
+++ b/src/main/scala/org/bdgenomics/adam/converters/SAMRecordConverter.scala
@@ -18,13 +18,12 @@
 package org.bdgenomics.adam.converters
 
 import htsjdk.samtools.{
-  CigarElement,
   SAMReadGroupRecord,
   SAMRecord,
   SAMUtils
 }
 import org.bdgenomics.utils.misc.Logging
-import org.bdgenomics.adam.models.{ Attribute, TagType }
+import org.bdgenomics.adam.models.Attribute
 import org.bdgenomics.adam.util.AttributeUtils
 import org.bdgenomics.formats.avro.AlignmentRecord
 import scala.collection.JavaConverters._
@@ -107,7 +106,7 @@ private[adam] class SAMRecordConverter extends Serializable with Logging {
         // set read alignment flag
         val start: Int = samRecord.getAlignmentStart
         assert(start != 0, "Start cannot equal 0 if contig is set.")
-        builder.setStart((start - 1))
+        builder.setStart(start - 1L)
 
         // set OP and OC flags, if applicable
         if (samRecord.getAttribute("OP") != null) {
@@ -152,7 +151,7 @@ private[adam] class SAMRecordConverter extends Serializable with Logging {
         val mateStart = samRecord.getMateAlignmentStart
         if (mateStart > 0) {
           // We subtract one here to be 0-based offset
-          builder.setMateAlignmentStart(mateStart - 1)
+          builder.setMateAlignmentStart(mateStart - 1L)
         }
       }
 
@@ -188,7 +187,7 @@ private[adam] class SAMRecordConverter extends Serializable with Logging {
       var tags = List[Attribute]()
       val tlen = samRecord.getInferredInsertSize
       if (tlen != 0) {
-        builder.setInferredInsertSize(tlen)
+        builder.setInferredInsertSize(tlen.toLong)
       }
       if (samRecord.getAttributes != null) {
         samRecord.getAttributes.asScala.foreach {
diff --git a/src/main/scala/org/bdgenomics/adam/converters/TranscriptEffectConverter.scala b/src/main/scala/org/bdgenomics/adam/converters/TranscriptEffectConverter.scala
index 5e6eba7bd0..ec4101c84b 100644
--- a/src/main/scala/org/bdgenomics/adam/converters/TranscriptEffectConverter.scala
+++ b/src/main/scala/org/bdgenomics/adam/converters/TranscriptEffectConverter.scala
@@ -24,7 +24,6 @@ import org.bdgenomics.formats.avro.VariantAnnotationMessage._
 import org.bdgenomics.formats.avro.{
   TranscriptEffect,
   Variant,
-  VariantAnnotation,
   VariantAnnotationMessage
 }
 import org.bdgenomics.utils.misc.Logging
@@ -165,7 +164,7 @@ private[adam] object TranscriptEffectConverter extends Serializable with Logging
     proteinPosition.foreach(te.setProteinPosition(_))
     proteinLength.foreach(te.setProteinLength(_))
     setIfNotEmpty(distance, (s: String) => te.setDistance(Integer.parseInt(s)))
-    if (!messages.isEmpty) te.setMessages(messages.asJava)
+    if (messages.nonEmpty) te.setMessages(messages.asJava)
 
     Seq(te.build())
   }
diff --git a/src/main/scala/org/bdgenomics/adam/instrumentation/Timers.scala b/src/main/scala/org/bdgenomics/adam/instrumentation/Timers.scala
index 4b34b5c977..b613dc7f7c 100644
--- a/src/main/scala/org/bdgenomics/adam/instrumentation/Timers.scala
+++ b/src/main/scala/org/bdgenomics/adam/instrumentation/Timers.scala
@@ -80,4 +80,11 @@ object Timers extends Metrics {
   val WriteBAMRecord = timer("Write BAM Record")
   val WriteSAMRecord = timer("Write SAM Record")
   val WriteCRAMRecord = timer("Write CRAM Record")
+
+  // org.bdgenomics.adam.rdd.TreeRegionJoin
+  val TreeJoin = timer("Running broadcast join with interval tree")
+  val BuildingTrees = timer("Building interval tree")
+  val SortingRightSide = timer("Sorting right side of join")
+  val GrowingTrees = timer("Growing forest of trees")
+  val RunningMapSideJoin = timer("Running map-side join")
 }
diff --git a/src/main/scala/org/bdgenomics/adam/models/IndelTable.scala b/src/main/scala/org/bdgenomics/adam/models/IndelTable.scala
index 2b91fcaac2..242195329d 100644
--- a/src/main/scala/org/bdgenomics/adam/models/IndelTable.scala
+++ b/src/main/scala/org/bdgenomics/adam/models/IndelTable.scala
@@ -18,7 +18,6 @@
 package org.bdgenomics.adam.models
 
 import org.apache.spark.SparkContext
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.algorithms.consensus.Consensus
 import org.bdgenomics.adam.rdd.ADAMContext._
diff --git a/src/main/scala/org/bdgenomics/adam/models/NonoverlappingRegions.scala b/src/main/scala/org/bdgenomics/adam/models/NonoverlappingRegions.scala
index c1a6523aef..693fab7968 100644
--- a/src/main/scala/org/bdgenomics/adam/models/NonoverlappingRegions.scala
+++ b/src/main/scala/org/bdgenomics/adam/models/NonoverlappingRegions.scala
@@ -40,7 +40,7 @@ private[adam] class NonoverlappingRegions(regions: Iterable[ReferenceRegion]) ex
   // checks to all the methods below to make sure that 'endpoints' isn't empty.  Also, it shouldn't
   // make any sense to have a set of non-overlapping regions for ... no regions.
   // Also, without this check, we can't tell which chromosome this NonoverlappingRegions object is for.
-  assert(regions.size > 0, "regions list must be non-empty")
+  assert(regions.nonEmpty, "regions list must be non-empty")
   assert(regions.head != null, "regions must have at least one non-null entry")
 
   /**
@@ -235,7 +235,7 @@ private[adam] class MultiContigNonoverlappingRegions(
    * nonoverlapping-regions.  Basically, reject the input value if its corresponding region is
    * completely outside the hull of all the input-set regions.
    *
-   * @param regionable The input value
+   * @param value The input value
    * @tparam U
    * @return a boolean -- the input value should only participate in the regionJoin if the return value
    *         here is 'true'.
diff --git a/src/main/scala/org/bdgenomics/adam/models/ProgramRecord.scala b/src/main/scala/org/bdgenomics/adam/models/ProgramRecord.scala
index 029ab6e736..15d1bc750b 100644
--- a/src/main/scala/org/bdgenomics/adam/models/ProgramRecord.scala
+++ b/src/main/scala/org/bdgenomics/adam/models/ProgramRecord.scala
@@ -18,7 +18,6 @@
 package org.bdgenomics.adam.models
 
 import htsjdk.samtools.SAMProgramRecord
-import org.bdgenomics.adam.rdd.ADAMContext._
 
 private[models] object ProgramRecord {
 
diff --git a/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala b/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala
index 080f30213a..9ccf15b409 100644
--- a/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala
+++ b/src/main/scala/org/bdgenomics/adam/models/ReferencePosition.scala
@@ -116,10 +116,10 @@ object ReferencePosition extends Serializable {
    *
    * @param referenceName The name of the reference contig this locus exists on.
    * @param pos The position of this locus.
-   * @param orientation The strand that this locus is on.
+   * @param strand The strand that this locus is on.
    */
-  def apply(referenceName: String, pos: Long, orientation: Strand): ReferencePosition = {
-    new ReferencePosition(referenceName, pos, orientation)
+  def apply(referenceName: String, pos: Long, strand: Strand): ReferencePosition = {
+    new ReferencePosition(referenceName, pos, strand)
   }
 }
 
@@ -128,13 +128,13 @@ object ReferencePosition extends Serializable {
  *
  * @param referenceName The name of the reference contig this locus exists on.
  * @param pos The position of this locus.
- * @param orientation The strand that this locus is on.
+ * @param strand The strand that this locus is on.
  */
 class ReferencePosition(
   override val referenceName: String,
   val pos: Long,
-  override val orientation: Strand = Strand.INDEPENDENT)
-    extends ReferenceRegion(referenceName, pos, pos + 1, orientation)
+  override val strand: Strand = Strand.INDEPENDENT)
+    extends ReferenceRegion(referenceName, pos, pos + 1, strand)
 
 class ReferencePositionSerializer extends Serializer[ReferencePosition] {
   private val enumValues = Strand.values()
@@ -142,13 +142,13 @@ class ReferencePositionSerializer extends Serializer[ReferencePosition] {
   def write(kryo: Kryo, output: Output, obj: ReferencePosition) = {
     output.writeString(obj.referenceName)
     output.writeLong(obj.pos)
-    output.writeInt(obj.orientation.ordinal)
+    output.writeInt(obj.strand.ordinal)
   }
 
   def read(kryo: Kryo, input: Input, klazz: Class[ReferencePosition]): ReferencePosition = {
     val refName = input.readString()
     val pos = input.readLong()
-    val orientation = input.readInt()
-    new ReferencePosition(refName, pos, enumValues(orientation))
+    val strand = input.readInt()
+    new ReferencePosition(refName, pos, enumValues(strand))
   }
 }
diff --git a/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala b/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala
index 454cb6b5e8..ec57de1e41 100644
--- a/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala
+++ b/src/main/scala/org/bdgenomics/adam/models/ReferenceRegion.scala
@@ -20,7 +20,7 @@ package org.bdgenomics.adam.models
 import com.esotericsoftware.kryo.io.{ Input, Output }
 import com.esotericsoftware.kryo.{ Kryo, Serializer }
 import org.bdgenomics.formats.avro._
-import org.bdgenomics.utils.intervaltree.Interval
+import org.bdgenomics.utils.intervalarray.Interval
 import org.hammerlab.genomics.reference.Region
 
 import scala.math.{ max, min }
@@ -43,7 +43,7 @@ trait ReferenceOrdering[T <: ReferenceRegion] extends Ordering[T] {
     b: T): Int = {
     val rc = regionCompare(a, b)
     if (rc == 0) {
-      a.orientation.ordinal compare b.orientation.ordinal
+      a.strand.ordinal compare b.strand.ordinal
     } else {
       rc
     }
@@ -99,12 +99,14 @@ object ReferenceRegion {
    * @param referenceName The name of the reference contig that this region is
    *   on.
    * @param end The end position for this region.
+   * @param strand The strand of the genome that this region exists on.
    * @return Returns a reference region that goes from the start of a contig to
    *   a user provided end point.
    */
   def fromStart(referenceName: String,
-                end: Long): ReferenceRegion = {
-    ReferenceRegion(referenceName, 0L, end)
+                end: Long,
+                strand: Strand = Strand.INDEPENDENT): ReferenceRegion = {
+    ReferenceRegion(referenceName, 0L, end, strand = strand)
   }
 
   /**
@@ -113,22 +115,26 @@ object ReferenceRegion {
    * @param referenceName The name of the reference contig that this region is
    *   on.
    * @param start The start position for this region.
+   * @param strand The strand of the genome that this region exists on.
    * @return Returns a reference region that goes from a user provided starting
    *   point to the end of a contig.
    */
   def toEnd(referenceName: String,
-            start: Long): ReferenceRegion = {
-    ReferenceRegion(referenceName, start, Long.MaxValue)
+            start: Long,
+            strand: Strand = Strand.INDEPENDENT): ReferenceRegion = {
+    ReferenceRegion(referenceName, start, Long.MaxValue, strand = strand)
   }
 
   /**
    * Creates a reference region that covers the entirety of a contig.
    *
    * @param referenceName The name of the reference contig to cover.
+   * @param strand The strand of the genome that this region exists on.
    * @return Returns a reference region that covers the entirety of a contig.
    */
-  def all(referenceName: String): ReferenceRegion = {
-    ReferenceRegion(referenceName, 0L, Long.MaxValue)
+  def all(referenceName: String,
+          strand: Strand = Strand.INDEPENDENT): ReferenceRegion = {
+    ReferenceRegion(referenceName, 0L, Long.MaxValue, strand = strand)
   }
 
   /**
@@ -140,7 +146,7 @@ object ReferenceRegion {
    */
   def opt(record: AlignmentRecord): Option[ReferenceRegion] = {
     if (record.getReadMapped) {
-      Some(apply(record))
+      Some(unstranded(record))
     } else {
       None
     }
@@ -153,7 +159,9 @@ object ReferenceRegion {
    * @return The site where this genotype lives.
    */
   def apply(genotype: Genotype): ReferenceRegion = {
-    ReferenceRegion(genotype.getVariant)
+    ReferenceRegion(genotype.getContigName,
+      genotype.getStart,
+      genotype.getEnd)
   }
 
   /**
@@ -176,22 +184,28 @@ object ReferenceRegion {
     ReferenceRegion(annotation.getVariant)
   }
 
+  private def checkRead(record: AlignmentRecord) {
+    require(record.getReadMapped,
+      "Cannot build reference region for unmapped read %s.".format(record))
+    require(record.getContigName != null &&
+      record.getStart != null &&
+      record.getEnd != null,
+      "Read %s contains required fields that are null.".format(record))
+  }
+
   /**
-   * Builds a reference region for an aligned read.
+   * Builds a reference region with independent strand for an aligned read.
    *
    * @throws IllegalArgumentException If this read is not aligned or alignment
    *   data is null.
    *
    * @param record The read to extract the reference region from.
    * @return Returns the reference region covered by this read's alignment.
+   *
+   * @see stranded
    */
-  def apply(record: AlignmentRecord): ReferenceRegion = {
-    require(record.getReadMapped,
-      "Cannot build reference region for unmapped read %s.".format(record))
-    require(record.getContigName != null &&
-      record.getStart != null &&
-      record.getEnd != null,
-      "Read %s contains required fields that are null.".format(record))
+  def unstranded(record: AlignmentRecord): ReferenceRegion = {
+    checkRead(record)
     ReferenceRegion(record.getContigName, record.getStart, record.getEnd)
   }
 
@@ -215,13 +229,44 @@ object ReferenceRegion {
     else
       None
 
+  /**
+   * Builds a reference region for an aligned read with strand set.
+   *
+   * @throws IllegalArgumentException If this read is not aligned, alignment
+   *   data is null, or strand is not set.
+   *
+   * @param record The read to extract the reference region from.
+   * @return Returns the reference region covered by this read's alignment.
+   *
+   * @see unstranded
+   */
+  def stranded(record: AlignmentRecord): ReferenceRegion = {
+    checkRead(record)
+
+    val strand = Option(record.getReadNegativeStrand)
+      .map(b => b: Boolean) match {
+        case None => throw new IllegalArgumentException(
+          "Alignment strand not set for %s".format(record))
+        case Some(true)  => Strand.REVERSE
+        case Some(false) => Strand.FORWARD
+      }
+    new ReferenceRegion(record.getContigName,
+      record.getStart,
+      record.getEnd,
+      strand = strand)
+  }
+
   /**
    * Generates a region from a given position -- the region will have a length of 1.
    * @param pos The position to convert
    * @return A 1-wide region at the same location as pos
    */
-  def apply(pos: ReferencePosition): ReferenceRegion =
-    ReferenceRegion(pos.referenceName, pos.pos, pos.pos + 1)
+  def apply(pos: ReferencePosition): ReferenceRegion = {
+    ReferenceRegion(pos.referenceName,
+      pos.pos,
+      pos.pos + 1,
+      strand = pos.strand)
+  }
 
   /**
    * Generates a reference region from assembly data. Returns None if the assembly does not
@@ -245,16 +290,47 @@ object ReferenceRegion {
     }
   }
 
+  private def checkFeature(record: Feature) {
+    require(record.getContigName != null &&
+      record.getStart != null &&
+      record.getEnd != null,
+      "Feature %s contains required fields that are null.".format(record))
+  }
+
   /**
-   * Builds a reference region for a feature.
+   * Builds a reference region for a feature without strand information.
    *
-   * @param feature Feature to extract ReferenceRegion from
+   * @param feature Feature to extract ReferenceRegion from.
    * @return Extracted ReferenceRegion
+   *
+   * @see stranded
    */
-  def apply(feature: Feature): ReferenceRegion = {
+  def unstranded(feature: Feature): ReferenceRegion = {
+    checkFeature(feature)
     new ReferenceRegion(feature.getContigName, feature.getStart, feature.getEnd)
   }
 
+  /**
+   * Builds a reference region for a feature with strand set.
+   *
+   * @param feature Feature to extract ReferenceRegion from.
+   * @return Extracted ReferenceRegion
+   *
+   * @throws IllegalArgumentException Throws an exception if the strand is null
+   *   in the provided feature.
+   *
+   * @see unstranded
+   */
+  def stranded(feature: Feature): ReferenceRegion = {
+    checkFeature(feature)
+    require(feature.getStrand != null,
+      "Strand is not defined in feature %s.".format(feature))
+    new ReferenceRegion(feature.getContigName,
+      feature.getStart,
+      feature.getEnd,
+      strand = feature.getStrand)
+  }
+
   /**
    * Builds a reference region for a coverage site.
    *
@@ -274,18 +350,19 @@ object ReferenceRegion {
  * @param end The 0-based residue-coordinate for the first residue <i>after</i> the start
  *            which is <i>not</i> in the region -- i.e. [start, end) define a 0-based
  *            half-open interval.
+ * @param strand The strand of the genome that this region exists on.
  */
 case class ReferenceRegion(
   referenceName: String,
   start: Long,
   end: Long,
-  orientation: Strand = Strand.INDEPENDENT)
+  strand: Strand = Strand.INDEPENDENT)
     extends Comparable[ReferenceRegion]
-    with Interval {
+    with Interval[ReferenceRegion] {
 
   assert(start >= 0 && end >= start,
     "Failed when trying to create region %s %d %d on %s strand.".format(
-      referenceName, start, end, orientation))
+      referenceName, start, end, strand))
 
   /**
    * @return Returns a copy of this reference region that is on the independent
@@ -331,7 +408,7 @@ case class ReferenceRegion(
    * @see merge
    */
   def hull(region: ReferenceRegion): ReferenceRegion = {
-    assert(orientation == region.orientation, "Cannot compute convex hull of differently oriented regions.")
+    assert(strand == region.strand, "Cannot compute convex hull of differently oriented regions.")
     assert(referenceName == region.referenceName, "Cannot compute convex hull of regions on different references.")
     ReferenceRegion(referenceName, min(start, region.start), max(end, region.end))
   }
@@ -361,7 +438,7 @@ case class ReferenceRegion(
    *   the point is not in our reference space, we return an empty option.
    */
   def distance(other: ReferenceRegion): Option[Long] = {
-    if (referenceName == other.referenceName && orientation == other.orientation) {
+    if (referenceName == other.referenceName && strand == other.strand) {
       if (overlaps(other)) {
         Some(0)
       } else if (other.start >= end) {
@@ -399,7 +476,7 @@ case class ReferenceRegion(
     new ReferenceRegion(referenceName,
       start - byStart,
       end + byEnd,
-      orientation)
+      strand)
   }
 
   /**
@@ -409,7 +486,7 @@ case class ReferenceRegion(
    * @return True if the position is within our region.
    */
   def contains(other: ReferencePosition): Boolean = {
-    orientation == other.orientation &&
+    strand == other.strand &&
       referenceName == other.referenceName &&
       start <= other.pos && end > other.pos
   }
@@ -421,11 +498,23 @@ case class ReferenceRegion(
    * @return True if the region is wholly contained within our region.
    */
   def contains(other: ReferenceRegion): Boolean = {
-    orientation == other.orientation &&
+    strand == other.strand &&
       referenceName == other.referenceName &&
       start <= other.start && end >= other.end
   }
 
+  /**
+   * Checks if our region overlaps (wholly or partially) another region,
+   * independent of strand.
+   *
+   * @param other The region to compare against.
+   * @return True if any section of the two regions overlap.
+   */
+  def covers(other: ReferenceRegion): Boolean = {
+    referenceName == other.referenceName &&
+      end > other.start && start < other.end
+  }
+
   /**
    * Checks if our region overlaps (wholly or partially) another region.
    *
@@ -433,7 +522,7 @@ case class ReferenceRegion(
    * @return True if any section of the two regions overlap.
    */
   def overlaps(other: ReferenceRegion): Boolean = {
-    orientation == other.orientation &&
+    strand == other.strand &&
       referenceName == other.referenceName &&
       end > other.start && start < other.end
   }
@@ -460,7 +549,7 @@ case class ReferenceRegion(
     result = 41 * result + (if (referenceName != null) referenceName.hashCode else 0)
     result = 41 * result + start.hashCode
     result = 41 * result + end.hashCode
-    result = 41 * result + (if (orientation != null) orientation.ordinal() else 0)
+    result = 41 * result + (if (strand != null) strand.ordinal() else 0)
     result
   }
 }
@@ -472,14 +561,14 @@ class ReferenceRegionSerializer extends Serializer[ReferenceRegion] {
     output.writeString(obj.referenceName)
     output.writeLong(obj.start)
     output.writeLong(obj.end)
-    output.writeInt(obj.orientation.ordinal())
+    output.writeInt(obj.strand.ordinal())
   }
 
   def read(kryo: Kryo, input: Input, klazz: Class[ReferenceRegion]): ReferenceRegion = {
     val referenceName = input.readString()
     val start = input.readLong()
     val end = input.readLong()
-    val orientation = input.readInt()
-    new ReferenceRegion(referenceName, start, end, enumValues(orientation))
+    val strand = input.readInt()
+    new ReferenceRegion(referenceName, start, end, enumValues(strand))
   }
 }
diff --git a/src/main/scala/org/bdgenomics/adam/models/SAMFileHeaderWritable.scala b/src/main/scala/org/bdgenomics/adam/models/SAMFileHeaderWritable.scala
index ecf9d7bfc0..5aea05443b 100644
--- a/src/main/scala/org/bdgenomics/adam/models/SAMFileHeaderWritable.scala
+++ b/src/main/scala/org/bdgenomics/adam/models/SAMFileHeaderWritable.scala
@@ -17,7 +17,7 @@
  */
 package org.bdgenomics.adam.models
 
-import htsjdk.samtools.{ SAMFileHeader, SAMProgramRecord }
+import htsjdk.samtools.SAMFileHeader
 import scala.collection.JavaConversions._
 
 private[adam] object SAMFileHeaderWritable {
diff --git a/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala b/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala
index 79d4d321f0..41eb4d8b95 100644
--- a/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala
+++ b/src/main/scala/org/bdgenomics/adam/models/SequenceDictionary.scala
@@ -23,6 +23,7 @@ import org.bdgenomics.formats.avro.{ Contig, NucleotideContigFragment }
 import org.hammerlab.genomics.reference.ContigLengths
 
 import scala.collection.JavaConversions._
+import scala.collection._
 
 /**
  * Singleton object for creating SequenceDictionaries.
@@ -119,7 +120,7 @@ class SequenceDictionary(val records: Vector[SequenceRecord]) extends Serializab
 
   private val hasSequenceOrdering = records.forall(_.referenceIndex.isDefined)
 
-  def contigLengths: ContigLengths = byName.mapValues(_.length)
+  def contigLengths: ContigLengths = byName.mapValues(_.length).toMap
 
   /**
    * @param that Sequence dictionary to compare against.
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala b/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala
index ff2de50935..4c7312e8a4 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/ADAMContext.scala
@@ -144,7 +144,7 @@ private class FileFilter(private val name: String) extends PathFilter {
  *
  * @param sc The SparkContext to wrap.
  */
-class ADAMContext private (@transient val sc: SparkContext) extends Serializable with Logging {
+class ADAMContext(@transient val sc: SparkContext) extends Serializable with Logging {
 
   /**
    * @param samHeader The header to extract a sequence dictionary from.
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala b/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala
index 7c79894bb9..81f5783950 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/ADAMRDDFunctions.scala
@@ -17,20 +17,17 @@
  */
 package org.bdgenomics.adam.rdd
 
-import java.util.logging.Level
 import java.io.OutputStream
 import org.apache.avro.Schema
 import org.apache.avro.file.DataFileWriter
 import org.apache.avro.generic.IndexedRecord
 import org.apache.avro.specific.{ SpecificDatumWriter, SpecificRecordBase }
-import org.apache.hadoop.fs.{ FileSystem, Path }
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{ OutputFormat => NewOutputFormat }
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.MetricsContext._
 import org.apache.spark.rdd.{ InstrumentedOutputFormat, RDD }
 import org.bdgenomics.adam.instrumentation.Timers._
-import org.bdgenomics.adam.models._
-import org.bdgenomics.adam.util.ParquetLogger
 import org.bdgenomics.utils.cli.SaveArgs
 import org.bdgenomics.utils.misc.HadoopUtil
 import org.bdgenomics.utils.misc.Logging
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoin.scala b/src/main/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoin.scala
deleted file mode 100644
index 28ae7174af..0000000000
--- a/src/main/scala/org/bdgenomics/adam/rdd/BroadcastRegionJoin.scala
+++ /dev/null
@@ -1,190 +0,0 @@
-/**
- * Licensed to Big Data Genomics (BDG) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The BDG licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.bdgenomics.adam.rdd
-
-import org.apache.spark.SparkContext._
-import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.rdd.RDD
-import org.bdgenomics.adam.models.{
-  MultiContigNonoverlappingRegions,
-  ReferenceRegion
-}
-import scala.Predef._
-import scala.reflect.ClassTag
-
-/**
- * Contains multiple implementations of a 'region join', an operation that joins two sets of
- * regions based on the spatial overlap between the regions.
- *
- * Different implementations will have different performance characteristics -- and new implementations
- * will likely be added in the future, see the notes to each individual method for more details.
- */
-sealed trait BroadcastRegionJoin[T, U, RT] extends RegionJoin[T, U, RT, U] {
-
-  /**
-   * Performs a region join between two RDDs (broadcast join).
-   *
-   * This implementation first _collects_ the left-side RDD; therefore, if the left-side RDD is large
-   * or otherwise idiosyncratic in a spatial sense (i.e. contains a set of regions whose unions overlap
-   * a significant fraction of the genome) then the performance of this implementation will likely be
-   * quite bad.
-   *
-   * Once the left-side RDD is collected, its elements are reduced to their distinct unions;
-   * these can then be used to define the partitions over which the region-join will be computed.
-   *
-   * The regions in the left-side are keyed by their corresponding partition (each such region should have
-   * exactly one partition).  The regions in the right-side are also keyed by their corresponding partitions
-   * (here there can be more than one partition for a region, since a region may cross the boundaries of
-   * the partitions defined by the left-side).
-   *
-   * Finally, within each separate partition, we essentially perform a cartesian-product-and-filter
-   * operation.  The result is the region-join.
-   *
-   * @param baseRDD The 'left' side of the join
-   * @param joinedRDD The 'right' side of the join
-   * @param tManifest implicit type of baseRDD
-   * @param uManifest implicit type of joinedRDD
-   * @tparam T type of baseRDD
-   * @tparam U type of joinedRDD
-   * @return An RDD of pairs (x, y), where x is from baseRDD, y is from joinedRDD, and the region
-   *         corresponding to x overlaps the region corresponding to y.
-   */
-  def partitionAndJoin(
-    baseRDD: RDD[(ReferenceRegion, T)],
-    joinedRDD: RDD[(ReferenceRegion, U)])(implicit tManifest: ClassTag[T],
-                                          uManifest: ClassTag[U]): RDD[(RT, U)] = {
-
-    val sc = baseRDD.context
-
-    /**
-     * Original Join Design:
-     *
-     * Parameters:
-     *   (1) f : (Range, Range) => T  // an aggregation function
-     *   (2) a : RDD[Range]
-     *   (3) b : RDD[Range]
-     *
-     * Return type: RDD[(Range,T)]
-     *
-     * Algorithm:
-     *   1. a.collect() (where a is smaller than b)
-     *   2. build a non-overlapping partition on a
-     *   3. ak = a.map( v => (partition(v), v) )
-     *   4. bk = b.flatMap( v => partitions(v).map( i=>(i,v) ) )
-     *   5. joined = ak.join(bk).filter( (i, (r1, r2)) => r1.overlaps(r2) ).map( (i, (r1,r2))=>(r1, r2) )
-     *   6. return: joined.reduceByKey(f)
-     *
-     * Ways in which we've generalized this plan:
-     * - removed the aggregation step altogether
-     * - carry a sequence dictionary through the computation.
-     */
-
-    // First, we group the regions in the left side of the join by their referenceName,
-    // and collect them.
-    val collectedLeft: Seq[(String, Iterable[ReferenceRegion])] =
-      baseRDD
-        .map(_._1) // RDD[ReferenceRegion]
-        .keyBy(_.referenceName) // RDD[(String,ReferenceRegion)]
-        .groupByKey() // RDD[(String,Seq[ReferenceRegion])]
-        .collect() // Iterable[(String,Seq[ReferenceRegion])]
-        .toSeq // Seq[(String,Seq[ReferenceRegion])]
-
-    // Next, we turn that into a data structure that reduces those regions to their non-overlapping
-    // pieces, which we will use as a partition.
-    val multiNonOverlapping = new MultiContigNonoverlappingRegions(collectedLeft)
-
-    // Then, we broadcast those partitions -- this will be the function that allows us to
-    // partition all the regions on the right side of the join.
-    val regions = sc.broadcast(multiNonOverlapping)
-
-    // each element of the left-side RDD should have exactly one partition.
-    val smallerKeyed: RDD[(ReferenceRegion, (ReferenceRegion, T))] =
-      baseRDD.map(t => (regions.value.regionsFor(t).head, t))
-
-    // each element of the right-side RDD may have 0, 1, or more than 1 corresponding partition.
-    val largerKeyed: RDD[(ReferenceRegion, (ReferenceRegion, U))] =
-      joinedRDD.flatMap(t => regionsFor(t, regions).map((r: ReferenceRegion) => (r, t)))
-
-    joinAndFilterFn(smallerKeyed, largerKeyed)
-  }
-
-  protected def regionsFor(u: (ReferenceRegion, U),
-                           regions: Broadcast[MultiContigNonoverlappingRegions]): Iterable[ReferenceRegion]
-
-  protected def joinAndFilterFn(tRdd: RDD[(ReferenceRegion, (ReferenceRegion, T))],
-                                uRdd: RDD[(ReferenceRegion, (ReferenceRegion, U))]): RDD[(RT, U)]
-}
-
-/**
- * Extends the BroadcastRegionJoin trait to implement an inner join.
- */
-case class InnerBroadcastRegionJoin[T, U]() extends BroadcastRegionJoin[T, U, T] {
-
-  protected def joinAndFilterFn(tRdd: RDD[(ReferenceRegion, (ReferenceRegion, T))],
-                                uRdd: RDD[(ReferenceRegion, (ReferenceRegion, U))]): RDD[(T, U)] = {
-    // this is (essentially) performing a cartesian product within each partition...
-    val joined: RDD[(ReferenceRegion, ((ReferenceRegion, T), (ReferenceRegion, U)))] =
-      tRdd.join(uRdd)
-
-    // ... so we need to filter the final pairs to make sure they're overlapping.
-    joined.flatMap(kv => {
-      val (_, (t: (ReferenceRegion, T), u: (ReferenceRegion, U))) = kv
-
-      if (t._1.overlaps(u._1)) {
-        Some((t._2, u._2))
-      } else {
-        None
-      }
-    })
-  }
-
-  protected def regionsFor(u: (ReferenceRegion, U),
-                           regions: Broadcast[MultiContigNonoverlappingRegions]): Iterable[ReferenceRegion] = {
-    regions.value.regionsFor(u)
-  }
-}
-
-/**
- * Extends the BroadcastRegionJoin trait to implement a right outer join.
- */
-case class RightOuterBroadcastRegionJoin[T, U]() extends BroadcastRegionJoin[T, U, Option[T]] {
-
-  protected def joinAndFilterFn(tRdd: RDD[(ReferenceRegion, (ReferenceRegion, T))],
-                                uRdd: RDD[(ReferenceRegion, (ReferenceRegion, U))]): RDD[(Option[T], U)] = {
-    // this is (essentially) performing a cartesian product within each partition...
-    val joined: RDD[(ReferenceRegion, (Option[(ReferenceRegion, T)], (ReferenceRegion, U)))] =
-      tRdd.rightOuterJoin(uRdd)
-
-    // ... so we need to filter the final pairs to make sure they're overlapping.
-    joined.map(kv => {
-      val (_, (optT: Option[(ReferenceRegion, T)], u: (ReferenceRegion, U))) = kv
-
-      (optT.filter(t => t._1.overlaps(u._1)).map(_._2), u._2)
-    })
-  }
-
-  protected def regionsFor(u: (ReferenceRegion, U),
-                           regions: Broadcast[MultiContigNonoverlappingRegions]): Iterable[ReferenceRegion] = {
-    val reg = regions.value.regionsFor(u)
-    if (reg.isEmpty) {
-      Iterable(u._1)
-    } else {
-      reg
-    }
-  }
-}
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/FileMerger.scala b/src/main/scala/org/bdgenomics/adam/rdd/FileMerger.scala
index 9c8d6b4540..977957d51a 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/FileMerger.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/FileMerger.scala
@@ -187,7 +187,7 @@ object FileMerger extends Logging {
 
     // finish the file off
     if (writeEmptyGzipBlock) {
-      os.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);
+      os.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK)
     } else if (writeCramEOF) {
       CramIO.issueEOF(CramVersions.DEFAULT_CRAM_VERSION, os)
     }
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala b/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala
index 695af7ba09..2f41524c25 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/GenomicRDD.scala
@@ -396,11 +396,12 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] {
    * @return Returns a new genomic RDD containing all pairs of keys that
    *   overlapped in the genomic coordinate space.
    */
-  def broadcastRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(T, X), Z]](genomicRdd: GenomicRDD[X, Y])(
-    implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(T, X), Z] = {
+  def broadcastRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(T, X), Z]](
+    genomicRdd: GenomicRDD[X, Y])(
+      implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(T, X), Z] = {
 
     // key the RDDs and join
-    GenericGenomicRDD[(T, X)](InnerBroadcastRegionJoin[T, X]().partitionAndJoin(flattenRddByRegions(),
+    GenericGenomicRDD[(T, X)](InnerTreeRegionJoin[T, X]().partitionAndJoin(flattenRddByRegions(),
       genomicRdd.flattenRddByRegions()),
       sequences ++ genomicRdd.sequences,
       kv => { getReferenceRegions(kv._1) ++ genomicRdd.getReferenceRegions(kv._2) })
@@ -423,11 +424,12 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] {
    *   overlapped in the genomic coordinate space, and all keys from the
    *   right RDD that did not overlap a key in the left RDD.
    */
-  def rightOuterBroadcastRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(Option[T], X), Z]](genomicRdd: GenomicRDD[X, Y])(
-    implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(Option[T], X), Z] = {
+  def rightOuterBroadcastRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(Option[T], X), Z]](
+    genomicRdd: GenomicRDD[X, Y])(
+      implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(Option[T], X), Z] = {
 
     // key the RDDs and join
-    GenericGenomicRDD[(Option[T], X)](RightOuterBroadcastRegionJoin[T, X]().partitionAndJoin(flattenRddByRegions(),
+    GenericGenomicRDD[(Option[T], X)](RightOuterTreeRegionJoin[T, X]().partitionAndJoin(flattenRddByRegions(),
       genomicRdd.flattenRddByRegions()),
       sequences ++ genomicRdd.sequences,
       kv => {
@@ -437,6 +439,96 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] {
       .asInstanceOf[GenomicRDD[(Option[T], X), Z]]
   }
 
+  /**
+   * Computes the partition size and final sequence dictionary for a join.
+   *
+   * @param optPartitions Optional user-requested number of partitions for the
+   *   end of the shuffle.
+   * @param genomicRdd The genomic RDD we are joining against.
+   * @return Returns a tuple containing the (partition size, final sequence
+   *   dictionary after the join).
+   */
+  private[rdd] def joinPartitionSizeAndSequences[X, Y <: GenomicRDD[X, Y]](
+    optPartitions: Option[Int],
+    genomicRdd: GenomicRDD[X, Y]): (Long, SequenceDictionary) = {
+
+    require(!(sequences.isEmpty && genomicRdd.sequences.isEmpty),
+      "Both RDDs at input to join have an empty sequence dictionary!")
+
+    // what sequences do we wind up with at the end?
+    val finalSequences = sequences ++ genomicRdd.sequences
+
+    // did the user provide a set partition count?
+    // if no, take the max partition count from our rdds
+    val estPartitions = optPartitions.getOrElse(Seq(rdd.partitions.length,
+      genomicRdd.rdd.partitions.length).max)
+
+    // if the user provides too high of a partition count, the estimated number
+    // of partitions can go to 0
+    val partitions = if (estPartitions >= 1) {
+      estPartitions
+    } else {
+      1
+    }
+
+    (finalSequences.records.map(_.length).sum / partitions,
+      finalSequences)
+  }
+
+  /**
+   * Performs a broadcast inner join between this RDD and another RDD.
+   *
+   * In a broadcast join, the left RDD (this RDD) is collected to the driver,
+   * and broadcast to all the nodes in the cluster. The key equality function
+   * used for this join is the reference region overlap function. Since this
+   * is an inner join, all values who do not overlap a value from the other
+   * RDD are dropped.
+   *
+   * @param genomicRdd The right RDD in the join.
+   * @return Returns a new genomic RDD containing all pairs of keys that
+   *   overlapped in the genomic coordinate space.
+   */
+  def broadcastRegionJoinAndGroupByRight[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(Iterable[T], X), Z]](genomicRdd: GenomicRDD[X, Y])(
+    implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(Iterable[T], X), Z] = {
+
+    // key the RDDs and join
+    GenericGenomicRDD[(Iterable[T], X)](InnerTreeRegionJoinAndGroupByRight[T, X]().partitionAndJoin(flattenRddByRegions(),
+      genomicRdd.flattenRddByRegions()),
+      sequences ++ genomicRdd.sequences,
+      kv => { (kv._1.flatMap(getReferenceRegions) ++ genomicRdd.getReferenceRegions(kv._2)).toSeq })
+      .asInstanceOf[GenomicRDD[(Iterable[T], X), Z]]
+  }
+
+  /**
+   * Performs a broadcast right outer join between this RDD and another RDD.
+   *
+   * In a broadcast join, the left RDD (this RDD) is collected to the driver,
+   * and broadcast to all the nodes in the cluster. The key equality function
+   * used for this join is the reference region overlap function. Since this
+   * is a right outer join, all values in the left RDD that do not overlap a
+   * value from the right RDD are dropped. If a value from the right RDD does
+   * not overlap any values in the left RDD, it will be paired with a `None`
+   * in the product of the join.
+   *
+   * @param genomicRdd The right RDD in the join.
+   * @return Returns a new genomic RDD containing all pairs of keys that
+   *   overlapped in the genomic coordinate space, and all keys from the
+   *   right RDD that did not overlap a key in the left RDD.
+   */
+  def rightOuterBroadcastRegionJoinAndGroupByRight[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(Iterable[T], X), Z]](genomicRdd: GenomicRDD[X, Y])(
+    implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(Iterable[T], X), Z] = {
+
+    // key the RDDs and join
+    GenericGenomicRDD[(Iterable[T], X)](RightOuterTreeRegionJoinAndGroupByRight[T, X]().partitionAndJoin(flattenRddByRegions(),
+      genomicRdd.flattenRddByRegions()),
+      sequences ++ genomicRdd.sequences,
+      kv => {
+        Seq(kv._1.map(v => getReferenceRegions(v))).flatten.flatten ++
+          genomicRdd.getReferenceRegions(kv._2)
+      })
+      .asInstanceOf[GenomicRDD[(Iterable[T], X), Z]]
+  }
+
   /**
    * Performs a sort-merge inner join between this RDD and another RDD.
    *
@@ -450,22 +542,18 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] {
    * @return Returns a new genomic RDD containing all pairs of keys that
    *   overlapped in the genomic coordinate space.
    */
-  def shuffleRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(T, X), Z]](genomicRdd: GenomicRDD[X, Y],
-                                                                              optPartitions: Option[Int] = None)(
-                                                                                implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(T, X), Z] = {
+  def shuffleRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(T, X), Z]](
+    genomicRdd: GenomicRDD[X, Y],
+    optPartitions: Option[Int] = None)(
+      implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(T, X), Z] = {
 
-    // did the user provide a set partition count?
-    // if no, take the max partition count from our rdds
-    val partitions = optPartitions.getOrElse(Seq(rdd.partitions.length,
-      genomicRdd.rdd.partitions.length).max)
-
-    // what sequences do we wind up with at the end?
-    val endSequences = sequences ++ genomicRdd.sequences
+    val (partitionSize, endSequences) = joinPartitionSizeAndSequences(optPartitions,
+      genomicRdd)
 
     // key the RDDs and join
     GenericGenomicRDD[(T, X)](
       InnerShuffleRegionJoin[T, X](endSequences,
-        partitions,
+        partitionSize,
         rdd.context).partitionAndJoin(flattenRddByRegions(),
           genomicRdd.flattenRddByRegions()),
       endSequences,
@@ -489,22 +577,18 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] {
    *   overlapped in the genomic coordinate space, and all keys from the
    *   right RDD that did not overlap a key in the left RDD.
    */
-  def rightOuterShuffleRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(Option[T], X), Z]](genomicRdd: GenomicRDD[X, Y],
-                                                                                                optPartitions: Option[Int] = None)(
-                                                                                                  implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(Option[T], X), Z] = {
+  def rightOuterShuffleRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(Option[T], X), Z]](
+    genomicRdd: GenomicRDD[X, Y],
+    optPartitions: Option[Int] = None)(
+      implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(Option[T], X), Z] = {
 
-    // did the user provide a set partition count?
-    // if no, take the max partition count from our rdds
-    val partitions = optPartitions.getOrElse(Seq(rdd.partitions.length,
-      genomicRdd.rdd.partitions.length).max)
-
-    // what sequences do we wind up with at the end?
-    val endSequences = sequences ++ genomicRdd.sequences
+    val (partitionSize, endSequences) = joinPartitionSizeAndSequences(optPartitions,
+      genomicRdd)
 
     // key the RDDs and join
     GenericGenomicRDD[(Option[T], X)](
       RightOuterShuffleRegionJoin[T, X](endSequences,
-        partitions,
+        partitionSize,
         rdd.context).partitionAndJoin(flattenRddByRegions(),
           genomicRdd.flattenRddByRegions()),
       endSequences,
@@ -531,22 +615,18 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] {
    *   overlapped in the genomic coordinate space, and all keys from the
    *   left RDD that did not overlap a key in the right RDD.
    */
-  def leftOuterShuffleRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(T, Option[X]), Z]](genomicRdd: GenomicRDD[X, Y],
-                                                                                               optPartitions: Option[Int] = None)(
-                                                                                                 implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(T, Option[X]), Z] = {
+  def leftOuterShuffleRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(T, Option[X]), Z]](
+    genomicRdd: GenomicRDD[X, Y],
+    optPartitions: Option[Int] = None)(
+      implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(T, Option[X]), Z] = {
 
-    // did the user provide a set partition count?
-    // if no, take the max partition count from our rdds
-    val partitions = optPartitions.getOrElse(Seq(rdd.partitions.length,
-      genomicRdd.rdd.partitions.length).max)
-
-    // what sequences do we wind up with at the end?
-    val endSequences = sequences ++ genomicRdd.sequences
+    val (partitionSize, endSequences) = joinPartitionSizeAndSequences(optPartitions,
+      genomicRdd)
 
     // key the RDDs and join
     GenericGenomicRDD[(T, Option[X])](
       LeftOuterShuffleRegionJoin[T, X](endSequences,
-        partitions,
+        partitionSize,
         rdd.context).partitionAndJoin(flattenRddByRegions(),
           genomicRdd.flattenRddByRegions()),
       endSequences,
@@ -572,22 +652,18 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] {
    *   overlapped in the genomic coordinate space, and values that did not
    *   overlap will be paired with a `None`.
    */
-  def fullOuterShuffleRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(Option[T], Option[X]), Z]](genomicRdd: GenomicRDD[X, Y],
-                                                                                                       optPartitions: Option[Int] = None)(
-                                                                                                         implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(Option[T], Option[X]), Z] = {
-
-    // did the user provide a set partition count?
-    // if no, take the max partition count from our rdds
-    val partitions = optPartitions.getOrElse(Seq(rdd.partitions.length,
-      genomicRdd.rdd.partitions.length).max)
+  def fullOuterShuffleRegionJoin[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(Option[T], Option[X]), Z]](
+    genomicRdd: GenomicRDD[X, Y],
+    optPartitions: Option[Int] = None)(
+      implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(Option[T], Option[X]), Z] = {
 
-    // what sequences do we wind up with at the end?
-    val endSequences = sequences ++ genomicRdd.sequences
+    val (partitionSize, endSequences) = joinPartitionSizeAndSequences(optPartitions,
+      genomicRdd)
 
     // key the RDDs and join
     GenericGenomicRDD[(Option[T], Option[X])](
       FullOuterShuffleRegionJoin[T, X](endSequences,
-        partitions,
+        partitionSize,
         rdd.context).partitionAndJoin(flattenRddByRegions(),
           genomicRdd.flattenRddByRegions()),
       endSequences,
@@ -614,22 +690,18 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] {
    *   overlapped in the genomic coordinate space, grouped together by
    *   the value they overlapped in the left RDD..
    */
-  def shuffleRegionJoinAndGroupByLeft[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(T, Iterable[X]), Z]](genomicRdd: GenomicRDD[X, Y],
-                                                                                                      optPartitions: Option[Int] = None)(
-                                                                                                        implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(T, Iterable[X]), Z] = {
-
-    // did the user provide a set partition count?
-    // if no, take the max partition count from our rdds
-    val partitions = optPartitions.getOrElse(Seq(rdd.partitions.length,
-      genomicRdd.rdd.partitions.length).max)
+  def shuffleRegionJoinAndGroupByLeft[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(T, Iterable[X]), Z]](
+    genomicRdd: GenomicRDD[X, Y],
+    optPartitions: Option[Int] = None)(
+      implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(T, Iterable[X]), Z] = {
 
-    // what sequences do we wind up with at the end?
-    val endSequences = sequences ++ genomicRdd.sequences
+    val (partitionSize, endSequences) = joinPartitionSizeAndSequences(optPartitions,
+      genomicRdd)
 
     // key the RDDs and join
     GenericGenomicRDD[(T, Iterable[X])](
       InnerShuffleRegionJoinAndGroupByLeft[T, X](endSequences,
-        partitions,
+        partitionSize,
         rdd.context).partitionAndJoin(flattenRddByRegions(),
           genomicRdd.flattenRddByRegions()),
       endSequences,
@@ -658,22 +730,18 @@ trait GenomicRDD[T, U <: GenomicRDD[T, U]] {
    *   the value they overlapped in the left RDD, and all values from the
    *   right RDD that did not overlap an item in the left RDD.
    */
-  def rightOuterShuffleRegionJoinAndGroupByLeft[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(Option[T], Iterable[X]), Z]](genomicRdd: GenomicRDD[X, Y],
-                                                                                                                        optPartitions: Option[Int] = None)(
-                                                                                                                          implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(Option[T], Iterable[X]), Z] = {
+  def rightOuterShuffleRegionJoinAndGroupByLeft[X, Y <: GenomicRDD[X, Y], Z <: GenomicRDD[(Option[T], Iterable[X]), Z]](
+    genomicRdd: GenomicRDD[X, Y],
+    optPartitions: Option[Int] = None)(
+      implicit tTag: ClassTag[T], xTag: ClassTag[X]): GenomicRDD[(Option[T], Iterable[X]), Z] = {
 
-    // did the user provide a set partition count?
-    // if no, take the max partition count from our rdds
-    val partitions = optPartitions.getOrElse(Seq(rdd.partitions.length,
-      genomicRdd.rdd.partitions.length).max)
-
-    // what sequences do we wind up with at the end?
-    val endSequences = sequences ++ genomicRdd.sequences
+    val (partitionSize, endSequences) = joinPartitionSizeAndSequences(optPartitions,
+      genomicRdd)
 
     // key the RDDs and join
     GenericGenomicRDD[(Option[T], Iterable[X])](
       RightOuterShuffleRegionJoinAndGroupByLeft[T, X](endSequences,
-        partitions,
+        partitionSize,
         rdd.context).partitionAndJoin(flattenRddByRegions(),
           genomicRdd.flattenRddByRegions()),
       endSequences,
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/RegionJoin.scala b/src/main/scala/org/bdgenomics/adam/rdd/RegionJoin.scala
index ffa64ed849..9a2e956367 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/RegionJoin.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/RegionJoin.scala
@@ -17,11 +17,8 @@
  */
 package org.bdgenomics.adam.rdd
 
-import org.bdgenomics.adam.models.{ SequenceDictionary, ReferenceRegion }
+import org.bdgenomics.adam.models.ReferenceRegion
 import org.apache.spark.rdd.RDD
-import org.apache.spark.SparkContext._
-import scala.Predef._
-import org.apache.spark.SparkContext
 import scala.reflect.ClassTag
 
 /**
@@ -35,7 +32,7 @@ import scala.reflect.ClassTag
  * @tparam RU The type of data yielded by the right RDD at the output of the
  *   join.
  */
-trait RegionJoin[T, U, RT, RU] {
+trait RegionJoin[T, U, RT, RU] extends Serializable {
 
   /**
    * Performs a region join between two RDDs.
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala b/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala
index 0869515de5..f63228a4dc 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/ShuffleRegionJoin.scala
@@ -17,8 +17,8 @@
  */
 package org.bdgenomics.adam.rdd
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.{ Partitioner, SparkContext }
+import org.apache.spark.rdd.MetricsContext._
 import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.models.ReferenceRegion._
 import org.bdgenomics.adam.models.{ SequenceDictionary, ReferenceRegion }
@@ -240,7 +240,7 @@ case class RightOuterShuffleRegionJoinAndGroupByLeft[T, U](sd: SequenceDictionar
 
   protected def emptyFn(left: Iterator[((ReferenceRegion, Int), T)],
                         right: Iterator[((ReferenceRegion, Int), U)]): Iterator[(Option[T], Iterable[U])] = {
-    right.map(v => (None, Iterable(v._2)))
+    left.map(v => (Some(v._2), Iterable.empty)) ++ right.map(v => (None, Iterable(v._2)))
   }
 }
 
@@ -278,8 +278,9 @@ private trait SortedIntervalPartitionJoin[T, U, RT, RU] extends Iterator[(RT, RU
   protected def pruneCache(to: Long)
 
   private def getHits(): Unit = {
+    assert(!hits.hasNext)
     // if there is nothing more in left, then I'm done
-    while (left.hasNext && hits.isEmpty) {
+    while (left.hasNext) {
       // there is more in left...
       val nl = left.next
       val ((nextLeftRegion, _), nextLeft) = nl
@@ -298,7 +299,11 @@ private trait SortedIntervalPartitionJoin[T, U, RT, RU] extends Iterator[(RT, RU
       // be improved by making cache a fancier data structure than just a list
       // we filter for things that overlap, where at least one side of the join has a start position
       // in this partition
-      hits = processHits(nextLeft, nextLeftRegion)
+      //
+      // also, see note "important: fun times with iterators" in this file, which explains
+      // that these must apparently be two lines
+      val newHits = processHits(nextLeft, nextLeftRegion)
+      hits = hits ++ newHits
 
       assert(prevLeftRegion == null ||
         (prevLeftRegion.referenceName == nextLeftRegion.referenceName &&
@@ -318,6 +323,11 @@ private trait SortedIntervalPartitionJoin[T, U, RT, RU] extends Iterator[(RT, RU
     if (hits.isEmpty) {
       getHits()
     }
+    // if that fails, try advancing and pruning the cache
+    if (hits.isEmpty) {
+      advanceCache(binRegion.end)
+      pruneCache(binRegion.end)
+    }
     // if hits is still empty, I must really be at the end
     hits.hasNext
   }
@@ -377,7 +387,11 @@ private case class SortedIntervalPartitionJoinAndGroupByLeft[T, U](
 
   protected def postProcessHits(iter: Iterator[(T, U)],
                                 currentLeft: T): Iterator[(T, Iterable[U])] = {
-    Iterator((currentLeft, iter.map(_._2).toIterable))
+    if (iter.hasNext) {
+      Iterator((currentLeft, iter.map(_._2).toIterable))
+    } else {
+      Iterator.empty
+    }
   }
 }
 
@@ -425,8 +439,40 @@ private trait SortedIntervalPartitionJoinWithVictims[T, U, RT, RU] extends Sorte
   }
 
   protected def pruneCache(to: Long) {
+
     cache = cache.dropWhile(_._1.end <= to)
-    hits = hits ++ (victimCache.takeWhile(_._1.end <= to).map(u => postProcessPruned(u._2)))
+    cache = cache ++ victimCache.takeWhile(_._1.end > to)
+    victimCache = victimCache.dropWhile(_._1.end > to)
+
+    // important: fun times with iterators
+    // 
+    // for reasons known only to God, if you combine these two lines down to a
+    // single line, it causes the hits iterator to be invalidated and become
+    // empty.
+    //
+    // MORE: it seems like there's some funniness with the underlying scala imp'l
+    // of append on two iterators. if the second line is written as:
+    //
+    // hits = hits ++ pped
+    //
+    // the line works as expected on scala 2.10. on scala 2.11, it occasionally
+    // fails. oddly enough, if you write the line above and then do a duplicate
+    // on the hits iterator (which you then reassign to hits), it works. i.e.,
+    //
+    // hits = hits ++ pped
+    // val (d, _) = hits.duplicate
+    // hits = d
+    //
+    // works on both scala 2.10 and 2.11 across all unit tests
+    //
+    // rewriting it as (pped ++ hits).toIterator seems to work all the time.
+    // that appends the hits iterator to a ListBuffer, and then returns an iterator
+    // over the list buffer. essentially, i think there's a bug in the Iterator.++
+    // method in scala that occasionally causes it to return an empty iterator, but
+    // i'm not sure why that is
+    val pped = (victimCache.takeWhile(_._1.end <= to).map(u => postProcessPruned(u._2)))
+    hits = (pped ++ hits).toIterator
+
     victimCache = victimCache.dropWhile(_._1.end <= to)
   }
 
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/TreeRegionJoin.scala b/src/main/scala/org/bdgenomics/adam/rdd/TreeRegionJoin.scala
new file mode 100644
index 0000000000..f0c60b0971
--- /dev/null
+++ b/src/main/scala/org/bdgenomics/adam/rdd/TreeRegionJoin.scala
@@ -0,0 +1,202 @@
+/**
+ * Licensed to Big Data Genomics (BDG) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The BDG licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.bdgenomics.adam.rdd
+
+import com.esotericsoftware.kryo.io.{ Input, Output }
+import com.esotericsoftware.kryo.{ Kryo, Serializer }
+import org.apache.spark.SparkContext._
+import org.apache.spark.rdd.RDD
+import org.bdgenomics.adam.instrumentation.Timers._
+import org.bdgenomics.adam.models.ReferenceRegion
+import org.bdgenomics.utils.intervalarray.IntervalArray
+import scala.annotation.tailrec
+import scala.reflect.ClassTag
+
+/**
+ * Implements a shuffle free (broadcast) region join.
+ *
+ * The broadcast values are stored in a sorted array. It was going to be an
+ * ensemble of interval trees, but, that didn't work out.
+ */
+trait TreeRegionJoin[T, U] {
+
+  /**
+   * Performs an inner region join between two RDDs, and groups by the
+   * value on the right side of the join.
+   *
+   * @param leftRdd RDD on the left side of the join. Will be collected to the
+   *   driver and broadcast.
+   * @param rightRdd RDD on the right side of the join.
+   * @return Returns an RDD where each element is a value from the right RDD,
+   *   along with all values from the left RDD that it overlapped.
+   */
+  private[rdd] def runJoinAndGroupByRight(
+    leftRdd: RDD[(ReferenceRegion, T)],
+    rightRdd: RDD[(ReferenceRegion, U)])(
+      implicit tTag: ClassTag[T]): RDD[(Iterable[T], U)] = TreeJoin.time {
+
+    // build the tree from the left RDD
+    val tree = IntervalArray(leftRdd)
+
+    RunningMapSideJoin.time {
+      // broadcast this tree
+      val broadcastTree = leftRdd.context
+        .broadcast(tree)
+
+      // map and join
+      rightRdd.map(kv => {
+        val (rr, u) = kv
+
+        // what values keys does this overlap in the tree?
+        val overlappingValues = broadcastTree.value
+          .get(rr)
+          .map(_._2)
+
+        (overlappingValues, u)
+      })
+    }
+  }
+}
+
+/**
+ * Implements an inner region join where the left side of the join is broadcast.
+ */
+case class InnerTreeRegionJoin[T, U]() extends RegionJoin[T, U, T, U] with TreeRegionJoin[T, U] {
+
+  /**
+   * Performs an inner region join between two RDDs.
+   *
+   * @param baseRDD The 'left' side of the join
+   * @param joinedRDD The 'right' side of the join
+   * @param tManifest implicit type of baseRDD
+   * @param uManifest implicit type of joinedRDD
+   * @tparam T type of baseRDD
+   * @tparam U type of joinedRDD
+   * @return An RDD of pairs (x, y), where x is from baseRDD, y is from joinedRDD, and the region
+   *         corresponding to x overlaps the region corresponding to y.
+   */
+  def partitionAndJoin(
+    baseRDD: RDD[(ReferenceRegion, T)],
+    joinedRDD: RDD[(ReferenceRegion, U)])(implicit tManifest: ClassTag[T],
+                                          uManifest: ClassTag[U]): RDD[(T, U)] = {
+    runJoinAndGroupByRight(baseRDD, joinedRDD)
+      .flatMap(kv => {
+        val (leftIterable, right) = kv
+        leftIterable.map(left => (left, right))
+      })
+  }
+}
+
+/**
+ * Implements a right outer region join where the left side of the join is
+ * broadcast.
+ */
+case class RightOuterTreeRegionJoin[T, U]() extends RegionJoin[T, U, Option[T], U] with TreeRegionJoin[T, U] {
+
+  /**
+   * Performs a right outer region join between two RDDs.
+   *
+   * @param baseRDD The 'left' side of the join
+   * @param joinedRDD The 'right' side of the join
+   * @param tManifest implicit type of baseRDD
+   * @param uManifest implicit type of joinedRDD
+   * @tparam T type of baseRDD
+   * @tparam U type of joinedRDD
+   * @return An RDD of pairs (Option[x], y), where the optional x value is from
+   *   baseRDD, y is from joinedRDD, and the region corresponding to x overlaps
+   *   the region corresponding to y. If there are no keys in the baseRDD that
+   *   overlap a given key (y) from the joinedRDD, x will be None.
+   */
+  def partitionAndJoin(
+    baseRDD: RDD[(ReferenceRegion, T)],
+    joinedRDD: RDD[(ReferenceRegion, U)])(implicit tManifest: ClassTag[T],
+                                          uManifest: ClassTag[U]): RDD[(Option[T], U)] = {
+    runJoinAndGroupByRight(baseRDD, joinedRDD)
+      .flatMap(kv => {
+        val (leftIterable, right) = kv
+
+        if (leftIterable.isEmpty) {
+          Iterable((None, right))
+        } else {
+          leftIterable.map(left => (Some(left), right))
+        }
+      })
+  }
+}
+
+/**
+ * Performs an inner region join, followed logically by grouping by the right
+ * value. This is implemented without any shuffling; the join naturally returns
+ * values on the left grouped by the right value.
+ */
+case class InnerTreeRegionJoinAndGroupByRight[T, U]() extends RegionJoin[T, U, Iterable[T], U] with TreeRegionJoin[T, U] {
+
+  /**
+   * Performs an inner join between two RDDs, followed by a groupBy on the
+   * right object.
+   *
+   * @param baseRDD The 'left' side of the join
+   * @param joinedRDD The 'right' side of the join
+   * @param tManifest implicit type of baseRDD
+   * @param uManifest implicit type of joinedRDD
+   * @tparam T type of baseRDD
+   * @tparam U type of joinedRDD
+   * @return An RDD of pairs (Iterable[x], y), where the Iterable[x] is from
+   *   baseRDD, y is from joinedRDD, and all values in the Iterable[x] are
+   *   aligned at regions that overlap the region corresponding to y. If the
+   *   iterable is empty, the key-value pair is filtered out.
+   */
+  def partitionAndJoin(
+    baseRDD: RDD[(ReferenceRegion, T)],
+    joinedRDD: RDD[(ReferenceRegion, U)])(implicit tManifest: ClassTag[T],
+                                          uManifest: ClassTag[U]): RDD[(Iterable[T], U)] = {
+    runJoinAndGroupByRight(baseRDD, joinedRDD)
+      .filter(_._1.nonEmpty)
+  }
+}
+
+/**
+ * Performs a right outer region join, followed logically by grouping by the right
+ * value. This is implemented without any shuffling; the join naturally returns
+ * values on the left grouped by the right value. In this implementation, empty
+ * collections on the left side of the join are kept.
+ */
+case class RightOuterTreeRegionJoinAndGroupByRight[T, U]() extends RegionJoin[T, U, Iterable[T], U] with TreeRegionJoin[T, U] {
+
+  /**
+   * Performs an inner join between two RDDs, followed by a groupBy on the
+   * right object.
+   *
+   * @param baseRDD The 'left' side of the join
+   * @param joinedRDD The 'right' side of the join
+   * @param tManifest implicit type of baseRDD
+   * @param uManifest implicit type of joinedRDD
+   * @tparam T type of baseRDD
+   * @tparam U type of joinedRDD
+   * @return An RDD of pairs (Iterable[x], y), where the Iterable[x] is from
+   *   baseRDD, y is from joinedRDD, and all values in the Iterable[x] are
+   *   aligned at regions that overlap the region corresponding to y. If the
+   *   iterable is empty, the key-value pair is NOT filtered out.
+   */
+  def partitionAndJoin(
+    baseRDD: RDD[(ReferenceRegion, T)],
+    joinedRDD: RDD[(ReferenceRegion, U)])(implicit tManifest: ClassTag[T],
+                                          uManifest: ClassTag[U]): RDD[(Iterable[T], U)] = {
+    runJoinAndGroupByRight(baseRDD, joinedRDD)
+  }
+}
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/VCFHeaderUtils.scala b/src/main/scala/org/bdgenomics/adam/rdd/VCFHeaderUtils.scala
index a6ba0acf8a..204fc902f7 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/VCFHeaderUtils.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/VCFHeaderUtils.scala
@@ -21,7 +21,6 @@ import htsjdk.variant.variantcontext.writer.{
   Options,
   VariantContextWriterBuilder
 }
-import htsjdk.variant.vcf.{ VCFHeader, VCFHeaderLine }
 import htsjdk.variant.vcf.VCFHeader
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{ FileSystem, Path }
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/contig/FlankReferenceFragments.scala b/src/main/scala/org/bdgenomics/adam/rdd/contig/FlankReferenceFragments.scala
index df98a749af..10161d735d 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/contig/FlankReferenceFragments.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/contig/FlankReferenceFragments.scala
@@ -17,7 +17,6 @@
  */
 package org.bdgenomics.adam.rdd.contig
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary }
 import org.bdgenomics.adam.rdd.ReferencePartitioner
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDD.scala b/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDD.scala
index d29557e6e7..6d4eec6132 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDD.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/contig/NucleotideContigFragmentRDD.scala
@@ -18,7 +18,6 @@
 package org.bdgenomics.adam.rdd.contig
 
 import com.google.common.base.Splitter
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.converters.FragmentConverter
 import org.bdgenomics.adam.models.{
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/feature/CoverageRDD.scala b/src/main/scala/org/bdgenomics/adam/rdd/feature/CoverageRDD.scala
index cdab4f80e7..6ac0e534d9 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/feature/CoverageRDD.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/feature/CoverageRDD.scala
@@ -21,8 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.models.{
   Coverage,
   ReferenceRegion,
-  SequenceDictionary,
-  SequenceRecord
+  SequenceDictionary
 }
 import org.bdgenomics.adam.rdd.GenomicRDD
 import scala.annotation.tailrec
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureParser.scala b/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureParser.scala
index 31c1c7f247..abd1e52297 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureParser.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureParser.scala
@@ -18,13 +18,11 @@
 package org.bdgenomics.adam.rdd.feature
 
 import htsjdk.samtools.ValidationStringency
-import java.io.File
 import org.bdgenomics.adam.models.SequenceRecord
-import org.bdgenomics.formats.avro.{ Dbxref, Feature, Strand }
+import org.bdgenomics.formats.avro.Feature
 import org.bdgenomics.utils.misc.Logging
 import scala.collection.JavaConversions._
 import scala.collection.mutable.ArrayBuffer
-import scala.io.Source
 
 private[rdd] sealed trait FeatureParser extends Serializable with Logging {
 
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureRDD.scala b/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureRDD.scala
index dc256e7667..3a257ba848 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureRDD.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/feature/FeatureRDD.scala
@@ -271,7 +271,7 @@ case class FeatureRDD(rdd: RDD[Feature],
    *   method will always return a Seq of exactly one ReferenceRegion.
    */
   protected def getReferenceRegions(elem: Feature): Seq[ReferenceRegion] = {
-    Seq(ReferenceRegion(elem))
+    Seq(ReferenceRegion.unstranded(elem))
   }
 
   /**
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/feature/Features.scala b/src/main/scala/org/bdgenomics/adam/rdd/feature/Features.scala
index f44770828f..9b18848040 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/feature/Features.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/feature/Features.scala
@@ -162,15 +162,15 @@ private[feature] object Features {
     )
 
     // set list fields if non-empty
-    if (!aliases.isEmpty) f.setAliases(aliases)
-    if (!notes.isEmpty) f.setNotes(notes)
-    if (!parentIds.isEmpty) f.setParentIds(parentIds)
-    if (!dbxrefs.isEmpty) f.setDbxrefs(dbxrefs)
-    if (!ontologyTerms.isEmpty) f.setOntologyTerms(ontologyTerms)
+    if (aliases.nonEmpty) f.setAliases(aliases)
+    if (notes.nonEmpty) f.setNotes(notes)
+    if (parentIds.nonEmpty) f.setParentIds(parentIds)
+    if (dbxrefs.nonEmpty) f.setDbxrefs(dbxrefs)
+    if (ontologyTerms.nonEmpty) f.setOntologyTerms(ontologyTerms)
 
     // set remaining attributes if non-empty;
     // any duplicate keys are lost at this point, last one in wins
-    if (!remaining.isEmpty) f.setAttributes(remaining)
+    if (remaining.nonEmpty) f.setAttributes(remaining)
 
     f
   }
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMBAMOutputFormat.scala b/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMBAMOutputFormat.scala
index b4d3320ca6..b0380b24b3 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMBAMOutputFormat.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMBAMOutputFormat.scala
@@ -17,7 +17,6 @@
  */
 package org.bdgenomics.adam.rdd.read
 
-import htsjdk.samtools.SAMFileHeader
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{ OutputFormat, RecordWriter, TaskAttemptContext }
 import org.apache.spark.rdd.InstrumentedOutputFormat
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMCRAMOutputFormat.scala b/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMCRAMOutputFormat.scala
index 28ce20ab13..7ddda38588 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMCRAMOutputFormat.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/read/ADAMCRAMOutputFormat.scala
@@ -17,7 +17,6 @@
  */
 package org.bdgenomics.adam.rdd.read
 
-import htsjdk.samtools.SAMFileHeader
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{ OutputFormat, RecordWriter, TaskAttemptContext }
 import org.apache.spark.rdd.InstrumentedOutputFormat
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDD.scala b/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDD.scala
index f32921cd2a..0f5cd504c7 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDD.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDD.scala
@@ -17,20 +17,13 @@
  */
 package org.bdgenomics.adam.rdd.read
 
-import htsjdk.samtools._
-import htsjdk.samtools.util.{
-  BinaryCodec,
-  BlockCompressedOutputStream,
-  BlockCompressedStreamConstants
-}
 import htsjdk.samtools._
 import htsjdk.samtools.cram.ref.ReferenceSource
 import htsjdk.samtools.util.{ BinaryCodec, BlockCompressedOutputStream }
-import java.io.{ InputStream, OutputStream, StringWriter, Writer }
+import java.io.{ OutputStream, StringWriter, Writer }
 import java.net.URI
 import java.nio.file.Paths
-import org.apache.avro.Schema
-import org.apache.hadoop.fs.{ FileSystem, Path }
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.LongWritable
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.MetricsContext._
@@ -56,10 +49,8 @@ import org.bdgenomics.adam.rdd.feature.CoverageRDD
 import org.bdgenomics.adam.rdd.read.realignment.RealignIndels
 import org.bdgenomics.adam.rdd.read.recalibration.BaseQualityRecalibration
 import org.bdgenomics.adam.rdd.fragment.FragmentRDD
-import org.bdgenomics.adam.rich.RichAlignmentRecord
 import org.bdgenomics.adam.util.ReferenceFile
 import org.bdgenomics.formats.avro._
-import org.bdgenomics.utils.misc.Logging
 import org.seqdoop.hadoop_bam._
 import scala.collection.JavaConversions._
 import scala.language.implicitConversions
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala b/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala
index e358f23ba7..4bf78a180a 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/read/MDTagging.scala
@@ -17,10 +17,7 @@
  */
 package org.bdgenomics.adam.rdd.read
 
-import org.bdgenomics.adam.rdd.ADAMContext._
 import htsjdk.samtools.{ TextCigarCodec, ValidationStringency }
-// NOTE(ryan): this is necessary for Spark <= 1.2.1.
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.models.{ MdTag, ReferenceRegion }
 import org.bdgenomics.adam.util.ReferenceFile
@@ -74,7 +71,8 @@ private[read] case class MDTagging(
         contig <- Option(read.getContigName)
         if read.getReadMapped
       } yield {
-        maybeMDTagRead(read, referenceFileB.value.extract(ReferenceRegion(read)))
+        maybeMDTagRead(read, referenceFileB.value
+          .extract(ReferenceRegion.unstranded(read)))
       }).getOrElse({
         numUnmappedReads += 1
         read
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/read/ReferencePositionPair.scala b/src/main/scala/org/bdgenomics/adam/rdd/read/ReferencePositionPair.scala
index bfc9e5ad7d..e26325f107 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/read/ReferencePositionPair.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/read/ReferencePositionPair.scala
@@ -19,10 +19,8 @@ package org.bdgenomics.adam.rdd.read
 
 import com.esotericsoftware.kryo.{ Kryo, Serializer }
 import com.esotericsoftware.kryo.io.{ Input, Output }
-import Ordering.Option
 import org.bdgenomics.utils.misc.Logging
 import org.bdgenomics.adam.instrumentation.Timers.CreateReferencePositionPair
-import org.bdgenomics.adam.models.ReferenceRegion._
 import org.bdgenomics.adam.models.{
   ReferencePosition,
   ReferencePositionSerializer
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/read/SingleReadBucket.scala b/src/main/scala/org/bdgenomics/adam/rdd/read/SingleReadBucket.scala
index 6ead0da7c2..343f52d5bb 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/read/SingleReadBucket.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/read/SingleReadBucket.scala
@@ -21,7 +21,6 @@ import com.esotericsoftware.kryo.{ Kryo, Serializer }
 import com.esotericsoftware.kryo.io.{ Output, Input }
 import org.bdgenomics.utils.misc.Logging
 import org.apache.spark.rdd.RDD
-import org.bdgenomics.adam.rdd.ADAMContext._
 import org.bdgenomics.adam.serialization.AvroSerializer
 import org.bdgenomics.formats.avro.{
   AlignmentRecord,
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala b/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala
index 56edf5e5cd..183c798f1e 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTarget.scala
@@ -101,7 +101,7 @@ private[realignment] object IndelRealignmentTarget {
     read: RichAlignmentRecord,
     maxIndelSize: Int): Seq[IndelRealignmentTarget] = CreateIndelRealignmentTargets.time {
 
-    val region = ReferenceRegion(read.record)
+    val region = ReferenceRegion.unstranded(read.record)
     val refId = read.record.getContigName
     var pos = List[ReferenceRegion]()
     var referencePos = read.record.getStart
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala b/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala
index 75c457608d..4000756833 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/read/realignment/RealignIndels.scala
@@ -22,7 +22,6 @@ import org.bdgenomics.utils.misc.Logging
 import org.apache.spark.rdd.MetricsContext._
 import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.algorithms.consensus.{ Consensus, ConsensusGenerator, ConsensusGeneratorFromReads }
-import org.bdgenomics.adam.models.ReferenceRegion._
 import org.bdgenomics.adam.models.{ MdTag, ReferencePosition, ReferenceRegion }
 import org.bdgenomics.adam.rich.RichAlignmentRecord
 import org.bdgenomics.adam.rich.RichAlignmentRecord._
diff --git a/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibration.scala b/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibration.scala
index ce1acc4a85..b0e5ddf90f 100644
--- a/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibration.scala
+++ b/src/main/scala/org/bdgenomics/adam/rdd/read/recalibration/BaseQualityRecalibration.scala
@@ -19,7 +19,6 @@ package org.bdgenomics.adam.rdd.read.recalibration
 
 import htsjdk.samtools.ValidationStringency
 import java.io._
-import org.apache.spark.SparkContext._
 import org.bdgenomics.utils.misc.Logging
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
diff --git a/src/main/scala/org/bdgenomics/adam/rich/DecadentRead.scala b/src/main/scala/org/bdgenomics/adam/rich/DecadentRead.scala
index b5ead6e373..2318429440 100644
--- a/src/main/scala/org/bdgenomics/adam/rich/DecadentRead.scala
+++ b/src/main/scala/org/bdgenomics/adam/rich/DecadentRead.scala
@@ -23,10 +23,8 @@ import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.models.{
   MdTag,
   ReferencePosition,
-  ReferenceRegion,
   QualityScore
 }
-import org.bdgenomics.adam.rdd.ADAMContext._
 import org.bdgenomics.adam.rich.RichAlignmentRecord._
 import org.bdgenomics.formats.avro.AlignmentRecord
 
diff --git a/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala b/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala
index fa249a4f73..172b2251b9 100644
--- a/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala
+++ b/src/main/scala/org/bdgenomics/adam/rich/RichAlignmentRecord.scala
@@ -23,7 +23,6 @@ import htsjdk.samtools.{
   CigarOperator,
   TextCigarCodec
 }
-import java.util.regex.Pattern
 import org.bdgenomics.adam.models.{
   Attribute,
   MdTag,
@@ -38,11 +37,11 @@ import scala.math.max
 
 object RichAlignmentRecord {
 
-  @deprecated("Use explicit coversion wherever possible in new development.",
+  @deprecated("Use explicit conversion wherever possible in new development.",
     since = "0.21.0")
   implicit def recordToRichRecord(record: AlignmentRecord): RichAlignmentRecord = new RichAlignmentRecord(record)
 
-  @deprecated("Use explicit coversion wherever possible in new development.",
+  @deprecated("Use explicit conversion wherever possible in new development.",
     since = "0.21.0")
   implicit def richRecordToRecord(record: RichAlignmentRecord): AlignmentRecord = record.record
 }
diff --git a/src/main/scala/org/bdgenomics/adam/rich/RichCigar.scala b/src/main/scala/org/bdgenomics/adam/rich/RichCigar.scala
index d7bbbb5e35..04db18de2e 100644
--- a/src/main/scala/org/bdgenomics/adam/rich/RichCigar.scala
+++ b/src/main/scala/org/bdgenomics/adam/rich/RichCigar.scala
@@ -37,7 +37,7 @@ private[adam] case class RichCigar(cigar: Cigar) {
   def moveLeft(index: Int): RichCigar = {
     // var elements = List[CigarElement]()
     // deepclone instead of empty list initialization
-    var elements = cigar.getCigarElements.map(e => new CigarElement(e.getLength, e.getOperator))
+    val elements = cigar.getCigarElements.map(e => new CigarElement(e.getLength, e.getOperator))
 
     /**
      * Moves an element of a cigar left.
diff --git a/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala b/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala
index ba8e3246fc..b21fb91baf 100644
--- a/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala
+++ b/src/main/scala/org/bdgenomics/adam/serialization/ADAMKryoRegistrator.scala
@@ -113,6 +113,7 @@ class ADAMKryoRegistrator extends KryoRegistrator {
     kryo.register(Class.forName("org.apache.avro.Schema$EnumSchema"))
     kryo.register(Class.forName("org.apache.avro.Schema$Name"))
     kryo.register(Class.forName("org.apache.avro.Schema$LongSchema"))
+    kryo.register(Class.forName("org.apache.avro.generic.GenericData$Array"))
 
     // org.apache.hadoop.io
     kryo.register(classOf[org.apache.hadoop.io.Text])
@@ -144,6 +145,7 @@ class ADAMKryoRegistrator extends KryoRegistrator {
 
     // org.bdgenomics.adam.rdd
     kryo.register(classOf[org.bdgenomics.adam.rdd.GenomeBins])
+    kryo.register(Class.forName("org.bdgenomics.adam.rdd.SortedIntervalPartitionJoinAndGroupByLeft$$anonfun$postProcessHits$1"))
 
     // org.bdgenomics.adam.rdd.read
     kryo.register(classOf[org.bdgenomics.adam.rdd.read.FlagStatMetrics])
@@ -220,6 +222,24 @@ class ADAMKryoRegistrator extends KryoRegistrator {
     kryo.register(classOf[org.bdgenomics.formats.avro.VariantCallingAnnotations],
       new AvroSerializer[org.bdgenomics.formats.avro.VariantCallingAnnotations])
 
+    // org.bdgenomics.utils.intervalarray
+    // this is only exposed officially through the genomicrdd trait, thus, we'll only
+    // register it for said traits
+    kryo.register(classOf[org.bdgenomics.utils.intervalarray.IntervalArray[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.AlignmentRecord]],
+      new org.bdgenomics.utils.intervalarray.IntervalArraySerializer[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.AlignmentRecord](kryo))
+    kryo.register(classOf[org.bdgenomics.utils.intervalarray.IntervalArray[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.Feature]],
+      new org.bdgenomics.utils.intervalarray.IntervalArraySerializer[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.Feature](kryo))
+    kryo.register(classOf[org.bdgenomics.utils.intervalarray.IntervalArray[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.Fragment]],
+      new org.bdgenomics.utils.intervalarray.IntervalArraySerializer[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.Fragment](kryo))
+    kryo.register(classOf[org.bdgenomics.utils.intervalarray.IntervalArray[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.Genotype]],
+      new org.bdgenomics.utils.intervalarray.IntervalArraySerializer[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.Genotype](kryo))
+    kryo.register(classOf[org.bdgenomics.utils.intervalarray.IntervalArray[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.NucleotideContigFragment]],
+      new org.bdgenomics.utils.intervalarray.IntervalArraySerializer[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.NucleotideContigFragment](kryo))
+    kryo.register(classOf[org.bdgenomics.utils.intervalarray.IntervalArray[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.Variant]],
+      new org.bdgenomics.utils.intervalarray.IntervalArraySerializer[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.formats.avro.Variant](kryo))
+    kryo.register(classOf[org.bdgenomics.utils.intervalarray.IntervalArray[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.adam.models.VariantContext]],
+      new org.bdgenomics.utils.intervalarray.IntervalArraySerializer[org.bdgenomics.adam.models.ReferenceRegion, org.bdgenomics.adam.models.VariantContext](kryo))
+
     // org.codehaus.jackson.node
     kryo.register(classOf[org.codehaus.jackson.node.NullNode])
     kryo.register(classOf[org.codehaus.jackson.node.BooleanNode])
@@ -258,15 +278,23 @@ class ADAMKryoRegistrator extends KryoRegistrator {
     kryo.register(classOf[scala.Array[String]])
     kryo.register(Class.forName("scala.Tuple2$mcCC$sp"))
 
+    // scala.collection
+    kryo.register(Class.forName("scala.collection.Iterator$$anon$11"))
+    kryo.register(Class.forName("scala.collection.Iterator$$anonfun$toStream$1"))
+
     // scala.collection.convert
     kryo.register(Class.forName("scala.collection.convert.Wrappers$"))
 
     // scala.collection.immutable
     kryo.register(classOf[scala.collection.immutable.::[_]])
     kryo.register(classOf[scala.collection.immutable.Range])
+    kryo.register(Class.forName("scala.collection.immutable.Stream$Cons"))
+    kryo.register(Class.forName("scala.collection.immutable.Stream$Empty$"))
 
     // scala.collection.mutable
     kryo.register(classOf[scala.collection.mutable.ArrayBuffer[_]])
+    kryo.register(classOf[scala.collection.mutable.ListBuffer[_]])
+    kryo.register(Class.forName("scala.collection.mutable.ListBuffer$$anon$1"))
     kryo.register(classOf[scala.collection.mutable.WrappedArray.ofInt])
     kryo.register(classOf[scala.collection.mutable.WrappedArray.ofLong])
     kryo.register(classOf[scala.collection.mutable.WrappedArray.ofByte])
diff --git a/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala b/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala
index d62af65981..07fa86eb75 100644
--- a/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala
+++ b/src/main/scala/org/bdgenomics/adam/util/ParquetFileTraversable.scala
@@ -18,7 +18,7 @@
 package org.bdgenomics.adam.util
 
 import org.apache.avro.generic.IndexedRecord
-import org.apache.hadoop.fs.{ FileSystem, Path }
+import org.apache.hadoop.fs.Path
 import org.apache.parquet.avro.AvroParquetReader
 import org.apache.spark.SparkContext
 import org.bdgenomics.utils.misc.HadoopUtil
diff --git a/src/main/scala/org/bdgenomics/adam/util/ReferenceContigMap.scala b/src/main/scala/org/bdgenomics/adam/util/ReferenceContigMap.scala
index c672ee766e..74221a70e7 100644
--- a/src/main/scala/org/bdgenomics/adam/util/ReferenceContigMap.scala
+++ b/src/main/scala/org/bdgenomics/adam/util/ReferenceContigMap.scala
@@ -17,7 +17,6 @@
  */
 package org.bdgenomics.adam.util
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.models.{
   ReferenceRegion,
diff --git a/src/main/scala/org/bdgenomics/adam/util/ReferenceFile.scala b/src/main/scala/org/bdgenomics/adam/util/ReferenceFile.scala
index aeebb531de..63b43ecf7d 100644
--- a/src/main/scala/org/bdgenomics/adam/util/ReferenceFile.scala
+++ b/src/main/scala/org/bdgenomics/adam/util/ReferenceFile.scala
@@ -15,7 +15,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.bdgenomics.adam.util
 
 import org.bdgenomics.adam.models.{ ReferenceRegion, SequenceDictionary }
diff --git a/src/test/resources/small.1.bed b/src/test/resources/small.1.bed
new file mode 100644
index 0000000000..b62e37fa40
--- /dev/null
+++ b/src/test/resources/small.1.bed
@@ -0,0 +1,4 @@
+1	143	26423
+1	14397230	26472788
+1	169801934	169801939
+1	240997788	240997796
\ No newline at end of file
diff --git a/src/test/resources/small.1.narrowPeak b/src/test/resources/small.1.narrowPeak
new file mode 100644
index 0000000000..b3338bc8d6
--- /dev/null
+++ b/src/test/resources/small.1.narrowPeak
@@ -0,0 +1,20 @@
+1	26472784	26472859	simread:1:26472783:false	0	+	0	-1	-1	-1
+1	240997788	240997863	simread:1:240997787:true	0	+	0	-1	-1	-1
+1	189606654	189606729	simread:1:189606653:true	0	+	0	-1	-1	-1
+1	207027739	207027814	simread:1:207027738:true	0	+	0	-1	-1	-1
+1	14397234	14397309	simread:1:14397233:false	0	+	0	-1	-1	-1
+1	240344443	240344518	simread:1:240344442:true	0	+	0	-1	-1	-1
+1	153978725	153978800	simread:1:153978724:false	0	+	0	-1	-1	-1
+1	237728410	237728485	simread:1:237728409:true	0	+	0	-1	-1	-1
+1	231911907	231911982	simread:1:231911906:false	0	+	0	-1	-1	-1
+1	50683372	50683447	simread:1:50683371:false	0	+	0	-1	-1	-1
+1	37577446	37577521	simread:1:37577445:false	0	+	0	-1	-1	-1
+1	195211966	195212041	simread:1:195211965:false	0	+	0	-1	-1	-1
+1	163841414	163841489	simread:1:163841413:false	0	+	0	-1	-1	-1
+1	101556379	101556454	simread:1:101556378:false	0	+	0	-1	-1	-1
+1	20101801	20101876	simread:1:20101800:true	0	+	0	-1	-1	-1
+1	186794284	186794359	simread:1:186794283:true	0	+	0	-1	-1	-1
+1	165341383	165341458	simread:1:165341382:true	0	+	0	-1	-1	-1
+1	5469107	5469182	simread:1:5469106:true	0	+	0	-1	-1	-1
+1	89554253	89554328	simread:1:89554252:false	0	+	0	-1	-1	-1
+1	169801934	169802009	simread:1:169801933:true	0	+	0	-1	-1	-1
diff --git a/src/test/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanSuite.scala b/src/test/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanSuite.scala
index 1994e34260..7ff91afc2f 100644
--- a/src/test/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/algorithms/smithwaterman/SmithWatermanSuite.scala
@@ -19,7 +19,6 @@ package org.bdgenomics.adam.algorithms.smithwaterman
 
 import org.scalatest.FunSuite
 import scala.math.abs
-import scala.util.Random
 
 class SmithWatermanSuite extends FunSuite {
 
diff --git a/src/test/scala/org/bdgenomics/adam/converters/AlignmentRecordConverterSuite.scala b/src/test/scala/org/bdgenomics/adam/converters/AlignmentRecordConverterSuite.scala
index 4b1e7295d7..d209d52de9 100644
--- a/src/test/scala/org/bdgenomics/adam/converters/AlignmentRecordConverterSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/converters/AlignmentRecordConverterSuite.scala
@@ -17,7 +17,7 @@
  */
 package org.bdgenomics.adam.converters
 
-import htsjdk.samtools.{ SamReaderFactory, SAMRecord }
+import htsjdk.samtools.SamReaderFactory
 import java.io.File
 import org.bdgenomics.adam.models.{
   RecordGroupDictionary,
@@ -28,7 +28,6 @@ import org.bdgenomics.adam.models.{
 }
 import org.bdgenomics.formats.avro.{
   AlignmentRecord,
-  Contig,
   Fragment
 }
 import org.scalatest.FunSuite
diff --git a/src/test/scala/org/bdgenomics/adam/converters/SAMRecordConverterSuite.scala b/src/test/scala/org/bdgenomics/adam/converters/SAMRecordConverterSuite.scala
index ed5ed6976f..0feebd47be 100644
--- a/src/test/scala/org/bdgenomics/adam/converters/SAMRecordConverterSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/converters/SAMRecordConverterSuite.scala
@@ -20,8 +20,6 @@ package org.bdgenomics.adam.converters
 import htsjdk.samtools._
 import java.io.File
 import org.scalatest.FunSuite
-import org.bdgenomics.adam.models.Attribute
-import org.bdgenomics.adam.rdd.ADAMContext._
 import scala.collection.JavaConversions._
 
 class SAMRecordConverterSuite extends FunSuite {
diff --git a/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala b/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala
index 3a28c41d67..c63029a05c 100644
--- a/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/converters/TranscriptEffectConverterSuite.scala
@@ -28,7 +28,6 @@ import org.bdgenomics.formats.avro.{
 }
 import org.mockito.Mockito
 import org.mockito.Mockito.when
-import scala.collection.JavaConverters._
 
 class TranscriptEffectConverterSuite extends ADAMFunSuite {
   final val EMPTY = ""
diff --git a/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala b/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala
index 3694029274..4232a96946 100644
--- a/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/converters/VariantContextConverterSuite.scala
@@ -18,7 +18,6 @@
 package org.bdgenomics.adam.converters
 
 import com.google.common.collect.ImmutableList
-import htsjdk.samtools.SAMFileReader
 import htsjdk.variant.utils.SAMSequenceDictionaryExtractor
 import htsjdk.variant.variantcontext.{
   Allele,
@@ -33,7 +32,6 @@ import org.bdgenomics.adam.models.{
 }
 import org.bdgenomics.adam.util.{ ADAMFunSuite, PhredUtils }
 import org.bdgenomics.formats.avro._
-import org.scalatest.FunSuite
 import scala.collection.JavaConversions._
 
 class VariantContextConverterSuite extends ADAMFunSuite {
diff --git a/src/test/scala/org/bdgenomics/adam/models/AlphabetSuite.scala b/src/test/scala/org/bdgenomics/adam/models/AlphabetSuite.scala
index 65b6f24160..cdf6cbfdd8 100644
--- a/src/test/scala/org/bdgenomics/adam/models/AlphabetSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/models/AlphabetSuite.scala
@@ -19,11 +19,6 @@ package org.bdgenomics.adam.models
 
 import org.bdgenomics.adam.util.ADAMFunSuite
 
-/**
- * Created by bryan on 4/18/15.
- *
- * Tests the alphabet class
- */
 class AlphabetSuite extends ADAMFunSuite {
 
   val testSymbols = Seq(
diff --git a/src/test/scala/org/bdgenomics/adam/models/IndelTableSuite.scala b/src/test/scala/org/bdgenomics/adam/models/IndelTableSuite.scala
index 6d87f6a696..e9d6b672d7 100644
--- a/src/test/scala/org/bdgenomics/adam/models/IndelTableSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/models/IndelTableSuite.scala
@@ -19,7 +19,7 @@ package org.bdgenomics.adam.models
 
 import org.bdgenomics.adam.algorithms.consensus.Consensus
 import org.bdgenomics.adam.util.ADAMFunSuite
-import org.bdgenomics.formats.avro.{ Contig, Variant }
+import org.bdgenomics.formats.avro.Variant
 
 class IndelTableSuite extends ADAMFunSuite {
 
diff --git a/src/test/scala/org/bdgenomics/adam/models/NonoverlappingRegionsSuite.scala b/src/test/scala/org/bdgenomics/adam/models/NonoverlappingRegionsSuite.scala
index a256025a6d..3502e3dd5c 100644
--- a/src/test/scala/org/bdgenomics/adam/models/NonoverlappingRegionsSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/models/NonoverlappingRegionsSuite.scala
@@ -110,9 +110,9 @@ class NonoverlappingRegionsSuite extends FunSuite {
     val record2 = AlignmentRecord.newBuilder(built).setStart(3L).setEnd(4L).build()
     val baseRecord = AlignmentRecord.newBuilder(built).setCigar("4M").setEnd(5L).build()
 
-    val baseMapping = new NonoverlappingRegions(Seq(ReferenceRegion(baseRecord)))
-    val regions1 = baseMapping.findOverlappingRegions(ReferenceRegion(record1))
-    val regions2 = baseMapping.findOverlappingRegions(ReferenceRegion(record2))
+    val baseMapping = new NonoverlappingRegions(Seq(ReferenceRegion.unstranded(baseRecord)))
+    val regions1 = baseMapping.findOverlappingRegions(ReferenceRegion.unstranded(record1))
+    val regions2 = baseMapping.findOverlappingRegions(ReferenceRegion.unstranded(record2))
     assert(regions1.size === 1)
     assert(regions2.size === 1)
     assert(regions1.head === regions2.head)
diff --git a/src/test/scala/org/bdgenomics/adam/models/ProgramRecordSuite.scala b/src/test/scala/org/bdgenomics/adam/models/ProgramRecordSuite.scala
index 7a593a6fee..6b2fd7e5ae 100644
--- a/src/test/scala/org/bdgenomics/adam/models/ProgramRecordSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/models/ProgramRecordSuite.scala
@@ -18,7 +18,6 @@
 package org.bdgenomics.adam.models
 
 import htsjdk.samtools.SAMProgramRecord
-import org.bdgenomics.adam.rdd.ADAMContext._
 import org.scalatest.FunSuite
 
 class ProgramRecordSuite extends FunSuite {
diff --git a/src/test/scala/org/bdgenomics/adam/models/RecordGroupDictionarySuite.scala b/src/test/scala/org/bdgenomics/adam/models/RecordGroupDictionarySuite.scala
index 6ac3be96c7..14e807eb4d 100644
--- a/src/test/scala/org/bdgenomics/adam/models/RecordGroupDictionarySuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/models/RecordGroupDictionarySuite.scala
@@ -18,7 +18,6 @@
 package org.bdgenomics.adam.models
 
 import htsjdk.samtools.SAMReadGroupRecord
-import org.bdgenomics.formats.avro.AlignmentRecord
 import org.scalatest.FunSuite
 
 class RecordGroupDictionarySuite extends FunSuite {
diff --git a/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala b/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala
index c864cb7721..17eb5e6492 100644
--- a/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/models/ReferenceRegionSuite.scala
@@ -17,8 +17,15 @@
  */
 package org.bdgenomics.adam.models
 
-import org.scalatest._
+import htsjdk.variant.variantcontext.{
+  Allele,
+  GenotypeBuilder,
+  VariantContextBuilder
+}
+import org.bdgenomics.adam.converters.VariantContextConverter
 import org.bdgenomics.formats.avro._
+import org.scalatest.FunSuite
+import scala.collection.JavaConversions._
 
 class ReferenceRegionSuite extends FunSuite {
 
@@ -59,23 +66,37 @@ class ReferenceRegionSuite extends FunSuite {
 
     // contained
     assert(region("chr0", 10, 100).overlaps(region("chr0", 20, 50)))
+    assert(region("chr0", 10, 100).covers(region("chr0", 20, 50)))
+
+    // different strands
+    assert(!ReferenceRegion("chr0", 10, 100, strand = Strand.FORWARD)
+      .overlaps(ReferenceRegion("chr0", 20, 50, strand = Strand.REVERSE)))
+    assert(ReferenceRegion("chr0", 10, 100, strand = Strand.FORWARD)
+      .covers(ReferenceRegion("chr0", 20, 50, strand = Strand.REVERSE)))
 
     // right side
     assert(region("chr0", 10, 100).overlaps(region("chr0", 50, 250)))
+    assert(region("chr0", 10, 100).covers(region("chr0", 50, 250)))
 
     // left side
     assert(region("chr0", 10, 100).overlaps(region("chr0", 5, 15)))
+    assert(region("chr0", 10, 100).covers(region("chr0", 5, 15)))
 
     // left edge
     assert(region("chr0", 10, 100).overlaps(region("chr0", 5, 11)))
+    assert(region("chr0", 10, 100).covers(region("chr0", 5, 11)))
     assert(!region("chr0", 10, 100).overlaps(region("chr0", 5, 10)))
+    assert(!region("chr0", 10, 100).covers(region("chr0", 5, 10)))
 
     // right edge
     assert(region("chr0", 10, 100).overlaps(region("chr0", 99, 200)))
+    assert(region("chr0", 10, 100).covers(region("chr0", 99, 200)))
     assert(!region("chr0", 10, 100).overlaps(region("chr0", 100, 200)))
+    assert(!region("chr0", 10, 100).covers(region("chr0", 100, 200)))
 
     // different sequences
     assert(!region("chr0", 10, 100).overlaps(region("chr1", 50, 200)))
+    assert(!region("chr0", 10, 100).covers(region("chr1", 50, 200)))
   }
 
   test("distance(: ReferenceRegion)") {
@@ -126,7 +147,7 @@ class ReferenceRegionSuite extends FunSuite {
       val read = AlignmentRecord.newBuilder()
         .setReadMapped(false)
         .build()
-      ReferenceRegion(read)
+      ReferenceRegion.unstranded(read)
     }
   }
 
@@ -135,10 +156,71 @@ class ReferenceRegionSuite extends FunSuite {
       val read = AlignmentRecord.newBuilder()
         .setReadMapped(true)
         .build()
-      ReferenceRegion(read)
+      ReferenceRegion.unstranded(read)
+    }
+  }
+
+  test("create stranded region from unmapped read fails") {
+    intercept[IllegalArgumentException] {
+      val read = AlignmentRecord.newBuilder()
+        .setReadMapped(false)
+        .build()
+      ReferenceRegion.stranded(read)
+    }
+  }
+
+  test("create stranded region from read with null alignment positions fails") {
+    intercept[IllegalArgumentException] {
+      val read = AlignmentRecord.newBuilder()
+        .setReadMapped(true)
+        .build()
+      ReferenceRegion.stranded(read)
+    }
+  }
+
+  test("create stranded region from read with null alignment strand fails") {
+    intercept[IllegalArgumentException] {
+      val read = AlignmentRecord.newBuilder()
+        .setReadMapped(true)
+        .setStart(10L)
+        .setEnd(15L)
+        .setContigName("ctg")
+        .setReadNegativeStrand(null)
+        .build()
+      ReferenceRegion.stranded(read)
     }
   }
 
+  test("create stranded region from read on forward strand") {
+    val read = AlignmentRecord.newBuilder()
+      .setReadMapped(true)
+      .setStart(10L)
+      .setEnd(15L)
+      .setContigName("ctg")
+      .setReadNegativeStrand(false)
+      .build()
+    val rr = ReferenceRegion.stranded(read)
+    assert(rr.referenceName === "ctg")
+    assert(rr.start === 10L)
+    assert(rr.end === 15L)
+    assert(rr.strand === Strand.FORWARD)
+  }
+
+  test("create stranded region from read on reverse strand") {
+    val read = AlignmentRecord.newBuilder()
+      .setReadMapped(true)
+      .setStart(10L)
+      .setEnd(15L)
+      .setContigName("ctg")
+      .setReadNegativeStrand(true)
+      .build()
+    val rr = ReferenceRegion.stranded(read)
+    assert(rr.referenceName === "ctg")
+    assert(rr.start === 10L)
+    assert(rr.end === 15L)
+    assert(rr.strand === Strand.REVERSE)
+  }
+
   test("create region from mapped read contains read start and end") {
     val read = AlignmentRecord.newBuilder()
       .setReadMapped(true)
@@ -149,9 +231,9 @@ class ReferenceRegionSuite extends FunSuite {
       .setContigName("chr1")
       .build()
 
-    assert(ReferenceRegion(read).contains(point("chr1", 1L)))
-    assert(ReferenceRegion(read).contains(point("chr1", 5L)))
-    assert(!ReferenceRegion(read).contains(point("chr1", 6L)))
+    assert(ReferenceRegion.unstranded(read).contains(point("chr1", 1L)))
+    assert(ReferenceRegion.unstranded(read).contains(point("chr1", 5L)))
+    assert(!ReferenceRegion.unstranded(read).contains(point("chr1", 6L)))
   }
 
   test("validate that adjacent regions can be merged") {
@@ -205,7 +287,7 @@ class ReferenceRegionSuite extends FunSuite {
       .setMismatchingPositions("5")
       .build()
 
-    val r = ReferenceRegion(read)
+    val r = ReferenceRegion.unstranded(read)
 
     assert(r.referenceName === "chrM")
     assert(r.start === 5L)
@@ -260,6 +342,9 @@ class ReferenceRegionSuite extends FunSuite {
       .build()
     val g = Genotype.newBuilder()
       .setVariant(v)
+      .setContigName("chr")
+      .setStart(1L)
+      .setEnd(3L)
       .build()
     val rrV = ReferenceRegion(v)
     val rrG = ReferenceRegion(g)
@@ -294,6 +379,22 @@ class ReferenceRegionSuite extends FunSuite {
     assert(openEnded.overlaps(ReferenceRegion("myCtg", Long.MaxValue - 1L, Long.MaxValue)))
   }
 
+  test("can build an open ended reference region with strand") {
+    val openEnded = ReferenceRegion.toEnd("myCtg", 45L, strand = Strand.FORWARD)
+
+    assert(!openEnded.overlaps(ReferenceRegion("myCtg", 44L, 45L)))
+    assert(!openEnded.overlaps(ReferenceRegion("myCtg", 44L, 46L)))
+    assert(!openEnded.overlaps(ReferenceRegion("myCtg", Long.MaxValue - 1L, Long.MaxValue)))
+
+    assert(!openEnded.covers(ReferenceRegion("myCtg", 44L, 45L)))
+    assert(openEnded.covers(ReferenceRegion("myCtg", 44L, 46L)))
+    assert(openEnded.covers(ReferenceRegion("myCtg", Long.MaxValue - 1L, Long.MaxValue)))
+
+    assert(!openEnded.overlaps(ReferenceRegion("myCtg", 44L, 45L, strand = Strand.FORWARD)))
+    assert(openEnded.overlaps(ReferenceRegion("myCtg", 44L, 46L, strand = Strand.FORWARD)))
+    assert(openEnded.overlaps(ReferenceRegion("myCtg", Long.MaxValue - 1L, Long.MaxValue, strand = Strand.FORWARD)))
+  }
+
   test("can build a reference region with an open start position") {
     val openStart = ReferenceRegion.fromStart("myCtg", 45L)
 
@@ -302,10 +403,114 @@ class ReferenceRegionSuite extends FunSuite {
     assert(!openStart.overlaps(ReferenceRegion("myCtg", 45L, 46L)))
   }
 
+  test("can build a reference region with an open start position with strand") {
+    val openStart = ReferenceRegion.fromStart("myCtg", 45L, strand = Strand.REVERSE)
+
+    assert(!openStart.overlaps(ReferenceRegion("myCtg", 0L, 1L)))
+    assert(!openStart.overlaps(ReferenceRegion("myCtg", 44L, 46L)))
+    assert(!openStart.overlaps(ReferenceRegion("myCtg", 45L, 46L)))
+
+    assert(openStart.covers(ReferenceRegion("myCtg", 0L, 1L)))
+    assert(openStart.covers(ReferenceRegion("myCtg", 44L, 46L)))
+    assert(!openStart.covers(ReferenceRegion("myCtg", 45L, 46L)))
+
+    assert(openStart.overlaps(ReferenceRegion("myCtg", 0L, 1L, strand = Strand.REVERSE)))
+    assert(openStart.overlaps(ReferenceRegion("myCtg", 44L, 46L, strand = Strand.REVERSE)))
+    assert(!openStart.overlaps(ReferenceRegion("myCtg", 45L, 46L, strand = Strand.REVERSE)))
+  }
+
   test("can build a reference region that covers the entirety of a contig") {
     val all = ReferenceRegion.all("myCtg")
 
     assert(all.overlaps(ReferenceRegion("myCtg", 0L, 1L)))
     assert(all.overlaps(ReferenceRegion("myCtg", Long.MaxValue - 1L, Long.MaxValue)))
   }
+
+  test("can build a reference region that covers the entirety of a contig with strand") {
+    val all = ReferenceRegion.all("myCtg", strand = Strand.FORWARD)
+
+    assert(!all.overlaps(ReferenceRegion("myCtg", 0L, 1L)))
+    assert(!all.overlaps(ReferenceRegion("myCtg", Long.MaxValue - 1L, Long.MaxValue)))
+
+    assert(all.covers(ReferenceRegion("myCtg", 0L, 1L)))
+    assert(all.covers(ReferenceRegion("myCtg", Long.MaxValue - 1L, Long.MaxValue)))
+
+    assert(all.overlaps(ReferenceRegion("myCtg", 0L, 1L, strand = Strand.FORWARD)))
+    assert(all.overlaps(ReferenceRegion("myCtg", Long.MaxValue - 1L, Long.MaxValue, strand = Strand.FORWARD)))
+  }
+
+  test("convert a genotype and then get the reference region") {
+    val converter = new VariantContextConverter
+    val vcb = new VariantContextBuilder()
+      .alleles(List(Allele.create("A", true), Allele.create("T")))
+      .start(1L)
+      .stop(1L)
+      .chr("1")
+    val vc = vcb.genotypes(GenotypeBuilder.create("NA12878",
+      vcb.getAlleles(),
+      Map.empty[String, java.lang.Object])).make()
+    val gts = converter.convert(vc).flatMap(_.genotypes)
+    assert(gts.size === 1)
+    val gt = gts.head
+
+    val rr = ReferenceRegion(gt)
+    assert(rr.referenceName === "1")
+    assert(rr.start === 0L)
+    assert(rr.end === 1L)
+  }
+
+  test("create region from feature with null alignment positions fails") {
+    intercept[IllegalArgumentException] {
+      val feature = Feature.newBuilder()
+        .build()
+      ReferenceRegion.unstranded(feature)
+    }
+  }
+
+  test("create stranded region from feature with null alignment positions fails") {
+    intercept[IllegalArgumentException] {
+      val feature = Feature.newBuilder()
+        .build()
+      ReferenceRegion.stranded(feature)
+    }
+  }
+
+  test("create stranded region from feature with null alignment strand fails") {
+    intercept[IllegalArgumentException] {
+      val feature = Feature.newBuilder()
+        .setStart(10L)
+        .setEnd(15L)
+        .setContigName("ctg")
+        .build()
+      ReferenceRegion.stranded(feature)
+    }
+  }
+
+  test("create stranded region from feature on forward strand") {
+    val feature = Feature.newBuilder()
+      .setStart(10L)
+      .setEnd(15L)
+      .setContigName("ctg")
+      .setStrand(Strand.FORWARD)
+      .build()
+    val rr = ReferenceRegion.stranded(feature)
+    assert(rr.referenceName === "ctg")
+    assert(rr.start === 10L)
+    assert(rr.end === 15L)
+    assert(rr.strand === Strand.FORWARD)
+  }
+
+  test("create stranded region from feature on reverse strand") {
+    val feature = Feature.newBuilder()
+      .setStart(10L)
+      .setEnd(15L)
+      .setContigName("ctg")
+      .setStrand(Strand.REVERSE)
+      .build()
+    val rr = ReferenceRegion.stranded(feature)
+    assert(rr.referenceName === "ctg")
+    assert(rr.start === 10L)
+    assert(rr.end === 15L)
+    assert(rr.strand === Strand.REVERSE)
+  }
 }
diff --git a/src/test/scala/org/bdgenomics/adam/models/SequenceDictionarySuite.scala b/src/test/scala/org/bdgenomics/adam/models/SequenceDictionarySuite.scala
index a07dc6fc79..e0a03d5399 100644
--- a/src/test/scala/org/bdgenomics/adam/models/SequenceDictionarySuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/models/SequenceDictionarySuite.scala
@@ -19,7 +19,6 @@ package org.bdgenomics.adam.models
 
 import java.io.File
 import htsjdk.samtools.{
-  SAMFileReader,
   SAMSequenceDictionary,
   SAMSequenceRecord
 }
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala
index c8c3ec193f..6e7be2b2e7 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/ADAMContextSuite.scala
@@ -18,8 +18,6 @@
 package org.bdgenomics.adam.rdd
 
 import java.io.{ File, FileNotFoundException }
-import java.util.UUID
-import htsjdk.samtools.DiskBasedBAMFileIndex
 import com.google.common.io.Files
 import org.apache.hadoop.fs.Path
 import org.apache.parquet.filter2.dsl.Dsl._
@@ -28,12 +26,10 @@ import org.apache.parquet.hadoop.metadata.CompressionCodecName
 import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.models._
 import org.bdgenomics.adam.rdd.ADAMContext._
-import org.bdgenomics.adam.rdd.read.AlignedReadRDD
 import org.bdgenomics.adam.util.PhredUtils._
 import org.bdgenomics.adam.util.ADAMFunSuite
 import org.bdgenomics.formats.avro._
-import org.seqdoop.hadoop_bam.{ BAMInputFormat, CRAMInputFormat }
-import scala.collection.JavaConversions._
+import org.seqdoop.hadoop_bam.CRAMInputFormat
 
 case class TestSaveArgs(var outputPath: String) extends ADAMSaveAnyArgs {
   var sortFastqOutput = false
@@ -48,6 +44,10 @@ case class TestSaveArgs(var outputPath: String) extends ADAMSaveAnyArgs {
 
 class ADAMContextSuite extends ADAMFunSuite {
 
+  sparkTest("ctr is accessible") {
+    new ADAMContext(sc)
+  }
+
   sparkTest("sc.loadParquet should not fail on unmapped reads") {
     val readsFilepath = testFile("unmapped.sam")
 
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/GenomicPositionPartitionerSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/GenomicPositionPartitionerSuite.scala
index ef959184e7..7458943827 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/GenomicPositionPartitionerSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/GenomicPositionPartitionerSuite.scala
@@ -17,9 +17,7 @@
  */
 package org.bdgenomics.adam.rdd
 
-import org.apache.spark.SparkContext._
 import org.apache.spark.RangePartitioner
-import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.models.{ ReferencePosition, SequenceRecord, SequenceDictionary }
 import org.bdgenomics.adam.projections.Projection
 import org.bdgenomics.adam.rdd.ADAMContext._
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/InnerShuffleRegionJoinSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/InnerShuffleRegionJoinSuite.scala
index 88a9095700..7c700a7a1c 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/InnerShuffleRegionJoinSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/InnerShuffleRegionJoinSuite.scala
@@ -51,8 +51,8 @@ class InnerShuffleRegionJoinSuite extends ADAMFunSuite {
     val record2 = AlignmentRecord.newBuilder(built).setStart(3L).setEnd(4L).build()
     val baseRecord = AlignmentRecord.newBuilder(built).setCigar("4M").setEnd(5L).build()
 
-    val baseRdd = sc.parallelize(Seq(baseRecord)).keyBy(ReferenceRegion(_))
-    val recordsRdd = sc.parallelize(Seq(record1, record2)).keyBy(ReferenceRegion(_))
+    val baseRdd = sc.parallelize(Seq(baseRecord)).keyBy(ReferenceRegion.unstranded(_))
+    val recordsRdd = sc.parallelize(Seq(record1, record2)).keyBy(ReferenceRegion.unstranded(_))
 
     assert(
       InnerShuffleRegionJoin[AlignmentRecord, AlignmentRecord](seqDict, partitionSize, sc)
@@ -110,8 +110,8 @@ class InnerShuffleRegionJoinSuite extends ADAMFunSuite {
     val baseRecord1 = AlignmentRecord.newBuilder(builtRef1).setCigar("4M").setEnd(5L).build()
     val baseRecord2 = AlignmentRecord.newBuilder(builtRef2).setCigar("4M").setEnd(5L).build()
 
-    val baseRdd = sc.parallelize(Seq(baseRecord1, baseRecord2)).keyBy(ReferenceRegion(_))
-    val recordsRdd = sc.parallelize(Seq(record1, record2, record3)).keyBy(ReferenceRegion(_))
+    val baseRdd = sc.parallelize(Seq(baseRecord1, baseRecord2)).keyBy(ReferenceRegion.unstranded(_))
+    val recordsRdd = sc.parallelize(Seq(record1, record2, record3)).keyBy(ReferenceRegion.unstranded(_))
 
     assert(
       InnerShuffleRegionJoin[AlignmentRecord, AlignmentRecord](seqDict, partitionSize, sc)
@@ -138,7 +138,7 @@ class InnerShuffleRegionJoinSuite extends ADAMFunSuite {
 
 object InnerShuffleRegionJoinSuite {
   def getReferenceRegion(record: AlignmentRecord): ReferenceRegion =
-    ReferenceRegion(record)
+    ReferenceRegion.unstranded(record)
 
   def merge(prev: Boolean, next: (AlignmentRecord, AlignmentRecord)): Boolean =
     prev && getReferenceRegion(next._1).overlaps(getReferenceRegion(next._2))
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/InnerBroadcastRegionJoinSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/InnerTreeRegionJoinSuite.scala
similarity index 77%
rename from src/test/scala/org/bdgenomics/adam/rdd/InnerBroadcastRegionJoinSuite.scala
rename to src/test/scala/org/bdgenomics/adam/rdd/InnerTreeRegionJoinSuite.scala
index a5cf3fc9d2..a9439e430d 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/InnerBroadcastRegionJoinSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/InnerTreeRegionJoinSuite.scala
@@ -22,7 +22,7 @@ import org.bdgenomics.adam.models.ReferenceRegion
 import org.bdgenomics.adam.util.ADAMFunSuite
 import org.bdgenomics.formats.avro.{ AlignmentRecord, Contig }
 
-class InnerBroadcastRegionJoinSuite extends ADAMFunSuite {
+class InnerTreeRegionJoinSuite extends ADAMFunSuite {
 
   sparkTest("Ensure same reference regions get passed together") {
     val contig = Contig.newBuilder
@@ -41,24 +41,24 @@ class InnerBroadcastRegionJoinSuite extends ADAMFunSuite {
     val record1 = builder.build()
     val record2 = builder.build()
 
-    val rdd1 = sc.parallelize(Seq(record1)).keyBy(ReferenceRegion(_))
-    val rdd2 = sc.parallelize(Seq(record2)).keyBy(ReferenceRegion(_))
+    val rdd1 = sc.parallelize(Seq(record1)).keyBy(ReferenceRegion.unstranded(_))
+    val rdd2 = sc.parallelize(Seq(record2)).keyBy(ReferenceRegion.unstranded(_))
 
-    assert(InnerBroadcastRegionJoinSuite.getReferenceRegion(record1) ===
-      InnerBroadcastRegionJoinSuite.getReferenceRegion(record2))
+    assert(InnerTreeRegionJoinSuite.getReferenceRegion(record1) ===
+      InnerTreeRegionJoinSuite.getReferenceRegion(record2))
 
-    assert(InnerBroadcastRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
+    assert(InnerTreeRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
       rdd1,
       rdd2).aggregate(true)(
-        InnerBroadcastRegionJoinSuite.merge,
-        InnerBroadcastRegionJoinSuite.and))
+        InnerTreeRegionJoinSuite.merge,
+        InnerTreeRegionJoinSuite.and))
 
-    assert(InnerBroadcastRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
+    assert(InnerTreeRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
       rdd1,
       rdd2)
       .aggregate(0)(
-        InnerBroadcastRegionJoinSuite.count,
-        InnerBroadcastRegionJoinSuite.sum) === 1)
+        InnerTreeRegionJoinSuite.count,
+        InnerTreeRegionJoinSuite.sum) === 1)
   }
 
   sparkTest("Overlapping reference regions") {
@@ -80,17 +80,17 @@ class InnerBroadcastRegionJoinSuite extends ADAMFunSuite {
     val record2 = AlignmentRecord.newBuilder(built).setStart(3L).setEnd(4L).build()
     val baseRecord = AlignmentRecord.newBuilder(built).setCigar("4M").setEnd(5L).build()
 
-    val baseRdd = sc.parallelize(Seq(baseRecord)).keyBy(ReferenceRegion(_))
-    val recordsRdd = sc.parallelize(Seq(record1, record2)).keyBy(ReferenceRegion(_))
+    val baseRdd = sc.parallelize(Seq(baseRecord)).keyBy(ReferenceRegion.unstranded(_))
+    val recordsRdd = sc.parallelize(Seq(record1, record2)).keyBy(ReferenceRegion.unstranded(_))
 
-    assert(InnerBroadcastRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
+    assert(InnerTreeRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
       baseRdd,
       recordsRdd)
       .aggregate(true)(
-        InnerBroadcastRegionJoinSuite.merge,
-        InnerBroadcastRegionJoinSuite.and))
+        InnerTreeRegionJoinSuite.merge,
+        InnerTreeRegionJoinSuite.and))
 
-    assert(InnerBroadcastRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
+    assert(InnerTreeRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
       baseRdd,
       recordsRdd).count() === 2)
   }
@@ -129,25 +129,25 @@ class InnerBroadcastRegionJoinSuite extends ADAMFunSuite {
     val baseRecord1 = AlignmentRecord.newBuilder(builtRef1).setCigar("4M").setEnd(5L).build()
     val baseRecord2 = AlignmentRecord.newBuilder(builtRef2).setCigar("4M").setEnd(5L).build()
 
-    val baseRdd = sc.parallelize(Seq(baseRecord1, baseRecord2)).keyBy(ReferenceRegion(_))
-    val recordsRdd = sc.parallelize(Seq(record1, record2, record3)).keyBy(ReferenceRegion(_))
+    val baseRdd = sc.parallelize(Seq(baseRecord1, baseRecord2)).keyBy(ReferenceRegion.unstranded(_))
+    val recordsRdd = sc.parallelize(Seq(record1, record2, record3)).keyBy(ReferenceRegion.unstranded(_))
 
-    assert(InnerBroadcastRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
+    assert(InnerTreeRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
       baseRdd,
       recordsRdd)
       .aggregate(true)(
-        InnerBroadcastRegionJoinSuite.merge,
-        InnerBroadcastRegionJoinSuite.and))
+        InnerTreeRegionJoinSuite.merge,
+        InnerTreeRegionJoinSuite.and))
 
-    assert(InnerBroadcastRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
+    assert(InnerTreeRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
       baseRdd,
       recordsRdd).count() === 3)
   }
 }
 
-object InnerBroadcastRegionJoinSuite {
+object InnerTreeRegionJoinSuite {
   def getReferenceRegion(record: AlignmentRecord): ReferenceRegion =
-    ReferenceRegion(record)
+    ReferenceRegion.unstranded(record)
 
   def merge(prev: Boolean, next: (AlignmentRecord, AlignmentRecord)): Boolean =
     prev && getReferenceRegion(next._1).overlaps(getReferenceRegion(next._2))
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/OuterRegionJoinSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/OuterRegionJoinSuite.scala
index 5c5a826155..6791452253 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/OuterRegionJoinSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/OuterRegionJoinSuite.scala
@@ -53,8 +53,8 @@ trait OuterRegionJoinSuite extends ADAMFunSuite {
       .setEnd(11L)
       .build()
 
-    val rdd1 = sc.parallelize(Seq(record1, record3)).keyBy(ReferenceRegion(_))
-    val rdd2 = sc.parallelize(Seq(record2, record4)).keyBy(ReferenceRegion(_))
+    val rdd1 = sc.parallelize(Seq(record1, record3)).keyBy(ReferenceRegion.unstranded(_))
+    val rdd2 = sc.parallelize(Seq(record2, record4)).keyBy(ReferenceRegion.unstranded(_))
 
     val jrdd = runJoin(rdd1, rdd2).cache()
 
@@ -86,8 +86,8 @@ trait OuterRegionJoinSuite extends ADAMFunSuite {
     val baseRecord = AlignmentRecord.newBuilder(built).setCigar("4M").setEnd(5L).build()
     val record3 = AlignmentRecord.newBuilder(built).setStart(6L).setEnd(7L).build()
 
-    val baseRdd = sc.parallelize(Seq(baseRecord)).keyBy(ReferenceRegion(_))
-    val recordsRdd = sc.parallelize(Seq(record1, record2, record3)).keyBy(ReferenceRegion(_))
+    val baseRdd = sc.parallelize(Seq(baseRecord)).keyBy(ReferenceRegion.unstranded(_))
+    val recordsRdd = sc.parallelize(Seq(record1, record2, record3)).keyBy(ReferenceRegion.unstranded(_))
 
     val jrdd = runJoin(baseRdd, recordsRdd).cache
 
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/RightOuterBroadcastRegionJoinSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/RightOuterTreeRegionJoinSuite.scala
similarity index 88%
rename from src/test/scala/org/bdgenomics/adam/rdd/RightOuterBroadcastRegionJoinSuite.scala
rename to src/test/scala/org/bdgenomics/adam/rdd/RightOuterTreeRegionJoinSuite.scala
index 5a32005426..6cf4893387 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/RightOuterBroadcastRegionJoinSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/RightOuterTreeRegionJoinSuite.scala
@@ -21,11 +21,11 @@ import org.apache.spark.rdd.RDD
 import org.bdgenomics.adam.models.ReferenceRegion
 import org.bdgenomics.formats.avro.AlignmentRecord
 
-class RightOuterBroadcastRegionJoinSuite extends OuterRegionJoinSuite {
+class RightOuterTreeRegionJoinSuite extends OuterRegionJoinSuite {
 
   def runJoin(leftRdd: RDD[(ReferenceRegion, AlignmentRecord)],
               rightRdd: RDD[(ReferenceRegion, AlignmentRecord)]): RDD[(Option[AlignmentRecord], AlignmentRecord)] = {
-    RightOuterBroadcastRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
+    RightOuterTreeRegionJoin[AlignmentRecord, AlignmentRecord]().partitionAndJoin(
       leftRdd,
       rightRdd)
   }
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/TreeRegionJoinSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/TreeRegionJoinSuite.scala
new file mode 100644
index 0000000000..b5ac8e1a9a
--- /dev/null
+++ b/src/test/scala/org/bdgenomics/adam/rdd/TreeRegionJoinSuite.scala
@@ -0,0 +1,85 @@
+/**
+ * Licensed to Big Data Genomics (BDG) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The BDG licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.bdgenomics.adam.rdd
+
+import org.bdgenomics.adam.models.ReferenceRegion
+import org.bdgenomics.adam.util.ADAMFunSuite
+import org.bdgenomics.formats.avro.{ AlignmentRecord, Variant }
+import scala.reflect.ClassTag
+
+private case class ConcreteTreeRegionJoin[T: ClassTag, U]() extends TreeRegionJoin[T, U] {
+}
+
+class TreeRegionJoinSuite extends ADAMFunSuite {
+
+  sparkTest("run a join between data on a single contig") {
+
+    val rightRdd = sc.parallelize(Seq(
+      (ReferenceRegion("chr1", 10L, 20L), 0),
+      (ReferenceRegion("chr1", 15L, 25L), 1),
+      (ReferenceRegion("chr1", 30L, 50L), 2),
+      (ReferenceRegion("chr1", 60L, 70L), 3),
+      (ReferenceRegion("chr1", 90L, 100L), 4)))
+      .map(kv => {
+        val (k, v) = kv
+        // i have made many poor life decisions
+        (k, Variant.newBuilder
+          .setStart(v.toLong)
+          .build)
+      })
+
+    val leftRdd = sc.parallelize(Seq(
+      (ReferenceRegion("chr1", 12L, 22L), 0),
+      (ReferenceRegion("chr1", 20L, 35L), 1),
+      (ReferenceRegion("chr1", 40L, 55L), 2),
+      (ReferenceRegion("chr1", 75L, 85L), 3),
+      (ReferenceRegion("chr1", 95L, 105L), 4)))
+      .map(kv => {
+        val (k, v) = kv
+        // and this is but another one of them
+        (k, AlignmentRecord.newBuilder
+          .setStart(v.toLong)
+          .build)
+      })
+
+    val joinData = ConcreteTreeRegionJoin().runJoinAndGroupByRight(rightRdd, leftRdd)
+      .map(kv => {
+        val (k, v) = kv
+        (k.map(_.getStart.toInt), v.getStart.toInt)
+      }).collect
+
+    assert(joinData.size === 5)
+
+    val joinMap = joinData.filter(_._1.nonEmpty)
+      .map(_.swap)
+      .toMap
+      .mapValues(_.toSet)
+
+    assert(joinMap.size === 4)
+    assert(joinMap(0).size === 2)
+    assert(joinMap(0)(0))
+    assert(joinMap(0)(1))
+    assert(joinMap(1).size === 2)
+    assert(joinMap(1)(1))
+    assert(joinMap(1)(2))
+    assert(joinMap(2).size === 1)
+    assert(joinMap(2)(2))
+    assert(joinMap(4).size === 1)
+    assert(joinMap(4)(4))
+  }
+}
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/feature/FeatureRDDSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/feature/FeatureRDDSuite.scala
index 0b995cbbdb..0868984a11 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/feature/FeatureRDDSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/feature/FeatureRDDSuite.scala
@@ -538,5 +538,199 @@ class FeatureRDDSuite extends ADAMFunSuite with TypeCheckedTripleEquals {
 
     assert(coverage.rdd.count == 19)
   }
+
+  sparkTest("use broadcast join to pull down features mapped to targets") {
+    val featuresPath = testFile("small.1.narrowPeak")
+    val targetsPath = testFile("small.1.bed")
+
+    val features = sc.loadFeatures(featuresPath)
+    val targets = sc.loadFeatures(targetsPath)
+
+    val jRdd = features.broadcastRegionJoin(targets)
+
+    assert(jRdd.rdd.count === 5L)
+  }
+
+  sparkTest("use right outer broadcast join to pull down features mapped to targets") {
+    val featuresPath = testFile("small.1.narrowPeak")
+    val targetsPath = testFile("small.1.bed")
+
+    val features = sc.loadFeatures(featuresPath)
+    val targets = sc.loadFeatures(targetsPath)
+
+    val jRdd = features.rightOuterBroadcastRegionJoin(targets)
+
+    val c = jRdd.rdd.collect
+    assert(c.count(_._1.isEmpty) === 1)
+    assert(c.count(_._1.isDefined) === 5)
+  }
+
+  def sd = {
+    sc.loadBam(testFile("small.1.sam"))
+      .sequences
+  }
+
+  sparkTest("use shuffle join to pull down features mapped to targets") {
+    val featuresPath = testFile("small.1.narrowPeak")
+    val targetsPath = testFile("small.1.bed")
+
+    val features = sc.loadFeatures(featuresPath)
+      .transform(_.repartition(1))
+      .copy(sequences = sd)
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = features.shuffleRegionJoin(targets)
+    val jRdd0 = features.shuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    assert(jRdd.rdd.count === 5L)
+    assert(jRdd0.rdd.count === 5L)
+  }
+
+  sparkTest("use right outer shuffle join to pull down features mapped to targets") {
+    val featuresPath = testFile("small.1.narrowPeak")
+    val targetsPath = testFile("small.1.bed")
+
+    val features = sc.loadFeatures(featuresPath)
+      .transform(_.repartition(1))
+      .copy(sequences = sd)
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = features.rightOuterShuffleRegionJoin(targets)
+    val jRdd0 = features.rightOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._1.isEmpty) === 1)
+    assert(c0.count(_._1.isEmpty) === 1)
+    assert(c.count(_._1.isDefined) === 5)
+    assert(c0.count(_._1.isDefined) === 5)
+  }
+
+  sparkTest("use left outer shuffle join to pull down features mapped to targets") {
+    val featuresPath = testFile("small.1.narrowPeak")
+    val targetsPath = testFile("small.1.bed")
+
+    val features = sc.loadFeatures(featuresPath)
+      .transform(_.repartition(1))
+      .copy(sequences = sd)
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = features.leftOuterShuffleRegionJoin(targets)
+    val jRdd0 = features.leftOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._2.isEmpty) === 15)
+    assert(c0.count(_._2.isEmpty) === 15)
+    assert(c.count(_._2.isDefined) === 5)
+    assert(c0.count(_._2.isDefined) === 5)
+  }
+
+  sparkTest("use full outer shuffle join to pull down features mapped to targets") {
+    val featuresPath = testFile("small.1.narrowPeak")
+    val targetsPath = testFile("small.1.bed")
+
+    val features = sc.loadFeatures(featuresPath)
+      .transform(_.repartition(1))
+      .copy(sequences = sd)
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = features.fullOuterShuffleRegionJoin(targets)
+    val jRdd0 = features.fullOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(t => t._1.isEmpty && t._2.isEmpty) === 0)
+    assert(c0.count(t => t._1.isEmpty && t._2.isEmpty) === 0)
+    assert(c.count(t => t._1.isDefined && t._2.isEmpty) === 15)
+    assert(c0.count(t => t._1.isDefined && t._2.isEmpty) === 15)
+    assert(c.count(t => t._1.isEmpty && t._2.isDefined) === 1)
+    assert(c0.count(t => t._1.isEmpty && t._2.isDefined) === 1)
+    assert(c.count(t => t._1.isDefined && t._2.isDefined) === 5)
+    assert(c0.count(t => t._1.isDefined && t._2.isDefined) === 5)
+  }
+
+  sparkTest("use shuffle join with group by to pull down features mapped to targets") {
+    val featuresPath = testFile("small.1.narrowPeak")
+    val targetsPath = testFile("small.1.bed")
+
+    val features = sc.loadFeatures(featuresPath)
+      .transform(_.repartition(1))
+      .copy(sequences = sd)
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = features.shuffleRegionJoinAndGroupByLeft(targets)
+    val jRdd0 = features.shuffleRegionJoinAndGroupByLeft(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.size === 5)
+    assert(c0.size === 5)
+    assert(c.forall(_._2.size == 1))
+    assert(c0.forall(_._2.size == 1))
+  }
+
+  sparkTest("use right outer shuffle join with group by to pull down features mapped to targets") {
+    val featuresPath = testFile("small.1.narrowPeak")
+    val targetsPath = testFile("small.1.bed")
+
+    val features = sc.loadFeatures(featuresPath)
+      .transform(_.repartition(1))
+      .copy(sequences = sd)
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = features.rightOuterShuffleRegionJoinAndGroupByLeft(targets)
+    val jRdd0 = features.rightOuterShuffleRegionJoinAndGroupByLeft(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd0.rdd.collect // FIXME
+    val c0 = jRdd0.rdd.collect
+
+    assert(c.count(_._1.isDefined) === 20)
+    assert(c0.count(_._1.isDefined) === 20)
+    assert(c.filter(_._1.isDefined).count(_._2.size == 1) === 5)
+    assert(c0.filter(_._1.isDefined).count(_._2.size == 1) === 5)
+    assert(c.filter(_._1.isDefined).count(_._2.isEmpty) === 15)
+    assert(c0.filter(_._1.isDefined).count(_._2.isEmpty) === 15)
+    assert(c.count(_._1.isEmpty) === 1)
+    assert(c0.count(_._1.isEmpty) === 1)
+    assert(c.filter(_._1.isEmpty).forall(_._2.size == 1))
+    assert(c0.filter(_._1.isEmpty).forall(_._2.size == 1))
+  }
 }
 
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/fragment/FragmentRDDSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/fragment/FragmentRDDSuite.scala
index 28b37da642..34a3401455 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/fragment/FragmentRDDSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/fragment/FragmentRDDSuite.scala
@@ -43,4 +43,186 @@ class FragmentRDDSuite extends ADAMFunSuite {
     val newRecords = pipedRdd.rdd.count
     assert(2 * records === newRecords)
   }
+
+  sparkTest("use broadcast join to pull down fragments mapped to targets") {
+    val fragmentsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val fragments = sc.loadFragments(fragmentsPath)
+    val targets = sc.loadFeatures(targetsPath)
+
+    val jRdd = fragments.broadcastRegionJoin(targets)
+
+    assert(jRdd.rdd.count === 5)
+  }
+
+  sparkTest("use right outer broadcast join to pull down fragments mapped to targets") {
+    val fragmentsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val fragments = sc.loadFragments(fragmentsPath)
+    val targets = sc.loadFeatures(targetsPath)
+
+    val jRdd = fragments.rightOuterBroadcastRegionJoin(targets)
+
+    val c = jRdd.rdd.collect
+    assert(c.count(_._1.isEmpty) === 1)
+    assert(c.count(_._1.isDefined) === 5)
+  }
+
+  sparkTest("use shuffle join to pull down fragments mapped to targets") {
+    val fragmentsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val fragments = sc.loadFragments(fragmentsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = fragments.shuffleRegionJoin(targets)
+    val jRdd0 = fragments.shuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    assert(jRdd.rdd.count === 5)
+    assert(jRdd0.rdd.count === 5)
+  }
+
+  sparkTest("use right outer shuffle join to pull down fragments mapped to targets") {
+    val fragmentsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val fragments = sc.loadFragments(fragmentsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = fragments.rightOuterShuffleRegionJoin(targets)
+    val jRdd0 = fragments.rightOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._1.isEmpty) === 1)
+    assert(c0.count(_._1.isEmpty) === 1)
+    assert(c.count(_._1.isDefined) === 5)
+    assert(c0.count(_._1.isDefined) === 5)
+  }
+
+  sparkTest("use left outer shuffle join to pull down fragments mapped to targets") {
+    val fragmentsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val fragments = sc.loadFragments(fragmentsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = fragments.leftOuterShuffleRegionJoin(targets)
+    val jRdd0 = fragments.leftOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._2.isEmpty) === 15)
+    assert(c0.count(_._2.isEmpty) === 15)
+    assert(c.count(_._2.isDefined) === 5)
+    assert(c0.count(_._2.isDefined) === 5)
+  }
+
+  sparkTest("use full outer shuffle join to pull down fragments mapped to targets") {
+    val fragmentsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val fragments = sc.loadFragments(fragmentsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = fragments.fullOuterShuffleRegionJoin(targets)
+    val jRdd0 = fragments.fullOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(t => t._1.isEmpty && t._2.isEmpty) === 0)
+    assert(c0.count(t => t._1.isEmpty && t._2.isEmpty) === 0)
+    assert(c.count(t => t._1.isDefined && t._2.isEmpty) === 15)
+    assert(c0.count(t => t._1.isDefined && t._2.isEmpty) === 15)
+    assert(c.count(t => t._1.isEmpty && t._2.isDefined) === 1)
+    assert(c0.count(t => t._1.isEmpty && t._2.isDefined) === 1)
+    assert(c.count(t => t._1.isDefined && t._2.isDefined) === 5)
+    assert(c0.count(t => t._1.isDefined && t._2.isDefined) === 5)
+  }
+
+  sparkTest("use shuffle join with group by to pull down fragments mapped to targets") {
+    val fragmentsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val fragments = sc.loadFragments(fragmentsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = fragments.shuffleRegionJoinAndGroupByLeft(targets)
+    val jRdd0 = fragments.shuffleRegionJoinAndGroupByLeft(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.size === 5)
+    assert(c0.size === 5)
+    assert(c.forall(_._2.size == 1))
+    assert(c0.forall(_._2.size == 1))
+  }
+
+  sparkTest("use right outer shuffle join with group by to pull down fragments mapped to targets") {
+    val fragmentsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val fragments = sc.loadFragments(fragmentsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = fragments.rightOuterShuffleRegionJoinAndGroupByLeft(targets)
+    val jRdd0 = fragments.rightOuterShuffleRegionJoinAndGroupByLeft(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._1.isDefined) === 20)
+    assert(c0.count(_._1.isDefined) === 20)
+    assert(c.filter(_._1.isDefined).count(_._2.size == 1) === 5)
+    assert(c0.filter(_._1.isDefined).count(_._2.size == 1) === 5)
+    assert(c.filter(_._1.isDefined).count(_._2.isEmpty) === 15)
+    assert(c0.filter(_._1.isDefined).count(_._2.isEmpty) === 15)
+    assert(c.count(_._1.isEmpty) === 1)
+    assert(c0.count(_._1.isEmpty) === 1)
+    assert(c.filter(_._1.isEmpty).forall(_._2.size == 1))
+    assert(c0.filter(_._1.isEmpty).forall(_._2.size == 1))
+  }
 }
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala
index b5464ac819..81a97908f3 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/read/AlignmentRecordRDDSuite.scala
@@ -708,4 +708,187 @@ class AlignmentRecordRDDSuite extends ADAMFunSuite {
     val tempBam = sc.loadBam(tempPath)
     assert(tempBam.rdd.count === ardd.rdd.count)
   }
+
+  sparkTest("use broadcast join to pull down reads mapped to targets") {
+    val readsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val reads = sc.loadAlignments(readsPath)
+    val targets = sc.loadFeatures(targetsPath)
+
+    val jRdd = reads.broadcastRegionJoin(targets)
+
+    assert(jRdd.rdd.count === 5)
+  }
+
+  sparkTest("use right outer broadcast join to pull down reads mapped to targets") {
+    val readsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val reads = sc.loadAlignments(readsPath)
+    val targets = sc.loadFeatures(targetsPath)
+
+    val jRdd = reads.rightOuterBroadcastRegionJoin(targets)
+
+    val c = jRdd.rdd.collect
+    assert(c.count(_._1.isEmpty) === 1)
+    assert(c.count(_._1.isDefined) === 5)
+  }
+
+  sparkTest("use shuffle join to pull down reads mapped to targets") {
+    val readsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val reads = sc.loadAlignments(readsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = reads.shuffleRegionJoin(targets)
+    val jRdd0 = reads.shuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    assert(jRdd.rdd.count === 5)
+    assert(jRdd0.rdd.count === 5)
+  }
+
+  sparkTest("use right outer shuffle join to pull down reads mapped to targets") {
+    val readsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val reads = sc.loadAlignments(readsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = reads.rightOuterShuffleRegionJoin(targets)
+    val jRdd0 = reads.rightOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._1.isEmpty) === 1)
+    assert(c0.count(_._1.isEmpty) === 1)
+    assert(c.count(_._1.isDefined) === 5)
+    assert(c0.count(_._1.isDefined) === 5)
+  }
+
+  sparkTest("use left outer shuffle join to pull down reads mapped to targets") {
+    val readsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val reads = sc.loadAlignments(readsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = reads.leftOuterShuffleRegionJoin(targets)
+    val jRdd0 = reads.leftOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._2.isEmpty) === 15)
+    assert(c0.count(_._2.isEmpty) === 15)
+    assert(c.count(_._2.isDefined) === 5)
+    assert(c0.count(_._2.isDefined) === 5)
+  }
+
+  sparkTest("use full outer shuffle join to pull down reads mapped to targets") {
+    val readsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val reads = sc.loadAlignments(readsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = reads.fullOuterShuffleRegionJoin(targets)
+    val jRdd0 = reads.fullOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(t => t._1.isEmpty && t._2.isEmpty) === 0)
+    assert(c0.count(t => t._1.isEmpty && t._2.isEmpty) === 0)
+    assert(c.count(t => t._1.isDefined && t._2.isEmpty) === 15)
+    assert(c0.count(t => t._1.isDefined && t._2.isEmpty) === 15)
+    assert(c.count(t => t._1.isEmpty && t._2.isDefined) === 1)
+    assert(c0.count(t => t._1.isEmpty && t._2.isDefined) === 1)
+    assert(c.count(t => t._1.isDefined && t._2.isDefined) === 5)
+    assert(c0.count(t => t._1.isDefined && t._2.isDefined) === 5)
+  }
+
+  sparkTest("use shuffle join with group by to pull down reads mapped to targets") {
+    val readsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val reads = sc.loadAlignments(readsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = reads.shuffleRegionJoinAndGroupByLeft(targets)
+    val jRdd0 = reads.shuffleRegionJoinAndGroupByLeft(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.size === 5)
+    assert(c0.size === 5)
+    assert(c.forall(_._2.size == 1))
+    assert(c0.forall(_._2.size == 1))
+  }
+
+  sparkTest("use right outer shuffle join with group by to pull down reads mapped to targets") {
+    val readsPath = testFile("small.1.sam")
+    val targetsPath = testFile("small.1.bed")
+
+    val reads = sc.loadAlignments(readsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = reads.rightOuterShuffleRegionJoinAndGroupByLeft(targets)
+    val jRdd0 = reads.rightOuterShuffleRegionJoinAndGroupByLeft(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd0.rdd.collect // FIXME
+    val c0 = jRdd0.rdd.collect
+
+    assert(c.count(_._1.isDefined) === 20)
+    assert(c0.count(_._1.isDefined) === 20)
+    assert(c.filter(_._1.isDefined).count(_._2.size == 1) === 5)
+    assert(c0.filter(_._1.isDefined).count(_._2.size == 1) === 5)
+    assert(c.filter(_._1.isDefined).count(_._2.isEmpty) === 15)
+    assert(c0.filter(_._1.isDefined).count(_._2.isEmpty) === 15)
+    assert(c.count(_._1.isEmpty) === 1)
+    assert(c0.count(_._1.isEmpty) === 1)
+    assert(c.filter(_._1.isEmpty).forall(_._2.size == 1))
+    assert(c0.filter(_._1.isEmpty).forall(_._2.size == 1))
+  }
 }
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala
index 7d5e09b4a4..4db692fdd1 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/read/MarkDuplicatesSuite.scala
@@ -23,7 +23,6 @@ import org.bdgenomics.adam.models.{
   RecordGroupDictionary,
   SequenceDictionary
 }
-import org.bdgenomics.adam.rdd.ADAMContext._
 import org.bdgenomics.adam.util.ADAMFunSuite
 import org.bdgenomics.formats.avro.{ AlignmentRecord, Contig }
 
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala
index d085247744..cb95e99221 100644
--- a/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rdd/read/realignment/IndelRealignmentTargetSuite.scala
@@ -22,7 +22,7 @@ import org.bdgenomics.adam.models.ReferenceRegion
 import org.bdgenomics.adam.rdd.ADAMContext._
 import org.bdgenomics.adam.rich.RichAlignmentRecord
 import org.bdgenomics.adam.util.ADAMFunSuite
-import org.bdgenomics.formats.avro.{ AlignmentRecord, Contig }
+import org.bdgenomics.formats.avro.AlignmentRecord
 
 class IndelRealignmentTargetSuite extends ADAMFunSuite {
 
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/variant/GenotypeRDDSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/variant/GenotypeRDDSuite.scala
new file mode 100644
index 0000000000..eddf8b0543
--- /dev/null
+++ b/src/test/scala/org/bdgenomics/adam/rdd/variant/GenotypeRDDSuite.scala
@@ -0,0 +1,208 @@
+/**
+ * Licensed to Big Data Genomics (BDG) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The BDG licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.bdgenomics.adam.rdd.variant
+
+import org.bdgenomics.adam.rdd.ADAMContext._
+import org.bdgenomics.adam.util.ADAMFunSuite
+
+class GenotypeRDDSuite extends ADAMFunSuite {
+
+  // these tests will all fail until https://github.com/bigdatagenomics/adam/pull/1291 merges
+  ignore("use broadcast join to pull down genotypes mapped to targets") {
+    val genotypesPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val genotypes = sc.loadGenotypes(genotypesPath)
+    val targets = sc.loadFeatures(targetsPath)
+
+    val jRdd = genotypes.broadcastRegionJoin(targets)
+
+    assert(jRdd.rdd.count === 9L)
+  }
+
+  ignore("use right outer broadcast join to pull down genotypes mapped to targets") {
+    val genotypesPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val genotypes = sc.loadGenotypes(genotypesPath)
+    val targets = sc.loadFeatures(targetsPath)
+
+    val jRdd = genotypes.rightOuterBroadcastRegionJoin(targets)
+
+    val c = jRdd.rdd.collect
+    assert(c.count(_._1.isEmpty) === 3)
+    assert(c.count(_._1.isDefined) === 9)
+  }
+
+  ignore("use shuffle join to pull down genotypes mapped to targets") {
+    val genotypesPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val genotypes = sc.loadGenotypes(genotypesPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = genotypes.shuffleRegionJoin(targets)
+    val jRdd0 = genotypes.shuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    assert(jRdd.rdd.count === 3L)
+    assert(jRdd0.rdd.count === 3L)
+  }
+
+  ignore("use right outer shuffle join to pull down genotypes mapped to targets") {
+    val genotypesPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val genotypes = sc.loadGenotypes(genotypesPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = genotypes.rightOuterShuffleRegionJoin(targets)
+    val jRdd0 = genotypes.rightOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._1.isEmpty) === 3)
+    assert(c0.count(_._1.isEmpty) === 3)
+    assert(c.count(_._1.isDefined) === 9)
+    assert(c0.count(_._1.isDefined) === 9)
+  }
+
+  ignore("use left outer shuffle join to pull down genotypes mapped to targets") {
+    val genotypesPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val genotypes = sc.loadGenotypes(genotypesPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = genotypes.leftOuterShuffleRegionJoin(targets)
+    val jRdd0 = genotypes.leftOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._2.isEmpty) === 9)
+    assert(c0.count(_._2.isEmpty) === 9)
+    assert(c.count(_._2.isDefined) === 9)
+    assert(c0.count(_._2.isDefined) === 9)
+  }
+
+  ignore("use full outer shuffle join to pull down genotypes mapped to targets") {
+    val genotypesPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val genotypes = sc.loadGenotypes(genotypesPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = genotypes.fullOuterShuffleRegionJoin(targets)
+    val jRdd0 = genotypes.fullOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(t => t._1.isEmpty && t._2.isEmpty) === 0)
+    assert(c0.count(t => t._1.isEmpty && t._2.isEmpty) === 0)
+    assert(c.count(t => t._1.isDefined && t._2.isEmpty) === 9)
+    assert(c0.count(t => t._1.isDefined && t._2.isEmpty) === 9)
+    assert(c.count(t => t._1.isEmpty && t._2.isDefined) === 3)
+    assert(c0.count(t => t._1.isEmpty && t._2.isDefined) === 3)
+    assert(c.count(t => t._1.isDefined && t._2.isDefined) === 9)
+    assert(c0.count(t => t._1.isDefined && t._2.isDefined) === 9)
+  }
+
+  ignore("use shuffle join with group by to pull down genotypes mapped to targets") {
+    val genotypesPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val genotypes = sc.loadGenotypes(genotypesPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = genotypes.shuffleRegionJoinAndGroupByLeft(targets)
+    val jRdd0 = genotypes.shuffleRegionJoinAndGroupByLeft(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.size === 9)
+    assert(c0.size === 9)
+    assert(c.forall(_._2.size == 1))
+    assert(c0.forall(_._2.size == 1))
+  }
+
+  ignore("use right outer shuffle join with group by to pull down genotypes mapped to targets") {
+    val genotypesPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val genotypes = sc.loadGenotypes(genotypesPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = genotypes.rightOuterShuffleRegionJoinAndGroupByLeft(targets)
+    val jRdd0 = genotypes.rightOuterShuffleRegionJoinAndGroupByLeft(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd0.rdd.collect
+    val c0 = jRdd0.rdd.collect
+
+    assert(c.count(_._1.isDefined) === 18)
+    assert(c0.count(_._1.isDefined) === 18)
+    assert(c.filter(_._1.isDefined).count(_._2.size == 1) === 9)
+    assert(c0.filter(_._1.isDefined).count(_._2.size == 1) === 9)
+    assert(c.filter(_._1.isDefined).count(_._2.isEmpty) === 9)
+    assert(c0.filter(_._1.isDefined).count(_._2.isEmpty) === 9)
+    assert(c.count(_._1.isEmpty) === 3)
+    assert(c0.count(_._1.isEmpty) === 3)
+    assert(c.filter(_._1.isEmpty).forall(_._2.size == 1))
+    assert(c0.filter(_._1.isEmpty).forall(_._2.size == 1))
+  }
+}
diff --git a/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantRDDSuite.scala b/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantRDDSuite.scala
new file mode 100644
index 0000000000..9a4ca21196
--- /dev/null
+++ b/src/test/scala/org/bdgenomics/adam/rdd/variant/VariantRDDSuite.scala
@@ -0,0 +1,207 @@
+/**
+ * Licensed to Big Data Genomics (BDG) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The BDG licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.bdgenomics.adam.rdd.variant
+
+import org.bdgenomics.adam.rdd.ADAMContext._
+import org.bdgenomics.adam.util.ADAMFunSuite
+
+class VariantRDDSuite extends ADAMFunSuite {
+
+  sparkTest("use broadcast join to pull down variants mapped to targets") {
+    val variantsPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val variants = sc.loadVariants(variantsPath)
+    val targets = sc.loadFeatures(targetsPath)
+
+    val jRdd = variants.broadcastRegionJoin(targets)
+
+    assert(jRdd.rdd.count === 3L)
+  }
+
+  sparkTest("use right outer broadcast join to pull down variants mapped to targets") {
+    val variantsPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val variants = sc.loadVariants(variantsPath)
+    val targets = sc.loadFeatures(targetsPath)
+
+    val jRdd = variants.rightOuterBroadcastRegionJoin(targets)
+
+    val c = jRdd.rdd.collect
+    assert(c.count(_._1.isEmpty) === 3)
+    assert(c.count(_._1.isDefined) === 3)
+  }
+
+  sparkTest("use shuffle join to pull down variants mapped to targets") {
+    val variantsPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val variants = sc.loadVariants(variantsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = variants.shuffleRegionJoin(targets)
+    val jRdd0 = variants.shuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    assert(jRdd.rdd.count === 3L)
+    assert(jRdd0.rdd.count === 3L)
+  }
+
+  sparkTest("use right outer shuffle join to pull down variants mapped to targets") {
+    val variantsPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val variants = sc.loadVariants(variantsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = variants.rightOuterShuffleRegionJoin(targets)
+    val jRdd0 = variants.rightOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._1.isEmpty) === 3)
+    assert(c0.count(_._1.isEmpty) === 3)
+    assert(c.count(_._1.isDefined) === 3)
+    assert(c0.count(_._1.isDefined) === 3)
+  }
+
+  sparkTest("use left outer shuffle join to pull down variants mapped to targets") {
+    val variantsPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val variants = sc.loadVariants(variantsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = variants.leftOuterShuffleRegionJoin(targets)
+    val jRdd0 = variants.leftOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(_._2.isEmpty) === 3)
+    assert(c0.count(_._2.isEmpty) === 3)
+    assert(c.count(_._2.isDefined) === 3)
+    assert(c0.count(_._2.isDefined) === 3)
+  }
+
+  sparkTest("use full outer shuffle join to pull down variants mapped to targets") {
+    val variantsPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val variants = sc.loadVariants(variantsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = variants.fullOuterShuffleRegionJoin(targets)
+    val jRdd0 = variants.fullOuterShuffleRegionJoin(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.count(t => t._1.isEmpty && t._2.isEmpty) === 0)
+    assert(c0.count(t => t._1.isEmpty && t._2.isEmpty) === 0)
+    assert(c.count(t => t._1.isDefined && t._2.isEmpty) === 3)
+    assert(c0.count(t => t._1.isDefined && t._2.isEmpty) === 3)
+    assert(c.count(t => t._1.isEmpty && t._2.isDefined) === 3)
+    assert(c0.count(t => t._1.isEmpty && t._2.isDefined) === 3)
+    assert(c.count(t => t._1.isDefined && t._2.isDefined) === 3)
+    assert(c0.count(t => t._1.isDefined && t._2.isDefined) === 3)
+  }
+
+  sparkTest("use shuffle join with group by to pull down variants mapped to targets") {
+    val variantsPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val variants = sc.loadVariants(variantsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = variants.shuffleRegionJoinAndGroupByLeft(targets)
+    val jRdd0 = variants.shuffleRegionJoinAndGroupByLeft(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd.rdd.collect
+    val c0 = jRdd0.rdd.collect
+    assert(c.size === 3)
+    assert(c0.size === 3)
+    assert(c.forall(_._2.size == 1))
+    assert(c0.forall(_._2.size == 1))
+  }
+
+  sparkTest("use right outer shuffle join with group by to pull down variants mapped to targets") {
+    val variantsPath = testFile("small.vcf")
+    val targetsPath = testFile("small.1.bed")
+
+    val variants = sc.loadVariants(variantsPath)
+      .transform(_.repartition(1))
+    val targets = sc.loadFeatures(targetsPath)
+      .transform(_.repartition(1))
+
+    val jRdd = variants.rightOuterShuffleRegionJoinAndGroupByLeft(targets)
+    val jRdd0 = variants.rightOuterShuffleRegionJoinAndGroupByLeft(targets, optPartitions = Some(4))
+
+    // we can't guarantee that we get exactly the number of partitions requested,
+    // we get close though
+    assert(jRdd.rdd.partitions.length === 1)
+    assert(jRdd0.rdd.partitions.length === 5)
+
+    val c = jRdd0.rdd.collect
+    val c0 = jRdd0.rdd.collect
+
+    assert(c.count(_._1.isDefined) === 6)
+    assert(c0.count(_._1.isDefined) === 6)
+    assert(c.filter(_._1.isDefined).count(_._2.size == 1) === 3)
+    assert(c0.filter(_._1.isDefined).count(_._2.size == 1) === 3)
+    assert(c.filter(_._1.isDefined).count(_._2.isEmpty) === 3)
+    assert(c0.filter(_._1.isDefined).count(_._2.isEmpty) === 3)
+    assert(c.count(_._1.isEmpty) === 3)
+    assert(c0.count(_._1.isEmpty) === 3)
+    assert(c.filter(_._1.isEmpty).forall(_._2.size == 1))
+    assert(c0.filter(_._1.isEmpty).forall(_._2.size == 1))
+  }
+}
diff --git a/src/test/scala/org/bdgenomics/adam/rich/RichAlignmentRecordSuite.scala b/src/test/scala/org/bdgenomics/adam/rich/RichAlignmentRecordSuite.scala
index 09765e8355..d9e5ab4174 100644
--- a/src/test/scala/org/bdgenomics/adam/rich/RichAlignmentRecordSuite.scala
+++ b/src/test/scala/org/bdgenomics/adam/rich/RichAlignmentRecordSuite.scala
@@ -21,7 +21,6 @@ import org.bdgenomics.adam.models.{ ReferencePosition, TagType, Attribute }
 import org.bdgenomics.adam.rich.RichAlignmentRecord._
 import org.bdgenomics.formats.avro.{ AlignmentRecord, Contig }
 import org.scalatest.FunSuite
-import org.scalatest.exceptions.TestFailedException
 
 class RichAlignmentRecordSuite extends FunSuite {