Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 18 additions & 34 deletions core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import scala.collection.JavaConverters._
import scala.language.implicitConversions
import scala.reflect.ClassTag

import com.google.common.base.Optional
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.io.compress.CompressionCodec
import org.apache.hadoop.mapred.{JobConf, OutputFormat}
Expand Down Expand Up @@ -472,9 +471,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* partition the output RDD.
*/
def leftOuterJoin[W](other: JavaPairRDD[K, W], partitioner: Partitioner)
: JavaPairRDD[K, (V, Optional[W])] = {
val joinResult = rdd.leftOuterJoin(other, partitioner)
fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))})
: JavaPairRDD[K, (V, Option[W])] = {
fromRDD(rdd.leftOuterJoin(other, partitioner))
}

/**
Expand All @@ -484,9 +482,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* partition the output RDD.
*/
def rightOuterJoin[W](other: JavaPairRDD[K, W], partitioner: Partitioner)
: JavaPairRDD[K, (Optional[V], W)] = {
val joinResult = rdd.rightOuterJoin(other, partitioner)
fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
: JavaPairRDD[K, (Option[V], W)] = {
fromRDD(rdd.rightOuterJoin(other, partitioner))
}

/**
Expand All @@ -498,11 +495,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* in `this` have key k. Uses the given Partitioner to partition the output RDD.
*/
def fullOuterJoin[W](other: JavaPairRDD[K, W], partitioner: Partitioner)
: JavaPairRDD[K, (Optional[V], Optional[W])] = {
val joinResult = rdd.fullOuterJoin(other, partitioner)
fromRDD(joinResult.mapValues{ case (v, w) =>
(JavaUtils.optionToOptional(v), JavaUtils.optionToOptional(w))
})
: JavaPairRDD[K, (Option[V], Option[W])] = {
fromRDD(rdd.fullOuterJoin(other, partitioner))
}

/**
Expand Down Expand Up @@ -559,9 +553,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* pair (k, (v, None)) if no elements in `other` have key k. Hash-partitions the output
* using the existing partitioner/parallelism level.
*/
def leftOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (V, Optional[W])] = {
val joinResult = rdd.leftOuterJoin(other)
fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))})
def leftOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (V, Option[W])] = {
fromRDD(rdd.leftOuterJoin(other))
}

/**
Expand All @@ -571,9 +564,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* into `numPartitions` partitions.
*/
def leftOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int)
: JavaPairRDD[K, (V, Optional[W])] = {
val joinResult = rdd.leftOuterJoin(other, numPartitions)
fromRDD(joinResult.mapValues{case (v, w) => (v, JavaUtils.optionToOptional(w))})
: JavaPairRDD[K, (V, Option[W])] = {
fromRDD(rdd.leftOuterJoin(other, numPartitions))
}

/**
Expand All @@ -582,9 +574,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* pair (k, (None, w)) if no elements in `this` have key k. Hash-partitions the resulting
* RDD using the existing partitioner/parallelism level.
*/
def rightOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Optional[V], W)] = {
val joinResult = rdd.rightOuterJoin(other)
fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
def rightOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Option[V], W)] = {
fromRDD(rdd.rightOuterJoin(other))
}

/**
Expand All @@ -594,9 +585,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* RDD into the given number of partitions.
*/
def rightOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int)
: JavaPairRDD[K, (Optional[V], W)] = {
val joinResult = rdd.rightOuterJoin(other, numPartitions)
fromRDD(joinResult.mapValues{case (v, w) => (JavaUtils.optionToOptional(v), w)})
: JavaPairRDD[K, (Option[V], W)] = {
fromRDD(rdd.rightOuterJoin(other, numPartitions))
}

/**
Expand All @@ -608,11 +598,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* in `this` have key k. Hash-partitions the resulting RDD using the existing partitioner/
* parallelism level.
*/
def fullOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Optional[V], Optional[W])] = {
val joinResult = rdd.fullOuterJoin(other)
fromRDD(joinResult.mapValues{ case (v, w) =>
(JavaUtils.optionToOptional(v), JavaUtils.optionToOptional(w))
})
def fullOuterJoin[W](other: JavaPairRDD[K, W]): JavaPairRDD[K, (Option[V], Option[W])] = {
fromRDD(rdd.fullOuterJoin(other))
}

/**
Expand All @@ -624,11 +611,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* in `this` have key k. Hash-partitions the resulting RDD into the given number of partitions.
*/
def fullOuterJoin[W](other: JavaPairRDD[K, W], numPartitions: Int)
: JavaPairRDD[K, (Optional[V], Optional[W])] = {
val joinResult = rdd.fullOuterJoin(other, numPartitions)
fromRDD(joinResult.mapValues{ case (v, w) =>
(JavaUtils.optionToOptional(v), JavaUtils.optionToOptional(w))
})
: JavaPairRDD[K, (Option[V], Option[W])] = {
fromRDD(rdd.fullOuterJoin(other, numPartitions))
}

/**
Expand Down
2 changes: 0 additions & 2 deletions core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@

package org.apache.spark.api.java

import java.util.Comparator

import scala.language.implicitConversions
import scala.reflect.ClassTag

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ import java.util.{Comparator, List => JList, Iterator => JIterator}
import scala.collection.JavaConverters._
import scala.reflect.ClassTag

import com.google.common.base.Optional
import org.apache.hadoop.io.compress.CompressionCodec

import org.apache.spark._
Expand Down Expand Up @@ -68,7 +67,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
def getNumPartitions: Int = rdd.getNumPartitions

/** The partitioner of this RDD. */
def partitioner: Optional[Partitioner] = JavaUtils.optionToOptional(rdd.partitioner)
def partitioner: Option[Partitioner] = rdd.partitioner

/** The [[org.apache.spark.SparkContext]] that this RDD was created on. */
def context: SparkContext = rdd.context
Expand Down Expand Up @@ -550,9 +549,7 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
/**
* Gets the name of the file to which this RDD was checkpointed
*/
def getCheckpointFile(): Optional[String] = {
JavaUtils.optionToOptional(rdd.getCheckpointFile)
}
def getCheckpointFile(): Option[String] = rdd.getCheckpointFile

/** A description of this RDD and its recursive dependencies for debugging. */
def toDebugString(): String = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import scala.collection.JavaConverters._
import scala.language.implicitConversions
import scala.reflect.ClassTag

import com.google.common.base.Optional
import org.apache.hadoop.conf.Configuration
import org.apache.spark.input.PortableDataStream
import org.apache.hadoop.mapred.{InputFormat, JobConf}
Expand Down Expand Up @@ -650,7 +649,7 @@ class JavaSparkContext(val sc: SparkContext)
* or the spark.home Java property, or the SPARK_HOME environment variable
* (in that order of preference). If neither of these is set, return None.
*/
def getSparkHome(): Optional[String] = JavaUtils.optionToOptional(sc.getSparkHome())
def getSparkHome(): Option[String] = sc.getSparkHome()

/**
* Add a file to be downloaded with this Spark job on every node.
Expand Down Expand Up @@ -707,7 +706,7 @@ class JavaSparkContext(val sc: SparkContext)
sc.setCheckpointDir(dir)
}

def getCheckpointDir: Optional[String] = JavaUtils.optionToOptional(sc.getCheckpointDir)
def getCheckpointDir: Option[String] = sc.getCheckpointDir

protected def checkpointFile[T](path: String): JavaRDD[T] = {
implicit val ctag: ClassTag[T] = fakeClassTag
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,11 @@
package org.apache.spark.api.java

import java.util.Map.Entry

import com.google.common.base.Optional

import java.{util => ju}

import scala.collection.mutable

private[spark] object JavaUtils {
def optionToOptional[T](option: Option[T]): Optional[T] =
option match {
case Some(value) => Optional.of(value)
case None => Optional.absent()
}

// Workaround for SPARK-3926 / SI-8911
def mapAsSerializableJavaMap[A, B](underlying: collection.Map[A, B]): SerializableMapWrapper[A, B]
Expand Down
42 changes: 8 additions & 34 deletions core/src/test/java/org/apache/spark/JavaAPISuite.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.*;
import java.util.concurrent.*;

import scala.Option;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple4;
Expand All @@ -35,7 +36,6 @@
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.base.Throwables;
import com.google.common.base.Optional;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
import org.apache.hadoop.io.IntWritable;
Expand Down Expand Up @@ -221,7 +221,7 @@ public int getPartition(Object key) {

JavaPairRDD<Integer, Integer> repartitioned =
rdd.repartitionAndSortWithinPartitions(partitioner);
Assert.assertTrue(repartitioned.partitioner().isPresent());
Assert.assertTrue(repartitioned.partitioner().isDefined());
Assert.assertEquals(repartitioned.partitioner().get(), partitioner);
List<List<Tuple2<Integer, Integer>>> partitions = repartitioned.glom().collect();
Assert.assertEquals(partitions.get(0),
Expand Down Expand Up @@ -491,15 +491,15 @@ public void leftOuterJoin() {
new Tuple2<>(2, 'z'),
new Tuple2<>(4, 'w')
));
List<Tuple2<Integer,Tuple2<Integer,Optional<Character>>>> joined =
List<Tuple2<Integer, Tuple2<Integer, Option<Character>>>> joined =
rdd1.leftOuterJoin(rdd2).collect();
Assert.assertEquals(5, joined.size());
Tuple2<Integer,Tuple2<Integer,Optional<Character>>> firstUnmatched =
Tuple2<Integer,Tuple2<Integer, Option<Character>>> firstUnmatched =
rdd1.leftOuterJoin(rdd2).filter(
new Function<Tuple2<Integer, Tuple2<Integer, Optional<Character>>>, Boolean>() {
new Function<Tuple2<Integer, Tuple2<Integer, Option<Character>>>, Boolean>() {
@Override
public Boolean call(Tuple2<Integer, Tuple2<Integer, Optional<Character>>> tup) {
return !tup._2()._2().isPresent();
public Boolean call(Tuple2<Integer, Tuple2<Integer, Option<Character>>> tup) {
return !tup._2()._2().isDefined();
}
}).first();
Assert.assertEquals(3, firstUnmatched._1().intValue());
Expand Down Expand Up @@ -1456,7 +1456,7 @@ public void checkpointAndRestore() {
rdd.count(); // Forces the DAG to cause a checkpoint
Assert.assertTrue(rdd.isCheckpointed());

Assert.assertTrue(rdd.getCheckpointFile().isPresent());
Assert.assertTrue(rdd.getCheckpointFile().isDefined());
JavaRDD<Integer> recovered = sc.checkpointFile(rdd.getCheckpointFile().get());
Assert.assertEquals(Arrays.asList(1, 2, 3, 4, 5), recovered.collect());
}
Expand Down Expand Up @@ -1792,32 +1792,6 @@ public void testAsyncActionErrorWrapping() throws Exception {
Assert.assertTrue(future.isDone());
}


/**
* Test for SPARK-3647. This test needs to use the maven-built assembly to trigger the issue,
* since that's the only artifact where Guava classes have been relocated.
*/
@Test
public void testGuavaOptional() {
// Stop the context created in setUp() and start a local-cluster one, to force usage of the
// assembly.
sc.stop();
JavaSparkContext localCluster = new JavaSparkContext("local-cluster[1,1,1024]", "JavaAPISuite");
try {
JavaRDD<Integer> rdd1 = localCluster.parallelize(Arrays.asList(1, 2, null), 3);
JavaRDD<Optional<Integer>> rdd2 = rdd1.map(
new Function<Integer, Optional<Integer>>() {
@Override
public Optional<Integer> call(Integer i) {
return Optional.fromNullable(i);
}
});
rdd2.collect();
} finally {
localCluster.stop();
}
}

static class Class1 {}
static class Class2 {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
import java.io.Serializable;
import java.util.*;

import scala.Option;
import scala.Tuple2;

import com.google.common.collect.Iterables;
import com.google.common.base.Optional;
import com.google.common.io.Files;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
Expand Down Expand Up @@ -110,11 +110,11 @@ public void leftOuterJoin() {
new Tuple2<>(2, 'z'),
new Tuple2<>(4, 'w')
));
List<Tuple2<Integer, Tuple2<Integer, Optional<Character>>>> joined =
List<Tuple2<Integer, Tuple2<Integer, Option<Character>>>> joined =
rdd1.leftOuterJoin(rdd2).collect();
Assert.assertEquals(5, joined.size());
Tuple2<Integer, Tuple2<Integer, Optional<Character>>> firstUnmatched =
rdd1.leftOuterJoin(rdd2).filter(tup -> !tup._2()._2().isPresent()).first();
Tuple2<Integer, Tuple2<Integer, Option<Character>>> firstUnmatched =
rdd1.leftOuterJoin(rdd2).filter(tup -> !tup._2()._2().isDefined()).first();
Assert.assertEquals(3, firstUnmatched._1().intValue());
}

Expand Down