Skip to content

Commit 9b015e9

Browse files
committed
Added a note, shuffle is required for intersection.
1 parent 1fea813 commit 9b015e9

File tree

4 files changed

+10
-4
lines changed

4 files changed

+10
-4
lines changed

core/src/main/scala/org/apache/spark/api/java/JavaDoubleRDD.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,8 @@ class JavaDoubleRDD(val srdd: RDD[scala.Double]) extends JavaRDDLike[JDouble, Ja
143143
/**
144144
* Return the intersection of this RDD and another one. The output will not contain any duplicate
145145
* elements, even if the input RDDs did.
146+
*
147+
* Note that this method performs a shuffle internally.
146148
*/
147149
def intersection(other: JavaDoubleRDD): JavaDoubleRDD = fromRDD(srdd.intersection(other.srdd))
148150

core/src/main/scala/org/apache/spark/api/java/JavaPairRDD.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
129129
/**
130130
* Return the intersection of this RDD and another one. The output will not contain any duplicate
131131
* elements, even if the input RDDs did.
132+
*
133+
* Note that this method performs a shuffle internally.
132134
*/
133135
def intersection(other: JavaPairRDD[K, V]): JavaPairRDD[K, V] =
134136
new JavaPairRDD[K, V](rdd.intersection(other.rdd))

core/src/main/scala/org/apache/spark/api/java/JavaRDD.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,8 @@ class JavaRDD[T](val rdd: RDD[T])(implicit val classTag: ClassTag[T])
110110
/**
111111
* Return the intersection of this RDD and another one. The output will not contain any duplicate
112112
* elements, even if the input RDDs did.
113+
*
114+
* Note that this method performs a shuffle internally.
113115
*/
114116
def intersection(other: JavaRDD[T]): JavaRDD[T] = wrapRDD(rdd.intersection(other.rdd))
115117

python/pyspark/rdd.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -321,10 +321,10 @@ def union(self, other):
321321

322322
def intersection(self, other):
323323
"""
324-
Return the intersection of this RDD and another one.
325-
326-
Note: The output will not contain any duplicate elements, even if the
327-
input RDDs did.
324+
Return the intersection of this RDD and another one. The output will not
325+
contain any duplicate elements, even if the input RDDs did.
326+
327+
Note that this method performs a shuffle internally.
328328
329329
>>> rdd1 = sc.parallelize([1, 10, 2, 3, 4, 5])
330330
>>> rdd2 = sc.parallelize([1, 6, 2, 3, 7, 8])

0 commit comments

Comments
 (0)