Skip to content

Commit 010c460

Browse files
AtlasPilotPuppypwendell
authored andcommitted
[SPARK-2061] Made splits deprecated in JavaRDDLike
The jira for the issue can be found at: https://issues.apache.org/jira/browse/SPARK-2061 Most of spark has used over to consistently using `partitions` instead of `splits`. We should do likewise and add a `partitions` method to JavaRDDLike and have `splits` just call that. We should also go through all cases where other API's (e.g. Python) call `splits` and we should change those to use the newer API. Author: Anant <anant.asty@gmail.com> Closes #1062 from anantasty/SPARK-2061 and squashes the following commits: b83ce6b [Anant] Fixed syntax issue 21f9210 [Anant] Fixed version number in deprecation string 9315b76 [Anant] made related changes to use partitions in python api 8c62dd1 [Anant] Made splits deprecated in JavaRDDLike
1 parent a678642 commit 010c460

File tree

4 files changed

+8
-5
lines changed

4 files changed

+8
-5
lines changed

core/src/main/scala/org/apache/spark/api/java/JavaRDDLike.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,11 @@ trait JavaRDDLike[T, This <: JavaRDDLike[T, This]] extends Serializable {
4343

4444
def rdd: RDD[T]
4545

46-
/** Set of partitions in this RDD. */
46+
@deprecated("Use partitions() instead.", "1.1.0")
4747
def splits: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq)
48+
49+
/** Set of partitions in this RDD. */
50+
def partitions: JList[Partition] = new java.util.ArrayList(rdd.partitions.toSeq)
4851

4952
/** The [[org.apache.spark.SparkContext]] that this RDD was created on. */
5053
def context: SparkContext = rdd.context

core/src/test/java/org/apache/spark/JavaAPISuite.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ public void persist() {
741741
public void iterator() {
742742
JavaRDD<Integer> rdd = sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 2);
743743
TaskContext context = new TaskContext(0, 0, 0, false, new TaskMetrics());
744-
Assert.assertEquals(1, rdd.iterator(rdd.splits().get(0), context).next().intValue());
744+
Assert.assertEquals(1, rdd.iterator(rdd.partitions().get(0), context).next().intValue());
745745
}
746746

747747
@Test

python/pyspark/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -704,7 +704,7 @@ def runJob(self, rdd, partitionFunc, partitions = None, allowLocal = False):
704704
[0, 1, 16, 25]
705705
"""
706706
if partitions == None:
707-
partitions = range(rdd._jrdd.splits().size())
707+
partitions = range(rdd._jrdd.partitions().size())
708708
javaPartitions = ListConverter().convert(partitions, self._gateway._gateway_client)
709709

710710
# Implementation note: This is implemented as a mapPartitions followed

python/pyspark/rdd.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ def getNumPartitions(self):
321321
>>> rdd.getNumPartitions()
322322
2
323323
"""
324-
return self._jrdd.splits().size()
324+
return self._jrdd.partitions().size()
325325

326326
def filter(self, f):
327327
"""
@@ -922,7 +922,7 @@ def take(self, num):
922922
[91, 92, 93]
923923
"""
924924
items = []
925-
totalParts = self._jrdd.splits().size()
925+
totalParts = self._jrdd.partitions().size()
926926
partsScanned = 0
927927

928928
while len(items) < num and partsScanned < totalParts:

0 commit comments

Comments
 (0)