Skip to content

Commit 68cb3e2

Browse files
committed
add getNumPartitions and test
1 parent 3fdce81 commit 68cb3e2

File tree

2 files changed

+13
-1
lines changed

2 files changed

+13
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2420,6 +2420,13 @@ class Dataset[T] private[sql](
24202420
RepartitionByExpression(partitionExprs.map(_.expr), logicalPlan, numPartitions = None)
24212421
}
24222422

2423+
/**
2424+
* Returns the number of partitions of this Dataset.
2425+
* @group basic
2426+
* @since 2.2.0
2427+
*/
2428+
def getNumPartitions: Int = rdd.getNumPartitions()
2429+
24232430
/**
24242431
* Returns a new Dataset that has exactly `numPartitions` partitions.
24252432
* Similar to coalesce defined on an `RDD`, this operation results in a narrow dependency, e.g.

sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,9 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
269269
checkAnswer(
270270
testData.select('key).repartition(10).select('key),
271271
testData.select('key).collect().toSeq)
272+
checkAnswer(
273+
testData.select('key).repartition(10).select('key).getNumPartitions(),
274+
10)
272275
}
273276

274277
test("coalesce") {
@@ -1301,12 +1304,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
13011304
test("distributeBy and localSort") {
13021305
val original = testData.repartition(1)
13031306
assert(original.rdd.partitions.length == 1)
1307+
assert(original.getNumPartitions() == 1)
13041308
val df = original.repartition(5, $"key")
13051309
assert(df.rdd.partitions.length == 5)
1310+
assert(df.getNumPartitions() == 5)
13061311
checkAnswer(original.select(), df.select())
13071312

13081313
val df2 = original.repartition(10, $"key")
1309-
assert(df2.rdd.partitions.length == 10)
1314+
assert(df2.getNumPartitions() == 10)
13101315
checkAnswer(original.select(), df2.select())
13111316

13121317
// Group by the column we are distributed by. This should generate a plan with no exchange

0 commit comments

Comments
 (0)