@@ -725,7 +725,7 @@ test_that("objectFile() works with row serialization", {
725725 objectPath <- tempfile(pattern = " spark-test" , fileext = " .tmp" )
726726 df <- read.json(jsonPath )
727727 dfRDD <- toRDD(df )
728- saveAsObjectFile(coalesce (dfRDD , 1L ), objectPath )
728+ saveAsObjectFile(coalesceRDD (dfRDD , 1L ), objectPath )
729729 objectIn <- objectFile(sc , objectPath )
730730
731731 expect_is(objectIn , " RDD" )
@@ -1236,7 +1236,7 @@ test_that("column functions", {
12361236 c16 <- is.nan(c ) + isnan(c ) + isNaN(c )
12371237 c17 <- cov(c , c1 ) + cov(" c" , " c1" ) + covar_samp(c , c1 ) + covar_samp(" c" , " c1" )
12381238 c18 <- covar_pop(c , c1 ) + covar_pop(" c" , " c1" )
1239- c19 <- spark_partition_id()
1239+ c19 <- spark_partition_id() + coalesce( c ) + coalesce( c1 , c2 , c3 )
12401240 c20 <- to_timestamp(c ) + to_timestamp(c , " yyyy" ) + to_date(c , " yyyy" )
12411241
12421242 # Test if base::is.nan() is exposed
@@ -2491,15 +2491,18 @@ test_that("repartition by columns on DataFrame", {
24912491 (" Please, specify the number of partitions and/or a column\\ (s\\ )" , retError ), TRUE )
24922492
24932493 # repartition by column and number of partitions
2494- actual <- repartition(df , 3L , col = df $ " a" )
2494+ actual <- repartition(df , 3 , col = df $ " a" )
24952495
2496- # since we cannot access the number of partitions from dataframe, checking
2497- # that at least the dimensions are identical
2496+ # Checking that at least the dimensions are identical
24982497 expect_identical(dim(df ), dim(actual ))
2498+ expect_equal(getNumPartitions(actual ), 3L )
24992499
25002500 # repartition by number of partitions
25012501 actual <- repartition(df , 13L )
25022502 expect_identical(dim(df ), dim(actual ))
2503+ expect_equal(getNumPartitions(actual ), 13L )
2504+
2505+ expect_equal(getNumPartitions(coalesce(actual , 1L )), 1L )
25032506
25042507 # a test case with a column and dapply
25052508 schema <- structType(structField(" a" , " integer" ), structField(" avg" , " double" ))
@@ -2515,6 +2518,25 @@ test_that("repartition by columns on DataFrame", {
25152518 expect_equal(nrow(df1 ), 2 )
25162519})
25172520
2521+ test_that(" coalesce, repartition, numPartitions" , {
2522+ df <- as.DataFrame(cars , numPartitions = 5 )
2523+ expect_equal(getNumPartitions(df ), 5 )
2524+ expect_equal(getNumPartitions(coalesce(df , 3 )), 3 )
2525+ expect_equal(getNumPartitions(coalesce(df , 6 )), 5 )
2526+
2527+ df1 <- coalesce(df , 3 )
2528+ expect_equal(getNumPartitions(df1 ), 3 )
2529+ expect_equal(getNumPartitions(coalesce(df1 , 6 )), 5 )
2530+ expect_equal(getNumPartitions(coalesce(df1 , 4 )), 4 )
2531+ expect_equal(getNumPartitions(coalesce(df1 , 2 )), 2 )
2532+
2533+ df2 <- repartition(df1 , 10 )
2534+ expect_equal(getNumPartitions(df2 ), 10 )
2535+ expect_equal(getNumPartitions(coalesce(df2 , 13 )), 5 )
2536+ expect_equal(getNumPartitions(coalesce(df2 , 7 )), 5 )
2537+ expect_equal(getNumPartitions(coalesce(df2 , 3 )), 3 )
2538+ })
2539+
25182540test_that(" gapply() and gapplyCollect() on a DataFrame" , {
25192541 df <- createDataFrame (
25202542 list (list (1L , 1 , " 1" , 0.1 ), list (1L , 2 , " 1" , 0.2 ), list (3L , 3 , " 3" , 0.3 )),
0 commit comments