From 843721b38e0a2385253053d475a855161dbc451c Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 28 Sep 2017 14:36:34 -0700
Subject: [PATCH 1/4] [SPARK-22160][SQL] Allow changing sample points per
 partition in range shuffle exchange

(cherry picked from commit 8e51ae52b6d54ed46a3441bbb83a8e93ba214410)
Signed-off-by: Reynold Xin <rxin@databricks.com>
---
 .../main/scala/org/apache/spark/Partitioner.scala   | 13 +++++++++++--
 .../org/apache/spark/sql/internal/SQLConf.scala     | 10 ++++++++++
 .../execution/exchange/ShuffleExchangeExec.scala    |  7 ++++++-
 3 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index 1484f29525a4..fa397f09f651 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -108,9 +108,17 @@ class HashPartitioner(partitions: Int) extends Partitioner {
 class RangePartitioner[K : Ordering : ClassTag, V](
     partitions: Int,
     rdd: RDD[_ <: Product2[K, V]],
-    private var ascending: Boolean = true)
+    private var ascending: Boolean = true,
+    val samplePointsPerPartitionHint: Int = 20)
   extends Partitioner {
 
+  // A constructor declared in order to maintain backward compatibility for Java, when we add the
+  // 4th constructor parameter samplePointsPerPartitionHint. See SPARK-22160.
+  // This is added to make sure from a bytecode point of view, there is still a 3-arg ctor.
+  def this(partitions: Int, rdd: RDD[_ <: Product2[K, V]], ascending: Boolean) = {
+    this(partitions, rdd, ascending, samplePointsPerPartitionHint = 20)
+  }
+
   // We allow partitions = 0, which happens when sorting an empty RDD under the default settings.
   require(partitions >= 0, s"Number of partitions cannot be negative but found $partitions.")
 
@@ -122,7 +130,8 @@ class RangePartitioner[K : Ordering : ClassTag, V](
       Array.empty
     } else {
       // This is the sample size we need to have roughly balanced output partitions, capped at 1M.
-      val sampleSize = math.min(20.0 * partitions, 1e6)
+      // Cast to double to avoid overflowing ints or longs
+      val sampleSize = math.min(samplePointsPerPartitionHint.toDouble * partitions, 1e6)
       // Assume the input partitions are roughly balanced and over-sample a little bit.
       val sampleSizePerPartition = math.ceil(3.0 * sampleSize / rdd.partitions.length).toInt
       val (numItems, sketched) = RangePartitioner.sketch(rdd.map(_._1), sampleSizePerPartition)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d00c67248753..36be71e40843 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -907,6 +907,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val RANGE_EXCHANGE_SAMPLE_SIZE_PER_PARTITION =
+    buildConf("spark.sql.execution.rangeExchange.sampleSizePerPartition")
+      .internal()
+      .doc("Number of points to sample per partition in order to determine the range boundaries" +
+          " for range partitioning, typically used in global sorting (without limit).")
+      .intConf
+      .createWithDefault(100)
+
   val ARROW_EXECUTION_ENABLE =
     buildConf("spark.sql.execution.arrow.enable")
       .internal()
@@ -1199,6 +1207,8 @@ class SQLConf extends Serializable with Logging {
 
   def supportQuotedRegexColumnName: Boolean = getConf(SUPPORT_QUOTED_REGEX_COLUMN_NAME)
 
+  def rangeExchangeSampleSizePerPartition: Int = getConf(RANGE_EXCHANGE_SAMPLE_SIZE_PER_PARTITION)
+
   def arrowEnable: Boolean = getConf(ARROW_EXECUTION_ENABLE)
 
   def arrowMaxRecordsPerBatch: Int = getConf(ARROW_EXECUTION_MAX_RECORDS_PER_BATCH)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 11c4aa9b4acf..5a1e217082bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.LazilyGeneratedOrdering
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.SQLMetrics
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.MutablePair
 
 /**
@@ -218,7 +219,11 @@ object ShuffleExchangeExec {
           iter.map(row => mutablePair.update(row.copy(), null))
         }
         implicit val ordering = new LazilyGeneratedOrdering(sortingExpressions, outputAttributes)
-        new RangePartitioner(numPartitions, rddForSampling, ascending = true)
+        new RangePartitioner(
+          numPartitions,
+          rddForSampling,
+          ascending = true,
+          samplePointsPerPartitionHint = SQLConf.get.rangeExchangeSampleSizePerPartition)
       case SinglePartition =>
         new Partitioner {
           override def numPartitions: Int = 1

From b46c92bf73b486ebc494b44be3c392f4bcd0a7c9 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 28 Sep 2017 15:51:04 -0700
Subject: [PATCH 2/4] Add a test

---
 .../spark/sql/ConfigBehaviorSuite.scala       | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
new file mode 100644
index 000000000000..7156520a2171
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.commons.math3.stat.inference.ChiSquareTest
+
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSQLContext
+
+
+class ConfigBehaviorSuite extends QueryTest with SharedSQLContext {
+
+  import testImplicits._
+
+  test("SPARK-22160 spark.sql.execution.rangeExchange.sampleSizePerPartition") {
+    // In this test, we run a sort and compute the histogram for partition size post shuffle.
+    // With a high sample count, the partition size should be more evenly distributed, and has a
+    // low chi-sq test value.
+
+    val numPartitions = 4
+
+    def computeChiSquareTest(): Double = {
+      val n = 10000
+      // Trigger a sort
+      val data = spark.range(0, n, 1, 1).sort('id)
+        .selectExpr("SPARK_PARTITION_ID() pid", "id").as[(Int, Long)].collect()
+
+      // Compute histogram for the number of records per partition post sort
+      val dist = data.groupBy(_._1).map(_._2.length.toLong).toArray
+      assert(dist.length == 4)
+
+      new ChiSquareTest().chiSquare(
+        Array.fill(numPartitions) { n.toDouble / numPartitions },
+        dist)
+    }
+
+    withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> numPartitions.toString) {
+      // The default chi-sq value should be low
+      assert(computeChiSquareTest() < 100)
+
+      withSQLConf(SQLConf.RANGE_EXCHANGE_SAMPLE_SIZE_PER_PARTITION.key -> "1") {
+        // If we only sample one point, the range boundaries will be pretty bad and the
+        // chi-sq value would be very high.
+        assert(computeChiSquareTest() > 1000)
+      }
+    }
+  }
+
+}

From df27868d4548c903af62866485aa03788f442013 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 28 Sep 2017 15:55:41 -0700
Subject: [PATCH 3/4] Add a precondition check

---
 core/src/main/scala/org/apache/spark/Partitioner.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index fa397f09f651..debbd8d7c26c 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -121,6 +121,8 @@ class RangePartitioner[K : Ordering : ClassTag, V](
 
   // We allow partitions = 0, which happens when sorting an empty RDD under the default settings.
   require(partitions >= 0, s"Number of partitions cannot be negative but found $partitions.")
+  require(samplePointsPerPartitionHint > 0,
+    s"Sample points per partition must be greater than 0 but found $samplePointsPerPartitionHint")
 
   private var ordering = implicitly[Ordering[K]]
 

From 99c5bc6226b395aeee79167180bfd92a349e45d3 Mon Sep 17 00:00:00 2001
From: Reynold Xin <rxin@databricks.com>
Date: Thu, 28 Sep 2017 17:12:20 -0700
Subject: [PATCH 4/4] Add a comment about the test being deterministic.

---
 .../test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala   | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
index 7156520a2171..2c1e5db5fd9b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
@@ -31,6 +31,8 @@ class ConfigBehaviorSuite extends QueryTest with SharedSQLContext {
     // In this test, we run a sort and compute the histogram for partition size post shuffle.
     // With a high sample count, the partition size should be more evenly distributed, and has a
     // low chi-sq test value.
+    // Also the whole code path for range partitioning as implemented should be deterministic
+    // (it uses the partition id as the seed), so this test shouldn't be flaky.
 
     val numPartitions = 4