Skip to content

Commit 734b144

Browse files
araysrowen
authored andcommitted
[SPARK-21330][SQL] Bad partitioning does not allow to read a JDBC table with extreme values on the partition column
## What changes were proposed in this pull request? An overflow of the difference of bounds on the partitioning column leads to no data being read. This patch checks for this overflow. ## How was this patch tested? New unit test. Author: Andrew Ray <ray.andrew@gmail.com> Closes #18800 from aray/SPARK-21330. (cherry picked from commit 25826c7) Signed-off-by: Sean Owen <sowen@cloudera.com>
1 parent d93e45b commit 734b144

File tree

2 files changed

+17
-1
lines changed

2 files changed

+17
-1
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,8 @@ private[sql] object JDBCRelation extends Logging {
6464
s"bound. Lower bound: $lowerBound; Upper bound: $upperBound")
6565

6666
val numPartitions =
67-
if ((upperBound - lowerBound) >= partitioning.numPartitions) {
67+
if ((upperBound - lowerBound) >= partitioning.numPartitions || /* check for overflow */
68+
(upperBound - lowerBound) < 0) {
6869
partitioning.numPartitions
6970
} else {
7071
logWarning("The number of partitions is reduced because the specified number of " +

sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,15 @@ class JDBCSuite extends SparkFunSuite
9595
| partitionColumn 'THEID', lowerBound '1', upperBound '4', numPartitions '3')
9696
""".stripMargin.replaceAll("\n", " "))
9797

98+
sql(
99+
s"""
100+
|CREATE OR REPLACE TEMPORARY VIEW partsoverflow
101+
|USING org.apache.spark.sql.jdbc
102+
|OPTIONS (url '$url', dbtable 'TEST.PEOPLE', user 'testUser', password 'testPass',
103+
| partitionColumn 'THEID', lowerBound '-9223372036854775808',
104+
| upperBound '9223372036854775807', numPartitions '3')
105+
""".stripMargin.replaceAll("\n", " "))
106+
98107
conn.prepareStatement("create table test.inttypes (a INT, b BOOLEAN, c TINYINT, "
99108
+ "d SMALLINT, e BIGINT)").executeUpdate()
100109
conn.prepareStatement("insert into test.inttypes values (1, false, 3, 4, 1234567890123)"
@@ -366,6 +375,12 @@ class JDBCSuite extends SparkFunSuite
366375
assert(ids(2) === 3)
367376
}
368377

378+
test("overflow of partition bound difference does not give negative stride") {
379+
val df = sql("SELECT * FROM partsoverflow")
380+
checkNumPartitions(df, expectedNumPartitions = 3)
381+
assert(df.collect().length == 3)
382+
}
383+
369384
test("Register JDBC query with renamed fields") {
370385
// Regression test for bug SPARK-7345
371386
sql(

0 commit comments

Comments
 (0)