Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/limit.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@

-- limit on various data types
select * from testdata limit 2;
select * from arraydata limit 2;
select * from mapdata limit 2;

-- foldable non-literal in limit
select * from testdata limit 2 + 1;

select * from testdata limit CAST(1 AS int);

-- limit must be non-negative
select * from testdata limit -1;

-- limit must be foldable
select * from testdata limit key > 3;

-- limit must be integer
select * from testdata limit true;
select * from testdata limit 'a';
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
-- parse as ints
select 1, -1;

-- parse as longs
-- parse as longs (Int.MaxValue + 1, and Int.MinValue - 1)
select 2147483648, -2147483649;

-- parse as decimals
-- parse long min and max value
select 9223372036854775807, -9223372036854775808;

-- parse as decimals (Long.MaxValue + 1, and Long.MinValue - 1)
select 9223372036854775808, -9223372036854775809;

-- various floating point (decimal) formats
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 1


Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 4


Expand Down
83 changes: 83 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/limit.sql.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 9


-- !query 0
select * from testdata limit 2
-- !query 0 schema
struct<key:int,value:string>
-- !query 0 output
1 1
2 2


-- !query 1
select * from arraydata limit 2
-- !query 1 schema
struct<arraycol:array<int>,nestedarraycol:array<array<int>>>
-- !query 1 output
[1,2,3] [[1,2,3]]
[2,3,4] [[2,3,4]]


-- !query 2
select * from mapdata limit 2
-- !query 2 schema
struct<mapcol:map<int,string>>
-- !query 2 output
{1:"a1",2:"b1",3:"c1",4:"d1",5:"e1"}
{1:"a2",2:"b2",3:"c2",4:"d2"}


-- !query 3
select * from testdata limit 2 + 1
-- !query 3 schema
struct<key:int,value:string>
-- !query 3 output
1 1
2 2
3 3


-- !query 4
select * from testdata limit CAST(1 AS int)
-- !query 4 schema
struct<key:int,value:string>
-- !query 4 output
1 1


-- !query 5
select * from testdata limit -1
-- !query 5 schema
struct<>
-- !query 5 output
org.apache.spark.sql.AnalysisException
The limit expression must be equal to or greater than 0, but got -1;


-- !query 6
select * from testdata limit key > 3
-- !query 6 schema
struct<>
-- !query 6 output
org.apache.spark.sql.AnalysisException
The limit expression must evaluate to a constant value, but got (testdata.`key` > 3);


-- !query 7
select * from testdata limit true
-- !query 7 schema
struct<>
-- !query 7 output
org.apache.spark.sql.AnalysisException
The limit expression must be integer type, but got boolean;


-- !query 8
select * from testdata limit 'a'
-- !query 8 schema
struct<>
-- !query 8 output
org.apache.spark.sql.AnalysisException
The limit expression must be integer type, but got string;
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 6


Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- Automatically generated by org.apache.spark.sql.SQLQueryTestSuite
-- Number of queries: 4
-- Automatically generated by SQLQueryTestSuite
-- Number of queries: 5


-- !query 0
Expand All @@ -19,16 +19,24 @@ struct<2147483648:bigint,(-2147483649):bigint>


-- !query 2
select 9223372036854775808, -9223372036854775809
select 9223372036854775807, -9223372036854775808
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you also add the boundary conditions for int as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I add it in a separate pull request? I want to add all literal parsing here, but don't want to distract this pull request.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sgtm

-- !query 2 schema
struct<9223372036854775808:decimal(19,0),(-9223372036854775809):decimal(19,0)>
struct<9223372036854775807:bigint,(-9223372036854775808):decimal(19,0)>
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"-9223372036854775808" is a valid long value (Long.MinValue) but Spark treats it as a decimal(19, 0) because "9223372036854775808" is out of range. Is this expected?

cc @cloud-fan

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also cc @sarutak who wrote the original test case.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe a parser bug? cc @hvanhovell

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd call this a bug. I tried in Spark 1.6 and it was returning double (which was worse).

Here's postgres:

rxin=# select pg_typeof(-9223372036854775808);
 pg_typeof 
-----------
 bigint
(1 row)

rxin=# select pg_typeof(-9223372036854775807);
 pg_typeof 
-----------
 bigint
(1 row)

rxin=# select pg_typeof(-9223372036854775806);
 pg_typeof 
-----------
 bigint
(1 row)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@petermaxlee can you file a jira ticket for this bug?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-- !query 2 output
9223372036854775808 -9223372036854775809
9223372036854775807 -9223372036854775808


-- !query 3
select 0.3, -0.8, .5, -.18, 0.1111
select 9223372036854775808, -9223372036854775809
-- !query 3 schema
struct<0.3:decimal(1,1),(-0.8):decimal(1,1),0.5:decimal(1,1),(-0.18):decimal(2,2),0.1111:decimal(4,4)>
struct<9223372036854775808:decimal(19,0),(-9223372036854775809):decimal(19,0)>
-- !query 3 output
9223372036854775808 -9223372036854775809


-- !query 4
select 0.3, -0.8, .5, -.18, 0.1111
-- !query 4 schema
struct<0.3:decimal(1,1),(-0.8):decimal(1,1),0.5:decimal(1,1),(-0.18):decimal(2,2),0.1111:decimal(4,4)>
-- !query 4 output
0.3 -0.8 0.5 -0.18 0.1111
50 changes: 0 additions & 50 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -650,51 +650,12 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
sortTest()
}

test("limit") {
checkAnswer(
sql("SELECT * FROM testData LIMIT 9 + 1"),
testData.take(10).toSeq)

checkAnswer(
sql("SELECT * FROM arrayData LIMIT CAST(1 AS Integer)"),
arrayData.collect().take(1).map(Row.fromTuple).toSeq)

checkAnswer(
sql("SELECT * FROM mapData LIMIT 1"),
mapData.collect().take(1).map(Row.fromTuple).toSeq)
}

test("non-foldable expressions in LIMIT") {
val e = intercept[AnalysisException] {
sql("SELECT * FROM testData LIMIT key > 3")
}.getMessage
assert(e.contains("The limit expression must evaluate to a constant value, " +
"but got (testdata.`key` > 3)"))
}

test("Expressions in limit clause are not integer") {
var e = intercept[AnalysisException] {
sql("SELECT * FROM testData LIMIT true")
}.getMessage
assert(e.contains("The limit expression must be integer type, but got boolean"))

e = intercept[AnalysisException] {
sql("SELECT * FROM testData LIMIT 'a'")
}.getMessage
assert(e.contains("The limit expression must be integer type, but got string"))
}

test("negative in LIMIT or TABLESAMPLE") {
val expected = "The limit expression must be equal to or greater than 0, but got -1"
var e = intercept[AnalysisException] {
sql("SELECT * FROM testData TABLESAMPLE (-1 rows)")
}.getMessage
assert(e.contains(expected))

e = intercept[AnalysisException] {
sql("SELECT * FROM testData LIMIT -1")
}.getMessage
assert(e.contains(expected))
}

test("CTE feature") {
Expand Down Expand Up @@ -1337,17 +1298,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
}
}

test("Test to check we can use Long.MinValue") {
checkAnswer(
sql(s"SELECT ${Long.MinValue} FROM testData ORDER BY key LIMIT 1"), Row(Long.MinValue)
)

checkAnswer(
sql(s"SELECT key FROM testData WHERE key > ${Long.MinValue}"),
(1 to 100).map(Row(_)).toSeq
)
}

test("Test to check we can apply sign to expression") {

checkAnswer(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ package org.apache.spark.sql
import java.io.File
import java.util.{Locale, TimeZone}

import scala.util.control.NonFatal

import org.apache.spark.sql.catalyst.planning.PhysicalOperation
import org.apache.spark.sql.catalyst.plans.logical._
import org.apache.spark.sql.catalyst.rules.RuleExecutor
Expand Down Expand Up @@ -132,6 +134,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
// Create a local SparkSession to have stronger isolation between different test cases.
// This does not isolate catalog changes.
val localSparkSession = spark.newSession()
loadTestData(localSparkSession)

// Run the SQL queries preparing them for comparison.
val outputs: Seq[QueryOutput] = queries.map { sql =>
Expand All @@ -146,7 +149,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
if (regenerateGoldenFiles) {
// Again, we are explicitly not using multi-line string due to stripMargin removing "|".
val goldenOutput = {
s"-- Automatically generated by ${getClass.getName}\n" +
s"-- Automatically generated by ${getClass.getSimpleName}\n" +
s"-- Number of queries: ${outputs.size}\n\n\n" +
outputs.zipWithIndex.map{case (qr, i) => qr.toString(i)}.mkString("\n\n\n") + "\n"
}
Expand Down Expand Up @@ -192,12 +195,19 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
case _ => plan.children.iterator.exists(isSorted)
}

val df = session.sql(sql)
val schema = df.schema
val answer = df.queryExecution.hiveResultString()
try {
val df = session.sql(sql)
val schema = df.schema
val answer = df.queryExecution.hiveResultString()

// If the output is not pre-sorted, sort it.
if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)

// If the output is not pre-sorted, sort it.
if (isSorted(df.queryExecution.analyzed)) (schema, answer) else (schema, answer.sorted)
} catch {
case NonFatal(e) =>
// If there is an exception, put the exception class followed by the message.
(StructType(Seq.empty), Seq(e.getClass.getName, e.getMessage))
}
}

private def listTestCases(): Seq[TestCase] = {
Expand All @@ -213,6 +223,25 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
files ++ dirs.flatMap(listFilesRecursively)
}

/** Load built-in test tables into the SparkSession. */
private def loadTestData(session: SparkSession): Unit = {
import session.implicits._

(1 to 100).map(i => (i, i.toString)).toDF("key", "value").createOrReplaceTempView("testdata")

((Seq(1, 2, 3), Seq(Seq(1, 2, 3))) :: (Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil)
.toDF("arraycol", "nestedarraycol")
.createOrReplaceTempView("arraydata")

(Tuple1(Map(1 -> "a1", 2 -> "b1", 3 -> "c1", 4 -> "d1", 5 -> "e1")) ::
Tuple1(Map(1 -> "a2", 2 -> "b2", 3 -> "c2", 4 -> "d2")) ::
Tuple1(Map(1 -> "a3", 2 -> "b3", 3 -> "c3")) ::
Tuple1(Map(1 -> "a4", 2 -> "b4")) ::
Tuple1(Map(1 -> "a5")) :: Nil)
.toDF("mapcol")
.createOrReplaceTempView("mapdata")
}

private val originalTimeZone = TimeZone.getDefault
private val originalLocale = Locale.getDefault

Expand Down