Skip to content

Commit 98ddbe6

Browse files
byFmarmbrus
authored andcommitted
[SPARK-3173][SQL] Timestamp support in the parser
If you have a table with TIMESTAMP column, that column can't be used in WHERE clause properly - it is not evaluated properly. [More](https://issues.apache.org/jira/browse/SPARK-3173) Motivation: http://www.aproint.com/aggregation-with-spark-sql/ - [x] modify SqlParser so it supports casting to TIMESTAMP (workaround for item 2) - [x] the string literal should be converted into Timestamp if the column is Timestamp. Author: Zdenek Farana <zdenek.farana@gmail.com> Author: Zdenek Farana <zdenek.farana@aproint.com> Closes #2084 from byF/SPARK-3173 and squashes the following commits: 442b59d [Zdenek Farana] Fixed test merge conflict 2dbf4f6 [Zdenek Farana] Merge remote-tracking branch 'origin/SPARK-3173' into SPARK-3173 65b6215 [Zdenek Farana] Fixed timezone sensitivity in the test 47b27b4 [Zdenek Farana] Now works in the case of "StringLiteral=TimestampColumn" 96a661b [Zdenek Farana] Code style change 491dfcf [Zdenek Farana] Added test cases for SPARK-3173 4446b1e [Zdenek Farana] A string literal is casted into Timestamp when the column is Timestamp. 59af397 [Zdenek Farana] Added a new TIMESTAMP keyword; CAST to TIMESTAMP now can be used in SQL expression.
1 parent 634d04b commit 98ddbe6

File tree

3 files changed

+54
-2
lines changed

3 files changed

+54
-2
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
114114
protected val STRING = Keyword("STRING")
115115
protected val SUM = Keyword("SUM")
116116
protected val TABLE = Keyword("TABLE")
117+
protected val TIMESTAMP = Keyword("TIMESTAMP")
117118
protected val TRUE = Keyword("TRUE")
118119
protected val UNCACHE = Keyword("UNCACHE")
119120
protected val UNION = Keyword("UNION")
@@ -359,7 +360,7 @@ class SqlParser extends StandardTokenParsers with PackratParsers {
359360
literal
360361

361362
protected lazy val dataType: Parser[DataType] =
362-
STRING ^^^ StringType
363+
STRING ^^^ StringType | TIMESTAMP ^^^ TimestampType
363364
}
364365

365366
class SqlLexical(val keywords: Seq[String]) extends StdLexical {

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,21 @@ trait HiveTypeCoercion {
218218
case a: BinaryArithmetic if a.right.dataType == StringType =>
219219
a.makeCopy(Array(a.left, Cast(a.right, DoubleType)))
220220

221+
case p: BinaryPredicate if p.left.dataType == StringType
222+
&& p.right.dataType == TimestampType =>
223+
p.makeCopy(Array(Cast(p.left, TimestampType), p.right))
224+
case p: BinaryPredicate if p.left.dataType == TimestampType
225+
&& p.right.dataType == StringType =>
226+
p.makeCopy(Array(p.left, Cast(p.right, TimestampType)))
227+
221228
case p: BinaryPredicate if p.left.dataType == StringType && p.right.dataType != StringType =>
222229
p.makeCopy(Array(Cast(p.left, DoubleType), p.right))
223230
case p: BinaryPredicate if p.left.dataType != StringType && p.right.dataType == StringType =>
224231
p.makeCopy(Array(p.left, Cast(p.right, DoubleType)))
225232

233+
case i @ In(a,b) if a.dataType == TimestampType && b.forall(_.dataType == StringType) =>
234+
i.makeCopy(Array(a,b.map(Cast(_,TimestampType))))
235+
226236
case Sum(e) if e.dataType == StringType =>
227237
Sum(Cast(e, DoubleType))
228238
case Average(e) if e.dataType == StringType =>

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,28 @@ package org.apache.spark.sql
1919

2020
import org.apache.spark.sql.catalyst.expressions._
2121
import org.apache.spark.sql.test._
22+
import org.scalatest.BeforeAndAfterAll
23+
import java.util.TimeZone
2224

2325
/* Implicits */
2426
import TestSQLContext._
2527
import TestData._
2628

27-
class SQLQuerySuite extends QueryTest {
29+
class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
2830
// Make sure the tables are loaded.
2931
TestData
3032

33+
var origZone: TimeZone = _
34+
override protected def beforeAll() {
35+
origZone = TimeZone.getDefault
36+
TimeZone.setDefault(TimeZone.getTimeZone("UTC"))
37+
}
38+
39+
override protected def afterAll() {
40+
TimeZone.setDefault(origZone)
41+
}
42+
43+
3144
test("SPARK-2041 column name equals tablename") {
3245
checkAnswer(
3346
sql("SELECT tableName FROM tableName"),
@@ -63,6 +76,34 @@ class SQLQuerySuite extends QueryTest {
6376
"st")
6477
}
6578

79+
test("SPARK-3173 Timestamp support in the parser") {
80+
checkAnswer(sql(
81+
"SELECT time FROM timestamps WHERE time=CAST('1970-01-01 00:00:00.001' AS TIMESTAMP)"),
82+
Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001"))))
83+
84+
checkAnswer(sql(
85+
"SELECT time FROM timestamps WHERE time='1970-01-01 00:00:00.001'"),
86+
Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001"))))
87+
88+
checkAnswer(sql(
89+
"SELECT time FROM timestamps WHERE '1970-01-01 00:00:00.001'=time"),
90+
Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001"))))
91+
92+
checkAnswer(sql(
93+
"""SELECT time FROM timestamps WHERE time<'1970-01-01 00:00:00.003'
94+
AND time>'1970-01-01 00:00:00.001'"""),
95+
Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.002"))))
96+
97+
checkAnswer(sql(
98+
"SELECT time FROM timestamps WHERE time IN ('1970-01-01 00:00:00.001','1970-01-01 00:00:00.002')"),
99+
Seq(Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.001")),
100+
Seq(java.sql.Timestamp.valueOf("1970-01-01 00:00:00.002"))))
101+
102+
checkAnswer(sql(
103+
"SELECT time FROM timestamps WHERE time='123'"),
104+
Nil)
105+
}
106+
66107
test("index into array") {
67108
checkAnswer(
68109
sql("SELECT data, data[0], data[0] + data[1], data[0 + 1] FROM arrayData"),

0 commit comments

Comments
 (0)