From 2fea01c0e5c7a3de7b25d5a29d76abba1dc3c269 Mon Sep 17 00:00:00 2001 From: ulysses Date: Sun, 26 Jul 2020 21:05:42 +0800 Subject: [PATCH 1/7] init --- .../org/apache/spark/sql/types/NullType.scala | 2 ++ .../sql/hive/execution/HiveDDLSuite.scala | 20 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala index 14097a5280d5..f74cbd02181c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala @@ -32,6 +32,8 @@ class NullType private() extends DataType { override def defaultSize: Int = 1 private[spark] override def asNullable: NullType = this + + override def sql: String = "VOID" } /** diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index fbd1fc1ea98d..1b835922a0e6 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2854,4 +2854,24 @@ class HiveDDLSuite assert(sql("SELECT * FROM t2 WHERE c = 'A'").collect().isEmpty) } } + + test("SPARK-32445: Make NullType.sql as VOID to support hive") { + withView("v1") { + sql("create view v1 as select null as c") + val df = sql("select * from v1") + assert(df.schema.fields.head.dataType == NullType) + checkAnswer( + df, + Row("NULL") + ) + + sql("alter view v1 as select null as c1, 1 as c2") + val df2 = sql("select * from v1") + assert(df2.schema.fields.head.dataType == NullType) + checkAnswer( + df2, + Row("NULL", 1) + ) + } + } } From 9b121770fb147bf1c23e89441be8986ab8948ef0 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 27 Jul 2020 08:00:59 +0800 Subject: [PATCH 2/7] fix toHiveColumn --- .../scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index 3f70387a3b05..e9ae824ffad9 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -984,7 +984,7 @@ private[hive] object HiveClientImpl extends Logging { val typeString = if (c.metadata.contains(HIVE_TYPE_STRING)) { c.metadata.getString(HIVE_TYPE_STRING) } else { - c.dataType.catalogString + c.dataType.sql } new FieldSchema(c.name, typeString, c.getComment().orNull) } From 9deabe4452347dae0d2851a5988ca6eac088ec35 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 27 Jul 2020 20:21:30 +0800 Subject: [PATCH 3/7] change to void --- python/pyspark/sql/types.py | 3 +++ .../sql/catalyst/parser/AstBuilder.scala | 1 + .../org/apache/spark/sql/types/NullType.scala | 2 +- .../sql-functions/sql-expression-schema.md | 2 +- .../sql-tests/results/ansi/literals.sql.out | 2 +- .../sql-tests/results/inline-table.sql.out | 2 +- .../sql-tests/results/literals.sql.out | 2 +- .../sql-tests/results/misc-functions.sql.out | 2 +- .../results/postgreSQL/select.sql.out | 4 ++-- .../sql-compatibility-functions.sql.out | 6 ++--- .../results/udf/udf-inline-table.sql.out | 2 +- .../spark/sql/FileBasedDataSourceSuite.scala | 2 +- .../sql/hive/execution/HiveDDLSuite.scala | 22 +++++++++---------- .../sql/hive/orc/HiveOrcSourceSuite.scala | 2 +- 14 files changed, 29 insertions(+), 25 deletions(-) diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index cc08482c735b..b3af9430d50a 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -110,6 +110,9 @@ class NullType(DataType): __metaclass__ = DataTypeSingleton + def simpleString(self): + return 'void' + class AtomicType(DataType): """An internal type used to represent everything that is not diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d6ae89f49c57..83b4a27f8c43 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2221,6 +2221,7 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging case ("decimal" | "dec" | "numeric", precision :: scale :: Nil) => DecimalType(precision.getText.toInt, scale.getText.toInt) case ("void", Nil) => NullType + case ("null", Nil) => NullType case ("interval", Nil) => CalendarIntervalType case (dt, params) => val dtStr = if (params.nonEmpty) s"$dt(${params.mkString(",")})" else dt diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala index f74cbd02181c..78d1f476fdac 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala @@ -33,7 +33,7 @@ class NullType private() extends DataType { private[spark] override def asNullable: NullType = this - override def sql: String = "VOID" + override def simpleString: String = "void" } /** diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index edf2ede9e5a4..21bcb2728254 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -34,7 +34,7 @@ | org.apache.spark.sql.catalyst.expressions.Ascii | ascii | SELECT ascii('222') | struct | | org.apache.spark.sql.catalyst.expressions.Asin | asin | SELECT asin(0) | struct | | org.apache.spark.sql.catalyst.expressions.Asinh | asinh | SELECT asinh(0) | struct | -| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | +| org.apache.spark.sql.catalyst.expressions.AssertTrue | assert_true | SELECT assert_true(0 < 1) | struct | | org.apache.spark.sql.catalyst.expressions.Atan | atan | SELECT atan(0) | struct | | org.apache.spark.sql.catalyst.expressions.Atan2 | atan2 | SELECT atan2(0, 0) | struct | | org.apache.spark.sql.catalyst.expressions.Atanh | atanh | SELECT atanh(0) | struct | diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out index ea74bb7175e9..1cffa9c4b2ca 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out @@ -5,7 +5,7 @@ -- !query select null, Null, nUll -- !query schema -struct +struct -- !query output NULL NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out index 9943b93c431d..12dcf331434e 100644 --- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out @@ -49,7 +49,7 @@ two 2 -- !query select * from values ("one", null), ("two", null) as data(a, b) -- !query schema -struct +struct -- !query output one NULL two NULL diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out index ea74bb7175e9..1cffa9c4b2ca 100644 --- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out @@ -5,7 +5,7 @@ -- !query select null, Null, nUll -- !query schema -struct +struct -- !query output NULL NULL NULL diff --git a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out index bd8ffb82ee12..4bacdf032e27 100644 --- a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out @@ -7,7 +7,7 @@ select typeof(null) -- !query schema struct -- !query output -null +void -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out index 1e59036b979b..d3674d6bc123 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out @@ -308,7 +308,7 @@ struct<1:int> -- !query select foo.* from (select null) as foo -- !query schema -struct +struct -- !query output NULL @@ -316,7 +316,7 @@ NULL -- !query select foo.* from (select 'xyzzy',1,null) as foo -- !query schema -struct +struct -- !query output xyzzy 1 NULL diff --git a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out index 26a44a85841e..4f8e283a311c 100644 --- a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out @@ -5,7 +5,7 @@ -- !query SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null) -- !query schema -struct +struct -- !query output x y NULL @@ -21,7 +21,7 @@ NULL x -- !query SELECT nvl(null, 'x'), nvl('y', 'x'), nvl(null, null) -- !query schema -struct +struct -- !query output x y NULL @@ -29,7 +29,7 @@ x y NULL -- !query SELECT nvl2(null, 'x', 'y'), nvl2('n', 'x', 'y'), nvl2(null, null, null) -- !query schema -struct +struct -- !query output y x NULL diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out index d78d347bc980..08764a2483a4 100644 --- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out @@ -49,7 +49,7 @@ two 2 -- !query select udf(a), b from values ("one", null), ("two", null) as data(a, b) -- !query schema -struct +struct -- !query output one NULL two NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala index e9bff64d72fc..b706fd0a267d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala @@ -405,7 +405,7 @@ class FileBasedDataSourceSuite extends QueryTest "" } def errorMessage(format: String): String = { - s"$format data source does not support null data type." + s"$format data source does not support ${NullType.simpleString} data type." } withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> useV1List) { withTempDir { dir => diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 1b835922a0e6..7859e866ee0d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2331,17 +2331,17 @@ class HiveDDLSuite val e1 = intercept[AnalysisException] { spark.sql("CREATE TABLE t1 USING PARQUET AS SELECT null as null_col") }.getMessage - assert(e1.contains("Cannot create tables with null type")) + assert(e1.contains(s"Cannot create tables with ${NullType.simpleString} type")) val e2 = intercept[AnalysisException] { spark.sql("CREATE TABLE t2 AS SELECT null as null_col") }.getMessage - assert(e2.contains("Cannot create tables with null type")) + assert(e2.contains(s"Cannot create tables with ${NullType.simpleString} type")) val e3 = intercept[AnalysisException] { spark.sql("CREATE TABLE t3 STORED AS PARQUET AS SELECT null as null_col") }.getMessage - assert(e3.contains("Cannot create tables with null type")) + assert(e3.contains(s"Cannot create tables with ${NullType.simpleString} type")) } // Forbid Replace table AS SELECT with null type @@ -2350,7 +2350,7 @@ class HiveDDLSuite val e = intercept[AnalysisException] { spark.sql(s"CREATE OR REPLACE TABLE t USING $v2Source AS SELECT null as null_col") }.getMessage - assert(e.contains("Cannot create tables with null type")) + assert(e.contains(s"Cannot create tables with ${NullType.simpleString} type")) } // Forbid creating table with VOID type in Spark @@ -2358,19 +2358,19 @@ class HiveDDLSuite val e1 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t1 (v VOID) USING PARQUET") }.getMessage - assert(e1.contains("Cannot create tables with null type")) + assert(e1.contains(s"Cannot create tables with ${NullType.simpleString} type")) val e2 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t2 (v VOID) USING hive") }.getMessage - assert(e2.contains("Cannot create tables with null type")) + assert(e2.contains(s"Cannot create tables with ${NullType.simpleString} type")) val e3 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t3 (v VOID)") }.getMessage - assert(e3.contains("Cannot create tables with null type")) + assert(e3.contains(s"Cannot create tables with ${NullType.simpleString} type")) val e4 = intercept[AnalysisException] { spark.sql(s"CREATE TABLE t4 (v VOID) STORED AS PARQUET") }.getMessage - assert(e4.contains("Cannot create tables with null type")) + assert(e4.contains(s"Cannot create tables with ${NullType.simpleString} type")) } // Forbid Replace table with VOID type @@ -2379,7 +2379,7 @@ class HiveDDLSuite val e = intercept[AnalysisException] { spark.sql(s"CREATE OR REPLACE TABLE t (v VOID) USING $v2Source") }.getMessage - assert(e.contains("Cannot create tables with null type")) + assert(e.contains(s"Cannot create tables with ${NullType.simpleString} type")) } // Make sure spark.catalog.createTable with null type will fail @@ -2416,7 +2416,7 @@ class HiveDDLSuite schema = schema, options = Map("fileFormat" -> "parquet")) }.getMessage - assert(e.contains("Cannot create tables with null type")) + assert(e.contains(s"Cannot create tables with ${NullType.simpleString} type")) } } @@ -2429,7 +2429,7 @@ class HiveDDLSuite schema = schema, options = Map.empty[String, String]) }.getMessage - assert(e.contains("Cannot create tables with null type")) + assert(e.contains(s"Cannot create tables with ${NullType.simpleString} type")) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala index 91fd8a47339f..bfb6e4129482 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala @@ -121,7 +121,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton { msg = intercept[AnalysisException] { sql("select null").write.mode("overwrite").orc(orcDir) }.getMessage - assert(msg.contains("ORC data source does not support null data type.")) + assert(msg.contains(s"ORC data source does not support ${NullType.simpleString} data type.")) msg = intercept[AnalysisException] { spark.udf.register("testType", () => new IntervalData()) From 9e56ff3a4424083cc0a90a8707e6c3a159affd35 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 27 Jul 2020 20:23:37 +0800 Subject: [PATCH 4/7] revert sql --- .../scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala index e9ae824ffad9..3f70387a3b05 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala @@ -984,7 +984,7 @@ private[hive] object HiveClientImpl extends Logging { val typeString = if (c.metadata.contains(HIVE_TYPE_STRING)) { c.metadata.getString(HIVE_TYPE_STRING) } else { - c.dataType.sql + c.dataType.catalogString } new FieldSchema(c.name, typeString, c.getComment().orNull) } From 8a5b2e9a076a7fe0a97cf0771c0c65b909a01656 Mon Sep 17 00:00:00 2001 From: ulysses Date: Mon, 27 Jul 2020 20:46:39 +0800 Subject: [PATCH 5/7] remove parse null --- .../scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 83b4a27f8c43..d6ae89f49c57 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2221,7 +2221,6 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging case ("decimal" | "dec" | "numeric", precision :: scale :: Nil) => DecimalType(precision.getText.toInt, scale.getText.toInt) case ("void", Nil) => NullType - case ("null", Nil) => NullType case ("interval", Nil) => CalendarIntervalType case (dt, params) => val dtStr = if (params.nonEmpty) s"$dt(${params.mkString(",")})" else dt From 1c84fc8d833563ac94b5c80d2ec2da328ae8b80f Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 28 Jul 2020 08:10:26 +0800 Subject: [PATCH 6/7] fix ut --- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 7859e866ee0d..d24a77f8b003 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2855,14 +2855,14 @@ class HiveDDLSuite } } - test("SPARK-32445: Make NullType.sql as VOID to support hive") { + test("SPARK-32445: Make NullType.simpleString as void to support hive") { withView("v1") { sql("create view v1 as select null as c") val df = sql("select * from v1") assert(df.schema.fields.head.dataType == NullType) checkAnswer( df, - Row("NULL") + Row("null") ) sql("alter view v1 as select null as c1, 1 as c2") @@ -2870,7 +2870,7 @@ class HiveDDLSuite assert(df2.schema.fields.head.dataType == NullType) checkAnswer( df2, - Row("NULL", 1) + Row("null", 1) ) } } From 65137da7801d7bd68f1040f6dc0f22665057394b Mon Sep 17 00:00:00 2001 From: ulysses Date: Tue, 28 Jul 2020 13:10:36 +0800 Subject: [PATCH 7/7] fix null --- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index d24a77f8b003..421690a68668 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -2862,7 +2862,7 @@ class HiveDDLSuite assert(df.schema.fields.head.dataType == NullType) checkAnswer( df, - Row("null") + Row(null) ) sql("alter view v1 as select null as c1, 1 as c2") @@ -2870,7 +2870,7 @@ class HiveDDLSuite assert(df2.schema.fields.head.dataType == NullType) checkAnswer( df2, - Row("null", 1) + Row(null, 1) ) } }