From a1b68cd179eee8bba99cc70cf2f2c2a7f1650188 Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Tue, 17 Mar 2020 21:21:20 +0800 Subject: [PATCH 1/2] size(null) should return null under ansi mode --- .../org/apache/spark/sql/internal/SQLConf.scala | 5 ++++- .../expressions/CollectionExpressionsSuite.scala | 6 ++++++ .../apache/spark/sql/DataFrameFunctionsSuite.scala | 12 ++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index b6282715533d..9bca9adee3e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -3014,7 +3014,10 @@ class SQLConf extends Serializable with Logging { def csvColumnPruning: Boolean = getConf(SQLConf.CSV_PARSER_COLUMN_PRUNING) - def legacySizeOfNull: Boolean = getConf(SQLConf.LEGACY_SIZE_OF_NULL) + def legacySizeOfNull: Boolean = { + // size(null) should return null under ansi mode. + getConf(SQLConf.LEGACY_SIZE_OF_NULL) && !getConf(ANSI_ENABLED) + } def isReplEagerEvalEnabled: Boolean = getConf(SQLConf.REPL_EAGER_EVAL_ENABLED) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala index 3cfc66f5cdb0..173f24881531 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala @@ -74,6 +74,12 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") { testSize(sizeOfNull = null) } + // size(null) should return null under ansi mode. + withSQLConf( + SQLConf.LEGACY_SIZE_OF_NULL.key -> "true", + SQLConf.ANSI_ENABLED.key -> "true") { + testSize(sizeOfNull = null) + } } test("MapKeys/MapValues") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index a613c33b6c87..c41eb98c13ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -490,6 +490,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") { testSizeOfArray(sizeOfNull = null) } + // size(null) should return null under ansi mode. + withSQLConf( + SQLConf.LEGACY_SIZE_OF_NULL.key -> "true", + SQLConf.ANSI_ENABLED.key -> "true") { + testSizeOfArray(sizeOfNull = null) + } } test("dataframe arrays_zip function") { @@ -569,6 +575,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { withSQLConf(SQLConf.LEGACY_SIZE_OF_NULL.key -> "false") { testSizeOfMap(sizeOfNull = null) } + // size(null) should return null under ansi mode. + withSQLConf( + SQLConf.LEGACY_SIZE_OF_NULL.key -> "true", + SQLConf.ANSI_ENABLED.key -> "true") { + testSizeOfMap(sizeOfNull = null) + } } test("map_keys/map_values function") { From 8459d63ddc5f14ef7f91a5d9ebf07a2cdb2090ba Mon Sep 17 00:00:00 2001 From: Wenchen Fan Date: Wed, 18 Mar 2020 00:54:26 +0800 Subject: [PATCH 2/2] update conf doc --- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 9bca9adee3e9..1cd2fcf28e44 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2154,8 +2154,8 @@ object SQLConf { val LEGACY_SIZE_OF_NULL = buildConf("spark.sql.legacy.sizeOfNull") .internal() - .doc("If it is set to true, size of null returns -1. This behavior was inherited from Hive. " + - "The size function returns null for null input if the flag is disabled.") + .doc(s"If it is set to false, or ${ANSI_ENABLED.key} is true, then size of null returns " + + "null. Otherwise, it returns -1, which was inherited from Hive.") .version("2.4.0") .booleanConf .createWithDefault(true)