From c66f6719c415ccff520530e97a21e9d73356e339 Mon Sep 17 00:00:00 2001 From: Chenhao Li Date: Tue, 26 Mar 2024 09:50:22 -0700 Subject: [PATCH 1/3] initial --- .../sql/catalyst/expressions/OrderUtils.scala | 3 ++- .../sql/catalyst/expressions/ExprUtils.scala | 5 +++-- .../org/apache/spark/sql/VariantSuite.scala | 18 ++++++++++++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala index 9319b104024a0..385e0f00695a3 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.sql.catalyst.expressions -import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, NullType, StructType, UserDefinedType} +import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, NullType, StructType, UserDefinedType, VariantType} object OrderUtils { /** @@ -24,6 +24,7 @@ object OrderUtils { */ def isOrderable(dataType: DataType): Boolean = dataType match { case NullType => true + case VariantType => false case dt: AtomicType => true case struct: StructType => struct.fields.forall(f => isOrderable(f.dataType)) case array: ArrayType => isOrderable(array.elementType) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala index 2bbe730d4cfb8..1ce9cb48c41d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala @@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CharVarcharUtils} import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase, QueryExecutionErrors} -import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType} +import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType, VariantType} import org.apache.spark.unsafe.types.UTF8String object ExprUtils extends QueryErrorsBase { @@ -194,7 +194,8 @@ object ExprUtils extends QueryErrorsBase { } // Check if the data type of expr is orderable. - if (expr.dataType.existsRecursively(_.isInstanceOf[MapType])) { + if (expr.dataType.existsRecursively( + t => t.isInstanceOf[MapType] || t.isInstanceOf[VariantType])) { expr.failAnalysis( errorClass = "GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE", messageParameters = Map( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala index 3991b44d0bbb7..e7108371a3672 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala @@ -192,4 +192,22 @@ class VariantSuite extends QueryTest with SharedSparkSession { } } } + + test("group/order/join variant are disabled") { + var ex = intercept[AnalysisException] { + spark.sql("select parse_json('') group by 1") + } + assert(ex.getErrorClass == "GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE") + + ex = intercept[AnalysisException] { + spark.sql("select parse_json('') order by 1") + } + assert(ex.getErrorClass == "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE") + + ex = intercept[AnalysisException] { + spark.sql("with t as (select parse_json('') as v) " + + "select t1.v from t as t1 join t as t2 on t1.v = t2.v") + } + assert(ex.getErrorClass == "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE") + } } From d35571c57cfb8b0bea6cd4378c13a8f17d587143 Mon Sep 17 00:00:00 2001 From: Chenhao Li Date: Tue, 26 Mar 2024 11:37:59 -0700 Subject: [PATCH 2/3] add tests --- .../scala/org/apache/spark/sql/VariantSuite.scala | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala index e7108371a3672..26ba678489600 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala @@ -204,6 +204,17 @@ class VariantSuite extends QueryTest with SharedSparkSession { } assert(ex.getErrorClass == "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE") + ex = intercept[AnalysisException] { + spark.sql("select parse_json('') sort by 1") + } + assert(ex.getErrorClass == "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE") + + ex = intercept[AnalysisException] { + spark.sql("with t as (select 1 as a, parse_json('') as v) " + + "select rank() over (partition by a order by v) from t") + } + assert(ex.getErrorClass == "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE") + ex = intercept[AnalysisException] { spark.sql("with t as (select parse_json('') as v) " + "select t1.v from t as t1 join t as t2 on t1.v = t2.v") From 23250486ed2a6bec9447272ba034852aeaf8720d Mon Sep 17 00:00:00 2001 From: Chenhao Li Date: Wed, 27 Mar 2024 16:38:38 -0700 Subject: [PATCH 3/3] add GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE back --- common/utils/src/main/resources/error/error-classes.json | 6 ++++++ docs/sql-error-conditions.md | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 185e86853dfd4..50f37d9413e84 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -1390,6 +1390,12 @@ ], "sqlState" : "42805" }, + "GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE" : { + "message" : [ + "The expression cannot be used as a grouping expression because its data type is not an orderable data type." + ], + "sqlState" : "42822" + }, "HLL_INVALID_INPUT_SKETCH_BUFFER" : { "message" : [ "Invalid call to ; only valid HLL sketch buffers are supported as inputs (such as those produced by the `hll_sketch_agg` function)." diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 838ca2fa33c92..e00b1e2ca26b4 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -846,6 +846,12 @@ GROUP BY `` refers to an expression `` that contains an aggregat GROUP BY position `` is not in select list (valid range is [1, ``]). +### GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE + +[SQLSTATE: 42822](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +The expression `` cannot be used as a grouping expression because its data type `` is not an orderable data type. + ### HLL_INVALID_INPUT_SKETCH_BUFFER [SQLSTATE: 22546](sql-error-conditions-sqlstates.html#class-22-data-exception)