From 40ffdefc2e11f9605e9bc84f3f8f3b57cb57f0d4 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 13 Nov 2024 09:15:13 +0100 Subject: [PATCH 01/79] [SPARK-50250][SQL] Assign appropriate error condition for `_LEGACY_ERROR_TEMP_2075`: `UNSUPPORTED_FEATURE.WRITE_FOR_BINARY_SOURCE` ### What changes were proposed in this pull request? This PR proposes to Integrate `_LEGACY_ERROR_TEMP_2075 ` into `UNSUPPORTED_FEATURE.WRITE_FOR_BINARY_SOURCE ` ### Why are the changes needed? To improve the error message by assigning proper error condition and SQLSTATE ### Does this PR introduce _any_ user-facing change? No, only user-facing error message improved ### How was this patch tested? Updated the existing tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #48780 from itholic/LEGACY_2075. Lead-authored-by: Haejoon Lee Co-authored-by: Haejoon Lee Signed-off-by: Max Gekk --- .../src/main/resources/error/error-conditions.json | 10 +++++----- .../apache/spark/sql/errors/QueryExecutionErrors.scala | 2 +- .../datasources/binaryfile/BinaryFileFormatSuite.scala | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index cc31678bc1ec..b3c92a9f2b9d 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -5382,6 +5382,11 @@ "message" : [ "Update column nullability for MySQL and MS SQL Server." ] + }, + "WRITE_FOR_BINARY_SOURCE" : { + "message" : [ + "Write for the binary file data source." + ] } }, "sqlState" : "0A000" @@ -7083,11 +7088,6 @@ "user-specified schema." ] }, - "_LEGACY_ERROR_TEMP_2075" : { - "message" : [ - "Write is not supported for binary file data source." - ] - }, "_LEGACY_ERROR_TEMP_2076" : { "message" : [ "The length of is , which exceeds the max length allowed: ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 6cf930f18dc2..0aa21a4d79c7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -914,7 +914,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE } def writeUnsupportedForBinaryFileDataSourceError(): SparkUnsupportedOperationException = { - new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_2075") + new SparkUnsupportedOperationException("UNSUPPORTED_FEATURE.WRITE_FOR_BINARY_SOURCE") } def fileLengthExceedsMaxLengthError(status: FileStatus, maxLength: Int): Throwable = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala index 387a2baa256b..62f2f2cb10a8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala @@ -168,7 +168,7 @@ class BinaryFileFormatSuite extends QueryTest with SharedSparkSession { .format(BINARY_FILE) .save(s"$tmpDir/test_save") }, - condition = "_LEGACY_ERROR_TEMP_2075", + condition = "UNSUPPORTED_FEATURE.WRITE_FOR_BINARY_SOURCE", parameters = Map.empty) } } From ede05fa500feb02be23f89f37e8b29265ddfc5cc Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 13 Nov 2024 09:18:06 +0100 Subject: [PATCH 02/79] [SPARK-50248][SQL] Assign appropriate error condition for `_LEGACY_ERROR_TEMP_2058`: `INVALID_PARTITION_VALUE` ### What changes were proposed in this pull request? This PR proposes to Integrate `_LEGACY_ERROR_TEMP_2058 ` into `INVALID_PARTITION_VALUE ` ### Why are the changes needed? To improve the error message by assigning proper error condition and SQLSTATE ### Does this PR introduce _any_ user-facing change? No, only user-facing error message improved ### How was this patch tested? Updated the existing tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #48778 from itholic/LEGACY_2058. Authored-by: Haejoon Lee Signed-off-by: Max Gekk --- .../src/main/resources/error/error-conditions.json | 11 ++++++----- .../spark/sql/errors/QueryExecutionErrors.scala | 8 ++++---- .../sql/execution/datasources/FileIndexSuite.scala | 4 ++-- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index b3c92a9f2b9d..553d085f8862 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -2950,6 +2950,12 @@ }, "sqlState" : "42601" }, + "INVALID_PARTITION_VALUE" : { + "message" : [ + "Failed to cast value to data type for partition column . Ensure the value matches the expected data type for this partition column." + ], + "sqlState" : "42846" + }, "INVALID_PROPERTY_KEY" : { "message" : [ " is an invalid property key, please use quotes, e.g. SET =." @@ -7026,11 +7032,6 @@ "Unable to clear partition directory prior to writing to it." ] }, - "_LEGACY_ERROR_TEMP_2058" : { - "message" : [ - "Failed to cast value `` to `` for partition column ``." - ] - }, "_LEGACY_ERROR_TEMP_2059" : { "message" : [ "End of stream." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 0aa21a4d79c7..0e3f37d8d6fb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -795,11 +795,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE def failedToCastValueToDataTypeForPartitionColumnError( value: String, dataType: DataType, columnName: String): SparkRuntimeException = { new SparkRuntimeException( - errorClass = "_LEGACY_ERROR_TEMP_2058", + errorClass = "INVALID_PARTITION_VALUE", messageParameters = Map( - "value" -> value, - "dataType" -> dataType.toString(), - "columnName" -> columnName)) + "value" -> toSQLValue(value), + "dataType" -> toSQLType(dataType), + "columnName" -> toSQLId(columnName))) } def endOfStreamError(): Throwable = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala index e9f78f9f598e..33b4cc1d2e7f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala @@ -137,8 +137,8 @@ class FileIndexSuite extends SharedSparkSession { exception = intercept[SparkRuntimeException] { fileIndex.partitionSpec() }, - condition = "_LEGACY_ERROR_TEMP_2058", - parameters = Map("value" -> "foo", "dataType" -> "IntegerType", "columnName" -> "a") + condition = "INVALID_PARTITION_VALUE", + parameters = Map("value" -> "'foo'", "dataType" -> "\"INT\"", "columnName" -> "`a`") ) } From 6fb1d438191262d2a127bc72cbbb1127fcac7587 Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 13 Nov 2024 09:20:39 +0100 Subject: [PATCH 03/79] [SPARK-50246][SQL] Assign appropriate error condition for `_LEGACY_ERROR_TEMP_2167`: `INVALID_JSON_RECORD_TYPE` ### What changes were proposed in this pull request? This PR proposes to Integrate `_LEGACY_ERROR_TEMP_2167 ` into `INVALID_JSON_RECORD_TYPE ` ### Why are the changes needed? To improve the error message by assigning proper error condition and SQLSTATE ### Does this PR introduce _any_ user-facing change? No, only user-facing error message improved ### How was this patch tested? Updated the existing tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #48775 from itholic/LEGACY_2167. Authored-by: Haejoon Lee Signed-off-by: Max Gekk --- .../src/main/resources/error/error-conditions.json | 11 ++++++----- .../spark/sql/errors/QueryExecutionErrors.scala | 4 ++-- .../sql/execution/datasources/json/JsonSuite.scala | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 553d085f8862..e51b35c0accc 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -2645,6 +2645,12 @@ ], "sqlState" : "2203G" }, + "INVALID_JSON_RECORD_TYPE" : { + "message" : [ + "Detected an invalid type of a JSON record while inferring a common schema in the mode . Expected a STRUCT type, but found ." + ], + "sqlState" : "22023" + }, "INVALID_JSON_ROOT_FIELD" : { "message" : [ "Cannot convert JSON root field to target Spark type." @@ -7354,11 +7360,6 @@ "Malformed JSON." ] }, - "_LEGACY_ERROR_TEMP_2167" : { - "message" : [ - "Malformed records are detected in schema inference. Parse Mode: . Reasons: Failed to infer a common schema. Struct types are expected, but `` was found." - ] - }, "_LEGACY_ERROR_TEMP_2168" : { "message" : [ "Decorrelate inner query through is not supported." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 0e3f37d8d6fb..09836995925e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1437,10 +1437,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE def malformedRecordsDetectedInSchemaInferenceError(dataType: DataType): Throwable = { new SparkException( - errorClass = "_LEGACY_ERROR_TEMP_2167", + errorClass = "INVALID_JSON_RECORD_TYPE", messageParameters = Map( "failFastMode" -> FailFastMode.name, - "dataType" -> dataType.catalogString), + "invalidType" -> toSQLType(dataType)), cause = null) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 06183596a54a..dfbc8e5279aa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -2078,8 +2078,8 @@ abstract class JsonSuite .option("mode", "FAILFAST") .json(path) }, - condition = "_LEGACY_ERROR_TEMP_2167", - parameters = Map("failFastMode" -> "FAILFAST", "dataType" -> "string|bigint")) + condition = "INVALID_JSON_RECORD_TYPE", + parameters = Map("failFastMode" -> "FAILFAST", "invalidType" -> "\"STRING\"|\"BIGINT\"")) val ex = intercept[SparkException] { spark.read From 898bff21c9921ba40c10ed19034baade5e0ac543 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladan=20Vasi=C4=87?= Date: Wed, 13 Nov 2024 09:30:30 +0100 Subject: [PATCH 04/79] [SPARK-50245][SQL][TESTS] Extended CollationSuite and added tests where SortMergeJoin is forced MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? I propose extending existing tests in `CollationSuite` and add cases where `SortMergeJoin` is forced and tested for correctness and use of `CollationKey`. ### Why are the changes needed? These changes are needed to properly test behavior of join with collated data when different configs are enabled. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? The change is a test itself. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48774 from vladanvasi-db/vladanvasi-db/collation-suite-test-extension. Authored-by: Vladan Vasić Signed-off-by: Max Gekk --- .../org/apache/spark/sql/CollationSuite.scala | 362 +++++++++--------- 1 file changed, 187 insertions(+), 175 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index 9a47491b0cca..9716d342bb6b 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable} import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.CatalogHelper import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership import org.apache.spark.sql.errors.DataTypeErrors.toSQLType +import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec} import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec @@ -43,6 +44,29 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { private val collationNonPreservingSources = Seq("orc", "csv", "json", "text") private val allFileBasedDataSources = collationPreservingSources ++ collationNonPreservingSources + @inline + private def isSortMergeForced: Boolean = { + SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD) == -1 + } + + private def checkRightTypeOfJoinUsed(queryPlan: SparkPlan): Unit = { + assert( + collectFirst(queryPlan) { + case _: SortMergeJoinExec => assert(isSortMergeForced) + case _: HashJoin => assert(!isSortMergeForced) + }.nonEmpty + ) + } + + private def checkCollationKeyInQueryPlan(queryPlan: SparkPlan, collationName: String): Unit = { + // Only if collation doesn't support binary equality, collation key should be injected. + if (!CollationFactory.fetchCollation(collationName).supportsBinaryEquality) { + assert(queryPlan.toString().contains("collationkey")) + } else { + assert(!queryPlan.toString().contains("collationkey")) + } + } + test("collate returns proper type") { Seq( "utf8_binary", @@ -1419,7 +1443,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { for (codeGen <- Seq("NO_CODEGEN", "CODEGEN_ONLY")) { val collationSetup = if (collation.isEmpty) "" else " COLLATE " + collation val supportsBinaryEquality = collation.isEmpty || collation == "UNICODE" || - CollationFactory.fetchCollation(collation).isUtf8BinaryType + CollationFactory.fetchCollation(collation).supportsBinaryEquality test(s"Group by on map containing$collationSetup strings ($codeGen)") { val tableName = "t" @@ -1589,7 +1613,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { } } - test("hash join should be used for collated strings") { + test("hash join should be used for collated strings if sort merge join is not forced") { val t1 = "T_1" val t2 = "T_2" @@ -1602,47 +1626,48 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ", Seq(Row("aa", 1, "AA ", 2), Row("aa", 1, "aa", 2))) ) - - testCases.foreach(t => { + for { + t <- testCases + broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD)) + } { withTable(t1, t2) { - sql(s"CREATE TABLE $t1 (x STRING COLLATE ${t.collation}, i int) USING PARQUET") - sql(s"INSERT INTO $t1 VALUES ('${t.data1}', 1)") + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) { + sql(s"CREATE TABLE $t1 (x STRING COLLATE ${t.collation}, i int) USING PARQUET") + sql(s"INSERT INTO $t1 VALUES ('${t.data1}', 1)") - sql(s"CREATE TABLE $t2 (y STRING COLLATE ${t.collation}, j int) USING PARQUET") - sql(s"INSERT INTO $t2 VALUES ('${t.data2}', 2), ('${t.data1}', 2)") + sql(s"CREATE TABLE $t2 (y STRING COLLATE ${t.collation}, j int) USING PARQUET") + sql(s"INSERT INTO $t2 VALUES ('${t.data2}', 2), ('${t.data1}', 2)") - val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y") - checkAnswer(df, t.result) + val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y") + checkAnswer(df, t.result) - val queryPlan = df.queryExecution.executedPlan + val queryPlan = df.queryExecution.executedPlan - // confirm that hash join is used instead of sort merge join - assert( - collectFirst(queryPlan) { - case _: HashJoin => () - }.nonEmpty - ) - assert( - collectFirst(queryPlan) { - case _: SortMergeJoinExec => () - }.isEmpty - ) + // confirm that right kind of join is used. + checkRightTypeOfJoinUsed(queryPlan) - // Only if collation doesn't support binary equality, collation key should be injected. - if (!CollationFactory.fetchCollation(t.collation).isUtf8BinaryType) { - assert(collectFirst(queryPlan) { - case b: HashJoin => b.leftKeys.head - }.head.isInstanceOf[CollationKey]) - } else { - assert(!collectFirst(queryPlan) { - case b: HashJoin => b.leftKeys.head - }.head.isInstanceOf[CollationKey]) + if (isSortMergeForced) { + // Confirm proper injection of collation key. + checkCollationKeyInQueryPlan(queryPlan, t.collation) + } + else { + // Only if collation doesn't support binary equality, collation key should be injected. + if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) { + assert(collectFirst(queryPlan) { + case b: HashJoin => b.leftKeys.head + }.head.isInstanceOf[CollationKey]) + } else { + assert(!collectFirst(queryPlan) { + case b: HashJoin => b.leftKeys.head + }.head.isInstanceOf[CollationKey]) + } + } } } - }) + } } - test("hash join should be used for arrays of collated strings") { + test("hash join should be used for arrays of collated strings if sort merge join is not forced") { val t1 = "T_1" val t2 = "T_2" @@ -1660,47 +1685,50 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { Seq(Row(Seq("aa"), 1, Seq("AA "), 2), Row(Seq("aa"), 1, Seq("aa"), 2))) ) - testCases.foreach(t => { + for { + t <- testCases + broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD)) + } { withTable(t1, t2) { - sql(s"CREATE TABLE $t1 (x ARRAY, i int) USING PARQUET") - sql(s"INSERT INTO $t1 VALUES (array('${t.data1}'), 1)") + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) { + sql(s"CREATE TABLE $t1 (x ARRAY, i int) USING PARQUET") + sql(s"INSERT INTO $t1 VALUES (array('${t.data1}'), 1)") - sql(s"CREATE TABLE $t2 (y ARRAY, j int) USING PARQUET") - sql(s"INSERT INTO $t2 VALUES (array('${t.data2}'), 2), (array('${t.data1}'), 2)") + sql(s"CREATE TABLE $t2 (y ARRAY, j int) USING PARQUET") + sql(s"INSERT INTO $t2 VALUES (array('${t.data2}'), 2), (array('${t.data1}'), 2)") - val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y") - checkAnswer(df, t.result) + val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y") + checkAnswer(df, t.result) - val queryPlan = df.queryExecution.executedPlan + val queryPlan = df.queryExecution.executedPlan - // confirm that hash join is used instead of sort merge join - assert( - collectFirst(queryPlan) { - case _: HashJoin => () - }.nonEmpty - ) - assert( - collectFirst(queryPlan) { - case _: ShuffledJoin => () - }.isEmpty - ) + // confirm that right kind of join is used. + checkRightTypeOfJoinUsed(queryPlan) - // Only if collation doesn't support binary equality, collation key should be injected. - if (!CollationFactory.fetchCollation(t.collation).isUtf8BinaryType) { - assert(collectFirst(queryPlan) { - case b: BroadcastHashJoinExec => b.leftKeys.head - }.head.asInstanceOf[ArrayTransform].function.asInstanceOf[LambdaFunction]. - function.isInstanceOf[CollationKey]) - } else { - assert(!collectFirst(queryPlan) { - case b: BroadcastHashJoinExec => b.leftKeys.head - }.head.isInstanceOf[ArrayTransform]) + if (isSortMergeForced) { + // Confirm proper injection of collation key. + checkCollationKeyInQueryPlan(queryPlan, t.collation) + } + else { + // Only if collation doesn't support binary equality, collation key should be injected. + if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) { + assert(collectFirst(queryPlan) { + case b: BroadcastHashJoinExec => b.leftKeys.head + }.head.asInstanceOf[ArrayTransform].function.asInstanceOf[LambdaFunction]. + function.isInstanceOf[CollationKey]) + } else { + assert(!collectFirst(queryPlan) { + case b: BroadcastHashJoinExec => b.leftKeys.head + }.head.isInstanceOf[ArrayTransform]) + } + } } } - }) + } } - test("hash join should be used for arrays of arrays of collated strings") { + test("hash join should be used for arrays of arrays of collated strings " + + "if sort merge join is not forced") { val t1 = "T_1" val t2 = "T_2" @@ -1718,51 +1746,53 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("AA ")), 2), Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2))) ) - testCases.foreach(t => { + for { + t <- testCases + broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD)) + } { withTable(t1, t2) { - sql(s"CREATE TABLE $t1 (x ARRAY>, i int) USING " + - s"PARQUET") - sql(s"INSERT INTO $t1 VALUES (array(array('${t.data1}')), 1)") + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) { + sql(s"CREATE TABLE $t1 (x ARRAY>, i int) USING " + + s"PARQUET") + sql(s"INSERT INTO $t1 VALUES (array(array('${t.data1}')), 1)") - sql(s"CREATE TABLE $t2 (y ARRAY>, j int) USING " + - s"PARQUET") - sql(s"INSERT INTO $t2 VALUES (array(array('${t.data2}')), 2)," + - s" (array(array('${t.data1}')), 2)") + sql(s"CREATE TABLE $t2 (y ARRAY>, j int) USING " + + s"PARQUET") + sql(s"INSERT INTO $t2 VALUES (array(array('${t.data2}')), 2)," + + s" (array(array('${t.data1}')), 2)") - val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y") - checkAnswer(df, t.result) + val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y") + checkAnswer(df, t.result) - val queryPlan = df.queryExecution.executedPlan + val queryPlan = df.queryExecution.executedPlan - // confirm that hash join is used instead of sort merge join - assert( - collectFirst(queryPlan) { - case _: HashJoin => () - }.nonEmpty - ) - assert( - collectFirst(queryPlan) { - case _: ShuffledJoin => () - }.isEmpty - ) + // confirm that right kind of join is used. + checkRightTypeOfJoinUsed(queryPlan) - // Only if collation doesn't support binary equality, collation key should be injected. - if (!CollationFactory.fetchCollation(t.collation).isUtf8BinaryType) { - assert(collectFirst(queryPlan) { - case b: BroadcastHashJoinExec => b.leftKeys.head - }.head.asInstanceOf[ArrayTransform].function. - asInstanceOf[LambdaFunction].function.asInstanceOf[ArrayTransform].function. - asInstanceOf[LambdaFunction].function.isInstanceOf[CollationKey]) - } else { - assert(!collectFirst(queryPlan) { - case b: BroadcastHashJoinExec => b.leftKeys.head - }.head.isInstanceOf[ArrayTransform]) + if (isSortMergeForced) { + // Confirm proper injection of collation key. + checkCollationKeyInQueryPlan(queryPlan, t.collation) + } + else { + // Only if collation doesn't support binary equality, collation key should be injected. + if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) { + assert(collectFirst(queryPlan) { + case b: BroadcastHashJoinExec => b.leftKeys.head + }.head.asInstanceOf[ArrayTransform].function. + asInstanceOf[LambdaFunction].function.asInstanceOf[ArrayTransform].function. + asInstanceOf[LambdaFunction].function.isInstanceOf[CollationKey]) + } else { + assert(!collectFirst(queryPlan) { + case b: BroadcastHashJoinExec => b.leftKeys.head + }.head.isInstanceOf[ArrayTransform]) + } + } } } - }) + } } - test("hash join should respect collation for struct of strings") { + test("hash and sort merge join should respect collation for struct of strings") { val t1 = "T_1" val t2 = "T_2" @@ -1779,43 +1809,36 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ", Seq(Row(Row("aa"), 1, Row("AA "), 2), Row(Row("aa"), 1, Row("aa"), 2))) ) - testCases.foreach(t => { + for { + t <- testCases + broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD)) + } { withTable(t1, t2) { - sql(s"CREATE TABLE $t1 (x STRUCT, i int) USING PARQUET") - sql(s"INSERT INTO $t1 VALUES (named_struct('f', '${t.data1}'), 1)") + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) { + sql(s"CREATE TABLE $t1 (x STRUCT, i int) USING PARQUET") + sql(s"INSERT INTO $t1 VALUES (named_struct('f', '${t.data1}'), 1)") - sql(s"CREATE TABLE $t2 (y STRUCT, j int) USING PARQUET") - sql(s"INSERT INTO $t2 VALUES (named_struct('f', '${t.data2}'), 2)," + - s" (named_struct('f', '${t.data1}'), 2)") + sql(s"CREATE TABLE $t2 (y STRUCT, j int) USING PARQUET") + sql(s"INSERT INTO $t2 VALUES (named_struct('f', '${t.data2}'), 2)," + + s" (named_struct('f', '${t.data1}'), 2)") - val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y") - checkAnswer(df, t.result) + val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y") + checkAnswer(df, t.result) - val queryPlan = df.queryExecution.executedPlan + val queryPlan = df.queryExecution.executedPlan - // Confirm that hash join is used instead of sort merge join. - assert( - collectFirst(queryPlan) { - case _: HashJoin => () - }.nonEmpty - ) - assert( - collectFirst(queryPlan) { - case _: ShuffledJoin => () - }.isEmpty - ) + // confirm that right kind of join is used. + checkRightTypeOfJoinUsed(queryPlan) - // Only if collation doesn't support binary equality, collation key should be injected. - if (!CollationFactory.fetchCollation(t.collation).isUtf8BinaryType) { - assert(queryPlan.toString().contains("collationkey")) - } else { - assert(!queryPlan.toString().contains("collationkey")) + // Confirm proper injection of collation key. + checkCollationKeyInQueryPlan(queryPlan, t.collation) } } - }) + } } - test("hash join should respect collation for struct of array of struct of strings") { + test("hash and sort merge join should respect collation " + + "for struct of array of struct of strings") { val t1 = "T_1" val t2 = "T_2" @@ -1835,43 +1858,36 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("AA "))), 2), Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2))) ) - testCases.foreach(t => { + + for { + t <- testCases + broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD)) + } { withTable(t1, t2) { - sql(s"CREATE TABLE $t1 (x STRUCT>>, " + - s"i int) USING PARQUET") - sql(s"INSERT INTO $t1 VALUES (named_struct('f', array(named_struct('f', '${t.data1}'))), 1)" - ) + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) { + sql(s"CREATE TABLE $t1 (x STRUCT>>, " + + s"i int) USING PARQUET") + sql(s"INSERT INTO $t1 VALUES (named_struct('f', array(named_struct('f', " + + s"'${t.data1}'))), 1)") - sql(s"CREATE TABLE $t2 (y STRUCT>>, " + - s"j int) USING PARQUET") - sql(s"INSERT INTO $t2 VALUES (named_struct('f', array(named_struct('f', '${t.data2}'))), 2)" - + s", (named_struct('f', array(named_struct('f', '${t.data1}'))), 2)") + sql(s"CREATE TABLE $t2 (y STRUCT>>, " + + s"j int) USING PARQUET") + sql(s"INSERT INTO $t2 VALUES (named_struct('f', array(named_struct('f', " + + s"'${t.data2}'))), 2), (named_struct('f', array(named_struct('f', '${t.data1}'))), 2)") - val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y") - checkAnswer(df, t.result) + val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y") + checkAnswer(df, t.result) - val queryPlan = df.queryExecution.executedPlan + val queryPlan = df.queryExecution.executedPlan - // confirm that hash join is used instead of sort merge join - assert( - collectFirst(queryPlan) { - case _: HashJoin => () - }.nonEmpty - ) - assert( - collectFirst(queryPlan) { - case _: ShuffledJoin => () - }.isEmpty - ) + // confirm that right kind of join is used. + checkRightTypeOfJoinUsed(queryPlan) - // Only if collation doesn't support binary equality, collation key should be injected. - if (!CollationFactory.fetchCollation(t.collation).isUtf8BinaryType) { - assert(queryPlan.toString().contains("collationkey")) - } else { - assert(!queryPlan.toString().contains("collationkey")) + // Confirm proper injection of collation key. + checkCollationKeyInQueryPlan(queryPlan, t.collation) } } - }) + } } test("rewrite with collationkey should be a non-excludable rule") { @@ -1931,31 +1947,27 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { "'a', 'a', 1", "'A', 'A ', 1", Row("a", "a", 1, "A", "A ", 1)) ) - testCases.foreach(t => { + for { + t <- testCases + broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD)) + } { withTable(t1, t2) { - sql(s"CREATE TABLE $t1 (x ${t.type1}, y ${t.type2}, i int) USING PARQUET") - sql(s"INSERT INTO $t1 VALUES (${t.data1})") - sql(s"CREATE TABLE $t2 (x ${t.type1}, y ${t.type2}, i int) USING PARQUET") - sql(s"INSERT INTO $t2 VALUES (${t.data2})") + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) { + sql(s"CREATE TABLE $t1 (x ${t.type1}, y ${t.type2}, i int) USING PARQUET") + sql(s"INSERT INTO $t1 VALUES (${t.data1})") + sql(s"CREATE TABLE $t2 (x ${t.type1}, y ${t.type2}, i int) USING PARQUET") + sql(s"INSERT INTO $t2 VALUES (${t.data2})") - val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.x AND $t1.y = $t2.y") - checkAnswer(df, t.result) + val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.x AND $t1.y = $t2.y") + checkAnswer(df, t.result) - val queryPlan = df.queryExecution.executedPlan + val queryPlan = df.queryExecution.executedPlan - // confirm that hash join is used instead of sort merge join - assert( - collectFirst(queryPlan) { - case _: HashJoin => () - }.nonEmpty - ) - assert( - collectFirst(queryPlan) { - case _: SortMergeJoinExec => () - }.isEmpty - ) + // confirm that right kind of join is used. + checkRightTypeOfJoinUsed(queryPlan) + } } - }) + } } test("hll sketch aggregate should respect collation") { From bd94419c988ba115c6c05df18f60e17c066dfe78 Mon Sep 17 00:00:00 2001 From: Ruzel Ibragimov Date: Wed, 13 Nov 2024 16:30:16 +0100 Subject: [PATCH 05/79] [SPARK-50226][SQL] Correct MakeDTInterval and MakeYMInterval to catch Java exceptions ### What changes were proposed in this pull request? `MakeDTInterval` and `MakeYMInterval` do not catch Java exceptions in nullSafeEval like it does `MakeInterval`. So we making behavior similar. ### Why are the changes needed? To show to users readable nice error message. ### Does this PR introduce _any_ user-facing change? Improved error message ### How was this patch tested? There already were few tests to check behavior, I just changed expected error type. ### Was this patch authored or co-authored using generative AI tooling? Yes, Copilot used. Closes #48773 from gotocoding-DB/SPARK-50226-overflow-error. Authored-by: Ruzel Ibragimov Signed-off-by: Max Gekk --- .../resources/error/error-conditions.json | 14 +++- .../expressions/intervalExpressions.scala | 43 +++++++--- .../sql/catalyst/util/IntervalMathUtils.scala | 9 ++- .../sql/catalyst/util/IntervalUtils.scala | 18 +++-- .../sql/errors/QueryExecutionErrors.scala | 23 +++--- .../IntervalExpressionsSuite.scala | 6 +- .../sql-tests/results/ansi/interval.sql.out | 80 ++++++++++++------- .../sql-tests/results/interval.sql.out | 80 ++++++++++++------- .../spark/sql/DataFrameAggregateSuite.scala | 47 +++++++---- 9 files changed, 210 insertions(+), 110 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index e51b35c0accc..5e1c3f46fd11 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -2012,8 +2012,20 @@ }, "INTERVAL_ARITHMETIC_OVERFLOW" : { "message" : [ - "." + "Integer overflow while operating with intervals." ], + "subClass" : { + "WITHOUT_SUGGESTION" : { + "message" : [ + "Try devising appropriate values for the interval parameters." + ] + }, + "WITH_SUGGESTION" : { + "message" : [ + "Use to tolerate overflow and return NULL instead." + ] + } + }, "sqlState" : "22015" }, "INTERVAL_DIVIDED_BY_ZERO" : { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala index 1ce7dfd39acc..a7b67f55d8cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala @@ -481,7 +481,7 @@ case class MakeDTInterval( hours: Expression, mins: Expression, secs: Expression) - extends QuaternaryExpression with ImplicitCastInputTypes { + extends QuaternaryExpression with ImplicitCastInputTypes with SupportQueryContext { override def nullIntolerant: Boolean = true def this( @@ -514,13 +514,15 @@ case class MakeDTInterval( day.asInstanceOf[Int], hour.asInstanceOf[Int], min.asInstanceOf[Int], - sec.asInstanceOf[Decimal]) + sec.asInstanceOf[Decimal], + origin.context) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { defineCodeGen(ctx, ev, (day, hour, min, sec) => { + val errorContext = getContextOrNullCode(ctx) val iu = IntervalUtils.getClass.getName.stripSuffix("$") - s"$iu.makeDayTimeInterval($day, $hour, $min, $sec)" + s"$iu.makeDayTimeInterval($day, $hour, $min, $sec, $errorContext)" }) } @@ -532,6 +534,8 @@ case class MakeDTInterval( mins: Expression, secs: Expression): MakeDTInterval = copy(days, hours, mins, secs) + + override def initQueryContext(): Option[QueryContext] = Some(origin.context) } @ExpressionDescription( @@ -556,7 +560,7 @@ case class MakeDTInterval( group = "datetime_funcs") // scalastyle:on line.size.limit case class MakeYMInterval(years: Expression, months: Expression) - extends BinaryExpression with ImplicitCastInputTypes with Serializable { + extends BinaryExpression with ImplicitCastInputTypes with Serializable with SupportQueryContext { override def nullIntolerant: Boolean = true def this(years: Expression) = this(years, Literal(0)) @@ -568,17 +572,28 @@ case class MakeYMInterval(years: Expression, months: Expression) override def dataType: DataType = YearMonthIntervalType() override def nullSafeEval(year: Any, month: Any): Any = { - Math.toIntExact(Math.addExact(month.asInstanceOf[Number].longValue(), - Math.multiplyExact(year.asInstanceOf[Number].longValue(), MONTHS_PER_YEAR))) + try { + Math.toIntExact( + Math.addExact(month.asInstanceOf[Int], + Math.multiplyExact(year.asInstanceOf[Int], MONTHS_PER_YEAR))) + } catch { + case _: ArithmeticException => + throw QueryExecutionErrors.withoutSuggestionIntervalArithmeticOverflowError(origin.context) + } } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { - defineCodeGen(ctx, ev, (years, months) => { + nullSafeCodeGen(ctx, ev, (years, months) => { val math = classOf[Math].getName.stripSuffix("$") + val errorContext = getContextOrNullCode(ctx) + // scalastyle:off line.size.limit s""" - |$math.toIntExact(java.lang.Math.addExact($months, - | $math.multiplyExact($years, $MONTHS_PER_YEAR))) - |""".stripMargin + |try { + | ${ev.value} = $math.toIntExact($math.addExact($months, $math.multiplyExact($years, $MONTHS_PER_YEAR))); + |} catch (java.lang.ArithmeticException e) { + | throw QueryExecutionErrors.withoutSuggestionIntervalArithmeticOverflowError($errorContext); + |}""".stripMargin + // scalastyle:on line.size.limit }) } @@ -587,6 +602,10 @@ case class MakeYMInterval(years: Expression, months: Expression) override protected def withNewChildrenInternal( newLeft: Expression, newRight: Expression): Expression = copy(years = newLeft, months = newRight) + + override def initQueryContext(): Option[QueryContext] = { + Some(origin.context) + } } // Multiply an year-month interval by a numeric @@ -699,8 +718,8 @@ trait IntervalDivide { context: QueryContext): Unit = { if (value == minValue && num.dataType.isInstanceOf[IntegralType]) { if (numValue.asInstanceOf[Number].longValue() == -1) { - throw QueryExecutionErrors.intervalArithmeticOverflowError( - "Interval value overflows after being divided by -1", "try_divide", context) + throw QueryExecutionErrors.withSuggestionIntervalArithmeticOverflowError( + "try_divide", context) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala index c935c6057376..756f2598f13f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala @@ -35,12 +35,15 @@ object IntervalMathUtils { def negateExact(a: Long): Long = withOverflow(Math.negateExact(a)) - private def withOverflow[A](f: => A, hint: String = ""): A = { + private def withOverflow[A](f: => A, suggestedFunc: String = ""): A = { try { f } catch { - case e: ArithmeticException => - throw QueryExecutionErrors.intervalArithmeticOverflowError(e.getMessage, hint, null) + case _: ArithmeticException if suggestedFunc.isEmpty => + throw QueryExecutionErrors.withoutSuggestionIntervalArithmeticOverflowError(context = null) + case _: ArithmeticException => + throw QueryExecutionErrors.withSuggestionIntervalArithmeticOverflowError( + suggestedFunc, context = null) } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala index 90c802b7e28d..39a07990dea3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala @@ -22,7 +22,7 @@ import java.util.concurrent.TimeUnit import scala.util.control.NonFatal -import org.apache.spark.{SparkIllegalArgumentException, SparkThrowable} +import org.apache.spark.{QueryContext, SparkIllegalArgumentException, SparkThrowable} import org.apache.spark.sql.catalyst.expressions.Literal import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.catalyst.util.DateTimeConstants._ @@ -782,13 +782,19 @@ object IntervalUtils extends SparkIntervalUtils { days: Int, hours: Int, mins: Int, - secs: Decimal): Long = { + secs: Decimal, + context: QueryContext): Long = { assert(secs.scale == 6, "Seconds fractional must have 6 digits for microseconds") var micros = secs.toUnscaledLong - micros = Math.addExact(micros, Math.multiplyExact(days, MICROS_PER_DAY)) - micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR)) - micros = Math.addExact(micros, Math.multiplyExact(mins, MICROS_PER_MINUTE)) - micros + try { + micros = Math.addExact(micros, Math.multiplyExact(days, MICROS_PER_DAY)) + micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR)) + micros = Math.addExact(micros, Math.multiplyExact(mins, MICROS_PER_MINUTE)) + micros + } catch { + case _: ArithmeticException => + throw QueryExecutionErrors.withoutSuggestionIntervalArithmeticOverflowError(context) + } } def intToYearMonthInterval(v: Int, startField: Byte, endField: Byte): Int = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 09836995925e..fb39d3c5d7c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -636,18 +636,21 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE summary = "") } - def intervalArithmeticOverflowError( - message: String, - hint: String = "", + def withSuggestionIntervalArithmeticOverflowError( + suggestedFunc: String, context: QueryContext): ArithmeticException = { - val alternative = if (hint.nonEmpty) { - s" Use '$hint' to tolerate overflow and return NULL instead." - } else "" new SparkArithmeticException( - errorClass = "INTERVAL_ARITHMETIC_OVERFLOW", - messageParameters = Map( - "message" -> message, - "alternative" -> alternative), + errorClass = "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", + messageParameters = Map("functionName" -> toSQLId(suggestedFunc)), + context = getQueryContext(context), + summary = getSummary(context)) + } + + def withoutSuggestionIntervalArithmeticOverflowError( + context: QueryContext): SparkArithmeticException = { + new SparkArithmeticException( + errorClass = "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION", + messageParameters = Map(), context = getQueryContext(context), summary = getSummary(context)) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala index 78bc77b9dc2a..8fb72ad53062 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala @@ -316,7 +316,8 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros) val durationExpr = MakeDTInterval(Literal(days), Literal(hours), Literal(minutes), Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6))) - checkExceptionInExpression[ArithmeticException](durationExpr, EmptyRow, "") + checkExceptionInExpression[ArithmeticException]( + durationExpr, "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION") } check(millis = -123) @@ -528,7 +529,8 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { Seq(MakeYMInterval(Literal(178956970), Literal(8)), MakeYMInterval(Literal(-178956970), Literal(-9))) .foreach { ym => - checkExceptionInExpression[ArithmeticException](ym, "integer overflow") + checkExceptionInExpression[ArithmeticException]( + ym, "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION") } def checkImplicitEvaluation(expr: Expression, value: Any): Unit = { diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 766bfba7696f..4e012df792de 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -936,8 +936,18 @@ select make_dt_interval(2147483647) -- !query schema struct<> -- !query output -java.lang.ArithmeticException -long overflow +org.apache.spark.SparkArithmeticException +{ + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION", + "sqlState" : "22015", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 35, + "fragment" : "make_dt_interval(2147483647)" + } ] +} -- !query @@ -977,8 +987,18 @@ select make_ym_interval(178956970, 8) -- !query schema struct<> -- !query output -java.lang.ArithmeticException -integer overflow +org.apache.spark.SparkArithmeticException +{ + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION", + "sqlState" : "22015", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 37, + "fragment" : "make_ym_interval(178956970, 8)" + } ] +} -- !query @@ -994,8 +1014,18 @@ select make_ym_interval(-178956970, -9) -- !query schema struct<> -- !query output -java.lang.ArithmeticException -integer overflow +org.apache.spark.SparkArithmeticException +{ + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION", + "sqlState" : "22015", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 39, + "fragment" : "make_ym_interval(-178956970, -9)" + } ] +} -- !query @@ -2493,12 +2523,8 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", - "sqlState" : "22015", - "messageParameters" : { - "alternative" : "", - "message" : "integer overflow" - } + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION", + "sqlState" : "22015" } @@ -2509,11 +2535,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_subtract' to tolerate overflow and return NULL instead.", - "message" : "integer overflow" + "functionName" : "`try_subtract`" } } @@ -2525,11 +2550,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.", - "message" : "integer overflow" + "functionName" : "`try_add`" } } @@ -2838,11 +2862,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.", - "message" : "Interval value overflows after being divided by -1" + "functionName" : "`try_divide`" }, "queryContext" : [ { "objectType" : "", @@ -2861,11 +2884,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.", - "message" : "Interval value overflows after being divided by -1" + "functionName" : "`try_divide`" }, "queryContext" : [ { "objectType" : "", @@ -2918,11 +2940,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.", - "message" : "Interval value overflows after being divided by -1" + "functionName" : "`try_divide`" }, "queryContext" : [ { "objectType" : "", @@ -2941,11 +2962,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.", - "message" : "Interval value overflows after being divided by -1" + "functionName" : "`try_divide`" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 7eed2d42da04..a8a0423bdb3e 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -823,8 +823,18 @@ select make_dt_interval(2147483647) -- !query schema struct<> -- !query output -java.lang.ArithmeticException -long overflow +org.apache.spark.SparkArithmeticException +{ + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION", + "sqlState" : "22015", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 35, + "fragment" : "make_dt_interval(2147483647)" + } ] +} -- !query @@ -864,8 +874,18 @@ select make_ym_interval(178956970, 8) -- !query schema struct<> -- !query output -java.lang.ArithmeticException -integer overflow +org.apache.spark.SparkArithmeticException +{ + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION", + "sqlState" : "22015", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 37, + "fragment" : "make_ym_interval(178956970, 8)" + } ] +} -- !query @@ -881,8 +901,18 @@ select make_ym_interval(-178956970, -9) -- !query schema struct<> -- !query output -java.lang.ArithmeticException -integer overflow +org.apache.spark.SparkArithmeticException +{ + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION", + "sqlState" : "22015", + "queryContext" : [ { + "objectType" : "", + "objectName" : "", + "startIndex" : 8, + "stopIndex" : 39, + "fragment" : "make_ym_interval(-178956970, -9)" + } ] +} -- !query @@ -2316,12 +2346,8 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", - "sqlState" : "22015", - "messageParameters" : { - "alternative" : "", - "message" : "integer overflow" - } + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION", + "sqlState" : "22015" } @@ -2332,11 +2358,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_subtract' to tolerate overflow and return NULL instead.", - "message" : "integer overflow" + "functionName" : "`try_subtract`" } } @@ -2348,11 +2373,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.", - "message" : "integer overflow" + "functionName" : "`try_add`" } } @@ -2661,11 +2685,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.", - "message" : "Interval value overflows after being divided by -1" + "functionName" : "`try_divide`" }, "queryContext" : [ { "objectType" : "", @@ -2684,11 +2707,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.", - "message" : "Interval value overflows after being divided by -1" + "functionName" : "`try_divide`" }, "queryContext" : [ { "objectType" : "", @@ -2741,11 +2763,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.", - "message" : "Interval value overflows after being divided by -1" + "functionName" : "`try_divide`" }, "queryContext" : [ { "objectType" : "", @@ -2764,11 +2785,10 @@ struct<> -- !query output org.apache.spark.SparkArithmeticException { - "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW", + "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", "sqlState" : "22015", "messageParameters" : { - "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.", - "message" : "Interval value overflows after being divided by -1" + "functionName" : "`try_divide`" }, "queryContext" : [ { "objectType" : "", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala index 7ebcb280def6..6348e5f31539 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala @@ -26,6 +26,7 @@ import org.scalatest.matchers.must.Matchers.the import org.apache.spark.{SparkArithmeticException, SparkRuntimeException} import org.apache.spark.sql.catalyst.plans.logical.Expand import org.apache.spark.sql.catalyst.util.AUTO_GENERATED_ALIAS +import org.apache.spark.sql.errors.DataTypeErrors.toSQLId import org.apache.spark.sql.execution.WholeStageCodegenExec import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec} @@ -1485,15 +1486,22 @@ class DataFrameAggregateSuite extends QueryTest val df2 = Seq((Period.ofMonths(Int.MaxValue), Duration.ofDays(106751991)), (Period.ofMonths(10), Duration.ofDays(10))) .toDF("year-month", "day") - val error = intercept[SparkArithmeticException] { - checkAnswer(df2.select(sum($"year-month")), Nil) - } - assert(error.getMessage contains "[INTERVAL_ARITHMETIC_OVERFLOW] integer overflow") - val error2 = intercept[SparkArithmeticException] { - checkAnswer(df2.select(sum($"day")), Nil) - } - assert(error2.getMessage contains "[INTERVAL_ARITHMETIC_OVERFLOW] long overflow") + checkError( + exception = intercept[SparkArithmeticException] { + checkAnswer(df2.select(sum($"year-month")), Nil) + }, + condition = "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", + parameters = Map("functionName" -> toSQLId("try_add")) + ) + + checkError( + exception = intercept[SparkArithmeticException] { + checkAnswer(df2.select(sum($"day")), Nil) + }, + condition = "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", + parameters = Map("functionName" -> toSQLId("try_add")) + ) } test("SPARK-34837: Support ANSI SQL intervals by the aggregate function `avg`") { @@ -1620,15 +1628,22 @@ class DataFrameAggregateSuite extends QueryTest val df2 = Seq((Period.ofMonths(Int.MaxValue), Duration.ofDays(106751991)), (Period.ofMonths(10), Duration.ofDays(10))) .toDF("year-month", "day") - val error = intercept[SparkArithmeticException] { - checkAnswer(df2.select(avg($"year-month")), Nil) - } - assert(error.getMessage contains "[INTERVAL_ARITHMETIC_OVERFLOW] integer overflow") - val error2 = intercept[SparkArithmeticException] { - checkAnswer(df2.select(avg($"day")), Nil) - } - assert(error2.getMessage contains "[INTERVAL_ARITHMETIC_OVERFLOW] long overflow") + checkError( + exception = intercept[SparkArithmeticException] { + checkAnswer(df2.select(avg($"year-month")), Nil) + }, + condition = "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", + parameters = Map("functionName" -> toSQLId("try_add")) + ) + + checkError( + exception = intercept[SparkArithmeticException] { + checkAnswer(df2.select(avg($"day")), Nil) + }, + condition = "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION", + parameters = Map("functionName" -> toSQLId("try_add")) + ) val df3 = intervalData.filter($"class" > 4) val avgDF3 = df3.select(avg($"year-month"), avg($"day")) From bc9b2597ea2d99620918f809a3db8739968e42a3 Mon Sep 17 00:00:00 2001 From: panbingkun Date: Wed, 13 Nov 2024 16:36:02 +0100 Subject: [PATCH 06/79] [SPARK-50066][SQL] Codegen Support for `SchemaOfXml` (by Invoke & RuntimeReplaceable) ### What changes were proposed in this pull request? The pr aims to add `Codegen` Support for `schema_of_xml`. ### Why are the changes needed? - improve codegen coverage. - simplified code. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA & Existed UT (eg: XmlFunctionsSuite#`*schema_of_xml*`) ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48594 from panbingkun/SPARK-50066. Authored-by: panbingkun Signed-off-by: Max Gekk --- .../xml/XmlExpressionEvalUtils.scala | 42 +++++++++++++++++++ .../catalyst/expressions/xmlExpressions.scala | 34 +++++++-------- 2 files changed, 58 insertions(+), 18 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala new file mode 100644 index 000000000000..dff88475327a --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.expressions.xml + +import org.apache.spark.sql.catalyst.xml.XmlInferSchema +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.types.{ArrayType, DataType, StructType} +import org.apache.spark.unsafe.types.UTF8String + +object XmlExpressionEvalUtils { + + def schemaOfXml(xmlInferSchema: XmlInferSchema, xml: UTF8String): UTF8String = { + val dataType = xmlInferSchema.infer(xml.toString).get match { + case st: StructType => + xmlInferSchema.canonicalizeType(st).getOrElse(StructType(Nil)) + case at: ArrayType if at.elementType.isInstanceOf[StructType] => + xmlInferSchema + .canonicalizeType(at.elementType) + .map(ArrayType(_, containsNull = at.containsNull)) + .getOrElse(ArrayType(StructType(Nil), containsNull = at.containsNull)) + case other: DataType => + xmlInferSchema.canonicalizeType(other).getOrElse(SQLConf.get.defaultStringType) + } + + UTF8String.fromString(dataType.sql) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala index 196c0793e619..6f004cbce426 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala @@ -21,7 +21,9 @@ import java.io.CharArrayWriter import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess} -import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode} +import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke +import org.apache.spark.sql.catalyst.expressions.xml.XmlExpressionEvalUtils import org.apache.spark.sql.catalyst.util.{DropMalformedMode, FailFastMode, FailureSafeParser, PermissiveMode} import org.apache.spark.sql.catalyst.util.TypeUtils._ import org.apache.spark.sql.catalyst.xml.{StaxXmlGenerator, StaxXmlParser, ValidatorUtil, XmlInferSchema, XmlOptions} @@ -149,7 +151,9 @@ case class XmlToStructs( case class SchemaOfXml( child: Expression, options: Map[String, String]) - extends UnaryExpression with CodegenFallback with QueryErrorsBase { + extends UnaryExpression + with RuntimeReplaceable + with QueryErrorsBase { def this(child: Expression) = this(child, Map.empty[String, String]) @@ -192,26 +196,20 @@ case class SchemaOfXml( } } - override def eval(v: InternalRow): Any = { - val dataType = xmlInferSchema.infer(xml.toString).get match { - case st: StructType => - xmlInferSchema.canonicalizeType(st).getOrElse(StructType(Nil)) - case at: ArrayType if at.elementType.isInstanceOf[StructType] => - xmlInferSchema - .canonicalizeType(at.elementType) - .map(ArrayType(_, containsNull = at.containsNull)) - .getOrElse(ArrayType(StructType(Nil), containsNull = at.containsNull)) - case other: DataType => - xmlInferSchema.canonicalizeType(other).getOrElse(SQLConf.get.defaultStringType) - } - - UTF8String.fromString(dataType.sql) - } - override def prettyName: String = "schema_of_xml" override protected def withNewChildInternal(newChild: Expression): SchemaOfXml = copy(child = newChild) + + @transient private lazy val xmlInferSchemaObjectType = ObjectType(classOf[XmlInferSchema]) + + override def replacement: Expression = StaticInvoke( + XmlExpressionEvalUtils.getClass, + dataType, + "schemaOfXml", + Seq(Literal(xmlInferSchema, xmlInferSchemaObjectType), child), + Seq(xmlInferSchemaObjectType, child.dataType) + ) } /** From 558fc89f8ccf631cf12e9838d57c6aaa77696c03 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 13 Nov 2024 16:40:16 +0100 Subject: [PATCH 07/79] [SPARK-49611][SQL][FOLLOW-UP] Make collations TVF consistent and return null on no result for country and language ### What changes were proposed in this pull request? It was noticed that we return null for country and language for collations TVF when collation is UTF8_*, but when information is missing in ICU we return empty string. ### Why are the changes needed? Making behaviour consistent. ### Does this PR introduce _any_ user-facing change? No, this is all in Spark 4.0, so addition of this TVF was not released yet. ### How was this patch tested? Existing test. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48835 from mihailom-db/fix-collations-table. Authored-by: Mihailo Milosevic Signed-off-by: Max Gekk --- .../sql/catalyst/util/CollationFactory.java | 6 ++++-- .../org/apache/spark/sql/CollationSuite.scala | 16 ++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java index ad5e5ae845f8..4064f830e92d 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java @@ -1023,12 +1023,14 @@ protected Collation buildCollation() { @Override protected CollationMeta buildCollationMeta() { + String language = ICULocaleMap.get(locale).getDisplayLanguage(); + String country = ICULocaleMap.get(locale).getDisplayCountry(); return new CollationMeta( CATALOG, SCHEMA, normalizedCollationName(), - ICULocaleMap.get(locale).getDisplayLanguage(), - ICULocaleMap.get(locale).getDisplayCountry(), + language.isEmpty() ? null : language, + country.isEmpty() ? null : country, VersionInfo.ICU_VERSION.toString(), COLLATION_PAD_ATTRIBUTE, accentSensitivity == AccentSensitivity.AS, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index 9716d342bb6b..f5cb30809ae5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -2037,21 +2037,21 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", null), Row("SYSTEM", "BUILTIN", "UTF8_LCASE", null, null, "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", null), - Row("SYSTEM", "BUILTIN", "UNICODE", "", "", + Row("SYSTEM", "BUILTIN", "UNICODE", null, null, "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion), - Row("SYSTEM", "BUILTIN", "UNICODE_AI", "", "", + Row("SYSTEM", "BUILTIN", "UNICODE_AI", null, null, "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion), - Row("SYSTEM", "BUILTIN", "UNICODE_CI", "", "", + Row("SYSTEM", "BUILTIN", "UNICODE_CI", null, null, "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion), - Row("SYSTEM", "BUILTIN", "UNICODE_CI_AI", "", "", + Row("SYSTEM", "BUILTIN", "UNICODE_CI_AI", null, null, "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion), - Row("SYSTEM", "BUILTIN", "af", "Afrikaans", "", + Row("SYSTEM", "BUILTIN", "af", "Afrikaans", null, "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion), - Row("SYSTEM", "BUILTIN", "af_AI", "Afrikaans", "", + Row("SYSTEM", "BUILTIN", "af_AI", "Afrikaans", null, "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion), - Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", "", + Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", null, "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion), - Row("SYSTEM", "BUILTIN", "af_CI_AI", "Afrikaans", "", + Row("SYSTEM", "BUILTIN", "af_CI_AI", "Afrikaans", null, "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion))) checkAnswer(sql("SELECT * FROM collations() WHERE NAME LIKE '%UTF8_BINARY%'"), From 7b1b450bb65b49f8a5c9e2d9ebd1e01a2e4e3880 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vladan=20Vasi=C4=87?= Date: Wed, 13 Nov 2024 16:43:59 +0100 Subject: [PATCH 08/79] Revert [SPARK-50215][SQL] Refactored StringType pattern matching in jdbc code stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What changes were proposed in this pull request? I propose reverting the PR for changing pattern matching of `StringType` in the jdbc code stack, since it may lead to collated column being mapped to uncollated column in some dialects. For the time being, this is not the correct behavior. ### Why are the changes needed? These changes are needed in order to preserve proper behavior in the dialects regarding datatype mapping. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? No testing was needed. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48833 from vladanvasi-db/vladanvasi-db/jdbc-refactor-revert. Authored-by: Vladan Vasić Signed-off-by: Max Gekk --- .../src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala | 2 +- .../scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala | 2 +- .../src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala | 2 +- .../src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala | 2 +- .../scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala | 2 +- .../src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala | 2 +- .../main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala | 2 +- .../main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala | 2 +- .../main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala index 3256803f6039..2f54f1f62fde 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala @@ -101,7 +101,7 @@ private case class DB2Dialect() extends JdbcDialect with SQLConfHelper with NoLe } override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { - case _: StringType => Option(JdbcType("CLOB", java.sql.Types.CLOB)) + case StringType => Option(JdbcType("CLOB", java.sql.Types.CLOB)) case BooleanType if conf.legacyDB2BooleanMappingEnabled => Option(JdbcType("CHAR(1)", java.sql.Types.CHAR)) case BooleanType => Option(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala index 3b855b376967..af77f8575dd8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DatabricksDialect.scala @@ -44,7 +44,7 @@ private case class DatabricksDialect() extends JdbcDialect with NoLegacyJDBCErro override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { case BooleanType => Some(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN)) case DoubleType => Some(JdbcType("DOUBLE", java.sql.Types.DOUBLE)) - case _: StringType => Some(JdbcType("STRING", java.sql.Types.VARCHAR)) + case StringType => Some(JdbcType("STRING", java.sql.Types.VARCHAR)) case BinaryType => Some(JdbcType("BINARY", java.sql.Types.BINARY)) case _ => None } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala index f78e155d485d..7b65a01b5e70 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala @@ -44,7 +44,7 @@ private case class DerbyDialect() extends JdbcDialect with NoLegacyJDBCError { } override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { - case _: StringType => Option(JdbcType("CLOB", java.sql.Types.CLOB)) + case StringType => Option(JdbcType("CLOB", java.sql.Types.CLOB)) case ByteType => Option(JdbcType("SMALLINT", java.sql.Types.SMALLINT)) case ShortType => Option(JdbcType("SMALLINT", java.sql.Types.SMALLINT)) case BooleanType => Option(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala index 5e5ba797ca60..798ecb5b36ff 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala @@ -73,7 +73,7 @@ private[sql] case class H2Dialect() extends JdbcDialect with NoLegacyJDBCError { } override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { - case _: StringType => Option(JdbcType("CLOB", Types.CLOB)) + case StringType => Option(JdbcType("CLOB", Types.CLOB)) case BooleanType => Some(JdbcType("BOOLEAN", Types.BOOLEAN)) case ShortType | ByteType => Some(JdbcType("SMALLINT", Types.SMALLINT)) case t: DecimalType => Some( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala index a29f3d9550d1..7d476d43e5c7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala @@ -135,7 +135,7 @@ private case class MsSqlServerDialect() extends JdbcDialect with NoLegacyJDBCErr override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { case TimestampType => Some(JdbcType("DATETIME", java.sql.Types.TIMESTAMP)) case TimestampNTZType => Some(JdbcType("DATETIME", java.sql.Types.TIMESTAMP)) - case _: StringType => Some(JdbcType("NVARCHAR(MAX)", java.sql.Types.NVARCHAR)) + case StringType => Some(JdbcType("NVARCHAR(MAX)", java.sql.Types.NVARCHAR)) case BooleanType => Some(JdbcType("BIT", java.sql.Types.BIT)) case BinaryType => Some(JdbcType("VARBINARY(MAX)", java.sql.Types.VARBINARY)) case ShortType if !SQLConf.get.legacyMsSqlServerNumericMappingEnabled => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala index c4f2793707e5..dd0118d87599 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala @@ -256,7 +256,7 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper with No // See SPARK-35446: MySQL treats REAL as a synonym to DOUBLE by default // We override getJDBCType so that FloatType is mapped to FLOAT instead case FloatType => Option(JdbcType("FLOAT", java.sql.Types.FLOAT)) - case _: StringType => Option(JdbcType("LONGTEXT", java.sql.Types.LONGVARCHAR)) + case StringType => Option(JdbcType("LONGTEXT", java.sql.Types.LONGVARCHAR)) case ByteType => Option(JdbcType("TINYINT", java.sql.Types.TINYINT)) case ShortType => Option(JdbcType("SMALLINT", java.sql.Types.SMALLINT)) // scalastyle:off line.size.limit diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala index 9c8a6bf5e145..a73a34c64635 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala @@ -121,7 +121,7 @@ private case class OracleDialect() extends JdbcDialect with SQLConfHelper with N case DoubleType => Some(JdbcType("NUMBER(19, 4)", java.sql.Types.DOUBLE)) case ByteType => Some(JdbcType("NUMBER(3)", java.sql.Types.SMALLINT)) case ShortType => Some(JdbcType("NUMBER(5)", java.sql.Types.SMALLINT)) - case _: StringType => Some(JdbcType("VARCHAR2(255)", java.sql.Types.VARCHAR)) + case StringType => Some(JdbcType("VARCHAR2(255)", java.sql.Types.VARCHAR)) case VarcharType(n) => Some(JdbcType(s"VARCHAR2($n)", java.sql.Types.VARCHAR)) case TimestampType if !conf.legacyOracleTimestampMappingEnabled => Some(JdbcType("TIMESTAMP WITH LOCAL TIME ZONE", TIMESTAMP_LTZ)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala index 1265550b3f19..8341063e0989 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala @@ -145,7 +145,7 @@ private case class PostgresDialect() } override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { - case _: StringType => Some(JdbcType("TEXT", Types.VARCHAR)) + case StringType => Some(JdbcType("TEXT", Types.VARCHAR)) case BinaryType => Some(JdbcType("BYTEA", Types.BINARY)) case BooleanType => Some(JdbcType("BOOLEAN", Types.BOOLEAN)) case FloatType => Some(JdbcType("FLOAT4", Types.FLOAT)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala index c7d8e899d71b..322b259485f5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala @@ -40,7 +40,7 @@ private case class TeradataDialect() extends JdbcDialect with NoLegacyJDBCError supportedFunctions.contains(funcName) override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { - case _: StringType => Some(JdbcType("VARCHAR(255)", java.sql.Types.VARCHAR)) + case StringType => Some(JdbcType("VARCHAR(255)", java.sql.Types.VARCHAR)) case BooleanType => Option(JdbcType("CHAR(1)", java.sql.Types.CHAR)) case ByteType => Option(JdbcType("BYTEINT", java.sql.Types.TINYINT)) case _ => None From 87ad4b4a2cfbb1b1c5d5374d3fea848b1e0dac8b Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Wed, 13 Nov 2024 09:22:08 -0800 Subject: [PATCH 09/79] [SPARK-50139][INFRA][SS][PYTHON] Introduce scripts to re-generate and checking StateMessage_pb2.py and StateMessage_pb2.pyi ### What changes were proposed in this pull request? This pr includes the following changes: 1. Refactor the `dev/connect-gen-protos.sh` script to support the generation of `.py` files from `.proto` files for both the `connect` and `streaming` modules simultaneously. Rename the script to `dev/gen-protos.sh`. Additionally, to maintain compatibility with previous development practices, this pull request (PR) introduces `dev/connect-gen-protos.sh` and `dev/streaming-gen-protos.sh` as wrappers around `dev/gen-protos.sh`. After this PR, you can use: ``` dev/gen-protos.sh connect dev/gen-protos.sh streaming ``` or ``` dev/connect-gen-protos.sh dev/streaming-gen-protos.sh ``` to regenerate the corresponding `.py` files for the respective modules. 2. Refactor the `dev/connect-check-protos.py` script to check the generated results for both the `connect` and `streaming` modules simultaneously, and rename it to `dev/check-protos.py`. Additionally, update the invocation of the check script in `build_and_test.yml`. ### Why are the changes needed? Provid tools for re-generate and checking `StateMessage_pb2.py` and `StateMessage_pb2.pyi`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GitHub Actions ### Was this patch authored or co-authored using generative AI tooling? No Closes #48815 from LuciferYang/streaming-gen-protos. Lead-authored-by: yangjie01 Co-authored-by: YangJie Signed-off-by: Dongjoon Hyun --- .github/workflows/build_and_test.yml | 6 +- ...onnect-check-protos.py => check-protos.py} | 23 +- dev/connect-gen-protos.sh | 78 +- dev/gen-protos.sh | 127 ++ dev/streaming-gen-protos.sh | 27 + dev/tox.ini | 1 + .../sql/streaming/proto/StateMessage_pb2.py | 173 +- .../sql/streaming/proto/StateMessage_pb2.pyi | 1552 ++++++++++++----- sql/core/src/main/buf.gen.yaml | 24 + sql/core/src/main/buf.work.yaml | 19 + 10 files changed, 1423 insertions(+), 607 deletions(-) rename dev/{connect-check-protos.py => check-protos.py} (73%) create mode 100755 dev/gen-protos.sh create mode 100755 dev/streaming-gen-protos.sh create mode 100644 sql/core/src/main/buf.gen.yaml create mode 100644 sql/core/src/main/buf.work.yaml diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index fc0959c5a415..4a3707404bcc 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -671,8 +671,12 @@ jobs: run: | python3.11 -m pip install 'black==23.9.1' 'protobuf==5.28.3' 'mypy==1.8.0' 'mypy-protobuf==3.3.0' python3.11 -m pip list - - name: Python CodeGen check + - name: Python CodeGen check for branch-3.5 + if: inputs.branch == 'branch-3.5' run: ./dev/connect-check-protos.py + - name: Python CodeGen check + if: inputs.branch != 'branch-3.5' + run: ./dev/check-protos.py # Static analysis lint: diff --git a/dev/connect-check-protos.py b/dev/check-protos.py similarity index 73% rename from dev/connect-check-protos.py rename to dev/check-protos.py index 9ba56bae6b19..bfca8b27be21 100755 --- a/dev/connect-check-protos.py +++ b/dev/check-protos.py @@ -18,7 +18,7 @@ # # Utility for checking whether generated codes in PySpark are out of sync. -# usage: ./dev/connect-check-protos.py +# usage: ./dev/check-protos.py import os import sys @@ -43,12 +43,12 @@ def run_cmd(cmd): return subprocess.check_output(cmd.split(" ")).decode("utf-8") -def check_connect_protos(): - print("Start checking the generated codes in pyspark-connect.") - with tempfile.TemporaryDirectory(prefix="check_connect_protos") as tmp: - run_cmd(f"{SPARK_HOME}/dev/connect-gen-protos.sh {tmp}") +def check_protos(module_name, cmp_path, proto_path): + print(f"Start checking the generated codes in pyspark-${module_name}.") + with tempfile.TemporaryDirectory(prefix=f"check_${module_name}__protos") as tmp: + run_cmd(f"{SPARK_HOME}/dev/gen-protos.sh {module_name} {tmp}") result = filecmp.dircmp( - f"{SPARK_HOME}/python/pyspark/sql/connect/proto/", + f"{SPARK_HOME}/{cmp_path}", tmp, ignore=["__init__.py", "__pycache__"], ) @@ -71,14 +71,17 @@ def check_connect_protos(): success = False if success: - print("Finish checking the generated codes in pyspark-connect: SUCCESS") + print(f"Finish checking the generated codes in pyspark-${module_name}: SUCCESS") else: fail( "Generated files for pyspark-connect are out of sync! " - "If you have touched files under sql/connect/common/src/main/protobuf/, " - "please run ./dev/connect-gen-protos.sh. " + f"If you have touched files under ${proto_path}, " + f"please run ./dev/${module_name}-gen-protos.sh. " "If you haven't touched any file above, please rebase your PR against main branch." ) -check_connect_protos() +check_protos( + "connect", "python/pyspark/sql/connect/proto/", "sql/connect/common/src/main/protobuf/" +) +check_protos("streaming", "python/pyspark/sql/streaming/proto/", "sql/core/src/main/protobuf/") diff --git a/dev/connect-gen-protos.sh b/dev/connect-gen-protos.sh index 2805908890ee..8ed323cc4259 100755 --- a/dev/connect-gen-protos.sh +++ b/dev/connect-gen-protos.sh @@ -24,80 +24,4 @@ if [[ $# -gt 1 ]]; then exit -1 fi - -SPARK_HOME="$(cd "`dirname $0`"/..; pwd)" -cd "$SPARK_HOME" - - -OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/connect/proto/ -if [[ $# -eq 1 ]]; then - rm -Rf $1 - mkdir -p $1 - OUTPUT_PATH=$1 -fi - -pushd sql/connect/common/src/main - -LICENSE=$(cat <<'EOF' -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -EOF) -echo "$LICENSE" > /tmp/tmp_licence - - -# Delete the old generated protobuf files. -rm -Rf gen - -# Now, regenerate the new files -buf generate --debug -vvv - -# We need to edit the generate python files to account for the actual package location and not -# the one generated by proto. -for f in `find gen/proto/python -name "*.py*"`; do - # First fix the imports. - if [[ $f == *_pb2.py || $f == *_pb2_grpc.py ]]; then - sed -e 's/from spark.connect import/from pyspark.sql.connect.proto import/g' $f > $f.tmp - mv $f.tmp $f - # Now fix the module name in the serialized descriptor. - sed -e "s/DESCRIPTOR, 'spark.connect/DESCRIPTOR, 'pyspark.sql.connect.proto/g" $f > $f.tmp - mv $f.tmp $f - elif [[ $f == *.pyi ]]; then - sed -e 's/import spark.connect./import pyspark.sql.connect.proto./g' -e 's/spark.connect./pyspark.sql.connect.proto./g' -e '/ *@typing_extensions\.final/d' $f > $f.tmp - mv $f.tmp $f - fi - - # Prepend the Apache licence header to the files. - cp $f $f.bak - cat /tmp/tmp_licence $f.bak > $f - - LC=$(wc -l < $f) - echo $LC - if [[ $f == *_grpc.py && $LC -eq 20 ]]; then - rm $f - fi - rm $f.bak -done - -black --config $SPARK_HOME/dev/pyproject.toml gen/proto/python - -# Last step copy the result files to the destination module. -for f in `find gen/proto/python -name "*.py*"`; do - cp $f $OUTPUT_PATH -done - -# Clean up everything. -rm -Rf gen +./dev/gen-protos.sh connect "$@" diff --git a/dev/gen-protos.sh b/dev/gen-protos.sh new file mode 100755 index 000000000000..d169964feb85 --- /dev/null +++ b/dev/gen-protos.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -ex + +SPARK_HOME="$(cd "`dirname $0`"/..; pwd)" +cd "$SPARK_HOME" + +OUTPUT_PATH="" +MODULE="" +SOURCE_MODULE="" +TARGET_MODULE="" + +function usage() { + echo "Illegal number of parameters." + echo "Usage:./dev/gen-protos.sh [connect|streaming] [output_path]" + exit -1 +} + +if [[ $# -lt 1 || $# -gt 2 ]]; then + usage +fi + +if [[ $1 == "connect" ]]; then + MODULE="connect" + OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/connect/proto/ + SOURCE_MODULE="spark.connect" + TARGET_MODULE="pyspark.sql.connect.proto" +elif [[ $1 == "streaming" ]]; then + MODULE="streaming" + OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/streaming/proto/ + SOURCE_MODULE="org.apache.spark.sql.execution.streaming" + TARGET_MODULE="pyspark.sql.streaming.proto" +else + usage +fi + +if [[ $# -eq 2 ]]; then + rm -Rf $2 + mkdir -p $2 + OUTPUT_PATH=$2 +fi + +if [[ $MODULE == "connect" ]]; then + pushd sql/connect/common/src/main +elif [[ $MODULE == "streaming" ]]; then + pushd sql/core/src/main +fi + +LICENSE=$(cat <<'EOF' +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +EOF) +echo "$LICENSE" > /tmp/tmp_licence + +# Delete the old generated protobuf files. +rm -Rf gen + +# Now, regenerate the new files +buf generate --debug -vvv + +# We need to edit the generate python files to account for the actual package location and not +# the one generated by proto. +for f in `find gen/proto/python -name "*.py*"`; do + # First fix the imports. + if [[ $f == *_pb2.py || $f == *_pb2_grpc.py ]]; then + sed -e "s/from ${SOURCE_MODULE} import/from ${TARGET_MODULE} import/g" $f > $f.tmp + mv $f.tmp $f + # Now fix the module name in the serialized descriptor. + sed -e "s/DESCRIPTOR, '${SOURCE_MODULE}/DESCRIPTOR, '${TARGET_MODULE}/g" $f > $f.tmp + mv $f.tmp $f + elif [[ $f == *.pyi ]]; then + sed -e "s/import ${SOURCE_MODULE}./import ${TARGET_MODULE}./g" -e "s/${SOURCE_MODULE}./${TARGET_MODULE}./g" -e '/ *@typing_extensions\.final/d' $f > $f.tmp + mv $f.tmp $f + fi + + # Prepend the Apache licence header to the files. + cp $f $f.bak + cat /tmp/tmp_licence $f.bak > $f + + LC=$(wc -l < $f) + echo $LC + if [[ $f == *_grpc.py && $LC -eq 20 ]]; then + rm $f + fi + rm $f.bak +done + +black --config $SPARK_HOME/dev/pyproject.toml gen/proto/python + +# Last step copy the result files to the destination module. +for f in `find gen/proto/python -name "*.py*"`; do + cp $f $OUTPUT_PATH +done + +# Clean up everything. +rm -Rf gen diff --git a/dev/streaming-gen-protos.sh b/dev/streaming-gen-protos.sh new file mode 100755 index 000000000000..3d80bda4fb94 --- /dev/null +++ b/dev/streaming-gen-protos.sh @@ -0,0 +1,27 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +set -ex + +if [[ $# -gt 1 ]]; then + echo "Illegal number of parameters." + echo "Usage: ./dev/streaming-gen-protos.sh [path]" + exit -1 +fi + +./dev/gen-protos.sh streaming "$@" diff --git a/dev/tox.ini b/dev/tox.ini index 47b1b4a9d783..05a6b16a03bd 100644 --- a/dev/tox.ini +++ b/dev/tox.ini @@ -59,5 +59,6 @@ exclude = *python/pyspark/worker.pyi, *python/pyspark/java_gateway.pyi, *python/pyspark/sql/connect/proto/*, + *python/pyspark/sql/streaming/proto/*, */venv/* max-line-length = 100 diff --git a/python/pyspark/sql/streaming/proto/StateMessage_pb2.py b/python/pyspark/sql/streaming/proto/StateMessage_pb2.py index 46bed10c4558..0a54690513a3 100644 --- a/python/pyspark/sql/streaming/proto/StateMessage_pb2.py +++ b/python/pyspark/sql/streaming/proto/StateMessage_pb2.py @@ -17,8 +17,8 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # NO CHECKED-IN PROTOBUF GENCODE -# source: StateMessage.proto -# Protobuf Python Version: 5.27.3 +# source: org/apache/spark/sql/execution/streaming/StateMessage.proto +# Protobuf Python Version: 5.28.3 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -27,7 +27,12 @@ from google.protobuf.internal import builder as _builder _runtime_version.ValidateProtobufRuntimeVersion( - _runtime_version.Domain.PUBLIC, 5, 27, 3, "", "StateMessage.proto" + _runtime_version.Domain.PUBLIC, + 5, + 28, + 3, + "", + "org/apache/spark/sql/execution/streaming/StateMessage.proto", ) # @@protoc_insertion_point(imports) @@ -35,90 +40,92 @@ DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x12StateMessage.proto\x12.org.apache.spark.sql.execution.streaming.state"\xbf\x03\n\x0cStateRequest\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x66\n\x15statefulProcessorCall\x18\x02 \x01(\x0b\x32\x45.org.apache.spark.sql.execution.streaming.state.StatefulProcessorCallH\x00\x12\x64\n\x14stateVariableRequest\x18\x03 \x01(\x0b\x32\x44.org.apache.spark.sql.execution.streaming.state.StateVariableRequestH\x00\x12p\n\x1aimplicitGroupingKeyRequest\x18\x04 \x01(\x0b\x32J.org.apache.spark.sql.execution.streaming.state.ImplicitGroupingKeyRequestH\x00\x12T\n\x0ctimerRequest\x18\x05 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.TimerRequestH\x00\x42\x08\n\x06method"H\n\rStateResponse\x12\x12\n\nstatusCode\x18\x01 \x01(\x05\x12\x14\n\x0c\x65rrorMessage\x18\x02 \x01(\t\x12\r\n\x05value\x18\x03 \x01(\x0c"W\n\x1cStateResponseWithLongTypeVal\x12\x12\n\nstatusCode\x18\x01 \x01(\x05\x12\x14\n\x0c\x65rrorMessage\x18\x02 \x01(\t\x12\r\n\x05value\x18\x03 \x01(\x03"\xc6\x04\n\x15StatefulProcessorCall\x12X\n\x0esetHandleState\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.SetHandleStateH\x00\x12Y\n\rgetValueState\x18\x02 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00\x12X\n\x0cgetListState\x18\x03 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00\x12W\n\x0bgetMapState\x18\x04 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00\x12_\n\x0etimerStateCall\x18\x05 \x01(\x0b\x32\x45.org.apache.spark.sql.execution.streaming.state.TimerStateCallCommandH\x00\x12Z\n\x0e\x64\x65leteIfExists\x18\x06 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00\x42\x08\n\x06method"\xa8\x02\n\x14StateVariableRequest\x12X\n\x0evalueStateCall\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.ValueStateCallH\x00\x12V\n\rlistStateCall\x18\x02 \x01(\x0b\x32=.org.apache.spark.sql.execution.streaming.state.ListStateCallH\x00\x12T\n\x0cmapStateCall\x18\x03 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.MapStateCallH\x00\x42\x08\n\x06method"\xe0\x01\n\x1aImplicitGroupingKeyRequest\x12X\n\x0esetImplicitKey\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.SetImplicitKeyH\x00\x12^\n\x11removeImplicitKey\x18\x02 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.RemoveImplicitKeyH\x00\x42\x08\n\x06method"\xda\x01\n\x0cTimerRequest\x12^\n\x11timerValueRequest\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.TimerValueRequestH\x00\x12`\n\x12\x65xpiryTimerRequest\x18\x02 \x01(\x0b\x32\x42.org.apache.spark.sql.execution.streaming.state.ExpiryTimerRequestH\x00\x42\x08\n\x06method"\xd4\x01\n\x11TimerValueRequest\x12_\n\x12getProcessingTimer\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.GetProcessingTimeH\x00\x12T\n\x0cgetWatermark\x18\x02 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.GetWatermarkH\x00\x42\x08\n\x06method"/\n\x12\x45xpiryTimerRequest\x12\x19\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03"\x13\n\x11GetProcessingTime"\x0e\n\x0cGetWatermark"\x9a\x01\n\x10StateCallCommand\x12\x11\n\tstateName\x18\x01 \x01(\t\x12\x0e\n\x06schema\x18\x02 \x01(\t\x12\x1b\n\x13mapStateValueSchema\x18\x03 \x01(\t\x12\x46\n\x03ttl\x18\x04 \x01(\x0b\x32\x39.org.apache.spark.sql.execution.streaming.state.TTLConfig"\x8f\x02\n\x15TimerStateCallCommand\x12Q\n\x08register\x18\x01 \x01(\x0b\x32=.org.apache.spark.sql.execution.streaming.state.RegisterTimerH\x00\x12M\n\x06\x64\x65lete\x18\x02 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.DeleteTimerH\x00\x12J\n\x04list\x18\x03 \x01(\x0b\x32:.org.apache.spark.sql.execution.streaming.state.ListTimersH\x00\x42\x08\n\x06method"\xe1\x02\n\x0eValueStateCall\x12\x11\n\tstateName\x18\x01 \x01(\t\x12H\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00\x12\x42\n\x03get\x18\x03 \x01(\x0b\x32\x33.org.apache.spark.sql.execution.streaming.state.GetH\x00\x12\\\n\x10valueStateUpdate\x18\x04 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.ValueStateUpdateH\x00\x12\x46\n\x05\x63lear\x18\x05 \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00\x42\x08\n\x06method"\x90\x04\n\rListStateCall\x12\x11\n\tstateName\x18\x01 \x01(\t\x12H\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00\x12T\n\x0clistStateGet\x18\x03 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.ListStateGetH\x00\x12T\n\x0clistStatePut\x18\x04 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.ListStatePutH\x00\x12R\n\x0b\x61ppendValue\x18\x05 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.AppendValueH\x00\x12P\n\nappendList\x18\x06 \x01(\x0b\x32:.org.apache.spark.sql.execution.streaming.state.AppendListH\x00\x12\x46\n\x05\x63lear\x18\x07 \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00\x42\x08\n\x06method"\xe1\x05\n\x0cMapStateCall\x12\x11\n\tstateName\x18\x01 \x01(\t\x12H\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00\x12L\n\x08getValue\x18\x03 \x01(\x0b\x32\x38.org.apache.spark.sql.execution.streaming.state.GetValueH\x00\x12R\n\x0b\x63ontainsKey\x18\x04 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.ContainsKeyH\x00\x12R\n\x0bupdateValue\x18\x05 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.UpdateValueH\x00\x12L\n\x08iterator\x18\x06 \x01(\x0b\x32\x38.org.apache.spark.sql.execution.streaming.state.IteratorH\x00\x12\x44\n\x04keys\x18\x07 \x01(\x0b\x32\x34.org.apache.spark.sql.execution.streaming.state.KeysH\x00\x12H\n\x06values\x18\x08 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ValuesH\x00\x12N\n\tremoveKey\x18\t \x01(\x0b\x32\x39.org.apache.spark.sql.execution.streaming.state.RemoveKeyH\x00\x12\x46\n\x05\x63lear\x18\n \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00\x42\x08\n\x06method"\x1d\n\x0eSetImplicitKey\x12\x0b\n\x03key\x18\x01 \x01(\x0c"\x13\n\x11RemoveImplicitKey"\x08\n\x06\x45xists"\x05\n\x03Get"*\n\rRegisterTimer\x12\x19\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03"(\n\x0b\x44\x65leteTimer\x12\x19\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03" \n\nListTimers\x12\x12\n\niteratorId\x18\x01 \x01(\t"!\n\x10ValueStateUpdate\x12\r\n\x05value\x18\x01 \x01(\x0c"\x07\n\x05\x43lear""\n\x0cListStateGet\x12\x12\n\niteratorId\x18\x01 \x01(\t"\x0e\n\x0cListStatePut"\x1c\n\x0b\x41ppendValue\x12\r\n\x05value\x18\x01 \x01(\x0c"\x0c\n\nAppendList"\x1b\n\x08GetValue\x12\x0f\n\x07userKey\x18\x01 \x01(\x0c"\x1e\n\x0b\x43ontainsKey\x12\x0f\n\x07userKey\x18\x01 \x01(\x0c"-\n\x0bUpdateValue\x12\x0f\n\x07userKey\x18\x01 \x01(\x0c\x12\r\n\x05value\x18\x02 \x01(\x0c"\x1e\n\x08Iterator\x12\x12\n\niteratorId\x18\x01 \x01(\t"\x1a\n\x04Keys\x12\x12\n\niteratorId\x18\x01 \x01(\t"\x1c\n\x06Values\x12\x12\n\niteratorId\x18\x01 \x01(\t"\x1c\n\tRemoveKey\x12\x0f\n\x07userKey\x18\x01 \x01(\x0c"\\\n\x0eSetHandleState\x12J\n\x05state\x18\x01 \x01(\x0e\x32;.org.apache.spark.sql.execution.streaming.state.HandleState"\x1f\n\tTTLConfig\x12\x12\n\ndurationMs\x18\x01 \x01(\x05*`\n\x0bHandleState\x12\x0b\n\x07\x43REATED\x10\x00\x12\x0f\n\x0bINITIALIZED\x10\x01\x12\x12\n\x0e\x44\x41TA_PROCESSED\x10\x02\x12\x13\n\x0fTIMER_PROCESSED\x10\x03\x12\n\n\x06\x43LOSED\x10\x04\x62\x06proto3' # noqa: E501 + b'\n;org/apache/spark/sql/execution/streaming/StateMessage.proto\x12.org.apache.spark.sql.execution.streaming.state"\xa0\x04\n\x0cStateRequest\x12\x18\n\x07version\x18\x01 \x01(\x05R\x07version\x12}\n\x15statefulProcessorCall\x18\x02 \x01(\x0b\x32\x45.org.apache.spark.sql.execution.streaming.state.StatefulProcessorCallH\x00R\x15statefulProcessorCall\x12z\n\x14stateVariableRequest\x18\x03 \x01(\x0b\x32\x44.org.apache.spark.sql.execution.streaming.state.StateVariableRequestH\x00R\x14stateVariableRequest\x12\x8c\x01\n\x1aimplicitGroupingKeyRequest\x18\x04 \x01(\x0b\x32J.org.apache.spark.sql.execution.streaming.state.ImplicitGroupingKeyRequestH\x00R\x1aimplicitGroupingKeyRequest\x12\x62\n\x0ctimerRequest\x18\x05 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.TimerRequestH\x00R\x0ctimerRequestB\x08\n\x06method"i\n\rStateResponse\x12\x1e\n\nstatusCode\x18\x01 \x01(\x05R\nstatusCode\x12"\n\x0c\x65rrorMessage\x18\x02 \x01(\tR\x0c\x65rrorMessage\x12\x14\n\x05value\x18\x03 \x01(\x0cR\x05value"x\n\x1cStateResponseWithLongTypeVal\x12\x1e\n\nstatusCode\x18\x01 \x01(\x05R\nstatusCode\x12"\n\x0c\x65rrorMessage\x18\x02 \x01(\tR\x0c\x65rrorMessage\x12\x14\n\x05value\x18\x03 \x01(\x03R\x05value"\xa0\x05\n\x15StatefulProcessorCall\x12h\n\x0esetHandleState\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.SetHandleStateH\x00R\x0esetHandleState\x12h\n\rgetValueState\x18\x02 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\rgetValueState\x12\x66\n\x0cgetListState\x18\x03 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0cgetListState\x12\x64\n\x0bgetMapState\x18\x04 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0bgetMapState\x12o\n\x0etimerStateCall\x18\x05 \x01(\x0b\x32\x45.org.apache.spark.sql.execution.streaming.state.TimerStateCallCommandH\x00R\x0etimerStateCall\x12j\n\x0e\x64\x65leteIfExists\x18\x06 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0e\x64\x65leteIfExistsB\x08\n\x06method"\xd5\x02\n\x14StateVariableRequest\x12h\n\x0evalueStateCall\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.ValueStateCallH\x00R\x0evalueStateCall\x12\x65\n\rlistStateCall\x18\x02 \x01(\x0b\x32=.org.apache.spark.sql.execution.streaming.state.ListStateCallH\x00R\rlistStateCall\x12\x62\n\x0cmapStateCall\x18\x03 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.MapStateCallH\x00R\x0cmapStateCallB\x08\n\x06method"\x83\x02\n\x1aImplicitGroupingKeyRequest\x12h\n\x0esetImplicitKey\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.SetImplicitKeyH\x00R\x0esetImplicitKey\x12q\n\x11removeImplicitKey\x18\x02 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.RemoveImplicitKeyH\x00R\x11removeImplicitKeyB\x08\n\x06method"\x81\x02\n\x0cTimerRequest\x12q\n\x11timerValueRequest\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.TimerValueRequestH\x00R\x11timerValueRequest\x12t\n\x12\x65xpiryTimerRequest\x18\x02 \x01(\x0b\x32\x42.org.apache.spark.sql.execution.streaming.state.ExpiryTimerRequestH\x00R\x12\x65xpiryTimerRequestB\x08\n\x06method"\xf6\x01\n\x11TimerValueRequest\x12s\n\x12getProcessingTimer\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.GetProcessingTimeH\x00R\x12getProcessingTimer\x12\x62\n\x0cgetWatermark\x18\x02 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.GetWatermarkH\x00R\x0cgetWatermarkB\x08\n\x06method"B\n\x12\x45xpiryTimerRequest\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs"\x13\n\x11GetProcessingTime"\x0e\n\x0cGetWatermark"\xc7\x01\n\x10StateCallCommand\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12\x16\n\x06schema\x18\x02 \x01(\tR\x06schema\x12\x30\n\x13mapStateValueSchema\x18\x03 \x01(\tR\x13mapStateValueSchema\x12K\n\x03ttl\x18\x04 \x01(\x0b\x32\x39.org.apache.spark.sql.execution.streaming.state.TTLConfigR\x03ttl"\xa7\x02\n\x15TimerStateCallCommand\x12[\n\x08register\x18\x01 \x01(\x0b\x32=.org.apache.spark.sql.execution.streaming.state.RegisterTimerH\x00R\x08register\x12U\n\x06\x64\x65lete\x18\x02 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.DeleteTimerH\x00R\x06\x64\x65lete\x12P\n\x04list\x18\x03 \x01(\x0b\x32:.org.apache.spark.sql.execution.streaming.state.ListTimersH\x00R\x04listB\x08\n\x06method"\x92\x03\n\x0eValueStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12G\n\x03get\x18\x03 \x01(\x0b\x32\x33.org.apache.spark.sql.execution.streaming.state.GetH\x00R\x03get\x12n\n\x10valueStateUpdate\x18\x04 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.ValueStateUpdateH\x00R\x10valueStateUpdate\x12M\n\x05\x63lear\x18\x05 \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method"\xdf\x04\n\rListStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12\x62\n\x0clistStateGet\x18\x03 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.ListStateGetH\x00R\x0clistStateGet\x12\x62\n\x0clistStatePut\x18\x04 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.ListStatePutH\x00R\x0clistStatePut\x12_\n\x0b\x61ppendValue\x18\x05 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.AppendValueH\x00R\x0b\x61ppendValue\x12\\\n\nappendList\x18\x06 \x01(\x0b\x32:.org.apache.spark.sql.execution.streaming.state.AppendListH\x00R\nappendList\x12M\n\x05\x63lear\x18\x07 \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method"\xc2\x06\n\x0cMapStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12V\n\x08getValue\x18\x03 \x01(\x0b\x32\x38.org.apache.spark.sql.execution.streaming.state.GetValueH\x00R\x08getValue\x12_\n\x0b\x63ontainsKey\x18\x04 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.ContainsKeyH\x00R\x0b\x63ontainsKey\x12_\n\x0bupdateValue\x18\x05 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.UpdateValueH\x00R\x0bupdateValue\x12V\n\x08iterator\x18\x06 \x01(\x0b\x32\x38.org.apache.spark.sql.execution.streaming.state.IteratorH\x00R\x08iterator\x12J\n\x04keys\x18\x07 \x01(\x0b\x32\x34.org.apache.spark.sql.execution.streaming.state.KeysH\x00R\x04keys\x12P\n\x06values\x18\x08 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ValuesH\x00R\x06values\x12Y\n\tremoveKey\x18\t \x01(\x0b\x32\x39.org.apache.spark.sql.execution.streaming.state.RemoveKeyH\x00R\tremoveKey\x12M\n\x05\x63lear\x18\n \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method""\n\x0eSetImplicitKey\x12\x10\n\x03key\x18\x01 \x01(\x0cR\x03key"\x13\n\x11RemoveImplicitKey"\x08\n\x06\x45xists"\x05\n\x03Get"=\n\rRegisterTimer\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs";\n\x0b\x44\x65leteTimer\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs",\n\nListTimers\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"(\n\x10ValueStateUpdate\x12\x14\n\x05value\x18\x01 \x01(\x0cR\x05value"\x07\n\x05\x43lear".\n\x0cListStateGet\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"\x0e\n\x0cListStatePut"#\n\x0b\x41ppendValue\x12\x14\n\x05value\x18\x01 \x01(\x0cR\x05value"\x0c\n\nAppendList"$\n\x08GetValue\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"\'\n\x0b\x43ontainsKey\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"=\n\x0bUpdateValue\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey\x12\x14\n\x05value\x18\x02 \x01(\x0cR\x05value"*\n\x08Iterator\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"&\n\x04Keys\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"(\n\x06Values\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"%\n\tRemoveKey\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"c\n\x0eSetHandleState\x12Q\n\x05state\x18\x01 \x01(\x0e\x32;.org.apache.spark.sql.execution.streaming.state.HandleStateR\x05state"+\n\tTTLConfig\x12\x1e\n\ndurationMs\x18\x01 \x01(\x05R\ndurationMs*`\n\x0bHandleState\x12\x0b\n\x07\x43REATED\x10\x00\x12\x0f\n\x0bINITIALIZED\x10\x01\x12\x12\n\x0e\x44\x41TA_PROCESSED\x10\x02\x12\x13\n\x0fTIMER_PROCESSED\x10\x03\x12\n\n\x06\x43LOSED\x10\x04\x62\x06proto3' ) _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "StateMessage_pb2", _globals) +_builder.BuildTopDescriptorsAndMessages( + DESCRIPTOR, "pyspark.sql.streaming.proto.StateMessage_pb2", _globals +) if not _descriptor._USE_C_DESCRIPTORS: DESCRIPTOR._loaded_options = None - _globals["_HANDLESTATE"]._serialized_start = 5058 - _globals["_HANDLESTATE"]._serialized_end = 5154 - _globals["_STATEREQUEST"]._serialized_start = 71 - _globals["_STATEREQUEST"]._serialized_end = 518 - _globals["_STATERESPONSE"]._serialized_start = 520 - _globals["_STATERESPONSE"]._serialized_end = 592 - _globals["_STATERESPONSEWITHLONGTYPEVAL"]._serialized_start = 594 - _globals["_STATERESPONSEWITHLONGTYPEVAL"]._serialized_end = 681 - _globals["_STATEFULPROCESSORCALL"]._serialized_start = 684 - _globals["_STATEFULPROCESSORCALL"]._serialized_end = 1266 - _globals["_STATEVARIABLEREQUEST"]._serialized_start = 1269 - _globals["_STATEVARIABLEREQUEST"]._serialized_end = 1565 - _globals["_IMPLICITGROUPINGKEYREQUEST"]._serialized_start = 1568 - _globals["_IMPLICITGROUPINGKEYREQUEST"]._serialized_end = 1792 - _globals["_TIMERREQUEST"]._serialized_start = 1795 - _globals["_TIMERREQUEST"]._serialized_end = 2013 - _globals["_TIMERVALUEREQUEST"]._serialized_start = 2016 - _globals["_TIMERVALUEREQUEST"]._serialized_end = 2228 - _globals["_EXPIRYTIMERREQUEST"]._serialized_start = 2230 - _globals["_EXPIRYTIMERREQUEST"]._serialized_end = 2277 - _globals["_GETPROCESSINGTIME"]._serialized_start = 2279 - _globals["_GETPROCESSINGTIME"]._serialized_end = 2298 - _globals["_GETWATERMARK"]._serialized_start = 2300 - _globals["_GETWATERMARK"]._serialized_end = 2314 - _globals["_STATECALLCOMMAND"]._serialized_start = 2317 - _globals["_STATECALLCOMMAND"]._serialized_end = 2471 - _globals["_TIMERSTATECALLCOMMAND"]._serialized_start = 2474 - _globals["_TIMERSTATECALLCOMMAND"]._serialized_end = 2745 - _globals["_VALUESTATECALL"]._serialized_start = 2748 - _globals["_VALUESTATECALL"]._serialized_end = 3101 - _globals["_LISTSTATECALL"]._serialized_start = 3104 - _globals["_LISTSTATECALL"]._serialized_end = 3632 - _globals["_MAPSTATECALL"]._serialized_start = 3635 - _globals["_MAPSTATECALL"]._serialized_end = 4372 - _globals["_SETIMPLICITKEY"]._serialized_start = 4374 - _globals["_SETIMPLICITKEY"]._serialized_end = 4403 - _globals["_REMOVEIMPLICITKEY"]._serialized_start = 4405 - _globals["_REMOVEIMPLICITKEY"]._serialized_end = 4424 - _globals["_EXISTS"]._serialized_start = 4426 - _globals["_EXISTS"]._serialized_end = 4434 - _globals["_GET"]._serialized_start = 4436 - _globals["_GET"]._serialized_end = 4441 - _globals["_REGISTERTIMER"]._serialized_start = 4443 - _globals["_REGISTERTIMER"]._serialized_end = 4485 - _globals["_DELETETIMER"]._serialized_start = 4487 - _globals["_DELETETIMER"]._serialized_end = 4527 - _globals["_LISTTIMERS"]._serialized_start = 4529 - _globals["_LISTTIMERS"]._serialized_end = 4561 - _globals["_VALUESTATEUPDATE"]._serialized_start = 4563 - _globals["_VALUESTATEUPDATE"]._serialized_end = 4596 - _globals["_CLEAR"]._serialized_start = 4598 - _globals["_CLEAR"]._serialized_end = 4605 - _globals["_LISTSTATEGET"]._serialized_start = 4607 - _globals["_LISTSTATEGET"]._serialized_end = 4641 - _globals["_LISTSTATEPUT"]._serialized_start = 4643 - _globals["_LISTSTATEPUT"]._serialized_end = 4657 - _globals["_APPENDVALUE"]._serialized_start = 4659 - _globals["_APPENDVALUE"]._serialized_end = 4687 - _globals["_APPENDLIST"]._serialized_start = 4689 - _globals["_APPENDLIST"]._serialized_end = 4701 - _globals["_GETVALUE"]._serialized_start = 4703 - _globals["_GETVALUE"]._serialized_end = 4730 - _globals["_CONTAINSKEY"]._serialized_start = 4732 - _globals["_CONTAINSKEY"]._serialized_end = 4762 - _globals["_UPDATEVALUE"]._serialized_start = 4764 - _globals["_UPDATEVALUE"]._serialized_end = 4809 - _globals["_ITERATOR"]._serialized_start = 4811 - _globals["_ITERATOR"]._serialized_end = 4841 - _globals["_KEYS"]._serialized_start = 4843 - _globals["_KEYS"]._serialized_end = 4869 - _globals["_VALUES"]._serialized_start = 4871 - _globals["_VALUES"]._serialized_end = 4899 - _globals["_REMOVEKEY"]._serialized_start = 4901 - _globals["_REMOVEKEY"]._serialized_end = 4929 - _globals["_SETHANDLESTATE"]._serialized_start = 4931 - _globals["_SETHANDLESTATE"]._serialized_end = 5023 - _globals["_TTLCONFIG"]._serialized_start = 5025 - _globals["_TTLCONFIG"]._serialized_end = 5056 + _globals["_HANDLESTATE"]._serialized_start = 5997 + _globals["_HANDLESTATE"]._serialized_end = 6093 + _globals["_STATEREQUEST"]._serialized_start = 112 + _globals["_STATEREQUEST"]._serialized_end = 656 + _globals["_STATERESPONSE"]._serialized_start = 658 + _globals["_STATERESPONSE"]._serialized_end = 763 + _globals["_STATERESPONSEWITHLONGTYPEVAL"]._serialized_start = 765 + _globals["_STATERESPONSEWITHLONGTYPEVAL"]._serialized_end = 885 + _globals["_STATEFULPROCESSORCALL"]._serialized_start = 888 + _globals["_STATEFULPROCESSORCALL"]._serialized_end = 1560 + _globals["_STATEVARIABLEREQUEST"]._serialized_start = 1563 + _globals["_STATEVARIABLEREQUEST"]._serialized_end = 1904 + _globals["_IMPLICITGROUPINGKEYREQUEST"]._serialized_start = 1907 + _globals["_IMPLICITGROUPINGKEYREQUEST"]._serialized_end = 2166 + _globals["_TIMERREQUEST"]._serialized_start = 2169 + _globals["_TIMERREQUEST"]._serialized_end = 2426 + _globals["_TIMERVALUEREQUEST"]._serialized_start = 2429 + _globals["_TIMERVALUEREQUEST"]._serialized_end = 2675 + _globals["_EXPIRYTIMERREQUEST"]._serialized_start = 2677 + _globals["_EXPIRYTIMERREQUEST"]._serialized_end = 2743 + _globals["_GETPROCESSINGTIME"]._serialized_start = 2745 + _globals["_GETPROCESSINGTIME"]._serialized_end = 2764 + _globals["_GETWATERMARK"]._serialized_start = 2766 + _globals["_GETWATERMARK"]._serialized_end = 2780 + _globals["_STATECALLCOMMAND"]._serialized_start = 2783 + _globals["_STATECALLCOMMAND"]._serialized_end = 2982 + _globals["_TIMERSTATECALLCOMMAND"]._serialized_start = 2985 + _globals["_TIMERSTATECALLCOMMAND"]._serialized_end = 3280 + _globals["_VALUESTATECALL"]._serialized_start = 3283 + _globals["_VALUESTATECALL"]._serialized_end = 3685 + _globals["_LISTSTATECALL"]._serialized_start = 3688 + _globals["_LISTSTATECALL"]._serialized_end = 4295 + _globals["_MAPSTATECALL"]._serialized_start = 4298 + _globals["_MAPSTATECALL"]._serialized_end = 5132 + _globals["_SETIMPLICITKEY"]._serialized_start = 5134 + _globals["_SETIMPLICITKEY"]._serialized_end = 5168 + _globals["_REMOVEIMPLICITKEY"]._serialized_start = 5170 + _globals["_REMOVEIMPLICITKEY"]._serialized_end = 5189 + _globals["_EXISTS"]._serialized_start = 5191 + _globals["_EXISTS"]._serialized_end = 5199 + _globals["_GET"]._serialized_start = 5201 + _globals["_GET"]._serialized_end = 5206 + _globals["_REGISTERTIMER"]._serialized_start = 5208 + _globals["_REGISTERTIMER"]._serialized_end = 5269 + _globals["_DELETETIMER"]._serialized_start = 5271 + _globals["_DELETETIMER"]._serialized_end = 5330 + _globals["_LISTTIMERS"]._serialized_start = 5332 + _globals["_LISTTIMERS"]._serialized_end = 5376 + _globals["_VALUESTATEUPDATE"]._serialized_start = 5378 + _globals["_VALUESTATEUPDATE"]._serialized_end = 5418 + _globals["_CLEAR"]._serialized_start = 5420 + _globals["_CLEAR"]._serialized_end = 5427 + _globals["_LISTSTATEGET"]._serialized_start = 5429 + _globals["_LISTSTATEGET"]._serialized_end = 5475 + _globals["_LISTSTATEPUT"]._serialized_start = 5477 + _globals["_LISTSTATEPUT"]._serialized_end = 5491 + _globals["_APPENDVALUE"]._serialized_start = 5493 + _globals["_APPENDVALUE"]._serialized_end = 5528 + _globals["_APPENDLIST"]._serialized_start = 5530 + _globals["_APPENDLIST"]._serialized_end = 5542 + _globals["_GETVALUE"]._serialized_start = 5544 + _globals["_GETVALUE"]._serialized_end = 5580 + _globals["_CONTAINSKEY"]._serialized_start = 5582 + _globals["_CONTAINSKEY"]._serialized_end = 5621 + _globals["_UPDATEVALUE"]._serialized_start = 5623 + _globals["_UPDATEVALUE"]._serialized_end = 5684 + _globals["_ITERATOR"]._serialized_start = 5686 + _globals["_ITERATOR"]._serialized_end = 5728 + _globals["_KEYS"]._serialized_start = 5730 + _globals["_KEYS"]._serialized_end = 5768 + _globals["_VALUES"]._serialized_start = 5770 + _globals["_VALUES"]._serialized_end = 5810 + _globals["_REMOVEKEY"]._serialized_start = 5812 + _globals["_REMOVEKEY"]._serialized_end = 5849 + _globals["_SETHANDLESTATE"]._serialized_start = 5851 + _globals["_SETHANDLESTATE"]._serialized_end = 5950 + _globals["_TTLCONFIG"]._serialized_start = 5952 + _globals["_TTLCONFIG"]._serialized_end = 5995 # @@protoc_insertion_point(module_scope) diff --git a/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi b/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi index bc5138f52281..52f66928294c 100644 --- a/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi +++ b/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi @@ -14,439 +14,1119 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from google.protobuf.internal import enum_type_wrapper as _enum_type_wrapper -from google.protobuf import descriptor as _descriptor -from google.protobuf import message as _message -from typing import ( - ClassVar as _ClassVar, - Mapping as _Mapping, - Optional as _Optional, - Union as _Union, -) - -DESCRIPTOR: _descriptor.FileDescriptor - -class HandleState(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = () - CREATED: _ClassVar[HandleState] - INITIALIZED: _ClassVar[HandleState] - DATA_PROCESSED: _ClassVar[HandleState] - TIMER_PROCESSED: _ClassVar[HandleState] - CLOSED: _ClassVar[HandleState] - -CREATED: HandleState -INITIALIZED: HandleState -DATA_PROCESSED: HandleState -TIMER_PROCESSED: HandleState -CLOSED: HandleState - -class StateRequest(_message.Message): - __slots__ = ( - "version", - "statefulProcessorCall", - "stateVariableRequest", - "implicitGroupingKeyRequest", - "timerRequest", - ) - VERSION_FIELD_NUMBER: _ClassVar[int] - STATEFULPROCESSORCALL_FIELD_NUMBER: _ClassVar[int] - STATEVARIABLEREQUEST_FIELD_NUMBER: _ClassVar[int] - IMPLICITGROUPINGKEYREQUEST_FIELD_NUMBER: _ClassVar[int] - TIMERREQUEST_FIELD_NUMBER: _ClassVar[int] - version: int - statefulProcessorCall: StatefulProcessorCall - stateVariableRequest: StateVariableRequest - implicitGroupingKeyRequest: ImplicitGroupingKeyRequest - timerRequest: TimerRequest - def __init__( - self, - version: _Optional[int] = ..., - statefulProcessorCall: _Optional[_Union[StatefulProcessorCall, _Mapping]] = ..., - stateVariableRequest: _Optional[_Union[StateVariableRequest, _Mapping]] = ..., - implicitGroupingKeyRequest: _Optional[_Union[ImplicitGroupingKeyRequest, _Mapping]] = ..., - timerRequest: _Optional[_Union[TimerRequest, _Mapping]] = ..., - ) -> None: ... - -class StateResponse(_message.Message): - __slots__ = ("statusCode", "errorMessage", "value") - STATUSCODE_FIELD_NUMBER: _ClassVar[int] - ERRORMESSAGE_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - statusCode: int - errorMessage: str - value: bytes - def __init__( - self, - statusCode: _Optional[int] = ..., - errorMessage: _Optional[str] = ..., - value: _Optional[bytes] = ..., - ) -> None: ... - -class StateResponseWithLongTypeVal(_message.Message): - __slots__ = ("statusCode", "errorMessage", "value") - STATUSCODE_FIELD_NUMBER: _ClassVar[int] - ERRORMESSAGE_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - statusCode: int - errorMessage: str - value: int - def __init__( - self, - statusCode: _Optional[int] = ..., - errorMessage: _Optional[str] = ..., - value: _Optional[int] = ..., - ) -> None: ... - -class StatefulProcessorCall(_message.Message): - __slots__ = ( - "setHandleState", - "getValueState", - "getListState", - "getMapState", - "timerStateCall", - "deleteIfExists", - ) - SETHANDLESTATE_FIELD_NUMBER: _ClassVar[int] - GETVALUESTATE_FIELD_NUMBER: _ClassVar[int] - GETLISTSTATE_FIELD_NUMBER: _ClassVar[int] - GETMAPSTATE_FIELD_NUMBER: _ClassVar[int] - TIMERSTATECALL_FIELD_NUMBER: _ClassVar[int] - DELETEIFEXISTS_FIELD_NUMBER: _ClassVar[int] - setHandleState: SetHandleState - getValueState: StateCallCommand - getListState: StateCallCommand - getMapState: StateCallCommand - timerStateCall: TimerStateCallCommand - deleteIfExists: StateCallCommand - def __init__( - self, - setHandleState: _Optional[_Union[SetHandleState, _Mapping]] = ..., - getValueState: _Optional[_Union[StateCallCommand, _Mapping]] = ..., - getListState: _Optional[_Union[StateCallCommand, _Mapping]] = ..., - getMapState: _Optional[_Union[StateCallCommand, _Mapping]] = ..., - timerStateCall: _Optional[_Union[TimerStateCallCommand, _Mapping]] = ..., - deleteIfExists: _Optional[_Union[StateCallCommand, _Mapping]] = ..., - ) -> None: ... - -class StateVariableRequest(_message.Message): - __slots__ = ("valueStateCall", "listStateCall", "mapStateCall") - VALUESTATECALL_FIELD_NUMBER: _ClassVar[int] - LISTSTATECALL_FIELD_NUMBER: _ClassVar[int] - MAPSTATECALL_FIELD_NUMBER: _ClassVar[int] - valueStateCall: ValueStateCall - listStateCall: ListStateCall - mapStateCall: MapStateCall - def __init__( - self, - valueStateCall: _Optional[_Union[ValueStateCall, _Mapping]] = ..., - listStateCall: _Optional[_Union[ListStateCall, _Mapping]] = ..., - mapStateCall: _Optional[_Union[MapStateCall, _Mapping]] = ..., - ) -> None: ... - -class ImplicitGroupingKeyRequest(_message.Message): - __slots__ = ("setImplicitKey", "removeImplicitKey") - SETIMPLICITKEY_FIELD_NUMBER: _ClassVar[int] - REMOVEIMPLICITKEY_FIELD_NUMBER: _ClassVar[int] - setImplicitKey: SetImplicitKey - removeImplicitKey: RemoveImplicitKey - def __init__( - self, - setImplicitKey: _Optional[_Union[SetImplicitKey, _Mapping]] = ..., - removeImplicitKey: _Optional[_Union[RemoveImplicitKey, _Mapping]] = ..., - ) -> None: ... - -class TimerRequest(_message.Message): - __slots__ = ("timerValueRequest", "expiryTimerRequest") - TIMERVALUEREQUEST_FIELD_NUMBER: _ClassVar[int] - EXPIRYTIMERREQUEST_FIELD_NUMBER: _ClassVar[int] - timerValueRequest: TimerValueRequest - expiryTimerRequest: ExpiryTimerRequest - def __init__( - self, - timerValueRequest: _Optional[_Union[TimerValueRequest, _Mapping]] = ..., - expiryTimerRequest: _Optional[_Union[ExpiryTimerRequest, _Mapping]] = ..., - ) -> None: ... - -class TimerValueRequest(_message.Message): - __slots__ = ("getProcessingTimer", "getWatermark") - GETPROCESSINGTIMER_FIELD_NUMBER: _ClassVar[int] - GETWATERMARK_FIELD_NUMBER: _ClassVar[int] - getProcessingTimer: GetProcessingTime - getWatermark: GetWatermark - def __init__( - self, - getProcessingTimer: _Optional[_Union[GetProcessingTime, _Mapping]] = ..., - getWatermark: _Optional[_Union[GetWatermark, _Mapping]] = ..., - ) -> None: ... - -class ExpiryTimerRequest(_message.Message): - __slots__ = ("expiryTimestampMs",) - EXPIRYTIMESTAMPMS_FIELD_NUMBER: _ClassVar[int] - expiryTimestampMs: int - def __init__(self, expiryTimestampMs: _Optional[int] = ...) -> None: ... - -class GetProcessingTime(_message.Message): - __slots__ = () - def __init__(self) -> None: ... - -class GetWatermark(_message.Message): - __slots__ = () - def __init__(self) -> None: ... - -class StateCallCommand(_message.Message): - __slots__ = ("stateName", "schema", "mapStateValueSchema", "ttl") - STATENAME_FIELD_NUMBER: _ClassVar[int] - SCHEMA_FIELD_NUMBER: _ClassVar[int] - MAPSTATEVALUESCHEMA_FIELD_NUMBER: _ClassVar[int] - TTL_FIELD_NUMBER: _ClassVar[int] - stateName: str - schema: str - mapStateValueSchema: str - ttl: TTLConfig - def __init__( - self, - stateName: _Optional[str] = ..., - schema: _Optional[str] = ..., - mapStateValueSchema: _Optional[str] = ..., - ttl: _Optional[_Union[TTLConfig, _Mapping]] = ..., - ) -> None: ... - -class TimerStateCallCommand(_message.Message): - __slots__ = ("register", "delete", "list") - REGISTER_FIELD_NUMBER: _ClassVar[int] - DELETE_FIELD_NUMBER: _ClassVar[int] - LIST_FIELD_NUMBER: _ClassVar[int] - register: RegisterTimer - delete: DeleteTimer - list: ListTimers - def __init__( - self, - register: _Optional[_Union[RegisterTimer, _Mapping]] = ..., - delete: _Optional[_Union[DeleteTimer, _Mapping]] = ..., - list: _Optional[_Union[ListTimers, _Mapping]] = ..., - ) -> None: ... - -class ValueStateCall(_message.Message): - __slots__ = ("stateName", "exists", "get", "valueStateUpdate", "clear") - STATENAME_FIELD_NUMBER: _ClassVar[int] - EXISTS_FIELD_NUMBER: _ClassVar[int] - GET_FIELD_NUMBER: _ClassVar[int] - VALUESTATEUPDATE_FIELD_NUMBER: _ClassVar[int] - CLEAR_FIELD_NUMBER: _ClassVar[int] - stateName: str - exists: Exists - get: Get - valueStateUpdate: ValueStateUpdate - clear: Clear - def __init__( - self, - stateName: _Optional[str] = ..., - exists: _Optional[_Union[Exists, _Mapping]] = ..., - get: _Optional[_Union[Get, _Mapping]] = ..., - valueStateUpdate: _Optional[_Union[ValueStateUpdate, _Mapping]] = ..., - clear: _Optional[_Union[Clear, _Mapping]] = ..., - ) -> None: ... - -class ListStateCall(_message.Message): - __slots__ = ( - "stateName", - "exists", - "listStateGet", - "listStatePut", - "appendValue", - "appendList", - "clear", - ) - STATENAME_FIELD_NUMBER: _ClassVar[int] - EXISTS_FIELD_NUMBER: _ClassVar[int] - LISTSTATEGET_FIELD_NUMBER: _ClassVar[int] - LISTSTATEPUT_FIELD_NUMBER: _ClassVar[int] - APPENDVALUE_FIELD_NUMBER: _ClassVar[int] - APPENDLIST_FIELD_NUMBER: _ClassVar[int] - CLEAR_FIELD_NUMBER: _ClassVar[int] - stateName: str - exists: Exists - listStateGet: ListStateGet - listStatePut: ListStatePut - appendValue: AppendValue - appendList: AppendList - clear: Clear - def __init__( - self, - stateName: _Optional[str] = ..., - exists: _Optional[_Union[Exists, _Mapping]] = ..., - listStateGet: _Optional[_Union[ListStateGet, _Mapping]] = ..., - listStatePut: _Optional[_Union[ListStatePut, _Mapping]] = ..., - appendValue: _Optional[_Union[AppendValue, _Mapping]] = ..., - appendList: _Optional[_Union[AppendList, _Mapping]] = ..., - clear: _Optional[_Union[Clear, _Mapping]] = ..., - ) -> None: ... - -class MapStateCall(_message.Message): - __slots__ = ( - "stateName", - "exists", - "getValue", - "containsKey", - "updateValue", - "iterator", - "keys", - "values", - "removeKey", - "clear", - ) - STATENAME_FIELD_NUMBER: _ClassVar[int] - EXISTS_FIELD_NUMBER: _ClassVar[int] - GETVALUE_FIELD_NUMBER: _ClassVar[int] - CONTAINSKEY_FIELD_NUMBER: _ClassVar[int] - UPDATEVALUE_FIELD_NUMBER: _ClassVar[int] - ITERATOR_FIELD_NUMBER: _ClassVar[int] - KEYS_FIELD_NUMBER: _ClassVar[int] - VALUES_FIELD_NUMBER: _ClassVar[int] - REMOVEKEY_FIELD_NUMBER: _ClassVar[int] - CLEAR_FIELD_NUMBER: _ClassVar[int] - stateName: str - exists: Exists - getValue: GetValue - containsKey: ContainsKey - updateValue: UpdateValue - iterator: Iterator - keys: Keys - values: Values - removeKey: RemoveKey - clear: Clear - def __init__( - self, - stateName: _Optional[str] = ..., - exists: _Optional[_Union[Exists, _Mapping]] = ..., - getValue: _Optional[_Union[GetValue, _Mapping]] = ..., - containsKey: _Optional[_Union[ContainsKey, _Mapping]] = ..., - updateValue: _Optional[_Union[UpdateValue, _Mapping]] = ..., - iterator: _Optional[_Union[Iterator, _Mapping]] = ..., - keys: _Optional[_Union[Keys, _Mapping]] = ..., - values: _Optional[_Union[Values, _Mapping]] = ..., - removeKey: _Optional[_Union[RemoveKey, _Mapping]] = ..., - clear: _Optional[_Union[Clear, _Mapping]] = ..., - ) -> None: ... - -class SetImplicitKey(_message.Message): - __slots__ = ("key",) - KEY_FIELD_NUMBER: _ClassVar[int] - key: bytes - def __init__(self, key: _Optional[bytes] = ...) -> None: ... - -class RemoveImplicitKey(_message.Message): - __slots__ = () - def __init__(self) -> None: ... - -class Exists(_message.Message): - __slots__ = () - def __init__(self) -> None: ... - -class Get(_message.Message): - __slots__ = () - def __init__(self) -> None: ... - -class RegisterTimer(_message.Message): - __slots__ = ("expiryTimestampMs",) - EXPIRYTIMESTAMPMS_FIELD_NUMBER: _ClassVar[int] - expiryTimestampMs: int - def __init__(self, expiryTimestampMs: _Optional[int] = ...) -> None: ... - -class DeleteTimer(_message.Message): - __slots__ = ("expiryTimestampMs",) - EXPIRYTIMESTAMPMS_FIELD_NUMBER: _ClassVar[int] - expiryTimestampMs: int - def __init__(self, expiryTimestampMs: _Optional[int] = ...) -> None: ... - -class ListTimers(_message.Message): - __slots__ = ("iteratorId",) - ITERATORID_FIELD_NUMBER: _ClassVar[int] - iteratorId: str - def __init__(self, iteratorId: _Optional[str] = ...) -> None: ... - -class ValueStateUpdate(_message.Message): - __slots__ = ("value",) - VALUE_FIELD_NUMBER: _ClassVar[int] - value: bytes - def __init__(self, value: _Optional[bytes] = ...) -> None: ... - -class Clear(_message.Message): - __slots__ = () - def __init__(self) -> None: ... - -class ListStateGet(_message.Message): - __slots__ = ("iteratorId",) - ITERATORID_FIELD_NUMBER: _ClassVar[int] - iteratorId: str - def __init__(self, iteratorId: _Optional[str] = ...) -> None: ... - -class ListStatePut(_message.Message): - __slots__ = () - def __init__(self) -> None: ... - -class AppendValue(_message.Message): - __slots__ = ("value",) - VALUE_FIELD_NUMBER: _ClassVar[int] - value: bytes - def __init__(self, value: _Optional[bytes] = ...) -> None: ... - -class AppendList(_message.Message): - __slots__ = () - def __init__(self) -> None: ... - -class GetValue(_message.Message): - __slots__ = ("userKey",) - USERKEY_FIELD_NUMBER: _ClassVar[int] - userKey: bytes - def __init__(self, userKey: _Optional[bytes] = ...) -> None: ... - -class ContainsKey(_message.Message): - __slots__ = ("userKey",) - USERKEY_FIELD_NUMBER: _ClassVar[int] - userKey: bytes - def __init__(self, userKey: _Optional[bytes] = ...) -> None: ... - -class UpdateValue(_message.Message): - __slots__ = ("userKey", "value") - USERKEY_FIELD_NUMBER: _ClassVar[int] - VALUE_FIELD_NUMBER: _ClassVar[int] - userKey: bytes - value: bytes - def __init__(self, userKey: _Optional[bytes] = ..., value: _Optional[bytes] = ...) -> None: ... - -class Iterator(_message.Message): - __slots__ = ("iteratorId",) - ITERATORID_FIELD_NUMBER: _ClassVar[int] - iteratorId: str - def __init__(self, iteratorId: _Optional[str] = ...) -> None: ... - -class Keys(_message.Message): - __slots__ = ("iteratorId",) - ITERATORID_FIELD_NUMBER: _ClassVar[int] - iteratorId: str - def __init__(self, iteratorId: _Optional[str] = ...) -> None: ... - -class Values(_message.Message): - __slots__ = ("iteratorId",) - ITERATORID_FIELD_NUMBER: _ClassVar[int] - iteratorId: str - def __init__(self, iteratorId: _Optional[str] = ...) -> None: ... - -class RemoveKey(_message.Message): - __slots__ = ("userKey",) - USERKEY_FIELD_NUMBER: _ClassVar[int] - userKey: bytes - def __init__(self, userKey: _Optional[bytes] = ...) -> None: ... - -class SetHandleState(_message.Message): - __slots__ = ("state",) - STATE_FIELD_NUMBER: _ClassVar[int] - state: HandleState - def __init__(self, state: _Optional[_Union[HandleState, str]] = ...) -> None: ... - -class TTLConfig(_message.Message): - __slots__ = ("durationMs",) - DURATIONMS_FIELD_NUMBER: _ClassVar[int] - durationMs: int - def __init__(self, durationMs: _Optional[int] = ...) -> None: ... +""" +@generated by mypy-protobuf. Do not edit manually! +isort:skip_file + +Licensed to the Apache Software Foundation (ASF) under one or more +contributor license agreements. See the NOTICE file distributed with +this work for additional information regarding copyright ownership. +The ASF licenses this file to You under the Apache License, Version 2.0 +(the "License"); you may not use this file except in compliance with +the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +""" +import builtins +import google.protobuf.descriptor +import google.protobuf.internal.enum_type_wrapper +import google.protobuf.message +import sys +import typing + +if sys.version_info >= (3, 10): + import typing as typing_extensions +else: + import typing_extensions + +DESCRIPTOR: google.protobuf.descriptor.FileDescriptor + +class _HandleState: + ValueType = typing.NewType("ValueType", builtins.int) + V: typing_extensions.TypeAlias = ValueType + +class _HandleStateEnumTypeWrapper( + google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_HandleState.ValueType], + builtins.type, +): # noqa: F821 + DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor + CREATED: _HandleState.ValueType # 0 + INITIALIZED: _HandleState.ValueType # 1 + DATA_PROCESSED: _HandleState.ValueType # 2 + TIMER_PROCESSED: _HandleState.ValueType # 3 + CLOSED: _HandleState.ValueType # 4 + +class HandleState(_HandleState, metaclass=_HandleStateEnumTypeWrapper): ... + +CREATED: HandleState.ValueType # 0 +INITIALIZED: HandleState.ValueType # 1 +DATA_PROCESSED: HandleState.ValueType # 2 +TIMER_PROCESSED: HandleState.ValueType # 3 +CLOSED: HandleState.ValueType # 4 +global___HandleState = HandleState + +class StateRequest(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + VERSION_FIELD_NUMBER: builtins.int + STATEFULPROCESSORCALL_FIELD_NUMBER: builtins.int + STATEVARIABLEREQUEST_FIELD_NUMBER: builtins.int + IMPLICITGROUPINGKEYREQUEST_FIELD_NUMBER: builtins.int + TIMERREQUEST_FIELD_NUMBER: builtins.int + version: builtins.int + @property + def statefulProcessorCall(self) -> global___StatefulProcessorCall: ... + @property + def stateVariableRequest(self) -> global___StateVariableRequest: ... + @property + def implicitGroupingKeyRequest(self) -> global___ImplicitGroupingKeyRequest: ... + @property + def timerRequest(self) -> global___TimerRequest: ... + def __init__( + self, + *, + version: builtins.int = ..., + statefulProcessorCall: global___StatefulProcessorCall | None = ..., + stateVariableRequest: global___StateVariableRequest | None = ..., + implicitGroupingKeyRequest: global___ImplicitGroupingKeyRequest | None = ..., + timerRequest: global___TimerRequest | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "implicitGroupingKeyRequest", + b"implicitGroupingKeyRequest", + "method", + b"method", + "stateVariableRequest", + b"stateVariableRequest", + "statefulProcessorCall", + b"statefulProcessorCall", + "timerRequest", + b"timerRequest", + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "implicitGroupingKeyRequest", + b"implicitGroupingKeyRequest", + "method", + b"method", + "stateVariableRequest", + b"stateVariableRequest", + "statefulProcessorCall", + b"statefulProcessorCall", + "timerRequest", + b"timerRequest", + "version", + b"version", + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["method", b"method"] + ) -> ( + typing_extensions.Literal[ + "statefulProcessorCall", + "stateVariableRequest", + "implicitGroupingKeyRequest", + "timerRequest", + ] + | None + ): ... + +global___StateRequest = StateRequest + +class StateResponse(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + STATUSCODE_FIELD_NUMBER: builtins.int + ERRORMESSAGE_FIELD_NUMBER: builtins.int + VALUE_FIELD_NUMBER: builtins.int + statusCode: builtins.int + errorMessage: builtins.str + value: builtins.bytes + def __init__( + self, + *, + statusCode: builtins.int = ..., + errorMessage: builtins.str = ..., + value: builtins.bytes = ..., + ) -> None: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "errorMessage", b"errorMessage", "statusCode", b"statusCode", "value", b"value" + ], + ) -> None: ... + +global___StateResponse = StateResponse + +class StateResponseWithLongTypeVal(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + STATUSCODE_FIELD_NUMBER: builtins.int + ERRORMESSAGE_FIELD_NUMBER: builtins.int + VALUE_FIELD_NUMBER: builtins.int + statusCode: builtins.int + errorMessage: builtins.str + value: builtins.int + def __init__( + self, + *, + statusCode: builtins.int = ..., + errorMessage: builtins.str = ..., + value: builtins.int = ..., + ) -> None: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "errorMessage", b"errorMessage", "statusCode", b"statusCode", "value", b"value" + ], + ) -> None: ... + +global___StateResponseWithLongTypeVal = StateResponseWithLongTypeVal + +class StatefulProcessorCall(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + SETHANDLESTATE_FIELD_NUMBER: builtins.int + GETVALUESTATE_FIELD_NUMBER: builtins.int + GETLISTSTATE_FIELD_NUMBER: builtins.int + GETMAPSTATE_FIELD_NUMBER: builtins.int + TIMERSTATECALL_FIELD_NUMBER: builtins.int + DELETEIFEXISTS_FIELD_NUMBER: builtins.int + @property + def setHandleState(self) -> global___SetHandleState: ... + @property + def getValueState(self) -> global___StateCallCommand: ... + @property + def getListState(self) -> global___StateCallCommand: ... + @property + def getMapState(self) -> global___StateCallCommand: ... + @property + def timerStateCall(self) -> global___TimerStateCallCommand: ... + @property + def deleteIfExists(self) -> global___StateCallCommand: ... + def __init__( + self, + *, + setHandleState: global___SetHandleState | None = ..., + getValueState: global___StateCallCommand | None = ..., + getListState: global___StateCallCommand | None = ..., + getMapState: global___StateCallCommand | None = ..., + timerStateCall: global___TimerStateCallCommand | None = ..., + deleteIfExists: global___StateCallCommand | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "deleteIfExists", + b"deleteIfExists", + "getListState", + b"getListState", + "getMapState", + b"getMapState", + "getValueState", + b"getValueState", + "method", + b"method", + "setHandleState", + b"setHandleState", + "timerStateCall", + b"timerStateCall", + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "deleteIfExists", + b"deleteIfExists", + "getListState", + b"getListState", + "getMapState", + b"getMapState", + "getValueState", + b"getValueState", + "method", + b"method", + "setHandleState", + b"setHandleState", + "timerStateCall", + b"timerStateCall", + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["method", b"method"] + ) -> ( + typing_extensions.Literal[ + "setHandleState", + "getValueState", + "getListState", + "getMapState", + "timerStateCall", + "deleteIfExists", + ] + | None + ): ... + +global___StatefulProcessorCall = StatefulProcessorCall + +class StateVariableRequest(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + VALUESTATECALL_FIELD_NUMBER: builtins.int + LISTSTATECALL_FIELD_NUMBER: builtins.int + MAPSTATECALL_FIELD_NUMBER: builtins.int + @property + def valueStateCall(self) -> global___ValueStateCall: ... + @property + def listStateCall(self) -> global___ListStateCall: ... + @property + def mapStateCall(self) -> global___MapStateCall: ... + def __init__( + self, + *, + valueStateCall: global___ValueStateCall | None = ..., + listStateCall: global___ListStateCall | None = ..., + mapStateCall: global___MapStateCall | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "listStateCall", + b"listStateCall", + "mapStateCall", + b"mapStateCall", + "method", + b"method", + "valueStateCall", + b"valueStateCall", + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "listStateCall", + b"listStateCall", + "mapStateCall", + b"mapStateCall", + "method", + b"method", + "valueStateCall", + b"valueStateCall", + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["method", b"method"] + ) -> typing_extensions.Literal["valueStateCall", "listStateCall", "mapStateCall"] | None: ... + +global___StateVariableRequest = StateVariableRequest + +class ImplicitGroupingKeyRequest(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + SETIMPLICITKEY_FIELD_NUMBER: builtins.int + REMOVEIMPLICITKEY_FIELD_NUMBER: builtins.int + @property + def setImplicitKey(self) -> global___SetImplicitKey: ... + @property + def removeImplicitKey(self) -> global___RemoveImplicitKey: ... + def __init__( + self, + *, + setImplicitKey: global___SetImplicitKey | None = ..., + removeImplicitKey: global___RemoveImplicitKey | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "method", + b"method", + "removeImplicitKey", + b"removeImplicitKey", + "setImplicitKey", + b"setImplicitKey", + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "method", + b"method", + "removeImplicitKey", + b"removeImplicitKey", + "setImplicitKey", + b"setImplicitKey", + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["method", b"method"] + ) -> typing_extensions.Literal["setImplicitKey", "removeImplicitKey"] | None: ... + +global___ImplicitGroupingKeyRequest = ImplicitGroupingKeyRequest + +class TimerRequest(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + TIMERVALUEREQUEST_FIELD_NUMBER: builtins.int + EXPIRYTIMERREQUEST_FIELD_NUMBER: builtins.int + @property + def timerValueRequest(self) -> global___TimerValueRequest: ... + @property + def expiryTimerRequest(self) -> global___ExpiryTimerRequest: ... + def __init__( + self, + *, + timerValueRequest: global___TimerValueRequest | None = ..., + expiryTimerRequest: global___ExpiryTimerRequest | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "expiryTimerRequest", + b"expiryTimerRequest", + "method", + b"method", + "timerValueRequest", + b"timerValueRequest", + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "expiryTimerRequest", + b"expiryTimerRequest", + "method", + b"method", + "timerValueRequest", + b"timerValueRequest", + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["method", b"method"] + ) -> typing_extensions.Literal["timerValueRequest", "expiryTimerRequest"] | None: ... + +global___TimerRequest = TimerRequest + +class TimerValueRequest(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + GETPROCESSINGTIMER_FIELD_NUMBER: builtins.int + GETWATERMARK_FIELD_NUMBER: builtins.int + @property + def getProcessingTimer(self) -> global___GetProcessingTime: ... + @property + def getWatermark(self) -> global___GetWatermark: ... + def __init__( + self, + *, + getProcessingTimer: global___GetProcessingTime | None = ..., + getWatermark: global___GetWatermark | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "getProcessingTimer", + b"getProcessingTimer", + "getWatermark", + b"getWatermark", + "method", + b"method", + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "getProcessingTimer", + b"getProcessingTimer", + "getWatermark", + b"getWatermark", + "method", + b"method", + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["method", b"method"] + ) -> typing_extensions.Literal["getProcessingTimer", "getWatermark"] | None: ... + +global___TimerValueRequest = TimerValueRequest + +class ExpiryTimerRequest(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + EXPIRYTIMESTAMPMS_FIELD_NUMBER: builtins.int + expiryTimestampMs: builtins.int + def __init__( + self, + *, + expiryTimestampMs: builtins.int = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["expiryTimestampMs", b"expiryTimestampMs"] + ) -> None: ... + +global___ExpiryTimerRequest = ExpiryTimerRequest + +class GetProcessingTime(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___GetProcessingTime = GetProcessingTime + +class GetWatermark(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___GetWatermark = GetWatermark + +class StateCallCommand(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + STATENAME_FIELD_NUMBER: builtins.int + SCHEMA_FIELD_NUMBER: builtins.int + MAPSTATEVALUESCHEMA_FIELD_NUMBER: builtins.int + TTL_FIELD_NUMBER: builtins.int + stateName: builtins.str + schema: builtins.str + mapStateValueSchema: builtins.str + @property + def ttl(self) -> global___TTLConfig: ... + def __init__( + self, + *, + stateName: builtins.str = ..., + schema: builtins.str = ..., + mapStateValueSchema: builtins.str = ..., + ttl: global___TTLConfig | None = ..., + ) -> None: ... + def HasField(self, field_name: typing_extensions.Literal["ttl", b"ttl"]) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "mapStateValueSchema", + b"mapStateValueSchema", + "schema", + b"schema", + "stateName", + b"stateName", + "ttl", + b"ttl", + ], + ) -> None: ... + +global___StateCallCommand = StateCallCommand + +class TimerStateCallCommand(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + REGISTER_FIELD_NUMBER: builtins.int + DELETE_FIELD_NUMBER: builtins.int + LIST_FIELD_NUMBER: builtins.int + @property + def register(self) -> global___RegisterTimer: ... + @property + def delete(self) -> global___DeleteTimer: ... + @property + def list(self) -> global___ListTimers: ... + def __init__( + self, + *, + register: global___RegisterTimer | None = ..., + delete: global___DeleteTimer | None = ..., + list: global___ListTimers | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "delete", b"delete", "list", b"list", "method", b"method", "register", b"register" + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "delete", b"delete", "list", b"list", "method", b"method", "register", b"register" + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["method", b"method"] + ) -> typing_extensions.Literal["register", "delete", "list"] | None: ... + +global___TimerStateCallCommand = TimerStateCallCommand + +class ValueStateCall(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + STATENAME_FIELD_NUMBER: builtins.int + EXISTS_FIELD_NUMBER: builtins.int + GET_FIELD_NUMBER: builtins.int + VALUESTATEUPDATE_FIELD_NUMBER: builtins.int + CLEAR_FIELD_NUMBER: builtins.int + stateName: builtins.str + @property + def exists(self) -> global___Exists: ... + @property + def get(self) -> global___Get: ... + @property + def valueStateUpdate(self) -> global___ValueStateUpdate: ... + @property + def clear(self) -> global___Clear: ... + def __init__( + self, + *, + stateName: builtins.str = ..., + exists: global___Exists | None = ..., + get: global___Get | None = ..., + valueStateUpdate: global___ValueStateUpdate | None = ..., + clear: global___Clear | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "clear", + b"clear", + "exists", + b"exists", + "get", + b"get", + "method", + b"method", + "valueStateUpdate", + b"valueStateUpdate", + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "clear", + b"clear", + "exists", + b"exists", + "get", + b"get", + "method", + b"method", + "stateName", + b"stateName", + "valueStateUpdate", + b"valueStateUpdate", + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["method", b"method"] + ) -> typing_extensions.Literal["exists", "get", "valueStateUpdate", "clear"] | None: ... + +global___ValueStateCall = ValueStateCall + +class ListStateCall(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + STATENAME_FIELD_NUMBER: builtins.int + EXISTS_FIELD_NUMBER: builtins.int + LISTSTATEGET_FIELD_NUMBER: builtins.int + LISTSTATEPUT_FIELD_NUMBER: builtins.int + APPENDVALUE_FIELD_NUMBER: builtins.int + APPENDLIST_FIELD_NUMBER: builtins.int + CLEAR_FIELD_NUMBER: builtins.int + stateName: builtins.str + @property + def exists(self) -> global___Exists: ... + @property + def listStateGet(self) -> global___ListStateGet: ... + @property + def listStatePut(self) -> global___ListStatePut: ... + @property + def appendValue(self) -> global___AppendValue: ... + @property + def appendList(self) -> global___AppendList: ... + @property + def clear(self) -> global___Clear: ... + def __init__( + self, + *, + stateName: builtins.str = ..., + exists: global___Exists | None = ..., + listStateGet: global___ListStateGet | None = ..., + listStatePut: global___ListStatePut | None = ..., + appendValue: global___AppendValue | None = ..., + appendList: global___AppendList | None = ..., + clear: global___Clear | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "appendList", + b"appendList", + "appendValue", + b"appendValue", + "clear", + b"clear", + "exists", + b"exists", + "listStateGet", + b"listStateGet", + "listStatePut", + b"listStatePut", + "method", + b"method", + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "appendList", + b"appendList", + "appendValue", + b"appendValue", + "clear", + b"clear", + "exists", + b"exists", + "listStateGet", + b"listStateGet", + "listStatePut", + b"listStatePut", + "method", + b"method", + "stateName", + b"stateName", + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["method", b"method"] + ) -> ( + typing_extensions.Literal[ + "exists", "listStateGet", "listStatePut", "appendValue", "appendList", "clear" + ] + | None + ): ... + +global___ListStateCall = ListStateCall + +class MapStateCall(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + STATENAME_FIELD_NUMBER: builtins.int + EXISTS_FIELD_NUMBER: builtins.int + GETVALUE_FIELD_NUMBER: builtins.int + CONTAINSKEY_FIELD_NUMBER: builtins.int + UPDATEVALUE_FIELD_NUMBER: builtins.int + ITERATOR_FIELD_NUMBER: builtins.int + KEYS_FIELD_NUMBER: builtins.int + VALUES_FIELD_NUMBER: builtins.int + REMOVEKEY_FIELD_NUMBER: builtins.int + CLEAR_FIELD_NUMBER: builtins.int + stateName: builtins.str + @property + def exists(self) -> global___Exists: ... + @property + def getValue(self) -> global___GetValue: ... + @property + def containsKey(self) -> global___ContainsKey: ... + @property + def updateValue(self) -> global___UpdateValue: ... + @property + def iterator(self) -> global___Iterator: ... + @property + def keys(self) -> global___Keys: ... + @property + def values(self) -> global___Values: ... + @property + def removeKey(self) -> global___RemoveKey: ... + @property + def clear(self) -> global___Clear: ... + def __init__( + self, + *, + stateName: builtins.str = ..., + exists: global___Exists | None = ..., + getValue: global___GetValue | None = ..., + containsKey: global___ContainsKey | None = ..., + updateValue: global___UpdateValue | None = ..., + iterator: global___Iterator | None = ..., + keys: global___Keys | None = ..., + values: global___Values | None = ..., + removeKey: global___RemoveKey | None = ..., + clear: global___Clear | None = ..., + ) -> None: ... + def HasField( + self, + field_name: typing_extensions.Literal[ + "clear", + b"clear", + "containsKey", + b"containsKey", + "exists", + b"exists", + "getValue", + b"getValue", + "iterator", + b"iterator", + "keys", + b"keys", + "method", + b"method", + "removeKey", + b"removeKey", + "updateValue", + b"updateValue", + "values", + b"values", + ], + ) -> builtins.bool: ... + def ClearField( + self, + field_name: typing_extensions.Literal[ + "clear", + b"clear", + "containsKey", + b"containsKey", + "exists", + b"exists", + "getValue", + b"getValue", + "iterator", + b"iterator", + "keys", + b"keys", + "method", + b"method", + "removeKey", + b"removeKey", + "stateName", + b"stateName", + "updateValue", + b"updateValue", + "values", + b"values", + ], + ) -> None: ... + def WhichOneof( + self, oneof_group: typing_extensions.Literal["method", b"method"] + ) -> ( + typing_extensions.Literal[ + "exists", + "getValue", + "containsKey", + "updateValue", + "iterator", + "keys", + "values", + "removeKey", + "clear", + ] + | None + ): ... + +global___MapStateCall = MapStateCall + +class SetImplicitKey(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + KEY_FIELD_NUMBER: builtins.int + key: builtins.bytes + def __init__( + self, + *, + key: builtins.bytes = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["key", b"key"]) -> None: ... + +global___SetImplicitKey = SetImplicitKey + +class RemoveImplicitKey(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___RemoveImplicitKey = RemoveImplicitKey + +class Exists(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___Exists = Exists + +class Get(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___Get = Get + +class RegisterTimer(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + EXPIRYTIMESTAMPMS_FIELD_NUMBER: builtins.int + expiryTimestampMs: builtins.int + def __init__( + self, + *, + expiryTimestampMs: builtins.int = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["expiryTimestampMs", b"expiryTimestampMs"] + ) -> None: ... + +global___RegisterTimer = RegisterTimer + +class DeleteTimer(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + EXPIRYTIMESTAMPMS_FIELD_NUMBER: builtins.int + expiryTimestampMs: builtins.int + def __init__( + self, + *, + expiryTimestampMs: builtins.int = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["expiryTimestampMs", b"expiryTimestampMs"] + ) -> None: ... + +global___DeleteTimer = DeleteTimer + +class ListTimers(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + ITERATORID_FIELD_NUMBER: builtins.int + iteratorId: builtins.str + def __init__( + self, + *, + iteratorId: builtins.str = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["iteratorId", b"iteratorId"] + ) -> None: ... + +global___ListTimers = ListTimers + +class ValueStateUpdate(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + VALUE_FIELD_NUMBER: builtins.int + value: builtins.bytes + def __init__( + self, + *, + value: builtins.bytes = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["value", b"value"]) -> None: ... + +global___ValueStateUpdate = ValueStateUpdate + +class Clear(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___Clear = Clear + +class ListStateGet(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + ITERATORID_FIELD_NUMBER: builtins.int + iteratorId: builtins.str + def __init__( + self, + *, + iteratorId: builtins.str = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["iteratorId", b"iteratorId"] + ) -> None: ... + +global___ListStateGet = ListStateGet + +class ListStatePut(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___ListStatePut = ListStatePut + +class AppendValue(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + VALUE_FIELD_NUMBER: builtins.int + value: builtins.bytes + def __init__( + self, + *, + value: builtins.bytes = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["value", b"value"]) -> None: ... + +global___AppendValue = AppendValue + +class AppendList(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + def __init__( + self, + ) -> None: ... + +global___AppendList = AppendList + +class GetValue(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + USERKEY_FIELD_NUMBER: builtins.int + userKey: builtins.bytes + def __init__( + self, + *, + userKey: builtins.bytes = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["userKey", b"userKey"]) -> None: ... + +global___GetValue = GetValue + +class ContainsKey(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + USERKEY_FIELD_NUMBER: builtins.int + userKey: builtins.bytes + def __init__( + self, + *, + userKey: builtins.bytes = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["userKey", b"userKey"]) -> None: ... + +global___ContainsKey = ContainsKey + +class UpdateValue(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + USERKEY_FIELD_NUMBER: builtins.int + VALUE_FIELD_NUMBER: builtins.int + userKey: builtins.bytes + value: builtins.bytes + def __init__( + self, + *, + userKey: builtins.bytes = ..., + value: builtins.bytes = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["userKey", b"userKey", "value", b"value"] + ) -> None: ... + +global___UpdateValue = UpdateValue + +class Iterator(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + ITERATORID_FIELD_NUMBER: builtins.int + iteratorId: builtins.str + def __init__( + self, + *, + iteratorId: builtins.str = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["iteratorId", b"iteratorId"] + ) -> None: ... + +global___Iterator = Iterator + +class Keys(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + ITERATORID_FIELD_NUMBER: builtins.int + iteratorId: builtins.str + def __init__( + self, + *, + iteratorId: builtins.str = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["iteratorId", b"iteratorId"] + ) -> None: ... + +global___Keys = Keys + +class Values(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + ITERATORID_FIELD_NUMBER: builtins.int + iteratorId: builtins.str + def __init__( + self, + *, + iteratorId: builtins.str = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["iteratorId", b"iteratorId"] + ) -> None: ... + +global___Values = Values + +class RemoveKey(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + USERKEY_FIELD_NUMBER: builtins.int + userKey: builtins.bytes + def __init__( + self, + *, + userKey: builtins.bytes = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["userKey", b"userKey"]) -> None: ... + +global___RemoveKey = RemoveKey + +class SetHandleState(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + STATE_FIELD_NUMBER: builtins.int + state: global___HandleState.ValueType + def __init__( + self, + *, + state: global___HandleState.ValueType = ..., + ) -> None: ... + def ClearField(self, field_name: typing_extensions.Literal["state", b"state"]) -> None: ... + +global___SetHandleState = SetHandleState + +class TTLConfig(google.protobuf.message.Message): + DESCRIPTOR: google.protobuf.descriptor.Descriptor + + DURATIONMS_FIELD_NUMBER: builtins.int + durationMs: builtins.int + def __init__( + self, + *, + durationMs: builtins.int = ..., + ) -> None: ... + def ClearField( + self, field_name: typing_extensions.Literal["durationMs", b"durationMs"] + ) -> None: ... + +global___TTLConfig = TTLConfig diff --git a/sql/core/src/main/buf.gen.yaml b/sql/core/src/main/buf.gen.yaml new file mode 100644 index 000000000000..94da50c2c41c --- /dev/null +++ b/sql/core/src/main/buf.gen.yaml @@ -0,0 +1,24 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +version: v1 +plugins: + # Building the Python build and building the mypy interfaces. + - plugin: buf.build/protocolbuffers/python:v28.3 + out: gen/proto/python + - name: mypy + out: gen/proto/python + diff --git a/sql/core/src/main/buf.work.yaml b/sql/core/src/main/buf.work.yaml new file mode 100644 index 000000000000..a02dead420cd --- /dev/null +++ b/sql/core/src/main/buf.work.yaml @@ -0,0 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +version: v1 +directories: + - protobuf From 05508cf7cb9da3042fa4b17645102a6406278695 Mon Sep 17 00:00:00 2001 From: Mihailo Milosevic Date: Wed, 13 Nov 2024 20:07:52 +0100 Subject: [PATCH 10/79] [SPARK-42838][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2000 ### What changes were proposed in this pull request? Introducing two new error classes instead of _LEGACY_ERROR_TEMP_2000. Classes introduced: - DATETIME_FIELD_OUT_OF_BOUNDS - INVALID_INTERVAL_WITH_MICROSECONDS_ADDITION ### Why are the changes needed? We want to assign names for all existing error classes. ### Does this PR introduce _any_ user-facing change? Yes, error message changed. ### How was this patch tested? Existing tests cover error raising. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48332 from mihailom-db/invalid_date_argument_value. Authored-by: Mihailo Milosevic Signed-off-by: Max Gekk --- .../main/resources/error/error-conditions.json | 17 ++++++++++++----- .../expressions/datetimeExpressions.scala | 8 ++++---- .../spark/sql/catalyst/util/DateTimeUtils.scala | 3 +-- .../spark/sql/errors/QueryExecutionErrors.scala | 14 ++++++-------- .../expressions/DateExpressionsSuite.scala | 6 ++---- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 6 ++---- .../sql-tests/results/ansi/date.sql.out | 10 ++++++---- .../sql-tests/results/ansi/timestamp.sql.out | 15 +++++++++------ .../sql-tests/results/postgreSQL/date.sql.out | 15 +++++++++------ .../results/timestampNTZ/timestamp-ansi.sql.out | 15 +++++++++------ 10 files changed, 60 insertions(+), 49 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 5e1c3f46fd11..eb772f053a88 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -1101,6 +1101,12 @@ ], "sqlState" : "42K03" }, + "DATETIME_FIELD_OUT_OF_BOUNDS" : { + "message" : [ + ". If necessary set to \"false\" to bypass this error." + ], + "sqlState" : "22023" + }, "DATETIME_OVERFLOW" : { "message" : [ "Datetime operation overflow: ." @@ -2609,6 +2615,12 @@ }, "sqlState" : "22006" }, + "INVALID_INTERVAL_WITH_MICROSECONDS_ADDITION" : { + "message" : [ + "Cannot add an interval to a date because its microseconds part is not 0. If necessary set to \"false\" to bypass this error." + ], + "sqlState" : "22006" + }, "INVALID_INVERSE_DISTRIBUTION_FUNCTION" : { "message" : [ "Invalid inverse distribution function ." @@ -6905,11 +6917,6 @@ "Sinks cannot request distribution and ordering in continuous execution mode." ] }, - "_LEGACY_ERROR_TEMP_2000" : { - "message" : [ - ". If necessary set to false to bypass this error." - ] - }, "_LEGACY_ERROR_TEMP_2003" : { "message" : [ "Unsuccessful try to zip maps with unique keys due to exceeding the array size limit ." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index f2ba3ed95b85..fba3927a0bc9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -2507,14 +2507,14 @@ case class MakeDate( localDateToDays(ld) } catch { case e: java.time.DateTimeException => - if (failOnError) throw QueryExecutionErrors.ansiDateTimeError(e) else null + if (failOnError) throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e) else null } } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val dtu = DateTimeUtils.getClass.getName.stripSuffix("$") val failOnErrorBranch = if (failOnError) { - "throw QueryExecutionErrors.ansiDateTimeError(e);" + "throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e);" } else { s"${ev.isNull} = true;" } @@ -2839,7 +2839,7 @@ case class MakeTimestamp( } catch { case e: SparkDateTimeException if failOnError => throw e case e: DateTimeException if failOnError => - throw QueryExecutionErrors.ansiDateTimeError(e) + throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e) case _: DateTimeException => null } } @@ -2870,7 +2870,7 @@ case class MakeTimestamp( val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName) val d = Decimal.getClass.getName.stripSuffix("$") val failOnErrorBranch = if (failOnError) { - "throw QueryExecutionErrors.ansiDateTimeError(e);" + "throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e);" } else { s"${ev.isNull} = true;" } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index e27ce29fc231..c9ca3ed864c1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -304,8 +304,7 @@ object DateTimeUtils extends SparkDateTimeUtils { start: Int, interval: CalendarInterval): Int = { if (interval.microseconds != 0) { - throw QueryExecutionErrors.ansiIllegalArgumentError( - "Cannot add hours, minutes or seconds, milliseconds, microseconds to a date") + throw QueryExecutionErrors.invalidIntervalWithMicrosecondsAdditionError() } val ld = daysToLocalDate(start).plusMonths(interval.months).plusDays(interval.days) localDateToDays(ld) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index fb39d3c5d7c6..ba48000f2aec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -277,22 +277,20 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE summary = "") } - def ansiDateTimeError(e: Exception): SparkDateTimeException = { + def ansiDateTimeArgumentOutOfRange(e: Exception): SparkDateTimeException = { new SparkDateTimeException( - errorClass = "_LEGACY_ERROR_TEMP_2000", + errorClass = "DATETIME_FIELD_OUT_OF_BOUNDS", messageParameters = Map( - "message" -> e.getMessage, + "rangeMessage" -> e.getMessage, "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)), context = Array.empty, summary = "") } - def ansiIllegalArgumentError(message: String): SparkIllegalArgumentException = { + def invalidIntervalWithMicrosecondsAdditionError(): SparkIllegalArgumentException = { new SparkIllegalArgumentException( - errorClass = "_LEGACY_ERROR_TEMP_2000", - messageParameters = Map( - "message" -> message, - "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key))) + errorClass = "INVALID_INTERVAL_WITH_MICROSECONDS_ADDITION", + messageParameters = Map("ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key))) } def overflowInSumOfDecimalError( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala index 05d68504a727..5cd974838fa2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala @@ -436,10 +436,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { withSQLConf((SQLConf.ANSI_ENABLED.key, "true")) { checkErrorInExpression[SparkIllegalArgumentException]( DateAddInterval(Literal(d), Literal(new CalendarInterval(1, 1, 25 * MICROS_PER_HOUR))), - "_LEGACY_ERROR_TEMP_2000", - Map("message" -> - "Cannot add hours, minutes or seconds, milliseconds, microseconds to a date", - "ansiConfig" -> "\"spark.sql.ansi.enabled\"")) + "INVALID_INTERVAL_WITH_MICROSECONDS_ADDITION", + Map("ansiConfig" -> "\"spark.sql.ansi.enabled\"")) } withSQLConf((SQLConf.ANSI_ENABLED.key, "false")) { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala index 96aaf13052b0..790c834d83e9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala @@ -542,10 +542,8 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper { checkError( exception = intercept[SparkIllegalArgumentException]( dateAddInterval(input, new CalendarInterval(36, 47, 1))), - condition = "_LEGACY_ERROR_TEMP_2000", - parameters = Map( - "message" -> "Cannot add hours, minutes or seconds, milliseconds, microseconds to a date", - "ansiConfig" -> "\"spark.sql.ansi.enabled\"")) + condition = "INVALID_INTERVAL_WITH_MICROSECONDS_ADDITION", + parameters = Map("ansiConfig" -> "\"spark.sql.ansi.enabled\"")) } test("timestamp add interval") { diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index 67cd23faf255..aa283d324961 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -53,10 +53,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid value for MonthOfYear (valid values 1 - 12): 13" + "rangeMessage" : "Invalid value for MonthOfYear (valid values 1 - 12): 13" } } @@ -68,10 +69,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid value for DayOfMonth (valid values 1 - 28/31): 33" + "rangeMessage" : "Invalid value for DayOfMonth (valid values 1 - 28/31): 33" } } diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out index d75380b16cc8..e3cf1a154922 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out @@ -154,10 +154,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61" + "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61" } } @@ -185,10 +186,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99" + "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99" } } @@ -200,10 +202,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999" + "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999" } } diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out index 8caf8c54b9f3..d9f4301dd0e8 100755 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out @@ -687,10 +687,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid date 'FEBRUARY 30'" + "rangeMessage" : "Invalid date 'FEBRUARY 30'" } } @@ -702,10 +703,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid value for MonthOfYear (valid values 1 - 12): 13" + "rangeMessage" : "Invalid value for MonthOfYear (valid values 1 - 12): 13" } } @@ -717,10 +719,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid value for DayOfMonth (valid values 1 - 28/31): -1" + "rangeMessage" : "Invalid value for DayOfMonth (valid values 1 - 28/31): -1" } } diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out index 79996d838c1e..681306ba9f40 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out @@ -154,10 +154,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61" + "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61" } } @@ -185,10 +186,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99" + "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99" } } @@ -200,10 +202,11 @@ struct<> -- !query output org.apache.spark.SparkDateTimeException { - "errorClass" : "_LEGACY_ERROR_TEMP_2000", + "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS", + "sqlState" : "22023", "messageParameters" : { "ansiConfig" : "\"spark.sql.ansi.enabled\"", - "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999" + "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999" } } From 5cc60f46708844c812ee0f21bee4f4b4b70c6d92 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 13 Nov 2024 14:57:53 -0800 Subject: [PATCH 11/79] [SPARK-50300][BUILD] Use mirror host instead of `archive.apache.org` ### What changes were proposed in this pull request? This PR aims to use `mirror host` instead of `archive.apache.org`. ### Why are the changes needed? Currently, Apache Spark CI is flaky due to the checksum download failure like the following. It took over 9 minutes and failed eventually. - https://github.com/apache/spark/actions/runs/11818847971/job/32927380452 - https://github.com/apache/spark/actions/runs/11818847971/job/32927382179 ``` exec: curl --retry 3 --silent --show-error -L https://www.apache.org/dyn/closer.lua/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz?action=download exec: curl --retry 3 --silent --show-error -L https://archive.apache.org/dist/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz.sha512 curl: (28) Failed to connect to archive.apache.org port 443 after 135199 ms: Connection timed out curl: (28) Failed to connect to archive.apache.org port 443 after 134166 ms: Connection timed out curl: (28) Failed to connect to archive.apache.org port 443 after 135213 ms: Connection timed out curl: (28) Failed to connect to archive.apache.org port 443 after 135260 ms: Connection timed out Verifying checksum from /home/runner/work/spark/spark/build/apache-maven-3.9.9-bin.tar.gz.sha512 shasum: /home/runner/work/spark/spark/build/apache-maven-3.9.9-bin.tar.gz.sha512: no properly formatted SHA checksum lines found Bad checksum from https://archive.apache.org/dist/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz.sha512 Error: Process completed with exit code 2. ``` **BEFORE** ``` $ build/mvn clean exec: curl --retry 3 --silent --show-error -L https://www.apache.org/dyn/closer.lua/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz?action=download exec: curl --retry 3 --silent --show-error -L https://archive.apache.org/dist/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz.sha512 ``` **AFTER** ``` $ build/mvn clean exec: curl --retry 3 --silent --show-error -L https://www.apache.org/dyn/closer.lua/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz?action=download exec: curl --retry 3 --silent --show-error -L https://www.apache.org/dyn/closer.lua/maven/maven-3/3.9.9/binaries/apache-maven-3.9.9-bin.tar.gz.sha512?action=download ``` ### Does this PR introduce _any_ user-facing change? No, this is a dev-only change. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48836 from dongjoon-hyun/SPARK-50300. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- build/mvn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/mvn b/build/mvn index 060209ac1ac4..fef589fc0347 100755 --- a/build/mvn +++ b/build/mvn @@ -56,7 +56,7 @@ install_app() { local binary="${_DIR}/$6" local remote_tarball="${mirror_host}/${url_path}${url_query}" local local_checksum="${local_tarball}.${checksum_suffix}" - local remote_checksum="https://archive.apache.org/dist/${url_path}.${checksum_suffix}" + local remote_checksum="${mirror_host}/${url_path}.${checksum_suffix}${url_query}" local curl_opts="--retry 3 --silent --show-error -L" local wget_opts="--no-verbose" From 33378a6f86e001b20236c7ccd1cebf0acbb54f3e Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 14 Nov 2024 09:01:19 +0900 Subject: [PATCH 12/79] [SPARK-50304][INFRA] Remove `(any|empty).proto` from RAT exclusion ### What changes were proposed in this pull request? This PR aims to remove `(any|empty).proto` from RAT exclusion. ### Why are the changes needed? `(any|empty).proto` files were never a part of Apache Spark repository. Those files were only used in the initial `Connect` PR and removed before merging. - #37710 - Added: https://github.com/apache/spark/pull/37710/commits/45c7bc55498f38081818424d231ec12576a0dc54 - Excluded from RAT check: https://github.com/apache/spark/pull/37710/commits/cf6b19a991c9bf8c0f208bb2de39dd7121b146a2 - Removed: https://github.com/apache/spark/pull/37710/commits/497198051af069f9afa70c9435dd5d7a099f11f1 ### Does this PR introduce _any_ user-facing change? No. This is a dev-only change. ### How was this patch tested? Pass the CIs or manual check. ``` $ ./dev/check-license Ignored 0 lines in your exclusion files as comments or empty lines. RAT checks passed. ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48837 from dongjoon-hyun/SPARK-50304. Authored-by: Dongjoon Hyun Signed-off-by: Hyukjin Kwon --- dev/.rat-excludes | 3 --- 1 file changed, 3 deletions(-) diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 6806c24c7d9f..d8c919629395 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -126,9 +126,6 @@ exported_table/* node_modules spark-events-broken/* SqlBaseLexer.tokens -# Spark Connect related files with custom licence -any.proto -empty.proto .*\.explain .*\.proto.bin LimitedInputStream.java From 891f694207ea83dcfd2ec53e72ca6f0daa093924 Mon Sep 17 00:00:00 2001 From: Hyukjin Kwon Date: Thu, 14 Nov 2024 10:57:04 +0900 Subject: [PATCH 13/79] [SPARK-50306][PYTHON][CONNECT] Support Python 3.13 in Spark Connect ### What changes were proposed in this pull request? This PR proposes to note Python 3.13 in `pyspark-connect` package as its supported version. ### Why are the changes needed? To officially support Python 3.13 ### Does this PR introduce _any_ user-facing change? Yes, in `pyspark-connect` package, Python 3.13 will be explicitly noted as a supported Python version. ### How was this patch tested? CI passed at https://github.com/apache/spark/actions/runs/11824865909 ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48839 from HyukjinKwon/SPARK-50306. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon --- python/packaging/connect/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/packaging/connect/setup.py b/python/packaging/connect/setup.py index 6ae16e9a9ad3..de76d51d0cfd 100755 --- a/python/packaging/connect/setup.py +++ b/python/packaging/connect/setup.py @@ -212,6 +212,7 @@ "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Typing :: Typed", From 2fd47026371488b9409750cba6b697cc61ea7371 Mon Sep 17 00:00:00 2001 From: Milan Dankovic Date: Thu, 14 Nov 2024 09:59:01 +0800 Subject: [PATCH 14/79] [SPARK-49913][SQL] Add check for unique label names in nested labeled scopes ### What changes were proposed in this pull request? We are introducing checks for unique label names. New rules for label names: - Labels can't have the same name as some of the labels in scope surrounding them - Labels can have the same name as other labels in the same scope **Valid** code: ``` BEGIN lbl: BEGIN SELECT 1; END; lbl: BEGIN SELECT 2; END; BEGIN lbl: WHILE 1=1 DO LEAVE lbl; END WHILE; END; END ``` **Invalid** code: ``` BEGIN lbl: BEGIN lbl: BEGIN SELECT 1; END; END; END ``` #### Design explanation: Even though there are _Listeners_ with `enterRule` and `exitRule` methods to check labels before and remove them from `seenLabels` after visiting node, we favor this approach because minimal changes were needed and code is more compact to avoid dependency issues. Additionally, generating label text would need to be done in 2 places and we wanted to avoid duplicated logic: - `enterRule` - `visitRule` ### Why are the changes needed? It will be needed in future when we release Local Scoped Variables for SQL Scripting so users can target variables from outer scopes if they are shadowed. ### How was this patch tested? New unit tests in 'SqlScriptingParserSuite.scala'. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #48795 from miland-db/milan-dankovic_data/unique_labels_scripting. Authored-by: Milan Dankovic Signed-off-by: Wenchen Fan --- .../resources/error/error-conditions.json | 6 + .../sql/catalyst/parser/AstBuilder.scala | 151 ++++++----- .../sql/catalyst/parser/ParserUtils.scala | 84 +++++- .../spark/sql/errors/SqlScriptingErrors.scala | 8 + .../parser/SqlScriptingParserSuite.scala | 242 ++++++++++++++++++ 5 files changed, 430 insertions(+), 61 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index eb772f053a88..63c54a71b904 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3411,6 +3411,12 @@ ], "sqlState" : "42K0L" }, + "LABEL_ALREADY_EXISTS" : { + "message" : [ + "The label