From 4607af16659208c09af08cd9859b98e8a8ab350d Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 6 Nov 2024 09:05:42 -0800 Subject: [PATCH 1/2] [SPARK-50246][SQL] Assign appropriate error condition for _LEGACY_ERROR_TEMP_2167: MALFORMED_RECORD_IN_SCHEMA_INFERENCE --- .../src/main/resources/error/error-conditions.json | 11 ++++++----- .../spark/sql/errors/QueryExecutionErrors.scala | 4 ++-- .../sql/execution/datasources/json/JsonSuite.scala | 4 ++-- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 18603b61d8ae..8c9772d21bcc 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -3439,6 +3439,12 @@ }, "sqlState" : "22023" }, + "MALFORMED_RECORD_IN_SCHEMA_INFERENCE" : { + "message" : [ + "Malformed records are detected in schema inference. Parse Mode: . Reasons: Failed to infer a common schema. Struct types are expected, but was found." + ], + "sqlState" : "22023" + }, "MALFORMED_VARIANT" : { "message" : [ "Variant binary is malformed. Please check the data source is valid." @@ -7284,11 +7290,6 @@ "Malformed JSON." ] }, - "_LEGACY_ERROR_TEMP_2167" : { - "message" : [ - "Malformed records are detected in schema inference. Parse Mode: . Reasons: Failed to infer a common schema. Struct types are expected, but `` was found." - ] - }, "_LEGACY_ERROR_TEMP_2168" : { "message" : [ "Decorrelate inner query through is not supported." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 2cc223ba69fa..e64d8f5cd4de 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1437,10 +1437,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE def malformedRecordsDetectedInSchemaInferenceError(dataType: DataType): Throwable = { new SparkException( - errorClass = "_LEGACY_ERROR_TEMP_2167", + errorClass = "MALFORMED_RECORD_IN_SCHEMA_INFERENCE", messageParameters = Map( "failFastMode" -> FailFastMode.name, - "dataType" -> dataType.catalogString), + "dataType" -> toSQLType(dataType)), cause = null) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 06183596a54a..a72c68c0e46c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -2078,8 +2078,8 @@ abstract class JsonSuite .option("mode", "FAILFAST") .json(path) }, - condition = "_LEGACY_ERROR_TEMP_2167", - parameters = Map("failFastMode" -> "FAILFAST", "dataType" -> "string|bigint")) + condition = "MALFORMED_RECORD_IN_SCHEMA_INFERENCE", + parameters = Map("failFastMode" -> "FAILFAST", "dataType" -> "\"STRING\"|\"BIGINT\"")) val ex = intercept[SparkException] { spark.read From 0b15a47428ebc2acaa616e7b9833543fef2a2d1a Mon Sep 17 00:00:00 2001 From: Haejoon Lee Date: Wed, 13 Nov 2024 09:47:13 +0900 Subject: [PATCH 2/2] adjusted comment --- .../src/main/resources/error/error-conditions.json | 12 ++++++------ .../spark/sql/errors/QueryExecutionErrors.scala | 4 ++-- .../sql/execution/datasources/json/JsonSuite.scala | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index 8c9772d21bcc..98b04ccaf0f5 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -2655,6 +2655,12 @@ ], "sqlState" : "2203G" }, + "INVALID_JSON_RECORD_TYPE" : { + "message" : [ + "Detected an invalid type of a JSON record while inferring a common schema in the mode . Expected a STRUCT type, but found ." + ], + "sqlState" : "22023" + }, "INVALID_JSON_ROOT_FIELD" : { "message" : [ "Cannot convert JSON root field to target Spark type." @@ -3439,12 +3445,6 @@ }, "sqlState" : "22023" }, - "MALFORMED_RECORD_IN_SCHEMA_INFERENCE" : { - "message" : [ - "Malformed records are detected in schema inference. Parse Mode: . Reasons: Failed to infer a common schema. Struct types are expected, but was found." - ], - "sqlState" : "22023" - }, "MALFORMED_VARIANT" : { "message" : [ "Variant binary is malformed. Please check the data source is valid." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index e64d8f5cd4de..a9beb11daa91 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1437,10 +1437,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE def malformedRecordsDetectedInSchemaInferenceError(dataType: DataType): Throwable = { new SparkException( - errorClass = "MALFORMED_RECORD_IN_SCHEMA_INFERENCE", + errorClass = "INVALID_JSON_RECORD_TYPE", messageParameters = Map( "failFastMode" -> FailFastMode.name, - "dataType" -> toSQLType(dataType)), + "invalidType" -> toSQLType(dataType)), cause = null) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index a72c68c0e46c..dfbc8e5279aa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -2078,8 +2078,8 @@ abstract class JsonSuite .option("mode", "FAILFAST") .json(path) }, - condition = "MALFORMED_RECORD_IN_SCHEMA_INFERENCE", - parameters = Map("failFastMode" -> "FAILFAST", "dataType" -> "\"STRING\"|\"BIGINT\"")) + condition = "INVALID_JSON_RECORD_TYPE", + parameters = Map("failFastMode" -> "FAILFAST", "invalidType" -> "\"STRING\"|\"BIGINT\"")) val ex = intercept[SparkException] { spark.read