From 07d0b51199d7decaa7040bfc00365a1dcc3275f8 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 25 Sep 2024 10:41:55 +0800 Subject: [PATCH 1/3] init --- .../spark/sql/errors/QueryExecutionErrors.scala | 2 +- .../results/ansi/string-functions.sql.out | 16 ++++++++-------- .../sql-tests/results/string-functions.sql.out | 16 ++++++++-------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 4a23e9766fc5..36bb995c040b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2766,7 +2766,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE "functionName" -> toSQLId(functionName), "parameter" -> toSQLId("charset"), "charset" -> charset, - "charsets" -> CharsetProvider.VALID_CHARSETS.mkString(", "))) + "charsets" -> CharsetProvider.VALID_CHARSETS.toSeq.sorted.mkString(", "))) } def malformedCharacterCoding(functionName: String, charset: String): RuntimeException = { diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out index cf1bce3c0e50..706673606625 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out @@ -842,7 +842,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "WINDOWS-1252", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`encode`", "parameter" : "`charset`" } @@ -860,7 +860,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "WINDOWS-1252", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`encode`", "parameter" : "`charset`" } @@ -878,7 +878,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "Windows-xxx", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`encode`", "parameter" : "`charset`" } @@ -896,7 +896,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "Windows-xxx", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`encode`", "parameter" : "`charset`" } @@ -1140,7 +1140,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "Windows-xxx", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`decode`", "parameter" : "`charset`" } @@ -1158,7 +1158,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "Windows-xxx", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`decode`", "parameter" : "`charset`" } @@ -1208,7 +1208,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "WINDOWS-1252", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`decode`", "parameter" : "`charset`" } @@ -1226,7 +1226,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "WINDOWS-1252", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`decode`", "parameter" : "`charset`" } diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 14d7b31f8c63..3f9f24f817f2 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -778,7 +778,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "WINDOWS-1252", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`encode`", "parameter" : "`charset`" } @@ -796,7 +796,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "WINDOWS-1252", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`encode`", "parameter" : "`charset`" } @@ -814,7 +814,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "Windows-xxx", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`encode`", "parameter" : "`charset`" } @@ -832,7 +832,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "Windows-xxx", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`encode`", "parameter" : "`charset`" } @@ -1076,7 +1076,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "Windows-xxx", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`decode`", "parameter" : "`charset`" } @@ -1094,7 +1094,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "Windows-xxx", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`decode`", "parameter" : "`charset`" } @@ -1144,7 +1144,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "WINDOWS-1252", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`decode`", "parameter" : "`charset`" } @@ -1162,7 +1162,7 @@ org.apache.spark.SparkIllegalArgumentException "sqlState" : "22023", "messageParameters" : { "charset" : "WINDOWS-1252", - "charsets" : "utf-8, utf-16be, iso-8859-1, utf-16le, utf-16, utf-32, us-ascii", + "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8", "functionName" : "`decode`", "parameter" : "`charset`" } From 348dc519b9b4e277e82be45c53980ec899f903b8 Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 25 Sep 2024 11:23:59 +0800 Subject: [PATCH 2/3] fix --- .../apache/spark/sql/execution/datasources/csv/CSVSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index e2d1d9b05c3c..68831fbb3948 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -271,7 +271,7 @@ abstract class CSVSuite "charset" -> "1-9588-osi", "functionName" -> toSQLId("CSVOptions"), "parameter" -> toSQLId("charset"), - "charsets" -> CharsetProvider.VALID_CHARSETS.mkString(", ")) + "charsets" -> CharsetProvider.VALID_CHARSETS.toSeq.sorted.mkString(", ")) ) } @@ -655,7 +655,7 @@ abstract class CSVSuite "charset" -> "1-9588-osi", "functionName" -> toSQLId("CSVOptions"), "parameter" -> toSQLId("charset"), - "charsets" -> CharsetProvider.VALID_CHARSETS.mkString(", ")) + "charsets" -> CharsetProvider.VALID_CHARSETS.toSeq.sorted.mkString(", ")) ) } From 9347c6f228fde8aa0c59c123a6541a5ef221f42b Mon Sep 17 00:00:00 2001 From: Ruifeng Zheng Date: Wed, 25 Sep 2024 11:35:55 +0800 Subject: [PATCH 3/3] simplify --- .../org/apache/spark/sql/catalyst/util/CharsetProvider.scala | 2 +- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 2 +- .../apache/spark/sql/execution/datasources/csv/CSVSuite.scala | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharsetProvider.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharsetProvider.scala index 0e7fca24e137..d85673f2ce81 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharsetProvider.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharsetProvider.scala @@ -24,7 +24,7 @@ private[sql] object CharsetProvider { final lazy val VALID_CHARSETS = - Set("us-ascii", "iso-8859-1", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32") + Array("us-ascii", "iso-8859-1", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32").sorted def forName( charset: String, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 36bb995c040b..4a23e9766fc5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2766,7 +2766,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE "functionName" -> toSQLId(functionName), "parameter" -> toSQLId("charset"), "charset" -> charset, - "charsets" -> CharsetProvider.VALID_CHARSETS.toSeq.sorted.mkString(", "))) + "charsets" -> CharsetProvider.VALID_CHARSETS.mkString(", "))) } def malformedCharacterCoding(functionName: String, charset: String): RuntimeException = { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 68831fbb3948..e2d1d9b05c3c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -271,7 +271,7 @@ abstract class CSVSuite "charset" -> "1-9588-osi", "functionName" -> toSQLId("CSVOptions"), "parameter" -> toSQLId("charset"), - "charsets" -> CharsetProvider.VALID_CHARSETS.toSeq.sorted.mkString(", ")) + "charsets" -> CharsetProvider.VALID_CHARSETS.mkString(", ")) ) } @@ -655,7 +655,7 @@ abstract class CSVSuite "charset" -> "1-9588-osi", "functionName" -> toSQLId("CSVOptions"), "parameter" -> toSQLId("charset"), - "charsets" -> CharsetProvider.VALID_CHARSETS.toSeq.sorted.mkString(", ")) + "charsets" -> CharsetProvider.VALID_CHARSETS.mkString(", ")) ) }