Skip to content

Commit 4231d58

Browse files
committed
[SPARK-51307][SQL][3.5] locationUri in CatalogStorageFormat shall be decoded for display
### What changes were proposed in this pull request? This PR uses CatalogUtils.URIToString instead of URI.toString to decode the location URI. ### Why are the changes needed? For example, for partition specs like test1=X'16', test3=timestamp'2018-11-17 13:33:33', the stored path will include them as `test1=%16/test3=2018-11-17 13%3A33%3A33` because the special characters are escaped. Furthermore, while resolving the whole path string to a URI object, this path fragment becomes `test1=%2516/test3=2018-11-17 13%253A33%253A33`, so we need to decode `%25` -> `%` before displaying to users ### Does this PR introduce _any_ user-facing change? yes, DESC TABLE will not show 2x-encoded paths. ### How was this patch tested? new tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #50164 from yaooqinn/SPARK-51307-35. Authored-by: Kent Yao <yao@apache.org> Signed-off-by: Kent Yao <yao@apache.org>
1 parent 0b88c4d commit 4231d58

File tree

5 files changed

+141
-17
lines changed

5 files changed

+141
-17
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ case class CatalogStorageFormat(
7676

7777
def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
7878
val map = new mutable.LinkedHashMap[String, String]()
79-
locationUri.foreach(l => map.put("Location", l.toString))
79+
locationUri.foreach(l => map.put("Location", CatalogUtils.URIToString(l)))
8080
serde.foreach(map.put("Serde Library", _))
8181
inputFormat.foreach(map.put("InputFormat", _))
8282
outputFormat.foreach(map.put("OutputFormat", _))

sql/core/src/test/resources/sql-tests/analyzer-results/describe.sql.out

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -318,14 +318,7 @@ DropTableCommand `spark_catalog`.`default`.`v`, false, true, false
318318
-- !query
319319
CREATE TABLE d (a STRING DEFAULT 'default-value', b INT DEFAULT 42) USING parquet COMMENT 'table_comment'
320320
-- !query analysis
321-
org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
322-
{
323-
"errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS",
324-
"sqlState" : "42P07",
325-
"messageParameters" : {
326-
"relationName" : "`spark_catalog`.`default`.`d`"
327-
}
328-
}
321+
CreateDataSourceTableCommand `spark_catalog`.`default`.`d`, false
329322

330323

331324
-- !query
@@ -355,14 +348,7 @@ DescribeTableCommand `spark_catalog`.`default`.`d`, true, [col_name#x, data_type
355348
-- !query
356349
CREATE TABLE e (a STRING DEFAULT CONCAT('a\n b\n ', 'c\n d'), b INT DEFAULT 42) USING parquet COMMENT 'table_comment'
357350
-- !query analysis
358-
org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
359-
{
360-
"errorClass" : "TABLE_OR_VIEW_ALREADY_EXISTS",
361-
"sqlState" : "42P07",
362-
"messageParameters" : {
363-
"relationName" : "`spark_catalog`.`default`.`e`"
364-
}
365-
}
351+
CreateDataSourceTableCommand `spark_catalog`.`default`.`e`, false
366352

367353

368354
-- !query
@@ -387,3 +373,52 @@ DescribeTableCommand `spark_catalog`.`default`.`e`, true, [col_name#x, data_type
387373
DESC FORMATTED e
388374
-- !query analysis
389375
DescribeTableCommand `spark_catalog`.`default`.`e`, true, [col_name#x, data_type#x, comment#x]
376+
377+
378+
-- !query
379+
CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C
380+
-- !query analysis
381+
CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`f`, ErrorIfExists, [A, B, C]
382+
+- Project [APACHE AS A#x, cast(SPARK as binary) AS B#x, 2018-11-17 13:33:33 AS C#x]
383+
+- OneRowRelation
384+
385+
386+
-- !query
387+
DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33')
388+
-- !query analysis
389+
DescribeTableCommand `spark_catalog`.`default`.`f`, [B=SPARK, C=2018-11-17 13:33:33], true, [col_name#x, data_type#x, comment#x]
390+
391+
392+
-- !query
393+
DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON
394+
-- !query analysis
395+
org.apache.spark.sql.catalyst.parser.ParseException
396+
{
397+
"errorClass" : "PARSE_SYNTAX_ERROR",
398+
"sqlState" : "42601",
399+
"messageParameters" : {
400+
"error" : "'JSON'",
401+
"hint" : ": extra input 'JSON'"
402+
}
403+
}
404+
405+
406+
-- !query
407+
DROP TABLE d
408+
-- !query analysis
409+
DropTable false, false
410+
+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.d
411+
412+
413+
-- !query
414+
DROP TABLE e
415+
-- !query analysis
416+
DropTable false, false
417+
+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.e
418+
419+
420+
-- !query
421+
DROP TABLE f
422+
-- !query analysis
423+
DropTable false, false
424+
+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.f

sql/core/src/test/resources/sql-tests/inputs/describe.sql

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,15 @@ DESC EXTENDED e;
119119
DESC TABLE EXTENDED e;
120120

121121
DESC FORMATTED e;
122+
123+
CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C;
124+
125+
DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33');
126+
127+
DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON;
128+
129+
DROP TABLE d;
130+
131+
DROP TABLE e;
132+
133+
DROP TABLE f;

sql/core/src/test/resources/sql-tests/results/describe.sql.out

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -864,3 +864,77 @@ Location [not included in comparison]/{warehouse_dir}/e
864864
# Column Default Values
865865
a string CONCAT('a\n b\n ', 'c\n d')
866866
b int 42
867+
868+
869+
-- !query
870+
CREATE TABLE f USING json PARTITIONED BY (B, C) AS SELECT 'APACHE' A, CAST('SPARK' AS BINARY) B, TIMESTAMP'2018-11-17 13:33:33' C
871+
-- !query schema
872+
struct<>
873+
-- !query output
874+
875+
876+
877+
-- !query
878+
DESC FORMATTED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33')
879+
-- !query schema
880+
struct<col_name:string,data_type:string,comment:string>
881+
-- !query output
882+
A string
883+
B binary
884+
C timestamp
885+
# Partition Information
886+
# col_name data_type comment
887+
B binary
888+
C timestamp
889+
890+
# Detailed Partition Information
891+
Database default
892+
Table f
893+
Partition Values [B=SPARK, C=2018-11-17 13:33:33]
894+
Location [not included in comparison]/{warehouse_dir}/f/B=SPARK/C=2018-11-17 13%3A33%3A33
895+
Partition Parameters {numFiles=1, totalSize=15, transient_lastDdlTime=[not included in comparison]}
896+
Created Time [not included in comparison]
897+
Last Access [not included in comparison]
898+
899+
# Storage Information
900+
Location [not included in comparison]/{warehouse_dir}/f
901+
902+
903+
-- !query
904+
DESC TABLE EXTENDED f PARTITION (B='SPARK', C=TIMESTAMP'2018-11-17 13:33:33') AS JSON
905+
-- !query schema
906+
struct<>
907+
-- !query output
908+
org.apache.spark.sql.catalyst.parser.ParseException
909+
{
910+
"errorClass" : "PARSE_SYNTAX_ERROR",
911+
"sqlState" : "42601",
912+
"messageParameters" : {
913+
"error" : "'JSON'",
914+
"hint" : ": extra input 'JSON'"
915+
}
916+
}
917+
918+
919+
-- !query
920+
DROP TABLE d
921+
-- !query schema
922+
struct<>
923+
-- !query output
924+
925+
926+
927+
-- !query
928+
DROP TABLE e
929+
-- !query schema
930+
struct<>
931+
-- !query output
932+
933+
934+
935+
-- !query
936+
DROP TABLE f
937+
-- !query schema
938+
struct<>
939+
-- !query output
940+

sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ trait SQLQueryTestHelper extends Logging {
4848
.replaceAll(s"file:[^\\s,]*$clsName", s"file:$notIncludedMsg/{warehouse_dir}")
4949
.replaceAll("Created By.*", s"Created By $notIncludedMsg")
5050
.replaceAll("Created Time.*", s"Created Time $notIncludedMsg")
51+
.replaceAll(s"transient_lastDdlTime=\\d+", s"transient_lastDdlTime=$notIncludedMsg")
52+
.replaceAll(s""""transient_lastDdlTime":"\\d+"""",
53+
s""""transient_lastDdlTime $notIncludedMsg":"None"""")
5154
.replaceAll("Last Access.*", s"Last Access $notIncludedMsg")
5255
.replaceAll("Owner\t.*", s"Owner\t$notIncludedMsg")
5356
.replaceAll("Partition Statistics\t\\d+", s"Partition Statistics\t$notIncludedMsg")

0 commit comments

Comments
 (0)