Skip to content

Commit 550d0e7

Browse files
gatorsmileyhuai
authored andcommitted
[SPARK-16482][SQL] Describe Table Command for Tables Requiring Runtime Inferred Schema
#### What changes were proposed in this pull request? If we create a table pointing to a parquet/json datasets without specifying the schema, describe table command does not show the schema at all. It only shows `# Schema of this table is inferred at runtime`. In 1.6, describe table does show the schema of such a table. ~~For data source tables, to infer the schema, we need to load the data source tables at runtime. Thus, this PR calls the function `lookupRelation`.~~ For data source tables, we infer the schema before table creation. Thus, this PR set the inferred schema as the table schema when table creation. #### How was this patch tested? Added test cases Author: gatorsmile <gatorsmile@gmail.com> Closes #14148 from gatorsmile/describeSchema. (cherry picked from commit c5ec879) Signed-off-by: Yin Huai <yhuai@databricks.com>
1 parent 9e3a598 commit 550d0e7

File tree

2 files changed

+22
-22
lines changed

2 files changed

+22
-22
lines changed

sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -413,38 +413,36 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
413413
} else {
414414
val metadata = catalog.getTableMetadata(table)
415415

416+
if (DDLUtils.isDatasourceTable(metadata)) {
417+
DDLUtils.getSchemaFromTableProperties(metadata) match {
418+
case Some(userSpecifiedSchema) => describeSchema(userSpecifiedSchema, result)
419+
case None => describeSchema(catalog.lookupRelation(table).schema, result)
420+
}
421+
} else {
422+
describeSchema(metadata.schema, result)
423+
}
424+
416425
if (isExtended) {
417426
describeExtended(metadata, result)
418427
} else if (isFormatted) {
419428
describeFormatted(metadata, result)
420429
} else {
421-
describe(metadata, result)
430+
describePartitionInfo(metadata, result)
422431
}
423432
}
424433

425434
result
426435
}
427436

428-
// Shows data columns and partitioned columns (if any)
429-
private def describe(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
437+
private def describePartitionInfo(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
430438
if (DDLUtils.isDatasourceTable(table)) {
431-
val schema = DDLUtils.getSchemaFromTableProperties(table)
432-
433-
if (schema.isEmpty) {
434-
append(buffer, "# Schema of this table is inferred at runtime", "", "")
435-
} else {
436-
schema.foreach(describeSchema(_, buffer))
437-
}
438-
439439
val partCols = DDLUtils.getPartitionColumnsFromTableProperties(table)
440440
if (partCols.nonEmpty) {
441441
append(buffer, "# Partition Information", "", "")
442442
append(buffer, s"# ${output.head.name}", "", "")
443443
partCols.foreach(col => append(buffer, col, "", ""))
444444
}
445445
} else {
446-
describeSchema(table.schema, buffer)
447-
448446
if (table.partitionColumns.nonEmpty) {
449447
append(buffer, "# Partition Information", "", "")
450448
append(buffer, s"# ${output.head.name}", output(1).name, output(2).name)
@@ -454,14 +452,14 @@ case class DescribeTableCommand(table: TableIdentifier, isExtended: Boolean, isF
454452
}
455453

456454
private def describeExtended(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
457-
describe(table, buffer)
455+
describePartitionInfo(table, buffer)
458456

459457
append(buffer, "", "", "")
460458
append(buffer, "# Detailed Table Information", table.toString, "")
461459
}
462460

463461
private def describeFormatted(table: CatalogTable, buffer: ArrayBuffer[Row]): Unit = {
464-
describe(table, buffer)
462+
describePartitionInfo(table, buffer)
465463

466464
append(buffer, "", "", "")
467465
append(buffer, "# Detailed Table Information", "", "")

sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -612,15 +612,17 @@ class HiveDDLSuite
612612
}
613613

614614
test("desc table for data source table - no user-defined schema") {
615-
withTable("t1") {
616-
withTempPath { dir =>
617-
val path = dir.getCanonicalPath
618-
spark.range(1).write.parquet(path)
619-
sql(s"CREATE TABLE t1 USING parquet OPTIONS (PATH '$path')")
615+
Seq("parquet", "json", "orc").foreach { fileFormat =>
616+
withTable("t1") {
617+
withTempPath { dir =>
618+
val path = dir.getCanonicalPath
619+
spark.range(1).write.format(fileFormat).save(path)
620+
sql(s"CREATE TABLE t1 USING $fileFormat OPTIONS (PATH '$path')")
620621

621-
val desc = sql("DESC FORMATTED t1").collect().toSeq
622+
val desc = sql("DESC FORMATTED t1").collect().toSeq
622623

623-
assert(desc.contains(Row("# Schema of this table is inferred at runtime", "", "")))
624+
assert(desc.contains(Row("id", "bigint", "")))
625+
}
624626
}
625627
}
626628
}

0 commit comments

Comments
 (0)