diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala index 3d79a7113e0d5..a225dffb075b9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala @@ -24,8 +24,10 @@ import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns} -import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog} +import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, SupportsRead, Table, TableCatalog} import org.apache.spark.sql.connector.expressions.IdentityTransform +import org.apache.spark.sql.connector.read.SupportsReportStatistics +import org.apache.spark.sql.util.CaseInsensitiveStringMap import org.apache.spark.util.ArrayImplicits._ case class DescribeTableExec( @@ -40,6 +42,7 @@ case class DescribeTableExec( if (isExtended) { addMetadataColumns(rows) addTableDetails(rows) + addTableStats(rows) } rows.toSeq } @@ -96,6 +99,23 @@ case class DescribeTableExec( case _ => } + private def addTableStats(rows: ArrayBuffer[InternalRow]): Unit = table match { + case read: SupportsRead => + read.newScanBuilder(CaseInsensitiveStringMap.empty()).build() match { + case s: SupportsReportStatistics => + val stats = s.estimateStatistics() + val statsComponents = Seq( + Option.when(stats.sizeInBytes().isPresent)(s"${stats.sizeInBytes().getAsLong} bytes"), + Option.when(stats.numRows().isPresent)(s"${stats.numRows().getAsLong} rows") + ).flatten + if (statsComponents.nonEmpty) { + rows += toCatalystRow("Statistics", statsComponents.mkString(", "), null) + } + case _ => + } + case _ => + } + private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = { if (table.partitioning.nonEmpty) { val partitionColumnsOnly = table.partitioning.forall(t => t.isInstanceOf[IdentityTransform]) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index 589283a29b852..f92a9a827b1ca 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -3342,7 +3342,8 @@ class DataSourceV2SQLSuiteV1Filter Row("# Column Default Values", "", ""), Row("# Metadata Columns", "", ""), Row("id", "bigint", "42"), - Row("id", "bigint", null) + Row("id", "bigint", null), + Row("Statistics", "0 bytes, 0 rows", null) )) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala index a21baebe24d8f..cfd26c09bf3e5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala @@ -90,7 +90,8 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase Row("Location", "file:/tmp/testcat/table_name", ""), Row("Provider", "_", ""), Row(TableCatalog.PROP_OWNER.capitalize, Utils.getCurrentUserName(), ""), - Row("Table Properties", "[bar=baz]", ""))) + Row("Table Properties", "[bar=baz]", ""), + Row("Statistics", "0 bytes, 0 rows", null))) } } @@ -196,4 +197,20 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase Row("comment", "column_comment"))) } } + + test("describe extended table with stats") { + withNamespaceAndTable("ns", "tbl") { tbl => + sql( + s""" + |CREATE TABLE $tbl + |(key INT, col STRING) + |$defaultUsing""".stripMargin) + + sql(s"INSERT INTO $tbl values (1, 'aaa'), (2, 'bbb'), (3, 'ccc'), (null, 'ddd')") + val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl") + val stats = descriptionDf.filter("col_name == 'Statistics'").head() + .getAs[String]("data_type") + assert("""\d+\s+bytes,\s+4\s+rows""".r.matches(stats)) + } + } }