From 2568d507f7878c92b803eacf2f9b5029b5fd7708 Mon Sep 17 00:00:00 2001 From: s71955 Date: Sat, 7 Sep 2019 23:26:02 +0530 Subject: [PATCH 1/3] [SPARK-28930][SQL] Last Access Time value shall display 'UNKNOWN' as currently system cannot evaluate the last access time, and 'null' values will be shown in its capital form 'NULL' for SQL client to make the display format similar to spark-shell. What changes were proposed in this pull request? If there is no comment for spark scala shell shows "null" in small letters but all other places Hive beeline/Spark beeline/Spark SQL it is showing in CAPITAL "NULL". In this patch shown in its capital form 'NULL' for SQL client to make the display format similar to Hive beeline/Spark beeline/Spark SQL. Also corrected the Last Access time, the value shall display 'UNKNOWN' as currently system wont support the last access time evaluation. Issue 2 mentioned in JIRA Spark SQL "desc formatted tablename" is not showing the header # col_name,data_type,comment , seems to be the header has been removed knowingly as part of SPARK-20954. Does this PR introduce any user-facing change? No How was this patch tested? Locally and corrected a ut. --- .../apache/spark/sql/catalyst/catalog/interface.scala | 11 +++++++---- .../apache/spark/sql/execution/command/tables.scala | 3 ++- .../spark/sql/hive/execution/HiveDDLSuite.scala | 10 +++++----- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index ce8c23ac6dce..dc43c3434b80 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -104,7 +104,7 @@ case class CatalogTablePartition( storage: CatalogStorageFormat, parameters: Map[String, String] = Map.empty, createTime: Long = System.currentTimeMillis, - lastAccessTime: Long = -1, + lastAccessTime: Long = 0, stats: Option[CatalogStatistics] = None) { def toLinkedHashMap: mutable.LinkedHashMap[String, String] = { @@ -117,7 +117,7 @@ case class CatalogTablePartition( } map.put("Created Time", new Date(createTime).toString) val lastAccess = { - if (-1 == lastAccessTime) "UNKNOWN" else new Date(lastAccessTime).toString + if (0 == lastAccessTime) "UNKNOWN" else new Date(lastAccessTime).toString } map.put("Last Access", lastAccess) stats.foreach(s => map.put("Partition Statistics", s.simpleString)) @@ -236,7 +236,7 @@ case class CatalogTable( bucketSpec: Option[BucketSpec] = None, owner: String = "", createTime: Long = System.currentTimeMillis, - lastAccessTime: Long = -1, + lastAccessTime: Long = 0, createVersion: String = "", properties: Map[String, String] = Map.empty, stats: Option[CatalogStatistics] = None, @@ -320,12 +320,15 @@ case class CatalogTable( val map = new mutable.LinkedHashMap[String, String]() val tableProperties = properties.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]") val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]") + val lastAccess = { + if (0 == lastAccessTime) "UNKNOWN" else new Date(lastAccessTime).toString + } identifier.database.foreach(map.put("Database", _)) map.put("Table", identifier.table) if (owner != null && owner.nonEmpty) map.put("Owner", owner) map.put("Created Time", new Date(createTime).toString) - map.put("Last Access", new Date(lastAccessTime).toString) + map.put("Last Access", lastAccess) map.put("Created By", "Spark " + createVersion) map.put("Type", tableType.name) provider.foreach(map.put("Provider", _)) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 9377cb017467..acb6e7a17548 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -510,7 +510,8 @@ abstract class DescribeCommandBase extends RunnableCommand { append(buffer, s"# ${output.head.name}", output(1).name, output(2).name) } schema.foreach { column => - append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull) + append(buffer, column.name, column.dataType.simpleString, + column.getComment().getOrElse("NULL")) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index cd8e2eaa2b4d..090d0779be82 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -1042,11 +1042,11 @@ class HiveDDLSuite assert(sql("DESC tbl").collect().containsSlice( Seq( - Row("a", "int", null), - Row("b", "int", null), + Row("a", "int", "NULL"), + Row("b", "int", "NULL"), Row("# Partition Information", "", ""), Row("# col_name", "data_type", "comment"), - Row("b", "int", null) + Row("b", "int", "NULL") ) )) } @@ -1617,7 +1617,7 @@ class HiveDDLSuite val desc = sql("DESC FORMATTED t1").collect().toSeq - assert(desc.contains(Row("id", "bigint", null))) + assert(desc.contains(Row("id", "bigint", "NULL"))) } } } @@ -2435,7 +2435,7 @@ class HiveDDLSuite .select("data_type") // check if the last access time doesnt have the default date of year // 1970 as its a wrong access time - assert(!(desc.first.toString.contains("1970"))) + assert((desc.first.toString.contains("UNKNOWN"))) } } From dc11f3aa5712a2d224d5dfd386b2c2995ea91be0 Mon Sep 17 00:00:00 2001 From: s71955 Date: Wed, 11 Sep 2019 13:00:14 +0530 Subject: [PATCH 2/3] [SPARK-28930][SQL] Last Access Time value shall display 'UNKNOWN' as currently system cannot evaluate the last access time, and 'null' values will be shown in its capital form 'NULL' for SQL client to make the display format similar to spark-shell. What changes were proposed in this pull request? If there is no comment for spark scala shell shows "null" in small letters but all other places Hive beeline/Spark beeline/Spark SQL it is showing in CAPITAL "NULL". In this patch shown in its capital form 'NULL' for SQL client to make the display format similar to Hive beeline/Spark beeline/Spark SQL. Also corrected the Last Access time, the value shall display 'UNKNOWN' as currently system wont support the last access time evaluation. Issue 2 mentioned in JIRA Spark SQL "desc formatted tablename" is not showing the header # col_name,data_type,comment , seems to be the header has been removed knowingly as part of SPARK-20954. Does this PR introduce any user-facing change? No How was this patch tested? Locally and corrected a ut. --- .../apache/spark/sql/catalyst/trees/TreeNodeSuite.scala | 2 +- .../org/apache/spark/sql/execution/command/tables.scala | 3 +-- .../apache/spark/sql/hive/execution/HiveDDLSuite.scala | 8 ++++---- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index fbaa5527a705..d0c9c4917698 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -513,7 +513,7 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper { "partitionColumnNames" -> List.empty[String], "owner" -> "", "createTime" -> 0, - "lastAccessTime" -> -1, + "lastAccessTime" -> 0, "createVersion" -> "2.x", "tracksPartitionsInCatalog" -> false, "properties" -> JNull, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index acb6e7a17548..9377cb017467 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -510,8 +510,7 @@ abstract class DescribeCommandBase extends RunnableCommand { append(buffer, s"# ${output.head.name}", output(1).name, output(2).name) } schema.foreach { column => - append(buffer, column.name, column.dataType.simpleString, - column.getComment().getOrElse("NULL")) + append(buffer, column.name, column.dataType.simpleString, column.getComment().orNull) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 090d0779be82..2af04cd7b5fa 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -1042,11 +1042,11 @@ class HiveDDLSuite assert(sql("DESC tbl").collect().containsSlice( Seq( - Row("a", "int", "NULL"), - Row("b", "int", "NULL"), + Row("a", "int", null), + Row("b", "int", null), Row("# Partition Information", "", ""), Row("# col_name", "data_type", "comment"), - Row("b", "int", "NULL") + Row("b", "int", null) ) )) } @@ -1617,7 +1617,7 @@ class HiveDDLSuite val desc = sql("DESC FORMATTED t1").collect().toSeq - assert(desc.contains(Row("id", "bigint", "NULL"))) + assert(desc.contains(Row("id", "bigint", null))) } } } From b9f20ba9f8b25cb6b0403b2c5f2025b4bd1fd443 Mon Sep 17 00:00:00 2001 From: s71955 Date: Mon, 16 Sep 2019 20:16:09 +0530 Subject: [PATCH 3/3] [SPARK-28930][SQL] Last Access Time value shall display 'UNKNOWN' as currently system cannot evaluate the last access time, and 'null' values will be shown in its capital form 'NULL' for SQL client to make the display format similar to spark-shell. What changes were proposed in this pull request? If there is no comment for spark scala shell shows "null" in small letters but all other places Hive beeline/Spark beeline/Spark SQL it is showing in CAPITAL "NULL". In this patch shown in its capital form 'NULL' for SQL client to make the display format similar to Hive beeline/Spark beeline/Spark SQL. Also corrected the Last Access time, the value shall display 'UNKNOWN' as currently system wont support the last access time evaluation. Issue 2 mentioned in JIRA Spark SQL "desc formatted tablename" is not showing the header # col_name,data_type,comment , seems to be the header has been removed knowingly as part of SPARK-20954. Does this PR introduce any user-facing change? No How was this patch tested? Locally and corrected a ut. --- .../org/apache/spark/sql/catalyst/catalog/interface.scala | 8 ++++---- .../apache/spark/sql/catalyst/trees/TreeNodeSuite.scala | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala index dc43c3434b80..01b21feab0dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala @@ -104,7 +104,7 @@ case class CatalogTablePartition( storage: CatalogStorageFormat, parameters: Map[String, String] = Map.empty, createTime: Long = System.currentTimeMillis, - lastAccessTime: Long = 0, + lastAccessTime: Long = -1, stats: Option[CatalogStatistics] = None) { def toLinkedHashMap: mutable.LinkedHashMap[String, String] = { @@ -117,7 +117,7 @@ case class CatalogTablePartition( } map.put("Created Time", new Date(createTime).toString) val lastAccess = { - if (0 == lastAccessTime) "UNKNOWN" else new Date(lastAccessTime).toString + if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString } map.put("Last Access", lastAccess) stats.foreach(s => map.put("Partition Statistics", s.simpleString)) @@ -236,7 +236,7 @@ case class CatalogTable( bucketSpec: Option[BucketSpec] = None, owner: String = "", createTime: Long = System.currentTimeMillis, - lastAccessTime: Long = 0, + lastAccessTime: Long = -1, createVersion: String = "", properties: Map[String, String] = Map.empty, stats: Option[CatalogStatistics] = None, @@ -321,7 +321,7 @@ case class CatalogTable( val tableProperties = properties.map(p => p._1 + "=" + p._2).mkString("[", ", ", "]") val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]") val lastAccess = { - if (0 == lastAccessTime) "UNKNOWN" else new Date(lastAccessTime).toString + if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString } identifier.database.foreach(map.put("Database", _)) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala index d0c9c4917698..fbaa5527a705 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala @@ -513,7 +513,7 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper { "partitionColumnNames" -> List.empty[String], "owner" -> "", "createTime" -> 0, - "lastAccessTime" -> 0, + "lastAccessTime" -> -1, "createVersion" -> "2.x", "tracksPartitionsInCatalog" -> false, "properties" -> JNull,