Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/sql-migration-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,8 @@ license: |

- In Spark 3.0.2, `PARTITION(col=null)` is always parsed as a null literal in the partition spec. In Spark 3.0.1 or earlier, it is parsed as a string literal of its text representation, e.g., string "null", if the partition column is string type. To restore the legacy behavior, you can set `spark.sql.legacy.parseNullPartitionSpecAsStringLiteral` as true.

- In Spark 3.0.0, the output schema of `SHOW DATABASES` becomes `namespace: string`. In Spark version 2.4 and earlier, the schema was `databaseName: string`. Since Spark 3.0.2, you can restore the old schema by setting `spark.sql.legacy.keepCommandOutputSchema` to `true`.

## Upgrading from Spark SQL 3.0 to 3.0.1

- In Spark 3.0, JSON datasource and JSON function `schema_of_json` infer TimestampType from string values if they match to the pattern defined by the JSON option `timestampFormat`. Since version 3.0.1, the timestamp type inference is disabled by default. Set the JSON option `inferTimestamp` to `true` to enable such type inference.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -325,11 +325,13 @@ case class AlterNamespaceSetLocation(
*/
case class ShowNamespaces(
namespace: LogicalPlan,
pattern: Option[String]) extends Command {
pattern: Option[String],
override val output: Seq[Attribute] = ShowNamespaces.OUTPUT) extends Command {
Copy link
Contributor Author

@cloud-fan cloud-fan Feb 4, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to put the output field in the constructor so that it's more stable (copying the node will not regenerate the output attributes again). It also helps if we want to support self-join later.

override def children: Seq[LogicalPlan] = Seq(namespace)
}

override val output: Seq[Attribute] = Seq(
AttributeReference("namespace", StringType, nullable = false)())
object ShowNamespaces {
val OUTPUT = Seq(AttributeReference("namespace", StringType, nullable = false)())
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3069,6 +3069,15 @@ object SQLConf {
.booleanConf
.createWithDefault(false)

val LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA =
buildConf("spark.sql.legacy.keepCommandOutputSchema")
.internal()
.doc("When true, Spark will keep the output schema of commands such as SHOW DATABASES " +
"unchanged, for v1 catalog and/or table.")
.version("3.0.2")
.booleanConf
.createWithDefault(false)

/**
* Holds information about keys that have been deprecated.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
case AlterNamespaceSetLocation(DatabaseInSessionCatalog(db), location) =>
AlterDatabaseSetLocationCommand(db, location)

case s @ ShowNamespaces(ResolvedNamespace(cata, _), _, output) if isSessionCatalog(cata) =>
if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
assert(output.length == 1)
s.copy(output = Seq(output.head.withName("databaseName")))
} else {
s
}

case RenameTable(ResolvedV1TableOrViewIdentifier(oldName), newName, isView) =>
AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
case DropNamespace(ResolvedNamespace(catalog, ns), ifExists, cascade) =>
DropNamespaceExec(catalog, ns, ifExists, cascade) :: Nil

case r @ ShowNamespaces(ResolvedNamespace(catalog, ns), pattern) =>
ShowNamespacesExec(r.output, catalog.asNamespaceCatalog, ns, pattern) :: Nil
case ShowNamespaces(ResolvedNamespace(catalog, ns), pattern, output) =>
ShowNamespacesExec(output, catalog.asNamespaceCatalog, ns, pattern) :: Nil

case r @ ShowTables(ResolvedNamespace(catalog, ns), pattern) =>
ShowTablesExec(r.output, catalog.asTableCatalog, ns, pattern) :: Nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ trait ShowNamespacesSuiteBase extends command.ShowNamespacesSuiteBase {
}.getMessage
assert(errMsg.contains("Namespace 'dummy' not found"))
}

test("SPARK-34359: keep the legacy output schema") {
withSQLConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA.key -> "true") {
assert(sql("SHOW NAMESPACES").schema.fieldNames.toSeq == Seq("databaseName"))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually, people don't rely on the output schema of commands, but some BI tools may rely on it and users are not able to update the BI tools. That's why I think it deserves a legacy config.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usually, people don't rely on the output schema of commands, but some BI tools may rely on it and users are not able to update the BI tools. That's why I think it deserves a legacy config.

That's an important reason. +1 for this.

}
}
}

class ShowNamespacesSuite extends ShowNamespacesSuiteBase with CommandSuiteBase {
Expand Down