-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-17541][SQL] fix some DDL bugs about table management when same-name temp view exists #15099
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4ab777e
1c57262
c5513d5
b48e4ae
0223fd3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -325,9 +325,9 @@ class SessionCatalog( | |
| new Path(new Path(dbLocation), formatTableName(tableIdent.table)).toString | ||
| } | ||
|
|
||
| // ------------------------------------------------------------- | ||
| // | Methods that interact with temporary and metastore tables | | ||
| // ------------------------------------------------------------- | ||
| // ---------------------------------------------- | ||
| // | Methods that interact with temp views only | | ||
| // ---------------------------------------------- | ||
|
|
||
| /** | ||
| * Create a temporary table. | ||
|
|
@@ -343,6 +343,24 @@ class SessionCatalog( | |
| tempTables.put(table, tableDefinition) | ||
| } | ||
|
|
||
| /** | ||
| * Return a temporary view exactly as it was stored. | ||
| */ | ||
| def getTempView(name: String): Option[LogicalPlan] = synchronized { | ||
| tempTables.get(formatTableName(name)) | ||
| } | ||
|
|
||
| /** | ||
| * Drop a temporary view. | ||
| */ | ||
| def dropTempView(name: String): Unit = synchronized { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add the description for this function too? |
||
| tempTables.remove(formatTableName(name)) | ||
| } | ||
|
|
||
| // ------------------------------------------------------------- | ||
| // | Methods that interact with temporary and metastore tables | | ||
| // ------------------------------------------------------------- | ||
|
|
||
| /** | ||
| * Rename a table. | ||
| * | ||
|
|
@@ -492,14 +510,6 @@ class SessionCatalog( | |
| tempTables.clear() | ||
| } | ||
|
|
||
| /** | ||
| * Return a temporary table exactly as it was stored. | ||
| * For testing only. | ||
| */ | ||
| private[catalog] def getTempTable(name: String): Option[LogicalPlan] = synchronized { | ||
| tempTables.get(formatTableName(name)) | ||
| } | ||
|
|
||
| // ---------------------------------------------------------------------------- | ||
| // Partitions | ||
| // ---------------------------------------------------------------------------- | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -47,11 +47,15 @@ case class CreateDataSourceTableCommand(table: CatalogTable, ignoreIfExists: Boo | |
| assert(table.provider.isDefined) | ||
|
|
||
| val sessionState = sparkSession.sessionState | ||
| if (sessionState.catalog.tableExists(table.identifier)) { | ||
| val db = table.identifier.database.getOrElse(sessionState.catalog.getCurrentDatabase) | ||
| val tableIdentWithDB = table.identifier.copy(database = Some(db)) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we make these three lines a util in TableIdentifier?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CreateDataSourceTableCommand is planned in physical plan stage. Maybe we should generate correct table identifier in logical plan stage? |
||
| // Pass a table identifier with database part, so that `tableExists` won't check temp views | ||
| // unexpectedly. | ||
| if (sessionState.catalog.tableExists(tableIdentWithDB)) { | ||
| if (ignoreIfExists) { | ||
| return Seq.empty[Row] | ||
| } else { | ||
| throw new AnalysisException(s"Table ${table.identifier.unquotedString} already exists.") | ||
| throw new AnalysisException(s"Table ${tableIdentWithDB.unquotedString} already exists.") | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -128,9 +132,11 @@ case class CreateDataSourceTableAsSelectCommand( | |
| assert(table.provider.isDefined) | ||
| assert(table.schema.isEmpty) | ||
|
|
||
| val tableName = table.identifier.unquotedString | ||
| val provider = table.provider.get | ||
| val sessionState = sparkSession.sessionState | ||
| val db = table.identifier.database.getOrElse(sessionState.catalog.getCurrentDatabase) | ||
| val tableIdentWithDB = table.identifier.copy(database = Some(db)) | ||
| val tableName = tableIdentWithDB.unquotedString | ||
|
|
||
| val optionsWithPath = if (table.tableType == CatalogTableType.MANAGED) { | ||
| table.storage.properties + ("path" -> sessionState.catalog.defaultTablePath(table.identifier)) | ||
|
|
@@ -140,7 +146,9 @@ case class CreateDataSourceTableAsSelectCommand( | |
|
|
||
| var createMetastoreTable = false | ||
| var existingSchema = Option.empty[StructType] | ||
| if (sparkSession.sessionState.catalog.tableExists(table.identifier)) { | ||
| // Pass a table identifier with database part, so that `tableExists` won't check temp views | ||
| // unexpectedly. | ||
| if (sparkSession.sessionState.catalog.tableExists(tableIdentWithDB)) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For This is kind of hack to me. Do you have ways to avoid hacking table identifier?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have, in my previous refactor PR: #14962 This PR aims to just fix these bugs surgically, so that it's easy to backport to 2.0 |
||
| // Check if we need to throw an exception or just return. | ||
| mode match { | ||
| case SaveMode.ErrorIfExists => | ||
|
|
@@ -165,7 +173,7 @@ case class CreateDataSourceTableAsSelectCommand( | |
| // inserting into (i.e. using the same compression). | ||
|
|
||
| EliminateSubqueryAliases( | ||
| sessionState.catalog.lookupRelation(table.identifier)) match { | ||
| sessionState.catalog.lookupRelation(tableIdentWithDB)) match { | ||
| case l @ LogicalRelation(_: InsertableRelation | _: HadoopFsRelation, _, _) => | ||
| // check if the file formats match | ||
| l.relation match { | ||
|
|
@@ -188,7 +196,7 @@ case class CreateDataSourceTableAsSelectCommand( | |
| throw new AnalysisException(s"Saving data in ${o.toString} is not supported.") | ||
| } | ||
| case SaveMode.Overwrite => | ||
| sparkSession.sql(s"DROP TABLE IF EXISTS $tableName") | ||
| sessionState.catalog.dropTable(tableIdentWithDB, ignoreIfNotExists = true, purge = false) | ||
| // Need to create the table again. | ||
| createMetastoreTable = true | ||
| } | ||
|
|
@@ -230,7 +238,7 @@ case class CreateDataSourceTableAsSelectCommand( | |
| } | ||
|
|
||
| // Refresh the cache of the table in the catalog. | ||
| sessionState.catalog.refreshTable(table.identifier) | ||
| sessionState.catalog.refreshTable(tableIdentWithDB) | ||
| Seq.empty[Row] | ||
| } | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the same file, there are also interfaces like isTemporaryTable, tableExists, refreshTable, clearTempTables, which can accept a temporary view, but using the name "table" instead of "view"
I am not sure introducing getTempView and dropTempView will make the class SessionCatalog's interface simpler or more complicated.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we already have
createTempView, I don't think the new APIs makeSessionCatalogmore complicated.