-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-34302][SQL] Migrate ALTER TABLE ... CHANGE COLUMN command to use UnresolvedTable to resolve the identifier #33113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ce47f89
2b908f8
727aa78
a19f7b4
4a97b36
808ab97
0c1a5e1
6a75c92
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans._ | |
| import org.apache.spark.sql.catalyst.plans.logical._ | ||
| import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, TypeUtils} | ||
| import org.apache.spark.sql.connector.catalog.{LookupCatalog, SupportsPartitionManagement} | ||
| import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn, UpdateColumnComment, UpdateColumnNullability, UpdateColumnPosition, UpdateColumnType} | ||
| import org.apache.spark.sql.connector.catalog.TableChange.{AddColumn, After, ColumnPosition, DeleteColumn} | ||
| import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} | ||
| import org.apache.spark.sql.internal.SQLConf | ||
| import org.apache.spark.sql.types._ | ||
|
|
@@ -444,12 +444,42 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { | |
| write.query.schema.foreach(f => TypeUtils.failWithIntervalType(f.dataType)) | ||
|
|
||
| case alter: AlterTableCommand if alter.table.resolved => | ||
| val table = alter.table.asInstanceOf[ResolvedTable] | ||
| def findField(fieldName: Seq[String]): StructField = { | ||
| // Include collections because structs nested in maps and arrays may be altered. | ||
| val field = table.schema.findNestedField(fieldName, includeCollections = true) | ||
| if (field.isEmpty) { | ||
| alter.failAnalysis(s"Cannot ${alter.operation} missing field ${fieldName.quoted} " + | ||
| s"in ${table.name} schema: ${table.schema.treeString}") | ||
| } | ||
| field.get._2 | ||
| } | ||
| def findParentStruct(fieldNames: Seq[String]): StructType = { | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| val parent = fieldNames.init | ||
| val field = if (parent.nonEmpty) { | ||
| findField(parent).dataType | ||
| } else { | ||
| table.schema | ||
| } | ||
| field match { | ||
| case s: StructType => s | ||
| case o => alter.failAnalysis(s"Cannot ${alter.operation} ${fieldNames.quoted}, " + | ||
| s"because its parent is not a StructType. Found $o") | ||
| } | ||
| } | ||
| alter.transformExpressions { | ||
| case u: UnresolvedFieldName => | ||
| val table = alter.table.asInstanceOf[ResolvedTable] | ||
| alter.failAnalysis( | ||
| s"Cannot ${alter.operation} missing field ${u.name.quoted} in ${table.name} " + | ||
| s"schema: ${table.schema.treeString}") | ||
| case UnresolvedFieldName(name) => | ||
| alter.failAnalysis(s"Cannot ${alter.operation} missing field ${name.quoted} in " + | ||
| s"${table.name} schema: ${table.schema.treeString}") | ||
| case UnresolvedFieldPosition(fieldName, position: After) => | ||
| val parent = findParentStruct(fieldName) | ||
| val allFields = parent match { | ||
| case s: StructType => s.fieldNames | ||
| case o => alter.failAnalysis(s"Cannot ${alter.operation} ${fieldName.quoted}, " + | ||
| s"because its parent is not a StructType. Found $o") | ||
| } | ||
| alter.failAnalysis(s"Couldn't resolve positional argument $position amongst " + | ||
| s"${allFields.mkString("[", ", ", "]")}") | ||
| } | ||
| checkAlterTableCommand(alter) | ||
|
|
||
|
|
@@ -522,66 +552,6 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { | |
| positionArgumentExists(add.position(), parent, fieldsAdded) | ||
| TypeUtils.failWithIntervalType(add.dataType()) | ||
| colsToAdd(parentName) = fieldsAdded :+ add.fieldNames().last | ||
| case update: UpdateColumnType => | ||
| val field = { | ||
| val f = findField("update", update.fieldNames) | ||
| CharVarcharUtils.getRawType(f.metadata) | ||
| .map(dt => f.copy(dataType = dt)) | ||
| .getOrElse(f) | ||
| } | ||
| val fieldName = update.fieldNames.quoted | ||
| update.newDataType match { | ||
| case _: StructType => | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " + | ||
| s"update a struct by updating its fields") | ||
| case _: MapType => | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " + | ||
| s"update a map by updating $fieldName.key or $fieldName.value") | ||
| case _: ArrayType => | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " + | ||
| s"update the element by updating $fieldName.element") | ||
| case u: UserDefinedType[_] => | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " + | ||
| s"update a UserDefinedType[${u.sql}] by updating its fields") | ||
| case _: CalendarIntervalType | _: YearMonthIntervalType | | ||
| _: DayTimeIntervalType => | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName to " + | ||
| s"interval type") | ||
| case _ => | ||
| // update is okay | ||
| } | ||
|
|
||
| // We don't need to handle nested types here which shall fail before | ||
|
Comment on lines
-533
to
-554
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These changes are moved below to |
||
| def canAlterColumnType(from: DataType, to: DataType): Boolean = (from, to) match { | ||
| case (CharType(l1), CharType(l2)) => l1 == l2 | ||
| case (CharType(l1), VarcharType(l2)) => l1 <= l2 | ||
| case (VarcharType(l1), VarcharType(l2)) => l1 <= l2 | ||
| case _ => Cast.canUpCast(from, to) | ||
| } | ||
|
|
||
| if (!canAlterColumnType(field.dataType, update.newDataType)) { | ||
| alter.failAnalysis( | ||
| s"Cannot update ${table.name} field $fieldName: " + | ||
| s"${field.dataType.simpleString} cannot be cast to " + | ||
| s"${update.newDataType.simpleString}") | ||
| } | ||
| case update: UpdateColumnNullability => | ||
| val field = findField("update", update.fieldNames) | ||
| val fieldName = update.fieldNames.quoted | ||
| if (!update.nullable && field.nullable) { | ||
| alter.failAnalysis( | ||
| s"Cannot change nullable column to non-nullable: $fieldName") | ||
| } | ||
| case updatePos: UpdateColumnPosition => | ||
| findField("update", updatePos.fieldNames) | ||
| val parent = findParentStruct("update", updatePos.fieldNames()) | ||
| val parentName = updatePos.fieldNames().init | ||
| positionArgumentExists( | ||
| updatePos.position(), | ||
| parent, | ||
| colsToAdd.getOrElse(parentName, Nil)) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we should consider |
||
| case update: UpdateColumnComment => | ||
| findField("update", update.fieldNames) | ||
| case delete: DeleteColumn => | ||
| findField("delete", delete.fieldNames) | ||
| // REPLACE COLUMNS has deletes followed by adds. Remember the deleted columns | ||
|
|
@@ -1088,8 +1058,51 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog { | |
| } | ||
|
|
||
| alter match { | ||
| case AlterTableRenameColumn(table: ResolvedTable, ResolvedFieldName(name), newName) => | ||
| checkColumnNotExists(name.init :+ newName, table.schema) | ||
| case AlterTableRenameColumn(table: ResolvedTable, col: ResolvedFieldName, newName) => | ||
| checkColumnNotExists(col.path :+ newName, table.schema) | ||
| case a @ AlterTableAlterColumn(table: ResolvedTable, col: ResolvedFieldName, _, _, _, _) => | ||
| val fieldName = col.name.quoted | ||
| if (a.dataType.isDefined) { | ||
| val field = CharVarcharUtils.getRawType(col.field.metadata) | ||
| .map(dt => col.field.copy(dataType = dt)) | ||
| .getOrElse(col.field) | ||
| val newDataType = a.dataType.get | ||
| newDataType match { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not related to this PR: I think there is a small bug here, if the data type is not changed, we shouldn't fail even if the new data type is struct/array/map. @imback82 can you help to fix it?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, this is OK. In the analzer, we will set the |
||
| case _: StructType => | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " + | ||
| "update a struct by updating its fields") | ||
| case _: MapType => | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " + | ||
| s"update a map by updating $fieldName.key or $fieldName.value") | ||
| case _: ArrayType => | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " + | ||
| s"update the element by updating $fieldName.element") | ||
| case u: UserDefinedType[_] => | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " + | ||
| s"update a UserDefinedType[${u.sql}] by updating its fields") | ||
| case _: CalendarIntervalType | _: YearMonthIntervalType | _: DayTimeIntervalType => | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName to interval type") | ||
| case _ => // update is okay | ||
| } | ||
|
|
||
| // We don't need to handle nested types here which shall fail before. | ||
| def canAlterColumnType(from: DataType, to: DataType): Boolean = (from, to) match { | ||
| case (CharType(l1), CharType(l2)) => l1 == l2 | ||
| case (CharType(l1), VarcharType(l2)) => l1 <= l2 | ||
| case (VarcharType(l1), VarcharType(l2)) => l1 <= l2 | ||
| case _ => Cast.canUpCast(from, to) | ||
| } | ||
|
|
||
| if (!canAlterColumnType(field.dataType, newDataType)) { | ||
| alter.failAnalysis(s"Cannot update ${table.name} field $fieldName: " + | ||
| s"${field.dataType.simpleString} cannot be cast to ${newDataType.simpleString}") | ||
| } | ||
| } | ||
| if (a.nullable.isDefined) { | ||
| if (!a.nullable.get && col.field.nullable) { | ||
| alter.failAnalysis(s"Cannot change nullable column to non-nullable: $fieldName") | ||
| } | ||
| } | ||
| case _ => | ||
| } | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to put
fieldNameinUnresolvedFieldPosition? We can easily get it viaAlterTableAlterColumn.column.nameThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
#33213