-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-14543] [SQL] Improve InsertIntoTable column resolution. #12313
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
06c3536
9a8cbc2
bb8e7e7
13a6950
c820846
d577aed
906e68d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -359,30 +359,43 @@ case class InsertIntoTable( | |
| partition: Map[String, Option[String]], | ||
| child: LogicalPlan, | ||
| overwrite: Boolean, | ||
| ifNotExists: Boolean) | ||
| ifNotExists: Boolean, | ||
| options: Map[String, String]) | ||
|
||
| extends LogicalPlan { | ||
|
|
||
| override def children: Seq[LogicalPlan] = child :: Nil | ||
| override def output: Seq[Attribute] = Seq.empty | ||
|
|
||
| private[spark] def isMatchByName: Boolean = { | ||
| options.get("matchByName").map(_.toBoolean).getOrElse(false) | ||
| } | ||
|
|
||
| private[spark] lazy val expectedColumns = { | ||
| if (table.output.isEmpty) { | ||
| None | ||
| } else { | ||
| val numDynamicPartitions = partition.values.count(_.isEmpty) | ||
| val dynamicPartitionNames = partition.filter { | ||
| case (name, Some(_)) => false | ||
| case (name, None) => true | ||
| }.keySet | ||
| val (partitionColumns, dataColumns) = table.output | ||
| .partition(a => partition.keySet.contains(a.name)) | ||
| Some(dataColumns ++ partitionColumns.takeRight(numDynamicPartitions)) | ||
| Some(dataColumns ++ partitionColumns.filter(col => dynamicPartitionNames.contains(col.name))) | ||
| } | ||
| } | ||
|
|
||
| assert(overwrite || !ifNotExists) | ||
| override lazy val resolved: Boolean = | ||
| childrenResolved && table.resolved && expectedColumns.forall { expected => | ||
| child.output.size == expected.size && child.output.zip(expected).forall { | ||
| case (childAttr, tableAttr) => | ||
| DataType.equalsIgnoreCompatibleNullability(childAttr.dataType, tableAttr.dataType) | ||
| } | ||
| childrenResolved && table.resolved && { | ||
| expectedColumns match { | ||
| case Some(expected) => | ||
| child.output.size == expected.size && child.output.zip(expected).forall { | ||
| case (childAttr, tableAttr) => | ||
| childAttr.name == tableAttr.name && // required by some relations | ||
| DataType.equalsIgnoreCompatibleNullability(childAttr.dataType, tableAttr.dataType) | ||
| } | ||
| case None => true | ||
| } | ||
| } | ||
| } | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
partitionData.contains?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using
containswould include partitions that are set statically for this query, which have values likeSome("static-val"). This doesn't happen through theDataFrameWriter, but is valid HiveQL.