Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -754,11 +754,15 @@ class Analyzer(
* a logical plan node's children.
*/
object ResolveReferences extends Rule[LogicalPlan] {
private val emptyAttrMap = new AttributeMap[Attribute](Map.empty)

/**
* Generate a new logical plan for the right child with different expression IDs
* for all conflicting attributes.
*/
private def dedupRight (left: LogicalPlan, right: LogicalPlan): LogicalPlan = {
private def dedupRight(
left: LogicalPlan,
right: LogicalPlan): (LogicalPlan, AttributeMap[Attribute]) = {
val conflictingAttributes = left.outputSet.intersect(right.outputSet)
logDebug(s"Conflicting attributes ${conflictingAttributes.mkString(",")} " +
s"between $left and $right")
Expand Down Expand Up @@ -805,10 +809,10 @@ class Analyzer(
* that this rule cannot handle. When that is the case, there must be another rule
* that resolves these conflicts. Otherwise, the analysis will fail.
*/
right
(right, emptyAttrMap)
case Some((oldRelation, newRelation)) =>
val attributeRewrites = AttributeMap(oldRelation.output.zip(newRelation.output))
right transformUp {
val newRight = right transformUp {
case r if r == oldRelation => newRelation
} transformUp {
case other => other transformExpressions {
Expand All @@ -818,6 +822,7 @@ class Analyzer(
s.withNewPlan(dedupOuterReferencesInSubquery(s.plan, attributeRewrites))
}
}
(newRight, attributeRewrites)
}
}

Expand Down Expand Up @@ -921,12 +926,18 @@ class Analyzer(
failAnalysis("Invalid usage of '*' in explode/json_tuple/UDTF")

// To resolve duplicate expression IDs for Join and Intersect
case j @ Join(left, right, _, _) if !j.duplicateResolved =>
j.copy(right = dedupRight(left, right))
case j @ Join(left, right, _, condition) if !j.duplicateResolved =>
val (dedupedRight, attributeRewrites) = dedupRight(left, right)
val changedCondition = condition.map(_.transform {
case attr: Attribute if attr.resolved => dedupAttr(attr, attributeRewrites)
})
j.copy(right = dedupedRight, condition = changedCondition)
case i @ Intersect(left, right, _) if !i.duplicateResolved =>
i.copy(right = dedupRight(left, right))
val (dedupedRight, _) = dedupRight(left, right)
i.copy(right = dedupedRight)
case e @ Except(left, right, _) if !e.duplicateResolved =>
e.copy(right = dedupRight(left, right))
val (dedupedRight, _) = dedupRight(left, right)
e.copy(right = dedupedRight)
// When resolve `SortOrder`s in Sort based on child, don't report errors as
// we still have chance to resolve it based on its descendants
case s @ Sort(ordering, global, child) if child.resolved && !s.resolved =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -295,4 +295,14 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext {
df.join(df, df("id") <=> df("id")).queryExecution.optimizedPlan
}
}

test("SPARK-25150: Attribute deduplication handles attributes in join condition properly") {
val a = spark.range(1, 5)
val b = spark.range(10)
val c = b.filter($"id" % 2 === 0)

val r = a.join(b, a("id") === b("id"), "inner").join(c, a("id") === c("id"), "inner")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this a simpler a.join(b, "id").join(c, "id")?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That simpler join doesn't hit the issue. It is handled by a different rule ResolveNaturalAndUsingJoin.


checkAnswer(r, Row(2, 2, 2) :: Row(4, 4, 4) :: Nil)
}
}