-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-33152][SQL] Improve the performance of constraint propagation for Project and Aggregate #30894
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-33152][SQL] Improve the performance of constraint propagation for Project and Aggregate #30894
Changes from all commits
03c6e56
119e057
08dc723
4bd8c06
e554605
22546e2
ccabb69
8cf2da9
0420514
0c156f7
f1332eb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -176,20 +176,58 @@ trait UnaryNode extends LogicalPlan with UnaryLike[LogicalPlan] { | |
| */ | ||
| protected def getAllValidConstraints(projectList: Seq[NamedExpression]): ExpressionSet = { | ||
| var allConstraints = child.constraints | ||
| projectList.foreach { | ||
| case a @ Alias(l: Literal, _) => | ||
| allConstraints += EqualNullSafe(a.toAttribute, l) | ||
| case a @ Alias(e, _) => | ||
| // For every alias in `projectList`, replace the reference in constraints by its attribute. | ||
| allConstraints ++= allConstraints.map(_ transform { | ||
| case expr: Expression if expr.semanticEquals(e) => | ||
| a.toAttribute | ||
|
|
||
| // For each expression collect its aliases | ||
| val aliasMap = projectList.collect { | ||
| case alias @ Alias(expr, _) if !expr.foldable && expr.deterministic => | ||
| (expr.canonicalized, alias) | ||
| }.groupBy(_._1).mapValues(_.map(_._2)) | ||
| val remainingExpressions = collection.mutable.Set(aliasMap.keySet.toSeq: _*) | ||
|
|
||
| /** | ||
| * Filtering allConstraints between each iteration is necessary, because | ||
| * otherwise collecting valid constraints could in the worst case have exponential | ||
| * time and memory complexity. Each replaced alias could double the number of constraints, | ||
| * because we would keep both the original constraint and the one with alias. | ||
| */ | ||
| def shouldBeKept(expr: Expression): Boolean = { | ||
| expr.references.subsetOf(outputSet) || | ||
| remainingExpressions.contains(expr.canonicalized) || | ||
| (expr.children.nonEmpty && expr.children.forall(shouldBeKept)) | ||
| } | ||
|
|
||
| // Replace expressions with aliases | ||
| for ((expr, aliases) <- aliasMap) { | ||
| allConstraints ++= allConstraints.flatMap(constraint => { | ||
| aliases.map(alias => { | ||
| constraint transform { | ||
| case e: Expression if e.semanticEquals(expr) => | ||
| alias.toAttribute | ||
| } | ||
| }) | ||
| allConstraints += EqualNullSafe(e, a.toAttribute) | ||
| }) | ||
|
|
||
| remainingExpressions.remove(expr) | ||
| allConstraints = allConstraints.filter(shouldBeKept) | ||
| } | ||
|
|
||
| // Equality between aliases for the same expression | ||
| aliasMap.values.foreach(_.combinations(2).foreach { | ||
| case Seq(a1, a2) => | ||
| allConstraints += EqualNullSafe(a1.toAttribute, a2.toAttribute) | ||
| }) | ||
|
|
||
| /** | ||
| * We keep the child constraints and equality between original and aliased attributes, | ||
| * so [[ConstraintHelper.inferAdditionalConstraints]] would have the full information available. | ||
| */ | ||
| projectList.foreach { | ||
| case alias @ Alias(expr, _) => | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe we just need to handle
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You might be right, that only the literal aliases are used currently, but all aliases were kept in the previous code (lines 180 & 187) and when somebody wants to improve |
||
| allConstraints += EqualNullSafe(alias.toAttribute, expr) | ||
| case _ => // Don't change. | ||
| } | ||
|
|
||
| allConstraints | ||
| allConstraints ++ child.constraints | ||
| } | ||
|
|
||
| override protected lazy val validConstraints: ExpressionSet = child.constraints | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.