-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-11275][SQL] Reimplement Expand as a Generator and fix existing implementation bugs #9429
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -205,45 +205,30 @@ class Analyzer( | |
| GroupingSets(bitmasks(a), a.groupByExprs, a.child, a.aggregations) | ||
| case x: GroupingSets => | ||
| val gid = AttributeReference(VirtualColumn.groupingIdName, IntegerType, false)() | ||
| // We will insert another Projection if the GROUP BY keys contains the | ||
| // non-attribute expressions. And the top operators can references those | ||
| // expressions by its alias. | ||
| // e.g. SELECT key%5 as c1 FROM src GROUP BY key%5 ==> | ||
| // SELECT a as c1 FROM (SELECT key%5 AS a FROM src) GROUP BY a | ||
|
|
||
| // find all of the non-attribute expressions in the GROUP BY keys | ||
| val nonAttributeGroupByExpressions = new ArrayBuffer[Alias]() | ||
|
|
||
| // The pair of (the original GROUP BY key, associated attribute) | ||
| val groupByExprPairs = x.groupByExprs.map(_ match { | ||
| case e: NamedExpression => (e, e.toAttribute) | ||
| case other => { | ||
| val alias = Alias(other, other.toString)() | ||
| nonAttributeGroupByExpressions += alias // add the non-attributes expression alias | ||
| (other, alias.toAttribute) | ||
| } | ||
| }) | ||
|
|
||
| // substitute the non-attribute expressions for aggregations. | ||
| val aggregation = x.aggregations.map(expr => expr.transformDown { | ||
| case e => groupByExprPairs.find(_._1.semanticEquals(e)).map(_._2).getOrElse(e) | ||
| }.asInstanceOf[NamedExpression]) | ||
| val aliasedGroupByExprPairs = x.groupByExprs.map{ | ||
| case a @ Alias(expr, _) => (expr, a) | ||
| case expr: NamedExpression => (expr, Alias(expr, expr.name)()) | ||
| case expr => (expr, Alias(expr, expr.prettyString)()) | ||
| } | ||
|
|
||
| // substitute the group by expressions. | ||
| val newGroupByExprs = groupByExprPairs.map(_._2) | ||
| val aliasedGroupByExprs = aliasedGroupByExprPairs.map(_._2) | ||
| val aliasedGroupByAttr = aliasedGroupByExprs.map(_.toAttribute) | ||
|
|
||
| val child = if (nonAttributeGroupByExpressions.length > 0) { | ||
| // insert additional projection if contains the | ||
| // non-attribute expressions in the GROUP BY keys | ||
| Project(x.child.output ++ nonAttributeGroupByExpressions, x.child) | ||
| } else { | ||
| x.child | ||
| // substitute group by expressions in aggregation list with appropriate attribute | ||
| val aggregations = x.aggregations.map{ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. expr => expr.transformDown {
..
}Otherwise it's not able to substitute the expression like
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @chenghao-intel actually that change would bring back the bug in question since it would do the substitutions in situations like below and the aggregations would be computed off the manipulated (nulls inserted) values. In general anything below an AggregateExpression we don't want to transform, but above we do. So really I need a transformDownUntil method. BTW making this change does fix the |
||
| case a @ Alias(e, name) => | ||
| aliasedGroupByExprPairs.find(_._1.semanticEquals(e)) | ||
| .map(_._2.toAttribute.withName(name)).getOrElse(a) | ||
| case e => | ||
| aliasedGroupByExprPairs.find(_._1.semanticEquals(e)).map(_._2.toAttribute).getOrElse(e) | ||
| } | ||
|
|
||
| Aggregate( | ||
| newGroupByExprs :+ VirtualColumn.groupingIdAttribute, | ||
| aggregation, | ||
| Expand(x.bitmasks, newGroupByExprs, gid, child)) | ||
| aliasedGroupByAttr :+ gid, aggregations, | ||
| Generate(ExpandGroupingSets(aliasedGroupByExprs, x.bitmasks), | ||
| join = true, outer = false, qualifier = None, aliasedGroupByAttr :+ gid, x.child) | ||
| ) | ||
| } | ||
| } | ||
|
|
||
|
|
||
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could also use
expr.toAttributefor aNamedExpressioninstead of creating an Alias.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe I need a new Alias here since we really have two versions of the expression -- the original and the version manipulated by the Generator with nulls inserted per the bitmask. In the Aggregate 'aggregation' list the grouping columns need to refer to the manipulated version and 'real' aggregates need to refer to the original version.