Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[8.x] ESQL: Disable pushdown of WHERE past STATS (#115308) #115419

Merged
merged 1 commit into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/115308.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 115308
summary: "ESQL: Disable pushdown of WHERE past STATS"
area: ES|QL
type: bug
issues:
- 115281
Original file line number Diff line number Diff line change
Expand Up @@ -2291,6 +2291,33 @@ m:integer |a:double |x:integer
74999 |48249.0 |0
;

statsWithFilterOnGroups
required_capability: fix_filter_pushdown_past_stats
FROM employees
| STATS v = VALUES(emp_no) by job_positions | WHERE job_positions == "Accountant" | MV_EXPAND v | SORT v
;

v:integer | job_positions:keyword
10001 | Accountant
10012 | Accountant
10016 | Accountant
10023 | Accountant
10025 | Accountant
10028 | Accountant
10034 | Accountant
10037 | Accountant
10044 | Accountant
10045 | Accountant
10050 | Accountant
10051 | Accountant
10066 | Accountant
10081 | Accountant
10085 | Accountant
10089 | Accountant
10092 | Accountant
10094 | Accountant
;


statsWithFiltering
required_capability: per_agg_filtering
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -405,7 +405,12 @@ public enum Cap {
/**
* Support for semantic_text field mapping
*/
SEMANTIC_TEXT_TYPE(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG);
SEMANTIC_TEXT_TYPE(EsqlCorePlugin.SEMANTIC_TEXT_FEATURE_FLAG),
/**
* Fix for an optimization that caused wrong results
* https://github.com/elastic/elasticsearch/issues/115281
*/
FIX_FILTER_PUSHDOWN_PAST_STATS;

private final boolean enabled;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
import org.elasticsearch.xpack.esql.core.expression.Expressions;
import org.elasticsearch.xpack.esql.core.expression.ReferenceAttribute;
import org.elasticsearch.xpack.esql.core.expression.predicate.Predicates;
import org.elasticsearch.xpack.esql.expression.function.aggregate.AggregateFunction;
import org.elasticsearch.xpack.esql.plan.logical.Aggregate;
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
import org.elasticsearch.xpack.esql.plan.logical.Eval;
import org.elasticsearch.xpack.esql.plan.logical.Filter;
Expand All @@ -38,18 +36,13 @@ protected LogicalPlan rule(Filter filter) {
LogicalPlan child = filter.child();
Expression condition = filter.condition();

// TODO: Push down past STATS if the filter is only on the groups; but take into account how `STATS ... BY field` handles
// multi-values: It seems to be equivalent to `EVAL field = MV_DEDUPE(field) | MV_EXPAND(field) | STATS ... BY field`, where the
// last `STATS ... BY field` can assume that `field` is single-valued (to be checked more thoroughly).
// https://github.com/elastic/elasticsearch/issues/115311
if (child instanceof Filter f) {
// combine nodes into a single Filter with updated ANDed condition
plan = f.with(Predicates.combineAnd(List.of(f.condition(), condition)));
} else if (child instanceof Aggregate agg) { // TODO: re-evaluate along with multi-value support
// Only push [parts of] a filter past an agg if these/it operates on agg's grouping[s], not output.
plan = maybePushDownPastUnary(
filter,
agg,
e -> e instanceof Attribute && agg.output().contains(e) && agg.groupings().contains(e) == false
|| e instanceof AggregateFunction,
NO_OP
);
} else if (child instanceof Eval eval) {
// Don't push if Filter (still) contains references to Eval's fields.
// Account for simple aliases in the Eval, though - these shouldn't stop us.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -738,6 +738,7 @@ public void testMultipleCombineLimits() {
);
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311")
public void testSelectivelyPushDownFilterPastRefAgg() {
// expected plan: "from test | where emp_no > 1 and emp_no < 3 | stats x = count(1) by emp_no | where x > 7"
LogicalPlan plan = optimizedPlan("""
Expand Down Expand Up @@ -790,6 +791,7 @@ public void testNoPushDownOrFilterPastAgg() {
assertTrue(stats.child() instanceof EsRelation);
}

@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311")
public void testSelectivePushDownComplexFilterPastAgg() {
// expected plan: from test | emp_no > 0 | stats x = count(1) by emp_no | where emp_no < 3 or x > 9
LogicalPlan plan = optimizedPlan("""
Expand Down Expand Up @@ -1393,13 +1395,15 @@ public void testPushDownLimitThroughMultipleSort_AfterMvExpand2() {
}

/**
* TODO: Push down the filter correctly https://github.com/elastic/elasticsearch/issues/115311
*
* Expected
* Limit[5[INTEGER]]
* \_Aggregate[[first_name{f}#232],[MAX(salary{f}#233) AS max_s, first_name{f}#232]]
* \_Filter[ISNOTNULL(first_name{f}#232)]
* \_MvExpand[first_name{f}#232]
* \_TopN[[Order[emp_no{f}#231,ASC,LAST]],50[INTEGER]]
* \_EsRelation[employees][emp_no{f}#231, first_name{f}#232, salary{f}#233]
* \_Filter[ISNOTNULL(first_name{r}#23)]
* \_Aggregate[STANDARD,[first_name{r}#23],[MAX(salary{f}#18,true[BOOLEAN]) AS max_s, first_name{r}#23]]
* \_MvExpand[first_name{f}#14,first_name{r}#23]
* \_TopN[[Order[emp_no{f}#13,ASC,LAST]],50[INTEGER]]
* \_EsRelation[test][_meta_field{f}#19, emp_no{f}#13, first_name{f}#14, ..]
*/
public void testDontPushDownLimitPastAggregate_AndMvExpand() {
LogicalPlan plan = optimizedPlan("""
Expand All @@ -1413,25 +1417,27 @@ public void testDontPushDownLimitPastAggregate_AndMvExpand() {
| limit 5""");

var limit = as(plan, Limit.class);
var filter = as(limit.child(), Filter.class);
assertThat(limit.limit().fold(), equalTo(5));
var agg = as(limit.child(), Aggregate.class);
var filter = as(agg.child(), Filter.class);
var mvExp = as(filter.child(), MvExpand.class);
var agg = as(filter.child(), Aggregate.class);
var mvExp = as(agg.child(), MvExpand.class);
var topN = as(mvExp.child(), TopN.class);
assertThat(topN.limit().fold(), equalTo(50));
assertThat(orderNames(topN), contains("emp_no"));
as(topN.child(), EsRelation.class);
}

/**
* TODO: Push down the filter correctly https://github.com/elastic/elasticsearch/issues/115311
*
* Expected
* Limit[5[INTEGER]]
* \_Aggregate[[first_name{f}#262],[MAX(salary{f}#263) AS max_s, first_name{f}#262]]
* \_Filter[ISNOTNULL(first_name{f}#262)]
* \_Limit[50[INTEGER]]
* \_MvExpand[first_name{f}#262]
* \_Limit[50[INTEGER]]
* \_EsRelation[employees][emp_no{f}#261, first_name{f}#262, salary{f}#263]
* \_Filter[ISNOTNULL(first_name{r}#22)]
* \_Aggregate[STANDARD,[first_name{r}#22],[MAX(salary{f}#17,true[BOOLEAN]) AS max_s, first_name{r}#22]]
* \_Limit[50[INTEGER]]
* \_MvExpand[first_name{f}#13,first_name{r}#22]
* \_Limit[50[INTEGER]]
* \_EsRelation[test][_meta_field{f}#18, emp_no{f}#12, first_name{f}#13, ..]
*/
public void testPushDown_TheRightLimit_PastMvExpand() {
LogicalPlan plan = optimizedPlan("""
Expand All @@ -1445,9 +1451,9 @@ public void testPushDown_TheRightLimit_PastMvExpand() {

var limit = as(plan, Limit.class);
assertThat(limit.limit().fold(), equalTo(5));
var agg = as(limit.child(), Aggregate.class);
var filter = as(agg.child(), Filter.class);
limit = as(filter.child(), Limit.class);
var filter = as(limit.child(), Filter.class);
var agg = as(filter.child(), Aggregate.class);
limit = as(agg.child(), Limit.class);
assertThat(limit.limit().fold(), equalTo(50));
var mvExp = as(limit.child(), MvExpand.class);
limit = as(mvExp.child(), Limit.class);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ public void testPushDownLikeRlikeFilter() {

// from ... | where a > 1 | stats count(1) by b | where count(1) >= 3 and b < 2
// => ... | where a > 1 and b < 2 | stats count(1) by b | where count(1) >= 3
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/115311")
public void testSelectivelyPushDownFilterPastFunctionAgg() {
EsRelation relation = relation();
GreaterThan conditionA = greaterThanOf(getFieldAttribute("a"), ONE);
Expand Down