Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SQL: Generate relevant error message when grouping functions are not used in GROUP BY #38017

Merged
merged 10 commits into from
Feb 2, 2019
Merged
23 changes: 23 additions & 0 deletions x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,29 @@ SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h
0 |10
;

histogramGroupByWithoutAlias
schema::h:ts|c:l
SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY HISTOGRAM(birth_date, INTERVAL 1 YEAR) ORDER BY h DESC;

h | c
--------------------+---------------
1964-02-02T00:00:00Z|5
1963-02-07T00:00:00Z|7
1962-02-12T00:00:00Z|6
1961-02-17T00:00:00Z|8
1960-02-23T00:00:00Z|7
1959-02-28T00:00:00Z|9
1958-03-05T00:00:00Z|6
1957-03-10T00:00:00Z|6
1956-03-15T00:00:00Z|4
1955-03-21T00:00:00Z|4
1954-03-26T00:00:00Z|7
1953-03-31T00:00:00Z|10
1952-04-05T00:00:00Z|10
1951-04-11T00:00:00Z|1
null |10
;

countAll
schema::all_names:l|c:l
SELECT COUNT(ALL first_name) all_names, COUNT(*) c FROM test_emp;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.elasticsearch.xpack.sql.expression.function.aggregate.Max;
import org.elasticsearch.xpack.sql.expression.function.aggregate.Min;
import org.elasticsearch.xpack.sql.expression.function.aggregate.TopHits;
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunction;
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction;
import org.elasticsearch.xpack.sql.expression.predicate.conditional.ConditionalFunction;
Expand Down Expand Up @@ -229,6 +230,7 @@ Collection<Failure> verify(LogicalPlan plan) {
validateInExpression(p, localFailures);
validateConditional(p, localFailures);

checkGroupingFunctionInGroupBy(p, localFailures);
checkFilterOnAggs(p, localFailures);
checkFilterOnGrouping(p, localFailures);

Expand Down Expand Up @@ -586,6 +588,24 @@ private static boolean checkGroupMatch(Expression e, Node<?> source, List<Expres
}
return false;
}

private static void checkGroupingFunctionInGroupBy(LogicalPlan p, Set<Failure> localFailures) {
// check if the query has a grouping function (Histogram) but no GROUP BY
if (p instanceof Project) {
Project proj = (Project) p;
proj.projections().forEach(e -> e.forEachDown(f ->
localFailures.add(fail(f, "[{}] needs to be part of the grouping", Expressions.name(f))), GroupingFunction.class));
} else if (p instanceof Aggregate) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this block is a bit condensed and could use some whitespace and indentation to make it more readable.
For example put localFailures on a new line.

// if it does have a GROUP BY, check if the groupings contain the grouping functions (Histograms)
Aggregate a = (Aggregate) p;
a.aggregates().forEach(agg -> agg.forEachDown(e -> {
if (a.groupings().size() == 0
|| Expressions.anyMatch(a.groupings(), g -> g instanceof Function && e.functionEquals((Function) g)) == false) {
localFailures.add(fail(e, "[{}] needs to be part of the grouping", Expressions.name(e)));
}
}, GroupingFunction.class));
}
}

private static void checkFilterOnAggs(LogicalPlan p, Set<Failure> localFailures) {
if (p instanceof Filter) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,6 @@ public GroupingFunctionAttribute toAttribute() {
return lazyAttribute;
}

@Override
public final GroupingFunction replaceChildren(List<Expression> newChildren) {
if (newChildren.size() != 1) {
throw new IllegalArgumentException("expected [1] child but received [" + newChildren.size() + "]");
}
return replaceChild(newChildren.get(0));
}

protected abstract GroupingFunction replaceChild(Expression newChild);

@Override
protected Pipe makePipe() {
// unresolved AggNameInput (should always get replaced by the folder)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
import org.elasticsearch.xpack.sql.expression.Expressions;
import org.elasticsearch.xpack.sql.expression.Expressions.ParamOrdinal;
import org.elasticsearch.xpack.sql.expression.Literal;
import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.tree.Source;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypes;

import java.time.ZoneId;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;

public class Histogram extends GroupingFunction {
Expand All @@ -24,8 +26,8 @@ public class Histogram extends GroupingFunction {
private final ZoneId zoneId;

public Histogram(Source source, Expression field, Expression interval, ZoneId zoneId) {
super(source, field);
this.interval = (Literal) interval;
super(source, field, Arrays.asList(interval));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not have this passed through parameters instead of extraChildren?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor improvement, use: Collections.singletonList(interval)

this.interval = Literal.of(interval);
this.zoneId = zoneId;
}

Expand All @@ -51,10 +53,13 @@ protected TypeResolution resolveType() {

return resolution;
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really minor: remove the whitespaces.

@Override
protected GroupingFunction replaceChild(Expression newChild) {
return new Histogram(source(), newChild, interval, zoneId);
public final GroupingFunction replaceChildren(List<Expression> newChildren) {
if (newChildren.size() != 2) {
throw new IllegalArgumentException("expected [2] children but received [" + newChildren.size() + "]");
}
return new Histogram(source(), newChildren.get(0), newChildren.get(1), zoneId);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,41 @@ public void testAggsInHistogram() {
assertEquals("1:47: Cannot use an aggregate [MAX] for grouping",
error("SELECT MAX(date) FROM test GROUP BY HISTOGRAM(MAX(int), 1)"));
}

public void testHistogramNotInGrouping() {
assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h FROM test"));
}

public void testHistogramNotInGroupingWithCount() {
assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h, COUNT(*) FROM test"));
}

public void testHistogramNotInGroupingWithMaxFirst() {
assertEquals("1:19: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT MAX(date), HISTOGRAM(date, INTERVAL 1 MONTH) AS h FROM test"));
}

public void testHistogramWithoutAliasNotInGrouping() {
assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) FROM test"));
}

public void testTwoHistogramsNotInGrouping() {
assertEquals("1:48: [HISTOGRAM(date, INTERVAL 1 DAY)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h, HISTOGRAM(date, INTERVAL 1 DAY) FROM test GROUP BY h"));
}

public void testHistogramNotInGrouping_WithGroupByField() {
assertEquals("1:8: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) FROM test GROUP BY date"));
}

public void testScalarOfHistogramNotInGrouping() {
assertEquals("1:14: [HISTOGRAM(date, INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT MONTH(HISTOGRAM(date, INTERVAL 1 MONTH)) FROM test"));
}

public void testErrorMessageForPercentileWithSecondArgBasedOnAField() {
assertEquals("1:8: Second argument of PERCENTILE must be a constant, received [ABS(int)]",
Expand Down