Skip to content

Commit

Permalink
SQL: Generate relevant error message when grouping functions are not …
Browse files Browse the repository at this point in the history
…used in GROUP BY (#38017)

* Add checks for Grouping functions restriction to be placed inside GROUP BY
* Fixed bug where GROUP BY HISTOGRAM (not using alias) wasn't recognized
properly in the Verifier due to functions equality not working correctly.

(cherry picked from commit 6968f09)
  • Loading branch information
astefan committed Feb 4, 2019
1 parent de7accb commit d05c24c
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 19 deletions.
23 changes: 23 additions & 0 deletions x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,29 @@ SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h
0 |10
;

histogramGroupByWithoutAlias
schema::h:ts|c:l
SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY HISTOGRAM(birth_date, INTERVAL 1 YEAR) ORDER BY h DESC;

h | c
--------------------+---------------
1964-02-02T00:00:00Z|5
1963-02-07T00:00:00Z|7
1962-02-12T00:00:00Z|6
1961-02-17T00:00:00Z|8
1960-02-23T00:00:00Z|7
1959-02-28T00:00:00Z|9
1958-03-05T00:00:00Z|6
1957-03-10T00:00:00Z|6
1956-03-15T00:00:00Z|4
1955-03-21T00:00:00Z|4
1954-03-26T00:00:00Z|7
1953-03-31T00:00:00Z|10
1952-04-05T00:00:00Z|10
1951-04-11T00:00:00Z|1
null |10
;

countAll
schema::all_names:l|c:l
SELECT COUNT(ALL first_name) all_names, COUNT(*) c FROM test_emp;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.elasticsearch.xpack.sql.expression.function.Functions;
import org.elasticsearch.xpack.sql.expression.function.Score;
import org.elasticsearch.xpack.sql.expression.function.aggregate.AggregateFunctionAttribute;
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunction;
import org.elasticsearch.xpack.sql.expression.function.grouping.GroupingFunctionAttribute;
import org.elasticsearch.xpack.sql.expression.function.scalar.ScalarFunction;
import org.elasticsearch.xpack.sql.expression.predicate.conditional.ConditionalFunction;
Expand Down Expand Up @@ -225,6 +226,7 @@ Collection<Failure> verify(LogicalPlan plan) {
validateInExpression(p, localFailures);
validateConditional(p, localFailures);

checkGroupingFunctionInGroupBy(p, localFailures);
checkFilterOnAggs(p, localFailures);
checkFilterOnGrouping(p, localFailures);

Expand Down Expand Up @@ -560,6 +562,24 @@ private static boolean checkGroupMatch(Expression e, Node<?> source, List<Expres
}
return false;
}

private static void checkGroupingFunctionInGroupBy(LogicalPlan p, Set<Failure> localFailures) {
// check if the query has a grouping function (Histogram) but no GROUP BY
if (p instanceof Project) {
Project proj = (Project) p;
proj.projections().forEach(e -> e.forEachDown(f ->
localFailures.add(fail(f, "[%s] needs to be part of the grouping", Expressions.name(f))), GroupingFunction.class));
} else if (p instanceof Aggregate) {
// if it does have a GROUP BY, check if the groupings contain the grouping functions (Histograms)
Aggregate a = (Aggregate) p;
a.aggregates().forEach(agg -> agg.forEachDown(e -> {
if (a.groupings().size() == 0
|| Expressions.anyMatch(a.groupings(), g -> g instanceof Function && e.functionEquals((Function) g)) == false) {
localFailures.add(fail(e, "[%s] needs to be part of the grouping", Expressions.name(e)));
}
}, GroupingFunction.class));
}
}

private static void checkFilterOnAggs(LogicalPlan p, Set<Failure> localFailures) {
if (p instanceof Filter) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,6 @@ public GroupingFunctionAttribute toAttribute() {
return lazyAttribute;
}

@Override
public final GroupingFunction replaceChildren(List<Expression> newChildren) {
if (newChildren.size() != 1) {
throw new IllegalArgumentException("expected [1] child but received [" + newChildren.size() + "]");
}
return replaceChild(newChildren.get(0));
}

protected abstract GroupingFunction replaceChild(Expression newChild);

@Override
protected Pipe makePipe() {
// unresolved AggNameInput (should always get replaced by the folder)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,14 @@
import org.elasticsearch.xpack.sql.expression.Expressions;
import org.elasticsearch.xpack.sql.expression.Expressions.ParamOrdinal;
import org.elasticsearch.xpack.sql.expression.Literal;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.tree.NodeInfo;
import org.elasticsearch.xpack.sql.tree.Location;
import org.elasticsearch.xpack.sql.type.DataType;
import org.elasticsearch.xpack.sql.type.DataTypes;

import java.time.ZoneId;
import java.util.Collections;
import java.util.List;
import java.util.Objects;

public class Histogram extends GroupingFunction {
Expand All @@ -24,8 +26,8 @@ public class Histogram extends GroupingFunction {
private final ZoneId zoneId;

public Histogram(Location location, Expression field, Expression interval, ZoneId zoneId) {
super(location, field);
this.interval = (Literal) interval;
super(location, field, Collections.singletonList(interval));
this.interval = Literal.of(interval);
this.zoneId = zoneId;
}

Expand All @@ -51,10 +53,13 @@ protected TypeResolution resolveType() {

return resolution;
}

@Override
protected GroupingFunction replaceChild(Expression newChild) {
return new Histogram(location(), newChild, interval, zoneId);
public final GroupingFunction replaceChildren(List<Expression> newChildren) {
if (newChildren.size() != 2) {
throw new IllegalArgumentException("expected [2] children but received [" + newChildren.size() + "]");
}
return new Histogram(location(), newChildren.get(0), newChildren.get(1), zoneId);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ public void testAggsInWhere() {
}

public void testHistogramInFilter() {
assertEquals("1:63: Cannot filter on grouping function [HISTOGRAM(date)], use its argument instead",
assertEquals("1:63: Cannot filter on grouping function [HISTOGRAM(date,INTERVAL 1 MONTH)], use its argument instead",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h FROM test WHERE "
+ "HISTOGRAM(date, INTERVAL 1 MONTH) > CAST('2000-01-01' AS DATE) GROUP BY h"));
}
Expand All @@ -520,15 +520,50 @@ public void testHistogramInHaving() {

public void testGroupByScalarOnTopOfGrouping() {
assertEquals(
"1:14: Cannot combine [HISTOGRAM(date)] grouping function inside GROUP BY, "
+ "found [MONTH_OF_YEAR(HISTOGRAM(date) [Z])]; consider moving the expression inside the histogram",
"1:14: Cannot combine [HISTOGRAM(date,INTERVAL 1 MONTH)] grouping function inside GROUP BY, "
+ "found [MONTH_OF_YEAR(HISTOGRAM(date,INTERVAL 1 MONTH) [Z])]; consider moving the expression inside the histogram",
error("SELECT MONTH(HISTOGRAM(date, INTERVAL 1 MONTH)) AS h FROM test GROUP BY h"));
}

public void testAggsInHistogram() {
assertEquals("1:47: Cannot use an aggregate [MAX] for grouping",
error("SELECT MAX(date) FROM test GROUP BY HISTOGRAM(MAX(int), 1)"));
}

public void testHistogramNotInGrouping() {
assertEquals("1:8: [HISTOGRAM(date,INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h FROM test"));
}

public void testHistogramNotInGroupingWithCount() {
assertEquals("1:8: [HISTOGRAM(date,INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h, COUNT(*) FROM test"));
}

public void testHistogramNotInGroupingWithMaxFirst() {
assertEquals("1:19: [HISTOGRAM(date,INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT MAX(date), HISTOGRAM(date, INTERVAL 1 MONTH) AS h FROM test"));
}

public void testHistogramWithoutAliasNotInGrouping() {
assertEquals("1:8: [HISTOGRAM(date,INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) FROM test"));
}

public void testTwoHistogramsNotInGrouping() {
assertEquals("1:48: [HISTOGRAM(date,INTERVAL 1 DAY)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) AS h, HISTOGRAM(date, INTERVAL 1 DAY) FROM test GROUP BY h"));
}

public void testHistogramNotInGrouping_WithGroupByField() {
assertEquals("1:8: [HISTOGRAM(date,INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT HISTOGRAM(date, INTERVAL 1 MONTH) FROM test GROUP BY date"));
}

public void testScalarOfHistogramNotInGrouping() {
assertEquals("1:14: [HISTOGRAM(date,INTERVAL 1 MONTH)] needs to be part of the grouping",
error("SELECT MONTH(HISTOGRAM(date, INTERVAL 1 MONTH)) FROM test"));
}

public void testErrorMessageForPercentileWithSecondArgBasedOnAField() {
assertEquals("1:8: Second argument of PERCENTILE must be a constant, received [ABS(int)]",
Expand Down

0 comments on commit d05c24c

Please sign in to comment.