Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use aggregation engine over distinct engine when overlapping order by #14359

Merged
merged 1 commit into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 46 additions & 2 deletions go/vt/vtgate/planbuilder/operators/queryprojection.go
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,13 @@ func (qp *QueryProjection) addOrderBy(ctx *plancontext.PlanningContext, orderBy

func (qp *QueryProjection) calculateDistinct(ctx *plancontext.PlanningContext) {
if qp.Distinct && !qp.HasAggr {
// grouping and distinct both lead to unique results, so we don't need
qp.groupByExprs = nil
if qp.useGroupingOverDistinct(ctx) {
// if order by exists with overlap with select expressions, we can use the aggregation with ordering over distinct.
qp.Distinct = false
} else {
// grouping and distinct both lead to unique results, so we don't need
qp.groupByExprs = nil
}
}

if qp.HasAggr && len(qp.groupByExprs) == 0 {
Expand Down Expand Up @@ -850,6 +855,45 @@ func (qp *QueryProjection) GetColumnCount() int {
return len(qp.SelectExprs) - qp.AddedColumn
}

func (qp *QueryProjection) orderByOverlapWithSelectExpr(ctx *plancontext.PlanningContext) bool {
for _, expr := range qp.OrderExprs {
idx, _ := qp.FindSelectExprIndexForExpr(ctx, expr.SimplifiedExpr)
if idx != nil {
return true
}
}
return false
}

func (qp *QueryProjection) useGroupingOverDistinct(ctx *plancontext.PlanningContext) bool {
if !qp.orderByOverlapWithSelectExpr(ctx) {
return false
}
var gbs []GroupBy
for idx, selExpr := range qp.SelectExprs {
ae, err := selExpr.GetAliasedExpr()
if err != nil {
// not an alias Expr, cannot continue forward.
return false
}
sExpr := qp.GetSimplifiedExpr(ae.Expr)
// check if the grouping already exists on that column.
found := slices.IndexFunc(qp.groupByExprs, func(gb GroupBy) bool {
return ctx.SemTable.EqualsExprWithDeps(gb.SimplifiedExpr, sExpr)
})
if found != -1 {
continue
}
groupBy := NewGroupBy(ae.Expr, sExpr, ae)
selectExprIdx := idx
groupBy.InnerIndex = &selectExprIdx

gbs = append(gbs, groupBy)
}
qp.groupByExprs = append(qp.groupByExprs, gbs...)
return true
}

func checkForInvalidGroupingExpressions(expr sqlparser.Expr) error {
return sqlparser.Walk(func(node sqlparser.SQLNode) (bool, error) {
if _, isAggregate := node.(sqlparser.AggrFunc); isAggregate {
Expand Down
33 changes: 13 additions & 20 deletions go/vt/vtgate/planbuilder/testdata/oltp_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,28 +106,21 @@
"QueryType": "SELECT",
"Original": "SELECT DISTINCT c FROM sbtest30 WHERE id BETWEEN 1 AND 10 ORDER BY c",
"Instructions": {
"OperatorType": "Sort",
"Variant": "Memory",
"OrderBy": "0 ASC COLLATE latin1_swedish_ci",
"OperatorType": "Aggregate",
"Variant": "Ordered",
"GroupBy": "0 COLLATE latin1_swedish_ci",
"Inputs": [
{
"OperatorType": "Distinct",
"Collations": [
"0: latin1_swedish_ci"
],
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "main",
"Sharded": true
},
"FieldQuery": "select c from sbtest30 where 1 != 1",
"Query": "select distinct c from sbtest30 where id between 1 and 10",
"Table": "sbtest30"
}
]
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "main",
"Sharded": true
},
"FieldQuery": "select c from sbtest30 where 1 != 1 group by c",
"OrderBy": "0 ASC COLLATE latin1_swedish_ci",
"Query": "select c from sbtest30 where id between 1 and 10 group by c order by c asc",
"Table": "sbtest30"
}
]
},
Expand Down
100 changes: 100 additions & 0 deletions go/vt/vtgate/planbuilder/testdata/postprocess_cases.json
Original file line number Diff line number Diff line change
Expand Up @@ -2081,5 +2081,105 @@
"user.user"
]
}
},
{
"comment": "distinct with order by using aggregation engine",
"query": "select distinct col from user where id between :vtg1 and :vtg2 order by col asc",
"plan": {
"QueryType": "SELECT",
"Original": "select distinct col from user where id between :vtg1 and :vtg2 order by col asc",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
"GroupBy": "0",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select col from `user` where 1 != 1 group by col",
"OrderBy": "0 ASC",
"Query": "select col from `user` where id between :vtg1 and :vtg2 group by col order by col asc",
"Table": "`user`"
}
]
},
"TablesUsed": [
"user.user"
]
}
},
{
"comment": "distinct with order by having additional non-order by columns in the selection using aggregation engine",
"query": "select distinct foo, col from user where id between :vtg1 and :vtg2 order by col asc",
"plan": {
"QueryType": "SELECT",
"Original": "select distinct foo, col from user where id between :vtg1 and :vtg2 order by col asc",
"Instructions": {
"OperatorType": "Aggregate",
"Variant": "Ordered",
"GroupBy": "1, (0|2)",
"ResultColumns": 2,
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select foo, col, weight_string(foo) from `user` where 1 != 1 group by col, foo, weight_string(foo)",
"OrderBy": "1 ASC, (0|2) ASC",
"Query": "select foo, col, weight_string(foo) from `user` where id between :vtg1 and :vtg2 group by col, foo, weight_string(foo) order by col asc, foo asc",
"Table": "`user`"
}
]
},
"TablesUsed": [
"user.user"
]
}
},
{
"comment": "distinct with order by having no overalap with the selection columns - using distinct engine",
"query": "select distinct foo from user where id between :vtg1 and :vtg2 order by col asc",
"plan": {
"QueryType": "SELECT",
"Original": "select distinct foo from user where id between :vtg1 and :vtg2 order by col asc",
"Instructions": {
"OperatorType": "Sort",
"Variant": "Memory",
"OrderBy": "1 ASC",
"ResultColumns": 1,
"Inputs": [
{
"OperatorType": "Distinct",
"Collations": [
"(0:2)",
"1"
],
"Inputs": [
{
"OperatorType": "Route",
"Variant": "Scatter",
"Keyspace": {
"Name": "user",
"Sharded": true
},
"FieldQuery": "select foo, col, weight_string(foo) from `user` where 1 != 1",
"Query": "select distinct foo, col, weight_string(foo) from `user` where id between :vtg1 and :vtg2",
"Table": "`user`"
}
]
}
]
},
"TablesUsed": [
"user.user"
]
}
}
]
Loading