Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

plan: add hint to force to choose hash join. #5315

Merged
merged 9 commits into from
Dec 8, 2017
Merged
4 changes: 4 additions & 0 deletions executor/join_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,10 @@ func (s *testSuite) TestJoin(c *C) {
// Test that two conflict hints will return error.
_, err = tk.Exec("select /*+ TIDB_INLJ(t) TIDB_SMJ(t) */ * from t join t1 on t.a=t1.a")
c.Assert(err, NotNil)
_, err = tk.Exec("select /*+ TIDB_INLJ(t) TIDB_HJ(t) */ from t join t1 on t.a=t1.a")
c.Assert(err, NotNil)
_, err = tk.Exec("select /*+ TIDB_SMJ(t) TIDB_HJ(t) */ from t join t1 on t.a=t1.a")
c.Assert(err, NotNil)

tk.MustExec("drop table if exists t")
tk.MustExec("create table t(a int)")
Expand Down
1 change: 1 addition & 0 deletions parser/misc.go
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ var tokenMap = map[string]int{
"THAN": than,
"THEN": then,
"TIDB": tidb,
"TIDB_HJ": tidbHJ,
"TIDB_INLJ": tidbINLJ,
"TIDB_SMJ": tidbSMJ,
"TIME": timeType,
Expand Down
7 changes: 6 additions & 1 deletion parser/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ import (
statsHistograms "STATS_HISTOGRAMS"
statsBuckets "STATS_BUCKETS"
tidb "TIDB"
tidbHJ "TIDB_HJ"
tidbSMJ "TIDB_SMJ"
tidbINLJ "TIDB_INLJ"

Expand Down Expand Up @@ -2500,7 +2501,7 @@ UnReservedKeyword:
| "MAX_USER_CONNECTIONS" | "REPLICATION" | "CLIENT" | "SLAVE" | "RELOAD" | "TEMPORARY" | "ROUTINE" | "EVENT" | "ALGORITHM" | "DEFINER" | "INVOKER" | "MERGE" | "TEMPTABLE" | "UNDEFINED" | "SECURITY" | "CASCADED"

TiDBKeyword:
"ADMIN" | "CANCEL" | "DDL" | "JOBS" | "STATS" | "STATS_META" | "STATS_HISTOGRAMS" | "STATS_BUCKETS" | "TIDB" | "TIDB_SMJ" | "TIDB_INLJ"
"ADMIN" | "CANCEL" | "DDL" | "JOBS" | "STATS" | "STATS_META" | "STATS_HISTOGRAMS" | "STATS_BUCKETS" | "TIDB" | "TIDB_HJ" | "TIDB_SMJ" | "TIDB_INLJ"

NotKeywordToken:
"ADDDATE" | "BIT_AND" | "BIT_OR" | "BIT_XOR" | "CAST" | "COUNT" | "CURTIME" | "DATE_ADD" | "DATE_SUB" | "EXTRACT" | "GET_FORMAT" | "GROUP_CONCAT" | "MIN" | "MAX" | "NOW" | "POSITION"
Expand Down Expand Up @@ -4299,6 +4300,10 @@ TableOptimizerHintOpt:
{
$$ = &ast.TableOptimizerHint{HintName: model.NewCIStr($1), Tables: $3.([]model.CIStr)}
}
| tidbHJ '(' HintTableList ')'
{
$$ = &ast.TableOptimizerHint{HintName: model.NewCIStr($1), Tables: $3.([]model.CIStr)}
}

SelectStmtCalcFoundRows:
{
Expand Down
15 changes: 15 additions & 0 deletions parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1589,6 +1589,21 @@ func (s *testParserSuite) TestOptimizerHints(c *C) {
c.Assert(hints[1].HintName.L, Equals, "tidb_inlj")
c.Assert(hints[1].Tables[0].L, Equals, "t3")
c.Assert(hints[1].Tables[1].L, Equals, "t4")

stmt, err = parser.Parse("select /*+ TIDB_HJ(t1, T2) tidb_hj(t3, t4) */ c1, c2 from t1, t2 where t1.c1 = t2.c1", "", "")
c.Assert(err, IsNil)
selectStmt = stmt[0].(*ast.SelectStmt)

hints = selectStmt.TableHints
c.Assert(len(hints), Equals, 2)
c.Assert(hints[0].HintName.L, Equals, "tidb_hj")
c.Assert(len(hints[0].Tables), Equals, 2)
c.Assert(hints[0].Tables[0].L, Equals, "t1")
c.Assert(hints[0].Tables[1].L, Equals, "t2")

c.Assert(hints[1].HintName.L, Equals, "tidb_hj")
c.Assert(hints[1].Tables[0].L, Equals, "t3")
c.Assert(hints[1].Tables[1].L, Equals, "t4")
}

func (s *testParserSuite) TestType(c *C) {
Expand Down
5 changes: 5 additions & 0 deletions plan/dag_plan_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,11 @@ func (s *testPlanSuite) TestDAGPlanBuilderJoin(c *C) {
sql: "select * from t t1 join t t2 on t1.b = t2.a order by t1.a limit 1",
best: "IndexJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.b,t2.a)->Limit",
},
// Test hash join's hint.
{
sql: "select /*+ TIDB_HJ(t1, t2) */ * from t t1 join t t2 on t1.b = t2.a order by t1.a limit 1",
best: "LeftHashJoin{TableReader(Table(t))->TableReader(Table(t))}(t1.b,t2.a)->TopN([t1.a],0,1)",
},
{
sql: "select * from t t1 left join t t2 on t1.b = t2.a where 1 = 1 limit 1",
best: "IndexJoin{TableReader(Table(t)->Limit)->TableReader(Table(t))}(t1.b,t2.a)->Limit",
Expand Down
32 changes: 21 additions & 11 deletions plan/gen_physical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,20 @@ func (p *LogicalJoin) getHashSemiJoin() PhysicalPlan {
return semiJoin
}

func (p *LogicalJoin) getHashJoins() []PhysicalPlan {
joins := make([]PhysicalPlan, 0, 2)
switch p.JoinType {
case SemiJoin, AntiSemiJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin, LeftOuterJoin:
joins = append(joins, p.getHashJoin(1))
case RightOuterJoin:
joins = append(joins, p.getHashJoin(0))
case InnerJoin:
joins = append(joins, p.getHashJoin(1))
joins = append(joins, p.getHashJoin(0))
}
return joins
}

func (p *LogicalJoin) getHashJoin(smallTable int) PhysicalPlan {
hashJoin := PhysicalHashJoin{
EqualConditions: p.EqualConditions,
Expand Down Expand Up @@ -252,8 +266,8 @@ func (p *LogicalJoin) tryToGetIndexJoin() ([]PhysicalPlan, bool) {
return nil, false
}
plans := make([]PhysicalPlan, 0, 2)
leftOuter := (p.preferINLJ & preferLeftAsOuter) > 0
rightOuter := (p.preferINLJ & preferRightAsOuter) > 0
leftOuter := (p.preferJoinType & preferLeftAsIndexOuter) > 0
rightOuter := (p.preferJoinType & preferRightAsIndexOuter) > 0
switch p.JoinType {
case SemiJoin, AntiSemiJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin, LeftOuterJoin:
join := p.getIndexJoinByOuterIdx(0)
Expand Down Expand Up @@ -296,7 +310,7 @@ func (p *LogicalJoin) tryToGetIndexJoin() ([]PhysicalPlan, bool) {

func (p *LogicalJoin) generatePhysicalPlans() []PhysicalPlan {
mergeJoins := p.getMergeJoin()
if p.preferMergeJoin && len(mergeJoins) > 0 {
if (p.preferJoinType&preferMergeJoin) > 0 && len(mergeJoins) > 0 {
return mergeJoins
}
joins := make([]PhysicalPlan, 0, 5)
Expand All @@ -308,15 +322,11 @@ func (p *LogicalJoin) generatePhysicalPlans() []PhysicalPlan {
}
joins = append(joins, indexJoins...)

switch p.JoinType {
case SemiJoin, AntiSemiJoin, LeftOuterSemiJoin, AntiLeftOuterSemiJoin, LeftOuterJoin:
joins = append(joins, p.getHashJoin(1))
case RightOuterJoin:
joins = append(joins, p.getHashJoin(0))
case InnerJoin:
joins = append(joins, p.getHashJoin(1))
joins = append(joins, p.getHashJoin(0))
hashJoins := p.getHashJoins()
if (p.preferJoinType & preferHashJoin) > 0 {
return hashJoins
}
joins = append(joins, hashJoins...)
return joins
}

Expand Down
6 changes: 3 additions & 3 deletions plan/join_reorder.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package plan

import (
"math/bits"
"sort"

log "github.com/Sirupsen/logrus"
Expand All @@ -27,9 +28,8 @@ func tryToGetJoinGroup(j *LogicalJoin) ([]LogicalPlan, bool) {
// Ignore reorder if:
// 1. already reordered
// 2. not inner join
// 3. forced merge join
// 4. forced index nested loop join
if j.reordered || !j.cartesianJoin || j.preferMergeJoin || j.preferINLJ > 0 {
// 3. forced to choose join type
if j.reordered || !j.cartesianJoin || bits.OnesCount(j.preferJoinType) > 0 {
return nil, false
}
lChild := j.children[0].(LogicalPlan)
Expand Down
40 changes: 31 additions & 9 deletions plan/logical_plan_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package plan
import (
"fmt"
"math"
"math/bits"
"reflect"
"strings"
"unicode"
Expand All @@ -42,6 +43,8 @@ const (
TiDBMergeJoin = "tidb_smj"
// TiDBIndexNestedLoopJoin is hint enforce index nested loop join.
TiDBIndexNestedLoopJoin = "tidb_inlj"
// TiDBHashJoin is hint enforce hash join.
TiDBHashJoin = "tidb_hj"
)

const (
Expand Down Expand Up @@ -257,15 +260,21 @@ func (b *planBuilder) buildJoin(join *ast.Join) LogicalPlan {
if b.TableHints() != nil {
leftAlias := extractTableAlias(leftPlan)
rightAlias := extractTableAlias(rightPlan)
joinPlan.preferMergeJoin = b.TableHints().ifPreferMergeJoin(leftAlias, rightAlias)
if b.TableHints().ifPreferMergeJoin(leftAlias, rightAlias) {
joinPlan.preferJoinType |= preferMergeJoin
}
if b.TableHints().ifPreferHashJoin(leftAlias, rightAlias) {
joinPlan.preferJoinType |= preferHashJoin
}
if b.TableHints().ifPreferINLJ(leftAlias) {
joinPlan.preferINLJ = joinPlan.preferINLJ | preferLeftAsOuter
joinPlan.preferJoinType |= preferLeftAsIndexOuter
}
if b.TableHints().ifPreferINLJ(rightAlias) {
joinPlan.preferINLJ = joinPlan.preferINLJ | preferRightAsOuter
joinPlan.preferJoinType |= preferRightAsIndexOuter
}
if joinPlan.preferMergeJoin && joinPlan.preferINLJ > 0 {
b.err = errors.New("Optimizer Hints is conflict")
// If there're multiple join type and one of them is not the index join hints, then is conflict.
if bits.OnesCount(joinPlan.preferJoinType) > 1 && (joinPlan.preferJoinType^preferRightAsIndexOuter^preferLeftAsIndexOuter) > 0 {
b.err = errors.New("Join hints are conflict, you can only specify one type of join")
return nil
}
}
Expand Down Expand Up @@ -1411,21 +1420,24 @@ func (b *planBuilder) unfoldWildStar(p LogicalPlan, selectFields []*ast.SelectFi
}

func (b *planBuilder) pushTableHints(hints []*ast.TableOptimizerHint) bool {
var sortMergeTables, INLJTables []model.CIStr
var sortMergeTables, INLJTables, hashJoinTables []model.CIStr
for _, hint := range hints {
switch hint.HintName.L {
case TiDBMergeJoin:
sortMergeTables = append(sortMergeTables, hint.Tables...)
case TiDBIndexNestedLoopJoin:
INLJTables = append(INLJTables, hint.Tables...)
case TiDBHashJoin:
hashJoinTables = append(hashJoinTables, hint.Tables...)
default:
// ignore hints that not implemented
}
}
if len(sortMergeTables) != 0 || len(INLJTables) != 0 {
if len(sortMergeTables)+len(INLJTables)+len(hashJoinTables) > 0 {
b.tableHintInfo = append(b.tableHintInfo, tableHintInfo{
sortMergeJoinTables: sortMergeTables,
indexNestedLoopJoinTables: INLJTables,
hashJoinTables: hashJoinTables,
})
return true
}
Expand Down Expand Up @@ -1800,10 +1812,20 @@ func (b *planBuilder) buildSemiJoin(outerPlan, innerPlan LogicalPlan, onConditio
if b.TableHints() != nil {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can this branch be extracted as a function.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@winoros this branch can be extracted as a function?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think it's not too much need to do this.

outerAlias := extractTableAlias(outerPlan)
innerAlias := extractTableAlias(innerPlan)
joinPlan.preferMergeJoin = b.TableHints().ifPreferMergeJoin(outerAlias, innerAlias)
if b.TableHints().ifPreferMergeJoin(outerAlias, innerAlias) {
joinPlan.preferJoinType |= preferMergeJoin
}
if b.TableHints().ifPreferHashJoin(outerAlias, innerAlias) {
joinPlan.preferJoinType |= preferHashJoin
}
// semi join's outer is always the left side.
if b.TableHints().ifPreferINLJ(outerAlias) {
joinPlan.preferINLJ = preferLeftAsOuter
joinPlan.preferJoinType = preferLeftAsIndexOuter
}
// If there're multiple join hints, they're conflict.
if bits.OnesCount(joinPlan.preferJoinType) > 1 {
b.err = errors.New("Join hints are conflict, you can only specify one type of join")
return nil
}
}
return joinPlan
Expand Down
15 changes: 8 additions & 7 deletions plan/logical_plans.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,20 +80,21 @@ func (tp JoinType) String() string {
}

const (
preferLeftAsOuter = 1 << iota
preferRightAsOuter
preferLeftAsIndexOuter = 1 << iota
preferRightAsIndexOuter
preferHashJoin
preferMergeJoin
)

// LogicalJoin is the logical join plan.
type LogicalJoin struct {
*basePlan
baseLogicalPlan

JoinType JoinType
reordered bool
cartesianJoin bool
preferINLJ int
preferMergeJoin bool
JoinType JoinType
reordered bool
cartesianJoin bool
preferJoinType uint

EqualConditions []*expression.ScalarFunction
LeftConditions expression.CNFExprs
Expand Down
40 changes: 20 additions & 20 deletions plan/planbuilder.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,35 +86,35 @@ type visitInfo struct {
type tableHintInfo struct {
indexNestedLoopJoinTables []model.CIStr
sortMergeJoinTables []model.CIStr
hashJoinTables []model.CIStr
}

func (info *tableHintInfo) ifPreferMergeJoin(tableNames ...*model.CIStr) bool {
// Only need either side matches one on the list.
// Even though you can put 2 tables on the list,
// it doesn't mean optimizer will reorder to make them
// join directly.
// Which it joins on with depend on sequence of traverse
// and without reorder, user might adjust themselves.
// This is similar to MySQL hints.
for _, tableName := range tableNames {
if tableName == nil {
continue
}
for _, curEntry := range info.sortMergeJoinTables {
if curEntry.L == tableName.L {
return true
}
}
}
return false
return info.matchTableName(tableNames, info.sortMergeJoinTables)
}

func (info *tableHintInfo) ifPreferHashJoin(tableNames ...*model.CIStr) bool {
return info.matchTableName(tableNames, info.hashJoinTables)
}

func (info *tableHintInfo) ifPreferINLJ(tableNames ...*model.CIStr) bool {
for _, tableName := range tableNames {
return info.matchTableName(tableNames, info.indexNestedLoopJoinTables)
}

// matchTableName checks whether the hint hit the need.
// Only need either side matches one on the list.
// Even though you can put 2 tables on the list,
// it doesn't mean optimizer will reorder to make them
// join directly.
// Which it joins on with depend on sequence of traverse
// and without reorder, user might adjust themselves.
// This is similar to MySQL hints.
func (info *tableHintInfo) matchTableName(tables []*model.CIStr, tablesInHints []model.CIStr) bool {
for _, tableName := range tables {
if tableName == nil {
continue
}
for _, curEntry := range info.indexNestedLoopJoinTables {
for _, curEntry := range tablesInHints {
if curEntry.L == tableName.L {
return true
}
Expand Down