Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

planner: complete the memo/group-expression/group logic and add test for derive stats #57973

Merged
merged 25 commits into from
Dec 11, 2024
4 changes: 2 additions & 2 deletions pkg/planner/cascades/memo/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ go_library(
importpath = "github.com/pingcap/tidb/pkg/planner/cascades/memo",
visibility = ["//visibility:public"],
deps = [
"//pkg/expression",
"//pkg/planner/cascades/base",
"//pkg/planner/cascades/pattern",
"//pkg/planner/cascades/util",
"//pkg/planner/core/base",
"//pkg/planner/property",
"//pkg/sessionctx",
"//pkg/util/intest",
],
)
Expand All @@ -36,9 +36,9 @@ go_test(
deps = [
"//pkg/expression",
"//pkg/planner/cascades/base",
"//pkg/planner/core",
"//pkg/planner/core/operator/logicalop",
"//pkg/testkit/testsetup",
"//pkg/util/mock",
"@com_github_stretchr_testify//require",
"@org_uber_go_goleak//:goleak",
],
Expand Down
42 changes: 40 additions & 2 deletions pkg/planner/cascades/memo/group.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ package memo
import (
"container/list"
"fmt"
"strconv"

"github.com/pingcap/tidb/pkg/planner/cascades/base"
"github.com/pingcap/tidb/pkg/planner/cascades/pattern"
"github.com/pingcap/tidb/pkg/planner/cascades/util"
"github.com/pingcap/tidb/pkg/planner/property"
"github.com/pingcap/tidb/pkg/util/intest"
)

var _ base.HashEquals = &Group{}
Expand Down Expand Up @@ -127,9 +127,47 @@ func (g *Group) GetFirstElem(operand pattern.Operand) *list.Element {
return g.Operand2FirstExpr[operand]
}

// HasLogicalProperty check whether current group has the logical property.
func (g *Group) HasLogicalProperty() bool {
return g.logicalProp != nil
}

// GetLogicalProperty return this group's logical property.
func (g *Group) GetLogicalProperty() *property.LogicalProperty {
intest.Assert(g.logicalProp != nil)
return g.logicalProp
}

// SetLogicalProperty set this group's logical property.
func (g *Group) SetLogicalProperty(prop *property.LogicalProperty) {
g.logicalProp = prop
}

// IsExplored returns whether this group is explored.
func (g *Group) IsExplored() bool {
return g.explored
}

// SetExplored set the group as tagged as explored.
func (g *Group) SetExplored() {
g.explored = true
}

// String implements fmt.Stringer interface.
func (g *Group) String(w util.StrBufferWriter) {
w.WriteString(fmt.Sprintf("inputs:%s", strconv.Itoa(int(g.groupID))))
w.WriteString(fmt.Sprintf("GID:%d", int(g.groupID)))
}

// ForEachGE traverse the inside group expression with f call on them each.
func (g *Group) ForEachGE(f func(ge *GroupExpression) bool) {
var next bool
for elem := g.logicalExpressions.Front(); elem != nil; elem = elem.Next() {
expr := elem.Value.(*GroupExpression)
next = f(expr)
if !next {
break
}
}
}

// NewGroup creates a new Group with given logical prop.
Expand Down
37 changes: 37 additions & 0 deletions pkg/planner/cascades/memo/group_expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@
package memo

import (
"github.com/pingcap/tidb/pkg/expression"
base2 "github.com/pingcap/tidb/pkg/planner/cascades/base"
"github.com/pingcap/tidb/pkg/planner/cascades/pattern"
"github.com/pingcap/tidb/pkg/planner/cascades/util"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/planner/property"
"github.com/pingcap/tidb/pkg/util/intest"
)

Expand Down Expand Up @@ -122,3 +124,38 @@ func (e *GroupExpression) Init(h base2.Hasher) {
e.Hash64(h)
e.hash64 = h.Sum64()
}

// DeriveLogicalProp derive the new group's logical property from a specific GE.
// DeriveLogicalProp is not called with recursive, because we only examine and
// init new group from bottom-up, so we can sure that this new group's children
// has already gotten its logical prop.
func (e *GroupExpression) DeriveLogicalProp() (err error) {
if e.GetGroup().HasLogicalProperty() {
return nil
}
childStats := make([]*property.StatsInfo, 0, len(e.Inputs))
childSchema := make([]*expression.Schema, 0, len(e.Inputs))
for _, childG := range e.Inputs {
childGProp := childG.GetLogicalProperty()
childStats = append(childStats, childGProp.Stats)
childSchema = append(childSchema, childGProp.Schema)
}
e.GetGroup().SetLogicalProperty(property.NewLogicalProp())
// currently the schemaProducer side logical op is still useful for group schema.
// just add this mock for a mocked logical-plan which is with the id less than 0.
// todo: functional dependency
tmpSchema := e.LogicalPlan.Schema()
tmpStats := e.LogicalPlan.StatsInfo()
// only for those new created logical op from XForm, we should rebuild their stats;
// in memo init phase, all logical ops has maintained their stats already, just use them.
if tmpStats == nil && e.LogicalPlan.ID() > 0 {
// here can only derive the basic stats from bottom up, we can't pass any colGroups required by parents.
tmpStats, err = e.LogicalPlan.DeriveStats(childStats, tmpSchema, childSchema, nil)
if err != nil {
return err
}
}
e.GetGroup().GetLogicalProperty().Schema = tmpSchema
e.GetGroup().GetLogicalProperty().Stats = tmpStats
return nil
}
124 changes: 84 additions & 40 deletions pkg/planner/cascades/memo/memo.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,13 @@ import (

base2 "github.com/pingcap/tidb/pkg/planner/cascades/base"
"github.com/pingcap/tidb/pkg/planner/core/base"
"github.com/pingcap/tidb/pkg/sessionctx"
"github.com/pingcap/tidb/pkg/util/intest"
)

// Memo is the main structure of the memo package.
type Memo struct {
// ctx is the context of the memo.
sCtx sessionctx.Context

// groupIDGen is the incremental group id for internal usage.
groupIDGen GroupIDGenerator
groupIDGen *GroupIDGenerator
Copy link
Contributor Author

@AilinKid AilinKid Dec 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

according the detail design, memo/rule should be not aware of session context, but scheduler and task did. so here remove it.


// rootGroup is the root group of the memo.
rootGroup *Group
Expand All @@ -48,84 +44,132 @@ type Memo struct {
}

// NewMemo creates a new memo.
func NewMemo(ctx sessionctx.Context) *Memo {
func NewMemo() *Memo {
return &Memo{
sCtx: ctx,
groupIDGen: GroupIDGenerator{id: 0},
groupIDGen: &GroupIDGenerator{id: 0},
groups: list.New(),
groupID2Group: make(map[GroupID]*list.Element),
hasher: base2.NewHashEqualer(),
}
}

// GetHasher gets a hasher from the memo that ready to use.
func (m *Memo) GetHasher() base2.Hasher {
m.hasher.Reset()
return m.hasher
func (mm *Memo) GetHasher() base2.Hasher {
mm.hasher.Reset()
return mm.hasher
}

// CopyIn copies a logical plan into the memo with format as GroupExpression.
func (m *Memo) CopyIn(target *Group, lp base.LogicalPlan) *GroupExpression {
// CopyIn copies a MemoExpression representation into the memo with format as GroupExpression inside.
// The generic logical forest inside memo is represented as memo group expression tree, while for entering
// and re-feeding the memo, we use the memoExpression as the currency:
//
// entering(init memo)
//
// lp ┌──────────┐
// / \ │ memo: │
// lp lp --copyIN-> │ G(ge) │
// / \ │ / \ │
// ... ... │ G G │
// └──────────┘
//
// re-feeding (intake XForm output)
//
// lp ┌──────────┐
// / \ │ memo: │
// GE lp --copyIN-> │ G(ge) │
// | │ / \ │
// GE │ G G │
// └──────────┘
//
// the bare lp means the new created logical op or that whose child has changed which invalidate it's original
// old belonged group, make it back to bare-lp for re-inserting again in copyIn.
func (mm *Memo) CopyIn(target *Group, lp base.LogicalPlan) (*GroupExpression, error) {
// Group the children first.
childGroups := make([]*Group, 0, len(lp.Children()))
for _, child := range lp.Children() {
// todo: child.getGroupExpression.GetGroup directly
groupExpr := m.CopyIn(nil, child)
group := groupExpr.group
intest.Assert(group != nil)
intest.Assert(group != target)
childGroups = append(childGroups, group)
var currentChildG *Group
if ge, ok := child.(*GroupExpression); ok {
// which means it's the earliest unchanged GroupExpression from rule XForm.
currentChildG = ge.GetGroup()
} else {
// which means it's a new/changed logical op, downward to get its input group ids to complete it.
ge, err := mm.CopyIn(nil, child)
if err != nil {
return nil, err
}
currentChildG = ge.GetGroup()
}
intest.Assert(currentChildG != nil)
intest.Assert(currentChildG != target)
childGroups = append(childGroups, currentChildG)
}

hasher := m.GetHasher()
hasher := mm.GetHasher()
groupExpr := NewGroupExpression(lp, childGroups)
groupExpr.Init(hasher)
m.insertGroupExpression(groupExpr, target)
// todo: new group need to derive the logical property.
return groupExpr
if mm.InsertGroupExpression(groupExpr, target) && target == nil {
// derive logical property for new group.
err := groupExpr.DeriveLogicalProp()
if err != nil {
return nil, err
}
}
return groupExpr, nil
}

// GetGroups gets all groups in the memo.
func (m *Memo) GetGroups() *list.List {
return m.groups
func (mm *Memo) GetGroups() *list.List {
return mm.groups
}

// GetGroupID2Group gets the map from group id to group.
func (m *Memo) GetGroupID2Group() map[GroupID]*list.Element {
return m.groupID2Group
func (mm *Memo) GetGroupID2Group() map[GroupID]*list.Element {
return mm.groupID2Group
}

// GetRootGroup gets the root group of the memo.
func (m *Memo) GetRootGroup() *Group {
return m.rootGroup
func (mm *Memo) GetRootGroup() *Group {
return mm.rootGroup
}

// InsertGroupExpression insert ge into a target group.
// @bool indicates whether the groupExpr is inserted to a new group.
func (m *Memo) insertGroupExpression(groupExpr *GroupExpression, target *Group) bool {
func (mm *Memo) InsertGroupExpression(groupExpr *GroupExpression, target *Group) bool {
// for group merge, here groupExpr is the new groupExpr with undetermined belonged group.
// we need to use groupExpr hash to find whether there is same groupExpr existed before.
// if existed and the existed groupExpr.Group is not same with target, we should merge them up.
// todo: merge group
if target == nil {
target = m.NewGroup()
m.groups.PushBack(target)
m.groupID2Group[target.groupID] = m.groups.Back()
target = mm.NewGroup()
mm.groups.PushBack(target)
mm.groupID2Group[target.groupID] = mm.groups.Back()
}
target.Insert(groupExpr)
return true
}

// NewGroup creates a new group.
func (m *Memo) NewGroup() *Group {
func (mm *Memo) NewGroup() *Group {
group := NewGroup(nil)
group.groupID = m.groupIDGen.NextGroupID()
group.groupID = mm.groupIDGen.NextGroupID()
return group
}

// Init initializes the memo with a logical plan, converting logical plan tree format into group tree.
func (m *Memo) Init(plan base.LogicalPlan) *GroupExpression {
intest.Assert(m.groups.Len() == 0)
gE := m.CopyIn(nil, plan)
m.rootGroup = gE.GetGroup()
func (mm *Memo) Init(plan base.LogicalPlan) *GroupExpression {
intest.Assert(mm.groups.Len() == 0)
gE, _ := mm.CopyIn(nil, plan)
mm.rootGroup = gE.GetGroup()
return gE
}

// ForEachGroup traverse the inside group expression with f call on them each.
func (mm *Memo) ForEachGroup(f func(g *Group) bool) {
var next bool
for elem := mm.GetGroups().Front(); elem != nil; elem = elem.Next() {
expr := elem.Value.(*Group)
next = f(expr)
if !next {
break
}
}
}
Loading