-
Notifications
You must be signed in to change notification settings - Fork 5.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
executor/join : use shallow copy for join. #7433
Changes from 42 commits
a576cc0
79b74e7
d9fe98b
8fa2ad8
33ae5a0
255c64b
0973373
055a41a
c5eeff9
e7c6c64
ce8eb4f
499b6f9
2a295a1
9d82447
ef85948
45b4631
5bf279f
9690506
8db639f
7a55ff5
05c1273
66a133c
dadb047
b4192e4
4096997
b802941
c5cfdf1
947f9d4
24ab90e
2b8d896
3f82d2b
604e49d
e5f4cbe
abbc2c9
e1dd31d
593b31c
3a6fbb7
600fdc3
f4fbd70
23eaf1e
e9ef7dd
21b5417
0aadbf6
0de2063
c681658
f8ccdf2
c7b2301
1e8a9f0
b939b3b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -91,6 +91,7 @@ func newJoiner(ctx sessionctx.Context, joinType plan.JoinType, | |
colTypes := make([]*types.FieldType, 0, len(lhsColTypes)+len(rhsColTypes)) | ||
colTypes = append(colTypes, lhsColTypes...) | ||
colTypes = append(colTypes, rhsColTypes...) | ||
base.shadowRow = chunk.MutRowFromTypes(colTypes) | ||
base.chk = chunk.NewChunkWithCapacity(colTypes, ctx.GetSessionVars().MaxChunkSize) | ||
base.selected = make([]bool, 0, chunk.InitialCapacity) | ||
if joinType == plan.LeftOuterJoin || joinType == plan.RightOuterJoin { | ||
|
@@ -125,6 +126,7 @@ type baseJoiner struct { | |
defaultInner chunk.Row | ||
outerIsRight bool | ||
chk *chunk.Chunk | ||
shadowRow chunk.MutRow | ||
selected []bool | ||
maxChunkSize int | ||
} | ||
|
@@ -142,6 +144,16 @@ func (j *baseJoiner) makeJoinRowToChunk(chk *chunk.Chunk, lhs, rhs chunk.Row) { | |
chk.AppendPartialRow(lhs.Len(), rhs) | ||
} | ||
|
||
// makeJoinRow combines inner, outer row into shadowRow. | ||
// combines will uses shadow copy inner and outer row data to shadowRow. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. makeJoinRow shallow copies `inner` and `outer` into `shallowRow`. |
||
func (j *baseJoiner) makeJoinRow(isRightJoin bool, inner, outer chunk.Row) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done. |
||
if !isRightJoin { | ||
inner, outer = outer, inner | ||
} | ||
j.shadowRow.ShallowCopyPartialRow(0, inner) | ||
j.shadowRow.ShallowCopyPartialRow(inner.Len(), outer) | ||
} | ||
|
||
func (j *baseJoiner) filter(input, output *chunk.Chunk) (matched bool, err error) { | ||
j.selected, err = expression.VectorizedFilter(j.ctx, j.conditions, chunk.NewIterator4Chunk(input), j.selected) | ||
if err != nil { | ||
|
@@ -173,14 +185,9 @@ func (j *semiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk *chu | |
} | ||
|
||
for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { | ||
j.chk.Reset() | ||
if j.outerIsRight { | ||
j.makeJoinRowToChunk(j.chk, inner, outer) | ||
} else { | ||
j.makeJoinRowToChunk(j.chk, outer, inner) | ||
} | ||
j.makeJoinRow(j.outerIsRight, inner, outer) | ||
|
||
matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0)) | ||
matched, err = expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) | ||
if err != nil { | ||
return false, errors.Trace(err) | ||
} | ||
|
@@ -212,14 +219,9 @@ func (j *antiSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk | |
} | ||
|
||
for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { | ||
j.chk.Reset() | ||
if j.outerIsRight { | ||
j.makeJoinRowToChunk(j.chk, inner, outer) | ||
} else { | ||
j.makeJoinRowToChunk(j.chk, outer, inner) | ||
} | ||
j.makeJoinRow(j.outerIsRight, inner, outer) | ||
|
||
matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0)) | ||
matched, err = expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) | ||
if err != nil { | ||
return false, errors.Trace(err) | ||
} | ||
|
@@ -252,10 +254,9 @@ func (j *leftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, | |
} | ||
|
||
for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { | ||
j.chk.Reset() | ||
j.makeJoinRowToChunk(j.chk, outer, inner) | ||
j.makeJoinRow(false, inner, outer) | ||
|
||
matched, err = expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0)) | ||
matched, err = expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) | ||
if err != nil { | ||
return false, errors.Trace(err) | ||
} | ||
|
@@ -295,10 +296,9 @@ func (j *antiLeftOuterSemiJoiner) tryToMatch(outer chunk.Row, inners chunk.Itera | |
} | ||
|
||
for inner := inners.Current(); inner != inners.End(); inner = inners.Next() { | ||
j.chk.Reset() | ||
j.makeJoinRowToChunk(j.chk, outer, inner) | ||
matched, err := expression.EvalBool(j.ctx, j.conditions, j.chk.GetRow(0)) | ||
j.makeJoinRow(false, inner, outer) | ||
|
||
matched, err := expression.EvalBool(j.ctx, j.conditions, j.shadowRow.ToRow()) | ||
if err != nil { | ||
return false, errors.Trace(err) | ||
} | ||
|
@@ -330,7 +330,6 @@ func (j *leftOuterJoiner) tryToMatch(outer chunk.Row, inners chunk.Iterator, chk | |
if inners.Len() == 0 { | ||
return false, nil | ||
} | ||
|
||
j.chk.Reset() | ||
chkForJoin := j.chk | ||
if len(j.conditions) == 0 { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -346,3 +346,26 @@ func setMutRowJSON(col *column, j json.BinaryJSON) { | |
copy(col.data[1:], j.Value) | ||
col.offsets[1] = int32(dataLen) | ||
} | ||
|
||
// ShallowCopyPartialRow shadow copies the data of `row` to MutRow. | ||
func (mr MutRow) ShallowCopyPartialRow(colIdx int, row Row) { | ||
chk := mr.c | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
for i, rowCol := range row.c.columns { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how about: |
||
chkCol := chk.columns[colIdx+i] | ||
if !rowCol.isNull(row.idx) { | ||
chkCol.nullBitmap[0] = 1 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's better to add some comments about why we set the whole byte to |
||
} else { | ||
chkCol.nullBitmap[0] = 0 | ||
} | ||
|
||
if rowCol.isFixed() { | ||
elemLen := len(rowCol.elemBuf) | ||
offset := row.idx * elemLen | ||
chkCol.data = rowCol.data[offset : offset+elemLen] | ||
} else { | ||
start, end := rowCol.offsets[row.idx], rowCol.offsets[row.idx+1] | ||
chkCol.data = rowCol.data[start:end] | ||
chkCol.offsets[1] = int32(len(chkCol.data)) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,6 +15,7 @@ package chunk | |
|
||
import ( | ||
"testing" | ||
"time" | ||
|
||
"github.com/pingcap/check" | ||
"github.com/pingcap/tidb/mysql" | ||
|
@@ -134,3 +135,64 @@ func BenchmarkMutRowFromValues(b *testing.B) { | |
MutRowFromValues(values) | ||
} | ||
} | ||
|
||
func (s *testChunkSuite) TestMutRowShadowCopyPartialRow(c *check.C) { | ||
colTypes := make([]*types.FieldType, 0, 3) | ||
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) | ||
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) | ||
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeTimestamp}) | ||
|
||
mutRow := MutRowFromTypes(colTypes) | ||
row := MutRowFromValues("abc", 123, types.ZeroTimestamp).ToRow() | ||
mutRow.ShallowCopyPartialRow(0, row) | ||
c.Assert(row.GetString(0), check.Equals, mutRow.ToRow().GetString(0)) | ||
c.Assert(row.GetInt64(1), check.Equals, mutRow.ToRow().GetInt64(1)) | ||
c.Assert(row.GetTime(2), check.DeepEquals, mutRow.ToRow().GetTime(2)) | ||
|
||
row.c.Reset() | ||
d := types.NewStringDatum("dfg") | ||
row.c.AppendDatum(0, &d) | ||
d = types.NewIntDatum(567) | ||
row.c.AppendDatum(1, &d) | ||
d = types.NewTimeDatum(types.Time{Time: types.FromGoTime(time.Now()), Fsp: 6, Type: mysql.TypeTimestamp}) | ||
row.c.AppendDatum(2, &d) | ||
|
||
c.Assert(d.GetMysqlTime(), check.DeepEquals, mutRow.ToRow().GetTime(2)) | ||
c.Assert(row.GetString(0), check.Equals, mutRow.ToRow().GetString(0)) | ||
c.Assert(row.GetInt64(1), check.Equals, mutRow.ToRow().GetInt64(1)) | ||
c.Assert(row.GetTime(2), check.DeepEquals, mutRow.ToRow().GetTime(2)) | ||
} | ||
|
||
var rowsNum = 1024 | ||
|
||
func BenchmarkMutRowShadowCopyPartialRow(b *testing.B) { | ||
b.ReportAllocs() | ||
colTypes := make([]*types.FieldType, 0, 8) | ||
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) | ||
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeVarString}) | ||
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) | ||
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeLonglong}) | ||
colTypes = append(colTypes, &types.FieldType{Tp: mysql.TypeDatetime}) | ||
|
||
mutRow := MutRowFromTypes(colTypes) | ||
row := MutRowFromValues("abc", "abcdefg", 123, 456, types.ZeroDatetime).ToRow() | ||
b.ResetTimer() | ||
for i := 0; i < b.N; i++ { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. b.ResetTimer() before this loop. |
||
for j := 0; j < rowsNum; j++ { | ||
mutRow.ShallowCopyPartialRow(0, row) | ||
} | ||
} | ||
} | ||
|
||
func BenchmarkChunkAppendPartialRow(b *testing.B) { | ||
b.ReportAllocs() | ||
chk := newChunkWithInitCap(rowsNum, 0, 0, 8, 8, 16) | ||
row := MutRowFromValues("abc", "abcdefg", 123, 456, types.ZeroDatetime).ToRow() | ||
b.ResetTimer() | ||
for i := 0; i < b.N; i++ { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
chk.Reset() | ||
for j := 0; j < rowsNum; j++ { | ||
chk.AppendPartialRow(0, row) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this be removed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No. Inner join, left out join and right out join will use
chk
to do deep copy. deep copy + vectorize filter + batch copy have better performance.