Skip to content

Commit 07976dd

Browse files
authored
*: make chunk.SwapColumn private (#57274) (#57369)
close #55885
1 parent a86fc5a commit 07976dd

12 files changed

+249
-181
lines changed

pkg/executor/builder.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -798,12 +798,15 @@ func (b *executorBuilder) buildLimit(v *plannercore.PhysicalLimit) exec.Executor
798798
end: v.Offset + v.Count,
799799
}
800800

801-
childUsedSchemaLen := v.Children()[0].Schema().Len()
801+
childSchemaLen := v.Children()[0].Schema().Len()
802802
childUsedSchema := markChildrenUsedCols(v.Schema().Columns, v.Children()[0].Schema())[0]
803803
e.columnIdxsUsedByChild = make([]int, 0, len(childUsedSchema))
804804
e.columnIdxsUsedByChild = append(e.columnIdxsUsedByChild, childUsedSchema...)
805-
if len(e.columnIdxsUsedByChild) == childUsedSchemaLen {
805+
if len(e.columnIdxsUsedByChild) == childSchemaLen {
806806
e.columnIdxsUsedByChild = nil // indicates that all columns are used. LimitExec will improve performance for this condition.
807+
} else {
808+
// construct a project evaluator to do the inline projection
809+
e.columnSwapHelper = chunk.NewColumnSwapHelper(e.columnIdxsUsedByChild)
807810
}
808811
return e
809812
}

pkg/executor/executor.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -1321,6 +1321,7 @@ type LimitExec struct {
13211321

13221322
// columnIdxsUsedByChild keep column indexes of child executor used for inline projection
13231323
columnIdxsUsedByChild []int
1324+
columnSwapHelper *chunk.ColumnSwapHelper
13241325

13251326
// Log the close time when opentracing is enabled.
13261327
span opentracing.Span
@@ -1382,10 +1383,9 @@ func (e *LimitExec) Next(ctx context.Context, req *chunk.Chunk) error {
13821383
e.cursor += batchSize
13831384

13841385
if e.columnIdxsUsedByChild != nil {
1385-
for i, childIdx := range e.columnIdxsUsedByChild {
1386-
if err = req.SwapColumn(i, e.childResult, childIdx); err != nil {
1387-
return err
1388-
}
1386+
err = e.columnSwapHelper.SwapColumns(e.childResult, req)
1387+
if err != nil {
1388+
return err
13891389
}
13901390
} else {
13911391
req.SwapColumns(e.childResult)

pkg/expression/BUILD.bazel

-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ go_library(
9797
"//pkg/util/encrypt",
9898
"//pkg/util/generatedexpr",
9999
"//pkg/util/hack",
100-
"//pkg/util/intest",
101100
"//pkg/util/intset",
102101
"//pkg/util/logutil",
103102
"//pkg/util/mathutil",

pkg/expression/evaluator.go

+9-124
Original file line numberDiff line numberDiff line change
@@ -15,127 +15,10 @@
1515
package expression
1616

1717
import (
18-
"sync/atomic"
19-
2018
"github.com/pingcap/tidb/pkg/sessionctx"
2119
"github.com/pingcap/tidb/pkg/util/chunk"
22-
"github.com/pingcap/tidb/pkg/util/disjointset"
23-
"github.com/pingcap/tidb/pkg/util/intest"
2420
)
2521

26-
type columnEvaluator struct {
27-
inputIdxToOutputIdxes map[int][]int
28-
// mergedInputIdxToOutputIdxes is only determined in runtime when saw the input chunk.
29-
mergedInputIdxToOutputIdxes atomic.Pointer[map[int][]int]
30-
}
31-
32-
// run evaluates "Column" expressions.
33-
// NOTE: It should be called after all the other expressions are evaluated
34-
//
35-
// since it will change the content of the input Chunk.
36-
func (e *columnEvaluator) run(ctx sessionctx.Context, input, output *chunk.Chunk) error {
37-
// mergedInputIdxToOutputIdxes only can be determined in runtime when we saw the input chunk structure.
38-
if e.mergedInputIdxToOutputIdxes.Load() == nil {
39-
e.mergeInputIdxToOutputIdxes(input, e.inputIdxToOutputIdxes)
40-
}
41-
for inputIdx, outputIdxes := range *e.mergedInputIdxToOutputIdxes.Load() {
42-
if err := output.SwapColumn(outputIdxes[0], input, inputIdx); err != nil {
43-
return err
44-
}
45-
for i, length := 1, len(outputIdxes); i < length; i++ {
46-
output.MakeRef(outputIdxes[0], outputIdxes[i])
47-
}
48-
}
49-
return nil
50-
}
51-
52-
// mergeInputIdxToOutputIdxes merges separate inputIdxToOutputIdxes entries when column references
53-
// are detected within the input chunk. This process ensures consistent handling of columns derived
54-
// from the same original source.
55-
//
56-
// Consider the following scenario:
57-
//
58-
// Initial scan operation produces a column 'a':
59-
//
60-
// scan: a (addr: ???)
61-
//
62-
// This column 'a' is used in the first projection (proj1) to create two columns a1 and a2, both referencing 'a':
63-
//
64-
// proj1
65-
// / \
66-
// / \
67-
// / \
68-
// a1 (addr: 0xe) a2 (addr: 0xe)
69-
// / \
70-
// / \
71-
// / \
72-
// proj2 proj2
73-
// / \ / \
74-
// / \ / \
75-
// a3 a4 a5 a6
76-
//
77-
// (addr: 0xe) (addr: 0xe) (addr: 0xe) (addr: 0xe)
78-
//
79-
// Here, a1 and a2 share the same address (0xe), indicating they reference the same data from the original 'a'.
80-
//
81-
// When moving to the second projection (proj2), the system tries to project these columns further:
82-
// - The first set (left side) consists of a3 and a4, derived from a1, both retaining the address (0xe).
83-
// - The second set (right side) consists of a5 and a6, derived from a2, also starting with address (0xe).
84-
//
85-
// When proj1 is complete, the output chunk contains two columns [a1, a2], both derived from the single column 'a' from the scan.
86-
// Since both a1 and a2 are column references with the same address (0xe), they are treated as referencing the same data.
87-
//
88-
// In proj2, two separate <inputIdx, []outputIdxes> items are created:
89-
// - <0, [0,1]>: This means the 0th input column (a1) is projected twice, into the 0th and 1st columns of the output chunk.
90-
// - <1, [2,3]>: This means the 1st input column (a2) is projected twice, into the 2nd and 3rd columns of the output chunk.
91-
//
92-
// Due to the column swapping logic in each projection, after applying the <0, [0,1]> projection,
93-
// the addresses for a1 and a2 may become swapped or invalid:
94-
//
95-
// proj1: a1 (addr: invalid) a2 (addr: invalid)
96-
//
97-
// This can lead to issues in proj2, where further operations on these columns may be unsafe:
98-
//
99-
// proj2: a3 (addr: 0xe) a4 (addr: 0xe) a5 (addr: ???) a6 (addr: ???)
100-
//
101-
// Therefore, it's crucial to identify and merge the original column references early, ensuring
102-
// the final inputIdxToOutputIdxes mapping accurately reflects the shared origins of the data.
103-
// For instance, <0, [0,1,2,3]> indicates that the 0th input column (original 'a') is referenced
104-
// by all four output columns in the final output.
105-
//
106-
// mergeInputIdxToOutputIdxes merges inputIdxToOutputIdxes based on detected column references.
107-
// This ensures that columns with the same reference are correctly handled in the output chunk.
108-
func (e *columnEvaluator) mergeInputIdxToOutputIdxes(input *chunk.Chunk, inputIdxToOutputIdxes map[int][]int) {
109-
originalDJSet := disjointset.NewSet[int](4)
110-
flag := make([]bool, input.NumCols())
111-
// Detect self column-references inside the input chunk by comparing column addresses
112-
for i := 0; i < input.NumCols(); i++ {
113-
if flag[i] {
114-
continue
115-
}
116-
for j := i + 1; j < input.NumCols(); j++ {
117-
if input.Column(i) == input.Column(j) {
118-
flag[j] = true
119-
originalDJSet.Union(i, j)
120-
}
121-
}
122-
}
123-
// Merge inputIdxToOutputIdxes based on the detected column references.
124-
newInputIdxToOutputIdxes := make(map[int][]int, len(inputIdxToOutputIdxes))
125-
for inputIdx := range inputIdxToOutputIdxes {
126-
// Root idx is internal offset, not the right column index.
127-
originalRootIdx := originalDJSet.FindRoot(inputIdx)
128-
originalVal, ok := originalDJSet.FindVal(originalRootIdx)
129-
intest.Assert(ok)
130-
mergedOutputIdxes := newInputIdxToOutputIdxes[originalVal]
131-
mergedOutputIdxes = append(mergedOutputIdxes, inputIdxToOutputIdxes[inputIdx]...)
132-
newInputIdxToOutputIdxes[originalVal] = mergedOutputIdxes
133-
}
134-
// Update the merged inputIdxToOutputIdxes automatically.
135-
// Once failed, it means other worker has done this job at meantime.
136-
e.mergedInputIdxToOutputIdxes.CompareAndSwap(nil, &newInputIdxToOutputIdxes)
137-
}
138-
13922
type defaultEvaluator struct {
14023
outputIdxes []int
14124
exprs []Expression
@@ -176,8 +59,8 @@ func (e *defaultEvaluator) run(ctx sessionctx.Context, input, output *chunk.Chun
17659
// It separates them to "column" and "other" expressions and evaluates "other"
17760
// expressions before "column" expressions.
17861
type EvaluatorSuite struct {
179-
*columnEvaluator // Evaluator for column expressions.
180-
*defaultEvaluator // Evaluator for other expressions.
62+
ColumnSwapHelper *chunk.ColumnSwapHelper // Evaluator for column expressions.
63+
*defaultEvaluator // Evaluator for other expressions.
18164
}
18265

18366
// NewEvaluatorSuite creates an EvaluatorSuite to evaluate all the exprs.
@@ -187,11 +70,11 @@ func NewEvaluatorSuite(exprs []Expression, avoidColumnEvaluator bool) *Evaluator
18770

18871
for i := 0; i < len(exprs); i++ {
18972
if col, isCol := exprs[i].(*Column); isCol && !avoidColumnEvaluator {
190-
if e.columnEvaluator == nil {
191-
e.columnEvaluator = &columnEvaluator{inputIdxToOutputIdxes: make(map[int][]int)}
73+
if e.ColumnSwapHelper == nil {
74+
e.ColumnSwapHelper = &chunk.ColumnSwapHelper{InputIdxToOutputIdxes: make(map[int][]int)}
19275
}
19376
inputIdx, outputIdx := col.Index, i
194-
e.columnEvaluator.inputIdxToOutputIdxes[inputIdx] = append(e.columnEvaluator.inputIdxToOutputIdxes[inputIdx], outputIdx)
77+
e.ColumnSwapHelper.InputIdxToOutputIdxes[inputIdx] = append(e.ColumnSwapHelper.InputIdxToOutputIdxes[inputIdx], outputIdx)
19578
continue
19679
}
19780
if e.defaultEvaluator == nil {
@@ -225,8 +108,10 @@ func (e *EvaluatorSuite) Run(ctx sessionctx.Context, input, output *chunk.Chunk)
225108
}
226109
}
227110

228-
if e.columnEvaluator != nil {
229-
return e.columnEvaluator.run(ctx, input, output)
111+
// NOTE: It should be called after all the other expressions are evaluated
112+
// since it will change the content of the input Chunk.
113+
if e.ColumnSwapHelper != nil {
114+
return e.ColumnSwapHelper.SwapColumns(input, output)
230115
}
231116
return nil
232117
}

pkg/expression/evaluator_test.go

-40
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
package expression
1616

1717
import (
18-
"slices"
1918
"sync/atomic"
2019
"testing"
2120
"time"
@@ -594,42 +593,3 @@ func TestMod(t *testing.T) {
594593
require.NoError(t, err)
595594
require.Equal(t, types.NewDatum(1.5), r)
596595
}
597-
598-
func TestMergeInputIdxToOutputIdxes(t *testing.T) {
599-
ctx := createContext(t)
600-
inputIdxToOutputIdxes := make(map[int][]int)
601-
// input 0th should be column referred as 0th and 1st in output columns.
602-
inputIdxToOutputIdxes[0] = []int{0, 1}
603-
// input 1th should be column referred as 2nd and 3rd in output columns.
604-
inputIdxToOutputIdxes[1] = []int{2, 3}
605-
columnEval := columnEvaluator{inputIdxToOutputIdxes: inputIdxToOutputIdxes}
606-
607-
input := chunk.NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeLonglong), types.NewFieldType(mysql.TypeLonglong)}, 2)
608-
input.AppendInt64(0, 99)
609-
// input chunk's 0th and 1st are column referred itself.
610-
input.MakeRef(0, 1)
611-
612-
// chunk: col1 <---(ref) col2
613-
// ____________/ \___________/ \___
614-
// proj: col1 col2 col3 col4
615-
//
616-
// original case after inputIdxToOutputIdxes[0], the original col2 will be nil pointer
617-
// cause consecutive col3,col4 ref projection are invalid.
618-
//
619-
// after fix, the new inputIdxToOutputIdxes should be: inputIdxToOutputIdxes[0]: {0, 1, 2, 3}
620-
621-
output := chunk.NewChunkWithCapacity([]*types.FieldType{types.NewFieldType(mysql.TypeLonglong), types.NewFieldType(mysql.TypeLonglong),
622-
types.NewFieldType(mysql.TypeLonglong), types.NewFieldType(mysql.TypeLonglong)}, 2)
623-
624-
err := columnEval.run(ctx, input, output)
625-
require.NoError(t, err)
626-
// all four columns are column-referred, pointing to the first one.
627-
require.Equal(t, output.Column(0), output.Column(1))
628-
require.Equal(t, output.Column(1), output.Column(2))
629-
require.Equal(t, output.Column(2), output.Column(3))
630-
require.Equal(t, output.GetRow(0).GetInt64(0), int64(99))
631-
632-
require.Equal(t, len(*columnEval.mergedInputIdxToOutputIdxes.Load()), 1)
633-
slices.Sort((*columnEval.mergedInputIdxToOutputIdxes.Load())[0])
634-
require.Equal(t, (*columnEval.mergedInputIdxToOutputIdxes.Load())[0], []int{0, 1, 2, 3})
635-
}

pkg/expression/integration_test/BUILD.bazel

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ go_test(
88
"main_test.go",
99
],
1010
flaky = True,
11-
shard_count = 29,
11+
shard_count = 30,
1212
deps = [
1313
"//pkg/config",
1414
"//pkg/domain",

pkg/expression/integration_test/integration_test.go

+49
Original file line numberDiff line numberDiff line change
@@ -3295,3 +3295,52 @@ func TestIssue43527(t *testing.T) {
32953295
"SELECT @total := @total + d FROM (SELECT d FROM test) AS temp, (SELECT @total := b FROM test) AS T1 where @total >= 100",
32963296
).Check(testkit.Rows("200", "300", "400", "500"))
32973297
}
3298+
3299+
func TestIssue55885(t *testing.T) {
3300+
store := testkit.CreateMockStore(t)
3301+
tk := testkit.NewTestKit(t, store)
3302+
tk.MustExec("use test")
3303+
tk.MustExec("create table t_jg8o (c_s int not null unique ,c__qy double ,c_z int not null ,c_a90ol text not null);")
3304+
tk.MustExec("insert into t_jg8o (c_s, c__qy, c_z, c_a90ol) values" +
3305+
"(-975033779, 85.65, -355481284, 'gnip' ),(-2018599732, 85.86, 1617093413, 'm' )," +
3306+
"(-960107027, 4.6, -2042358076, 'y1q')," +
3307+
"(-3, 38.1, -1528586343, 'ex_2')," +
3308+
"(69386953, 32768.0, -62220810, 'tfkxjj5c')," +
3309+
"(587181689, -9223372036854775806.3, -1666156943, 'queemvgj')," +
3310+
"(-218561796, 85.2, -670390288, 'nf990nol')," +
3311+
"(858419954, 2147483646.0, -1649362344, 'won_9')," +
3312+
"(-1120115215, 22.100, 1509989939, 'w')," +
3313+
"(-1388119356, 94.32, -1694148464, 'gu4i4knyhm')," +
3314+
"(-1016230734, -4294967295.8, 1430313391, 's')," +
3315+
"(-1861825796, 36.52, -1457928755, 'j')," +
3316+
"(1963621165, 88.87, 18928603, 'gxbsloff' )," +
3317+
"(1492879828, cast(null as double), 759883041, 'zwue')," +
3318+
"(-1607192175, 12.36, 1669523024, 'qt5zch71a')," +
3319+
"(1534068569, 46.79, -392085130, 'bc')," +
3320+
"(155707446, 9223372036854775809.4, 1727199557, 'qyghenu9t6')," +
3321+
"(-1524976778, 75.99, 335492222, 'sdgde0z')," +
3322+
"(175403335, cast(null as double), -69711503, 'ja')," +
3323+
"(-272715456, 48.62, 753928713, 'ur')," +
3324+
"(-2035825967, 257.3, -1598426762, 'lmqmn')," +
3325+
"(-1178957955, 2147483648.100000, 1432554380, 'dqpb210')," +
3326+
"(-2056628646, 254.5, -1476177588, 'k41ajpt7x')," +
3327+
"(-914210874, 126.7, -421919910, 'x57ud7oy1')," +
3328+
"(-88586773, 1.2, 1568247510, 'drmxi8')," +
3329+
"(-834563269, -4294967296.7, 1163133933, 'wp')," +
3330+
"(-84490060, 54.13, -630289437, '_3_twecg5h')," +
3331+
" (267700893, 54.75, 370343042, 'n72')," +
3332+
"(552106333, 32766.2, 2365745, 's7tt')," +
3333+
"(643440707, 65536.8, -850412592, 'wmluxa9a')," +
3334+
"(1709853766, -4294967296.5, -21041749, 'obqj0uu5v')," +
3335+
"(-7, 80.88, 528792379, 'n5qr9m26i')," +
3336+
"(-456431629, 28.43, 1958788149, 'b')," +
3337+
"(-28841240, 11.86, -1089765168, 'pqg')," +
3338+
"(-807839288, 25.89, 504535500, 'cs3tkhs')," +
3339+
"(-52910064, 85.16, 354032882, '_ffjo67yxe')," +
3340+
"(1919869830, 81.81, -272247558, 'aj')," +
3341+
"(165434725, -2147483648.0, 11, 'xxnsf5')," +
3342+
"(3, -2147483648.7, 1616632952, 'g7t8tqyi')," +
3343+
"(1851859144, 70.73, -1105664209, 'qjfhjr');")
3344+
3345+
tk.MustQuery("SELECT subq_0.c3 as c1 FROM (select c_a90ol as c3, c_a90ol as c4, var_pop(cast(c__qy as double)) over (partition by c_a90ol, c_s order by c_z) as c5 from t_jg8o limit 65) as subq_0 LIMIT 37")
3346+
}

pkg/util/chunk/BUILD.bazel

+2
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,11 @@ go_library(
2626
"//pkg/parser/terror",
2727
"//pkg/types",
2828
"//pkg/util/checksum",
29+
"//pkg/util/disjointset",
2930
"//pkg/util/disk",
3031
"//pkg/util/encrypt",
3132
"//pkg/util/hack",
33+
"//pkg/util/intest",
3234
"//pkg/util/logutil",
3335
"//pkg/util/mathutil",
3436
"//pkg/util/memory",

pkg/util/chunk/chunk.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -199,10 +199,12 @@ func (c *Chunk) MakeRefTo(dstColIdx int, src *Chunk, srcColIdx int) error {
199199
return nil
200200
}
201201

202-
// SwapColumn swaps Column "c.columns[colIdx]" with Column
202+
// swapColumn swaps Column "c.columns[colIdx]" with Column
203203
// "other.columns[otherIdx]". If there exists columns refer to the Column to be
204204
// swapped, we need to re-build the reference.
205-
func (c *Chunk) SwapColumn(colIdx int, other *Chunk, otherIdx int) error {
205+
// this function should not be used directly, if you wants to swap columns between two chunks,
206+
// use ColumnSwapHelper.SwapColumns instead.
207+
func (c *Chunk) swapColumn(colIdx int, other *Chunk, otherIdx int) error {
206208
if c.sel != nil || other.sel != nil {
207209
return errors.New(msgErrSelNotNil)
208210
}

pkg/util/chunk/chunk_test.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -642,25 +642,25 @@ func TestSwapColumn(t *testing.T) {
642642
checkRef()
643643

644644
// swap two chunk's columns
645-
require.NoError(t, chk1.SwapColumn(0, chk2, 0))
645+
require.NoError(t, chk1.swapColumn(0, chk2, 0))
646646
checkRef()
647647

648-
require.NoError(t, chk1.SwapColumn(0, chk2, 0))
648+
require.NoError(t, chk1.swapColumn(0, chk2, 0))
649649
checkRef()
650650

651651
// swap reference and referenced columns
652-
require.NoError(t, chk2.SwapColumn(1, chk2, 0))
652+
require.NoError(t, chk2.swapColumn(1, chk2, 0))
653653
checkRef()
654654

655655
// swap the same column in the same chunk
656-
require.NoError(t, chk2.SwapColumn(1, chk2, 1))
656+
require.NoError(t, chk2.swapColumn(1, chk2, 1))
657657
checkRef()
658658

659659
// swap reference and another column
660-
require.NoError(t, chk2.SwapColumn(1, chk2, 2))
660+
require.NoError(t, chk2.swapColumn(1, chk2, 2))
661661
checkRef()
662662

663-
require.NoError(t, chk2.SwapColumn(2, chk2, 0))
663+
require.NoError(t, chk2.swapColumn(2, chk2, 0))
664664
checkRef()
665665
}
666666

0 commit comments

Comments
 (0)