-
Notifications
You must be signed in to change notification settings - Fork 5.8k
/
session.go
3958 lines (3372 loc) · 144 KB
/
session.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2015 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package variable
import (
"bytes"
"context"
"crypto/tls"
"encoding/binary"
"encoding/json"
"fmt"
"math"
"math/rand"
"net"
"slices"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/pingcap/errors"
"github.com/pingcap/kvproto/pkg/kvrpcpb"
"github.com/pingcap/tidb/pkg/config"
"github.com/pingcap/tidb/pkg/domain/resourcegroup"
"github.com/pingcap/tidb/pkg/kv"
"github.com/pingcap/tidb/pkg/metrics"
"github.com/pingcap/tidb/pkg/parser"
"github.com/pingcap/tidb/pkg/parser/ast"
"github.com/pingcap/tidb/pkg/parser/auth"
"github.com/pingcap/tidb/pkg/parser/charset"
"github.com/pingcap/tidb/pkg/parser/model"
"github.com/pingcap/tidb/pkg/parser/mysql"
ptypes "github.com/pingcap/tidb/pkg/parser/types"
"github.com/pingcap/tidb/pkg/sessionctx/sessionstates"
"github.com/pingcap/tidb/pkg/sessionctx/stmtctx"
pumpcli "github.com/pingcap/tidb/pkg/tidb-binlog/pump_client"
"github.com/pingcap/tidb/pkg/types"
"github.com/pingcap/tidb/pkg/util/chunk"
"github.com/pingcap/tidb/pkg/util/dbterror/plannererrors"
"github.com/pingcap/tidb/pkg/util/disk"
"github.com/pingcap/tidb/pkg/util/execdetails"
"github.com/pingcap/tidb/pkg/util/intest"
"github.com/pingcap/tidb/pkg/util/kvcache"
"github.com/pingcap/tidb/pkg/util/mathutil"
"github.com/pingcap/tidb/pkg/util/memory"
"github.com/pingcap/tidb/pkg/util/redact"
"github.com/pingcap/tidb/pkg/util/replayer"
"github.com/pingcap/tidb/pkg/util/rowcodec"
"github.com/pingcap/tidb/pkg/util/sqlkiller"
"github.com/pingcap/tidb/pkg/util/stringutil"
"github.com/pingcap/tidb/pkg/util/tableutil"
"github.com/pingcap/tidb/pkg/util/tiflash"
"github.com/pingcap/tidb/pkg/util/tiflashcompute"
"github.com/pingcap/tidb/pkg/util/timeutil"
"github.com/pingcap/tipb/go-tipb"
tikvstore "github.com/tikv/client-go/v2/kv"
"github.com/tikv/client-go/v2/tikv"
"github.com/twmb/murmur3"
atomic2 "go.uber.org/atomic"
"golang.org/x/exp/maps"
)
var (
// PreparedStmtCount is exported for test.
PreparedStmtCount int64
// enableAdaptiveReplicaRead indicates whether closest adaptive replica read
// can be enabled. We forces disable replica read when tidb server in missing
// in regions that contains tikv server to avoid read traffic skew.
enableAdaptiveReplicaRead uint32 = 1
)
// ConnStatusShutdown indicates that the connection status is closed by server.
// This code is put here because of package imports, and this value is the original server.connStatusShutdown.
const ConnStatusShutdown int32 = 2
// SetEnableAdaptiveReplicaRead set `enableAdaptiveReplicaRead` with given value.
// return true if the value is changed.
func SetEnableAdaptiveReplicaRead(enabled bool) bool {
value := uint32(0)
if enabled {
value = 1
}
return atomic.SwapUint32(&enableAdaptiveReplicaRead, value) != value
}
// IsAdaptiveReplicaReadEnabled returns whether adaptive closest replica read can be enabled.
func IsAdaptiveReplicaReadEnabled() bool {
return atomic.LoadUint32(&enableAdaptiveReplicaRead) > 0
}
// RetryInfo saves retry information.
type RetryInfo struct {
Retrying bool
DroppedPreparedStmtIDs []uint32
autoIncrementIDs retryInfoAutoIDs
autoRandomIDs retryInfoAutoIDs
LastRcReadTS uint64
}
// Clean does some clean work.
func (r *RetryInfo) Clean() {
r.autoIncrementIDs.clean()
r.autoRandomIDs.clean()
if len(r.DroppedPreparedStmtIDs) > 0 {
r.DroppedPreparedStmtIDs = r.DroppedPreparedStmtIDs[:0]
}
}
// ResetOffset resets the current retry offset.
func (r *RetryInfo) ResetOffset() {
r.autoIncrementIDs.resetOffset()
r.autoRandomIDs.resetOffset()
}
// AddAutoIncrementID adds id to autoIncrementIDs.
func (r *RetryInfo) AddAutoIncrementID(id int64) {
r.autoIncrementIDs.autoIDs = append(r.autoIncrementIDs.autoIDs, id)
}
// GetCurrAutoIncrementID gets current autoIncrementID.
func (r *RetryInfo) GetCurrAutoIncrementID() (int64, bool) {
return r.autoIncrementIDs.getCurrent()
}
// AddAutoRandomID adds id to autoRandomIDs.
func (r *RetryInfo) AddAutoRandomID(id int64) {
r.autoRandomIDs.autoIDs = append(r.autoRandomIDs.autoIDs, id)
}
// GetCurrAutoRandomID gets current AutoRandomID.
func (r *RetryInfo) GetCurrAutoRandomID() (int64, bool) {
return r.autoRandomIDs.getCurrent()
}
type retryInfoAutoIDs struct {
currentOffset int
autoIDs []int64
}
func (r *retryInfoAutoIDs) resetOffset() {
r.currentOffset = 0
}
func (r *retryInfoAutoIDs) clean() {
r.currentOffset = 0
if len(r.autoIDs) > 0 {
r.autoIDs = r.autoIDs[:0]
}
}
func (r *retryInfoAutoIDs) getCurrent() (int64, bool) {
if r.currentOffset >= len(r.autoIDs) {
return 0, false
}
id := r.autoIDs[r.currentOffset]
r.currentOffset++
return id, true
}
// TransactionContext is used to store variables that has transaction scope.
type TransactionContext struct {
TxnCtxNoNeedToRestore
TxnCtxNeedToRestore
}
// TxnCtxNeedToRestore stores transaction variables which need to be restored when rolling back to a savepoint.
type TxnCtxNeedToRestore struct {
// TableDeltaMap is used in the schema validator for DDL changes in one table not to block others.
// It's also used in the statistics updating.
// Note: for the partitioned table, it stores all the partition IDs.
TableDeltaMap map[int64]TableDelta
// pessimisticLockCache is the cache for pessimistic locked keys,
// The value never changes during the transaction.
pessimisticLockCache map[string][]byte
// CachedTables is not nil if the transaction write on cached table.
CachedTables map[int64]any
// InsertTTLRowsCount counts how many rows are inserted in this statement
InsertTTLRowsCount int
}
// TxnCtxNoNeedToRestore stores transaction variables which do not need to restored when rolling back to a savepoint.
type TxnCtxNoNeedToRestore struct {
forUpdateTS uint64
Binlog any
InfoSchema any
History any
StartTS uint64
StaleReadTs uint64
// unchangedKeys is used to store the unchanged keys that needs to lock for pessimistic transaction.
unchangedKeys map[string]struct{}
PessimisticCacheHit int
// CreateTime For metrics.
CreateTime time.Time
StatementCount int
CouldRetry bool
IsPessimistic bool
// IsStaleness indicates whether the txn is read only staleness txn.
IsStaleness bool
// IsExplicit indicates whether the txn is an interactive txn, which is typically started with a BEGIN
// or START TRANSACTION statement, or by setting autocommit to 0.
IsExplicit bool
Isolation string
LockExpire uint32
ForUpdate uint32
// TxnScope indicates the value of txn_scope
TxnScope string
// Savepoints contains all definitions of the savepoint of a transaction at runtime, the order of the SavepointRecord is the same with the SAVEPOINT statements.
// It is used for a lookup when running `ROLLBACK TO` statement.
Savepoints []SavepointRecord
// TableDeltaMap lock to prevent potential data race
tdmLock sync.Mutex
// TemporaryTables is used to store transaction-specific information for global temporary tables.
// It can also be stored in sessionCtx with local temporary tables, but it's easier to clean this data after transaction ends.
TemporaryTables map[int64]tableutil.TempTable
// EnableMDL indicates whether to enable the MDL lock for the transaction.
EnableMDL bool
// relatedTableForMDL records the `lock` table for metadata lock. It maps from int64 to int64(version).
relatedTableForMDL *sync.Map
// FairLockingUsed marking whether at least one of the statements in the transaction was executed in
// fair locking mode.
FairLockingUsed bool
// FairLockingEffective marking whether at least one of the statements in the transaction was executed in
// fair locking mode, and it takes effect (which is determined according to whether lock-with-conflict
// has occurred during execution of any statement).
FairLockingEffective bool
// CurrentStmtPessimisticLockCache is the cache for pessimistic locked keys in the current statement.
// It is merged into `pessimisticLockCache` after a statement finishes.
// Read results cannot be directly written into pessimisticLockCache because failed statement need to rollback
// its pessimistic locks.
CurrentStmtPessimisticLockCache map[string][]byte
}
// SavepointRecord indicates a transaction's savepoint record.
type SavepointRecord struct {
// name is the name of the savepoint
Name string
// MemDBCheckpoint is the transaction's memdb checkpoint.
MemDBCheckpoint *tikv.MemDBCheckpoint
// TxnCtxSavepoint is the savepoint of TransactionContext
TxnCtxSavepoint TxnCtxNeedToRestore
}
// RowIDShardGenerator is used to generate shard for row id.
type RowIDShardGenerator struct {
// shardRand is used for generated rand shard
shardRand *rand.Rand
// shardStep indicates the max size of continuous rowid shard in one transaction.
shardStep int
shardRemain int
currentShard int64
}
// NewRowIDShardGenerator creates a new RowIDShardGenerator.
func NewRowIDShardGenerator(shardRand *rand.Rand, step int) *RowIDShardGenerator {
intest.AssertNotNil(shardRand)
return &RowIDShardGenerator{
shardRand: shardRand,
shardStep: step,
}
}
// SetShardStep sets the step of shard
func (s *RowIDShardGenerator) SetShardStep(step int) {
s.shardStep = step
s.shardRemain = 0
}
// GetShardStep returns the shard step
func (s *RowIDShardGenerator) GetShardStep() int {
return s.shardStep
}
// GetCurrentShard returns the shard for the next `count` IDs.
func (s *RowIDShardGenerator) GetCurrentShard(count int) int64 {
if s.shardRemain <= 0 {
s.updateShard(s.shardRand)
s.shardRemain = s.GetShardStep()
}
s.shardRemain -= count
return s.currentShard
}
func (s *RowIDShardGenerator) updateShard(shardRand *rand.Rand) {
var buf [8]byte
binary.LittleEndian.PutUint64(buf[:], shardRand.Uint64())
s.currentShard = int64(murmur3.Sum32(buf[:]))
}
// GetRowIDShardGenerator shard row id generator
func (s *SessionVars) GetRowIDShardGenerator() *RowIDShardGenerator {
if s.shardGenerator != nil {
return s.shardGenerator
}
intest.Assert(s.TxnCtx.StartTS > 0)
r := rand.New(rand.NewSource(int64(s.TxnCtx.StartTS))) // #nosec G404
s.shardGenerator = NewRowIDShardGenerator(r, int(s.ShardAllocateStep))
return s.shardGenerator
}
// AddUnchangedKeyForLock adds an unchanged key for pessimistic lock.
func (tc *TransactionContext) AddUnchangedKeyForLock(key []byte) {
if tc.unchangedKeys == nil {
tc.unchangedKeys = map[string]struct{}{}
}
tc.unchangedKeys[string(key)] = struct{}{}
}
// CollectUnchangedKeysForLock collects unchanged keys for pessimistic lock.
func (tc *TransactionContext) CollectUnchangedKeysForLock(buf []kv.Key) []kv.Key {
for key := range tc.unchangedKeys {
buf = append(buf, kv.Key(key))
}
tc.unchangedKeys = nil
return buf
}
// ColSize is a data struct to store the delta information for a table.
type ColSize struct {
ColID int64
Size int64
}
// DeltaCols is used to update the delta size for cols.
type DeltaCols interface {
// UpdateColSizeMap is used to update delta map for cols.
UpdateColSizeMap(m map[int64]int64) map[int64]int64
}
// DeltaColsMap implements DeltaCols
type DeltaColsMap map[int64]int64
// UpdateColSizeMap implements DeltaCols
func (cols DeltaColsMap) UpdateColSizeMap(m map[int64]int64) map[int64]int64 {
if m == nil && len(cols) > 0 {
m = make(map[int64]int64, len(cols))
}
for colID, size := range cols {
m[colID] += size
}
return m
}
// UpdateDeltaForTable updates the delta info for some table.
// The `cols` argument is used to update the delta size for cols.
// If `cols` is nil, it means that the delta size for cols is not changed.
func (tc *TransactionContext) UpdateDeltaForTable(
physicalTableID int64, delta int64,
count int64, cols DeltaCols,
) {
tc.tdmLock.Lock()
defer tc.tdmLock.Unlock()
if tc.TableDeltaMap == nil {
tc.TableDeltaMap = make(map[int64]TableDelta)
}
item := tc.TableDeltaMap[physicalTableID]
item.Delta += delta
item.Count += count
item.TableID = physicalTableID
if cols != nil {
item.ColSize = cols.UpdateColSizeMap(item.ColSize)
}
tc.TableDeltaMap[physicalTableID] = item
}
// GetKeyInPessimisticLockCache gets a key in pessimistic lock cache.
func (tc *TransactionContext) GetKeyInPessimisticLockCache(key kv.Key) (val []byte, ok bool) {
if tc.pessimisticLockCache == nil && tc.CurrentStmtPessimisticLockCache == nil {
return nil, false
}
if tc.CurrentStmtPessimisticLockCache != nil {
val, ok = tc.CurrentStmtPessimisticLockCache[string(key)]
if ok {
tc.PessimisticCacheHit++
return
}
}
if tc.pessimisticLockCache != nil {
val, ok = tc.pessimisticLockCache[string(key)]
if ok {
tc.PessimisticCacheHit++
}
}
return
}
// SetPessimisticLockCache sets a key value pair in pessimistic lock cache.
// The value is buffered in the statement cache until the current statement finishes.
func (tc *TransactionContext) SetPessimisticLockCache(key kv.Key, val []byte) {
if tc.CurrentStmtPessimisticLockCache == nil {
tc.CurrentStmtPessimisticLockCache = make(map[string][]byte)
}
tc.CurrentStmtPessimisticLockCache[string(key)] = val
}
// Cleanup clears up transaction info that no longer use.
func (tc *TransactionContext) Cleanup() {
// tc.InfoSchema = nil; we cannot do it now, because some operation like handleFieldList depend on this.
tc.Binlog = nil
tc.History = nil
tc.tdmLock.Lock()
tc.TableDeltaMap = nil
tc.relatedTableForMDL = nil
tc.tdmLock.Unlock()
tc.pessimisticLockCache = nil
tc.CurrentStmtPessimisticLockCache = nil
tc.IsStaleness = false
tc.Savepoints = nil
tc.EnableMDL = false
}
// ClearDelta clears the delta map.
func (tc *TransactionContext) ClearDelta() {
tc.tdmLock.Lock()
tc.TableDeltaMap = nil
tc.tdmLock.Unlock()
}
// GetForUpdateTS returns the ts for update.
func (tc *TransactionContext) GetForUpdateTS() uint64 {
if tc.forUpdateTS > tc.StartTS {
return tc.forUpdateTS
}
return tc.StartTS
}
// SetForUpdateTS sets the ts for update.
func (tc *TransactionContext) SetForUpdateTS(forUpdateTS uint64) {
if forUpdateTS > tc.forUpdateTS {
tc.forUpdateTS = forUpdateTS
}
}
// GetCurrentSavepoint gets TransactionContext's savepoint.
func (tc *TransactionContext) GetCurrentSavepoint() TxnCtxNeedToRestore {
tableDeltaMap := make(map[int64]TableDelta, len(tc.TableDeltaMap))
for k, v := range tc.TableDeltaMap {
tableDeltaMap[k] = v.Clone()
}
pessimisticLockCache := make(map[string][]byte, len(tc.pessimisticLockCache))
maps.Copy(pessimisticLockCache, tc.pessimisticLockCache)
CurrentStmtPessimisticLockCache := make(map[string][]byte, len(tc.CurrentStmtPessimisticLockCache))
maps.Copy(CurrentStmtPessimisticLockCache, tc.CurrentStmtPessimisticLockCache)
cachedTables := make(map[int64]any, len(tc.CachedTables))
maps.Copy(cachedTables, tc.CachedTables)
return TxnCtxNeedToRestore{
TableDeltaMap: tableDeltaMap,
pessimisticLockCache: pessimisticLockCache,
CachedTables: cachedTables,
InsertTTLRowsCount: tc.InsertTTLRowsCount,
}
}
// RestoreBySavepoint restores TransactionContext to the specify savepoint.
func (tc *TransactionContext) RestoreBySavepoint(savepoint TxnCtxNeedToRestore) {
tc.TableDeltaMap = savepoint.TableDeltaMap
tc.pessimisticLockCache = savepoint.pessimisticLockCache
tc.CachedTables = savepoint.CachedTables
tc.InsertTTLRowsCount = savepoint.InsertTTLRowsCount
}
// AddSavepoint adds a new savepoint.
func (tc *TransactionContext) AddSavepoint(name string, memdbCheckpoint *tikv.MemDBCheckpoint) {
name = strings.ToLower(name)
tc.DeleteSavepoint(name)
record := SavepointRecord{
Name: name,
MemDBCheckpoint: memdbCheckpoint,
TxnCtxSavepoint: tc.GetCurrentSavepoint(),
}
tc.Savepoints = append(tc.Savepoints, record)
}
// DeleteSavepoint deletes the savepoint, return false indicate the savepoint name doesn't exists.
func (tc *TransactionContext) DeleteSavepoint(name string) bool {
name = strings.ToLower(name)
for i, sp := range tc.Savepoints {
if sp.Name == name {
tc.Savepoints = append(tc.Savepoints[:i], tc.Savepoints[i+1:]...)
return true
}
}
return false
}
// ReleaseSavepoint deletes the named savepoint and the later savepoints, return false indicate the named savepoint doesn't exists.
func (tc *TransactionContext) ReleaseSavepoint(name string) bool {
name = strings.ToLower(name)
for i, sp := range tc.Savepoints {
if sp.Name == name {
tc.Savepoints = tc.Savepoints[:i]
return true
}
}
return false
}
// RollbackToSavepoint rollbacks to the specified savepoint by name.
func (tc *TransactionContext) RollbackToSavepoint(name string) *SavepointRecord {
name = strings.ToLower(name)
for idx, sp := range tc.Savepoints {
if name == sp.Name {
tc.RestoreBySavepoint(sp.TxnCtxSavepoint)
tc.Savepoints = tc.Savepoints[:idx+1]
return &tc.Savepoints[idx]
}
}
return nil
}
// FlushStmtPessimisticLockCache merges the current statement pessimistic lock cache into transaction pessimistic lock
// cache. The caller may need to clear the stmt cache itself.
func (tc *TransactionContext) FlushStmtPessimisticLockCache() {
if tc.CurrentStmtPessimisticLockCache == nil {
return
}
if tc.pessimisticLockCache == nil {
tc.pessimisticLockCache = make(map[string][]byte)
}
for key, val := range tc.CurrentStmtPessimisticLockCache {
tc.pessimisticLockCache[key] = val
}
tc.CurrentStmtPessimisticLockCache = nil
}
// WriteStmtBufs can be used by insert/replace/delete/update statement.
// TODO: use a common memory pool to replace this.
type WriteStmtBufs struct {
// RowValBuf is used by tablecodec.EncodeRow, to reduce runtime.growslice.
RowValBuf []byte
// AddRowValues use to store temp insert rows value, to reduce memory allocations when importing data.
AddRowValues []types.Datum
// IndexValsBuf is used by index.FetchValues
IndexValsBuf []types.Datum
// IndexKeyBuf is used by index.GenIndexKey
IndexKeyBuf []byte
}
func (ib *WriteStmtBufs) clean() {
ib.RowValBuf = nil
ib.AddRowValues = nil
ib.IndexValsBuf = nil
ib.IndexKeyBuf = nil
}
// TableSnapshot represents a data snapshot of the table contained in `information_schema`.
type TableSnapshot struct {
Rows [][]types.Datum
Err error
}
type txnIsolationLevelOneShotState uint
// RewritePhaseInfo records some information about the rewrite phase
type RewritePhaseInfo struct {
// DurationRewrite is the duration of rewriting the SQL.
DurationRewrite time.Duration
// DurationPreprocessSubQuery is the duration of pre-processing sub-queries.
DurationPreprocessSubQuery time.Duration
// PreprocessSubQueries is the number of pre-processed sub-queries.
PreprocessSubQueries int
}
// Reset resets all fields in RewritePhaseInfo.
func (r *RewritePhaseInfo) Reset() {
r.DurationRewrite = 0
r.DurationPreprocessSubQuery = 0
r.PreprocessSubQueries = 0
}
// TemporaryTableData is a interface to maintain temporary data in session
type TemporaryTableData interface {
kv.Retriever
// Staging create a new staging buffer inside the MemBuffer.
// Subsequent writes will be temporarily stored in this new staging buffer.
// When you think all modifications looks good, you can call `Release` to public all of them to the upper level buffer.
Staging() kv.StagingHandle
// Release publish all modifications in the latest staging buffer to upper level.
Release(kv.StagingHandle)
// Cleanup cleanups the resources referenced by the StagingHandle.
// If the changes are not published by `Release`, they will be discarded.
Cleanup(kv.StagingHandle)
// GetTableSize get the size of a table
GetTableSize(tblID int64) int64
// DeleteTableKey removes the entry for key k from table
DeleteTableKey(tblID int64, k kv.Key) error
// SetTableKey sets the entry for k from table
SetTableKey(tblID int64, k kv.Key, val []byte) error
}
// temporaryTableData is used for store temporary table data in session
type temporaryTableData struct {
kv.MemBuffer
tblSize map[int64]int64
}
// NewTemporaryTableData creates a new TemporaryTableData
func NewTemporaryTableData(memBuffer kv.MemBuffer) TemporaryTableData {
return &temporaryTableData{
MemBuffer: memBuffer,
tblSize: make(map[int64]int64),
}
}
// GetTableSize get the size of a table
func (d *temporaryTableData) GetTableSize(tblID int64) int64 {
if tblSize, ok := d.tblSize[tblID]; ok {
return tblSize
}
return 0
}
// DeleteTableKey removes the entry for key k from table
func (d *temporaryTableData) DeleteTableKey(tblID int64, k kv.Key) error {
bufferSize := d.MemBuffer.Size()
defer d.updateTblSize(tblID, bufferSize)
return d.MemBuffer.Delete(k)
}
// SetTableKey sets the entry for k from table
func (d *temporaryTableData) SetTableKey(tblID int64, k kv.Key, val []byte) error {
bufferSize := d.MemBuffer.Size()
defer d.updateTblSize(tblID, bufferSize)
return d.MemBuffer.Set(k, val)
}
func (d *temporaryTableData) updateTblSize(tblID int64, beforeSize int) {
delta := int64(d.MemBuffer.Size() - beforeSize)
d.tblSize[tblID] = d.GetTableSize(tblID) + delta
}
const (
// oneShotDef means default, that is tx_isolation_one_shot not set.
oneShotDef txnIsolationLevelOneShotState = iota
// oneShotSet means it's set in current transaction.
oneShotSet
// onsShotUse means it should be used in current transaction.
oneShotUse
)
// ReadConsistencyLevel is the level of read consistency.
type ReadConsistencyLevel string
const (
// ReadConsistencyStrict means read by strict consistency, default value.
ReadConsistencyStrict ReadConsistencyLevel = "strict"
// ReadConsistencyWeak means read can be weak consistency.
ReadConsistencyWeak ReadConsistencyLevel = "weak"
)
// IsWeak returns true only if it's a weak-consistency read.
func (r ReadConsistencyLevel) IsWeak() bool {
return r == ReadConsistencyWeak
}
func validateReadConsistencyLevel(val string) error {
switch v := ReadConsistencyLevel(strings.ToLower(val)); v {
case ReadConsistencyStrict, ReadConsistencyWeak:
return nil
default:
return ErrWrongTypeForVar.GenWithStackByArgs(TiDBReadConsistency)
}
}
// SetUserVarVal set user defined variables' value
func (s *SessionVars) SetUserVarVal(name string, dt types.Datum) {
s.userVars.lock.Lock()
defer s.userVars.lock.Unlock()
s.userVars.values[name] = dt
}
// GetUserVarVal get user defined variables' value
func (s *SessionVars) GetUserVarVal(name string) (types.Datum, bool) {
s.userVars.lock.RLock()
defer s.userVars.lock.RUnlock()
dt, ok := s.userVars.values[name]
return dt, ok
}
// SetUserVarType set user defined variables' type
func (s *SessionVars) SetUserVarType(name string, ft *types.FieldType) {
s.userVars.lock.Lock()
defer s.userVars.lock.Unlock()
s.userVars.types[name] = ft
}
// GetUserVarType get user defined variables' type
func (s *SessionVars) GetUserVarType(name string) (*types.FieldType, bool) {
s.userVars.lock.RLock()
defer s.userVars.lock.RUnlock()
ft, ok := s.userVars.types[name]
return ft, ok
}
// HookContext contains the necessary variables for executing set/get hook
type HookContext interface {
GetStore() kv.Storage
}
// SessionVarsProvider provides the session variables.
type SessionVarsProvider interface {
GetSessionVars() *SessionVars
}
// SessionVars is to handle user-defined or global variables in the current session.
type SessionVars struct {
Concurrency
MemQuota
BatchSize
// DMLBatchSize indicates the number of rows batch-committed for a statement.
// It will be used when using LOAD DATA or BatchInsert or BatchDelete is on.
DMLBatchSize int
RetryLimit int64
DisableTxnAutoRetry bool
userVars struct {
// lock is for user defined variables. values and types is read/write protected.
lock sync.RWMutex
// values stores the Datum for user variables
values map[string]types.Datum
// types stores the FieldType for user variables, it cannot be inferred from values when values have not been set yet.
types map[string]*types.FieldType
}
// systems variables, don't modify it directly, use GetSystemVar/SetSystemVar method.
systems map[string]string
// SysWarningCount is the system variable "warning_count", because it is on the hot path, so we extract it from the systems
SysWarningCount int
// SysErrorCount is the system variable "error_count", because it is on the hot path, so we extract it from the systems
SysErrorCount uint16
// nonPreparedPlanCacheStmts stores PlanCacheStmts for non-prepared plan cache.
nonPreparedPlanCacheStmts *kvcache.SimpleLRUCache
// PreparedStmts stores prepared statement.
PreparedStmts map[uint32]any
PreparedStmtNameToID map[string]uint32
// preparedStmtID is id of prepared statement.
preparedStmtID uint32
// Parameter values for plan cache.
PlanCacheParams *PlanCacheParamList
LastUpdateTime4PC types.Time
// ActiveRoles stores active roles for current user
ActiveRoles []*auth.RoleIdentity
RetryInfo *RetryInfo
// TxnCtx Should be reset on transaction finished.
TxnCtx *TransactionContext
// TxnCtxMu is used to protect TxnCtx.
TxnCtxMu sync.Mutex
// TxnManager is used to manage txn context in session
TxnManager any
// KVVars is the variables for KV storage.
KVVars *tikvstore.Variables
// txnIsolationLevelOneShot is used to implements "set transaction isolation level ..."
txnIsolationLevelOneShot struct {
state txnIsolationLevelOneShotState
value string
}
// status stands for the session status. e.g. in transaction or not, auto commit is on or off, and so on.
status atomic.Uint32
// ClientCapability is client's capability.
ClientCapability uint32
// TLSConnectionState is the TLS connection state (nil if not using TLS).
TLSConnectionState *tls.ConnectionState
// ConnectionID is the connection id of the current session.
ConnectionID uint64
// PlanID is the unique id of logical and physical plan.
PlanID atomic.Int32
// PlanColumnID is the unique id for column when building plan.
PlanColumnID atomic.Int64
// MapScalarSubQ maps the scalar sub queries from its ID to its struct.
MapScalarSubQ []any
// MapHashCode2UniqueID4ExtendedCol map the expr's hash code to specified unique ID.
MapHashCode2UniqueID4ExtendedCol map[string]int
// User is the user identity with which the session login.
User *auth.UserIdentity
// Port is the port of the connected socket
Port string
// CurrentDB is the default database of this session.
CurrentDB string
// CurrentDBChanged indicates if the CurrentDB has been updated, and if it is we should print it into
// the slow log to make it be compatible with MySQL, https://github.com/pingcap/tidb/issues/17846.
CurrentDBChanged bool
// CommonGlobalLoaded indicates if common global variable has been loaded for this session.
CommonGlobalLoaded bool
// InRestrictedSQL indicates if the session is handling restricted SQL execution.
InRestrictedSQL bool
// SnapshotTS is used for reading history data. For simplicity, SnapshotTS only supports distsql request.
SnapshotTS uint64
// TxnReadTS is used for staleness transaction, it provides next staleness transaction startTS.
TxnReadTS *TxnReadTS
// SnapshotInfoschema is used with SnapshotTS, when the schema version at snapshotTS less than current schema
// version, we load an old version schema for query.
SnapshotInfoschema any
// BinlogClient is used to write binlog.
BinlogClient *pumpcli.PumpsClient
// GlobalVarsAccessor is used to set and get global variables.
GlobalVarsAccessor GlobalVarAccessor
// LastFoundRows is the number of found rows of last query statement
LastFoundRows uint64
// StmtCtx holds variables for current executing statement.
StmtCtx *stmtctx.StatementContext
// RefCountOfStmtCtx indicates the reference count of StmtCtx. When the
// StmtCtx is accessed by other sessions, e.g. oom-alarm-handler/expensive-query-handler, add one first.
// Note: this variable should be accessed and updated by atomic operations.
RefCountOfStmtCtx stmtctx.ReferenceCount
// AllowAggPushDown can be set to false to forbid aggregation push down.
AllowAggPushDown bool
// AllowDeriveTopN is used to enable/disable derived TopN optimization.
AllowDeriveTopN bool
// AllowCartesianBCJ means allow broadcast CARTESIAN join, 0 means not allow, 1 means allow broadcast CARTESIAN join
// but the table size should under the broadcast threshold, 2 means allow broadcast CARTESIAN join even if the table
// size exceeds the broadcast threshold
AllowCartesianBCJ int
// MPPOuterJoinFixedBuildSide means in MPP plan, always use right(left) table as build side for left(right) out join
MPPOuterJoinFixedBuildSide bool
// AllowDistinctAggPushDown can be set true to allow agg with distinct push down to tikv/tiflash.
AllowDistinctAggPushDown bool
// EnableSkewDistinctAgg can be set true to allow skew distinct aggregate rewrite
EnableSkewDistinctAgg bool
// Enable3StageDistinctAgg indicates whether to allow 3 stage distinct aggregate
Enable3StageDistinctAgg bool
// Enable3StageMultiDistinctAgg indicates whether to allow 3 stage multi distinct aggregate
Enable3StageMultiDistinctAgg bool
ExplainNonEvaledSubQuery bool
// MultiStatementMode permits incorrect client library usage. Not recommended to be turned on.
MultiStatementMode int
// InMultiStmts indicates whether the statement is a multi-statement like `update t set a=1; update t set b=2;`.
InMultiStmts bool
// AllowWriteRowID variable is currently not recommended to be turned on.
AllowWriteRowID bool
// AllowBatchCop means if we should send batch coprocessor to TiFlash. Default value is 1, means to use batch cop in case of aggregation and join.
// Value set to 2 means to force to send batch cop for any query. Value set to 0 means never use batch cop.
AllowBatchCop int
// allowMPPExecution means if we should use mpp way to execute query.
// Default value is `true`, means to be determined by the optimizer.
// Value set to `false` means never use mpp.
allowMPPExecution bool
// allowTiFlashCop means if we must use mpp way to execute query.
// Default value is `false`, means to be determined by the optimizer.
// Value set to `true` means we may fall back to TiFlash cop if possible.
allowTiFlashCop bool
// HashExchangeWithNewCollation means if we support hash exchange when new collation is enabled.
// Default value is `true`, means support hash exchange when new collation is enabled.
// Value set to `false` means not use hash exchange when new collation is enabled.
HashExchangeWithNewCollation bool
// enforceMPPExecution means if we should enforce mpp way to execute query.
// Default value is `false`, means to be determined by variable `allowMPPExecution`.
// Value set to `true` means enforce use mpp.
// Note if you want to set `enforceMPPExecution` to `true`, you must set `allowMPPExecution` to `true` first.
enforceMPPExecution bool
// TiFlashMaxThreads is the maximum number of threads to execute the request which is pushed down to tiflash.
// Default value is -1, means it will not be pushed down to tiflash.
// If the value is bigger than -1, it will be pushed down to tiflash and used to create db context in tiflash.
TiFlashMaxThreads int64
// TiFlashMaxBytesBeforeExternalJoin is the maximum bytes used by a TiFlash join before spill to disk
// Default value is -1, means it will not be pushed down to TiFlash
// If the value is bigger than -1, it will be pushed down to TiFlash, and if the value is 0, it means
// not limit and spill will never happen
TiFlashMaxBytesBeforeExternalJoin int64
// TiFlashMaxBytesBeforeExternalGroupBy is the maximum bytes used by a TiFlash hash aggregation before spill to disk
// Default value is -1, means it will not be pushed down to TiFlash
// If the value is bigger than -1, it will be pushed down to TiFlash, and if the value is 0, it means
// not limit and spill will never happen
TiFlashMaxBytesBeforeExternalGroupBy int64
// TiFlashMaxBytesBeforeExternalSort is the maximum bytes used by a TiFlash sort/TopN before spill to disk
// Default value is -1, means it will not be pushed down to TiFlash
// If the value is bigger than -1, it will be pushed down to TiFlash, and if the value is 0, it means
// not limit and spill will never happen
TiFlashMaxBytesBeforeExternalSort int64
// TiFlash max query memory per node, -1 and 0 means no limit, and the default value is 0
// If TiFlashMaxQueryMemoryPerNode > 0 && TiFlashQuerySpillRatio > 0, it will trigger auto spill in TiFlash side, and when auto spill
// is triggered, per executor's memory usage threshold set by TiFlashMaxBytesBeforeExternalJoin/TiFlashMaxBytesBeforeExternalGroupBy/TiFlashMaxBytesBeforeExternalSort will be ignored.
TiFlashMaxQueryMemoryPerNode int64
// TiFlashQuerySpillRatio is the percentage threshold to trigger auto spill in TiFlash if TiFlashMaxQueryMemoryPerNode is set
TiFlashQuerySpillRatio float64
// TiDBAllowAutoRandExplicitInsert indicates whether explicit insertion on auto_random column is allowed.
AllowAutoRandExplicitInsert bool
// BroadcastJoinThresholdSize is used to limit the size of smaller table.
// It's unit is bytes, if the size of small table is larger than it, we will not use bcj.
BroadcastJoinThresholdSize int64
// BroadcastJoinThresholdCount is used to limit the total count of smaller table.
// If we can't estimate the size of one side of join child, we will check if its row number exceeds this limitation.
BroadcastJoinThresholdCount int64
// PreferBCJByExchangeDataSize indicates the method used to choose mpp broadcast join
// false: choose mpp broadcast join by `BroadcastJoinThresholdSize` and `BroadcastJoinThresholdCount`
// true: compare data exchange size of join and choose the smallest one
PreferBCJByExchangeDataSize bool
// LimitPushDownThreshold determines if push Limit or TopN down to TiKV forcibly.
LimitPushDownThreshold int64
// CorrelationThreshold is the guard to enable row count estimation using column order correlation.
CorrelationThreshold float64
// EnableCorrelationAdjustment is used to indicate if correlation adjustment is enabled.
EnableCorrelationAdjustment bool
// CorrelationExpFactor is used to control the heuristic approach of row count estimation when CorrelationThreshold is not met.
CorrelationExpFactor int
// cpuFactor is the CPU cost of processing one expression for one row.
cpuFactor float64
// copCPUFactor is the CPU cost of processing one expression for one row in coprocessor.
copCPUFactor float64
// networkFactor is the network cost of transferring 1 byte data.
networkFactor float64
// ScanFactor is the IO cost of scanning 1 byte data on TiKV and TiFlash.
scanFactor float64
// descScanFactor is the IO cost of scanning 1 byte data on TiKV and TiFlash in desc order.
descScanFactor float64
// seekFactor is the IO cost of seeking the start value of a range in TiKV or TiFlash.
seekFactor float64
// memoryFactor is the memory cost of storing one tuple.
memoryFactor float64
// diskFactor is the IO cost of reading/writing one byte to temporary disk.
diskFactor float64
// concurrencyFactor is the CPU cost of additional one goroutine.
concurrencyFactor float64