-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
index_encoding.go
1830 lines (1701 loc) · 65.8 KB
/
index_encoding.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright 2018 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
package rowenc
import (
"context"
"fmt"
"sort"
"unsafe"
"github.com/cockroachdb/cockroach/pkg/geo/geoindex"
"github.com/cockroachdb/cockroach/pkg/geo/geopb"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/catalog"
"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/types"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
"github.com/cockroachdb/cockroach/pkg/util/json"
"github.com/cockroachdb/cockroach/pkg/util/mon"
"github.com/cockroachdb/cockroach/pkg/util/unique"
"github.com/cockroachdb/errors"
)
// This file contains facilities to encode primary and secondary
// indexes on SQL tables.
// MakeIndexKeyPrefix returns the key prefix used for the index's data. If you
// need the corresponding Span, prefer desc.IndexSpan(indexID) or
// desc.PrimaryIndexSpan().
func MakeIndexKeyPrefix(
codec keys.SQLCodec, desc catalog.TableDescriptor, indexID descpb.IndexID,
) []byte {
if i, err := desc.FindIndexWithID(indexID); err == nil && i.NumInterleaveAncestors() > 0 {
ancestor := i.GetInterleaveAncestor(0)
return codec.IndexPrefix(uint32(ancestor.TableID), uint32(ancestor.IndexID))
}
return codec.IndexPrefix(uint32(desc.GetID()), uint32(indexID))
}
// EncodeIndexKey creates a key by concatenating keyPrefix with the
// encodings of the columns in the index, and returns the key and
// whether any of the encoded values were NULLs.
//
// If a table or index is interleaved, `encoding.interleavedSentinel`
// is used in place of the family id (a varint) to signal the next
// component of the key. An example of one level of interleaving (a
// parent):
// /<parent_table_id>/<parent_index_id>/<field_1>/<field_2>/NullDesc/<table_id>/<index_id>/<field_3>/<family>
//
// Note that ExtraColumnIDs are not encoded, so the result isn't always a
// full index key.
func EncodeIndexKey(
tableDesc catalog.TableDescriptor,
index *descpb.IndexDescriptor,
colMap catalog.TableColMap,
values []tree.Datum,
keyPrefix []byte,
) (key []byte, containsNull bool, err error) {
return EncodePartialIndexKey(
tableDesc,
index,
len(index.ColumnIDs), /* encode all columns */
colMap,
values,
keyPrefix,
)
}
// EncodePartialIndexSpan creates the minimal key span for the key specified by the
// given table, index, and values, with the same method as
// EncodePartialIndexKey.
func EncodePartialIndexSpan(
tableDesc catalog.TableDescriptor,
index *descpb.IndexDescriptor,
numCols int,
colMap catalog.TableColMap,
values []tree.Datum,
keyPrefix []byte,
) (span roachpb.Span, containsNull bool, err error) {
var key roachpb.Key
var endKey roachpb.Key
key, containsNull, err = EncodePartialIndexKey(tableDesc, index, numCols, colMap, values, keyPrefix)
if err != nil {
return span, containsNull, err
}
if numCols == len(index.ColumnIDs) {
// If all values in the input index were specified, append an interleave
// marker instead of PrefixEnding the key, to avoid including any child
// interleaves of the input key.
endKey = encoding.EncodeInterleavedSentinel(key)
} else {
endKey = key.PrefixEnd()
}
return roachpb.Span{Key: key, EndKey: endKey}, containsNull, nil
}
// EncodePartialIndexKey encodes a partial index key; only the first numCols of
// the index key columns are encoded. The index key columns are
// - index.ColumnIDs for unique indexes, and
// - append(index.ColumnIDs, index.ExtraColumnIDs) for non-unique indexes.
func EncodePartialIndexKey(
tableDesc catalog.TableDescriptor,
index *descpb.IndexDescriptor,
numCols int,
colMap catalog.TableColMap,
values []tree.Datum,
keyPrefix []byte,
) (key []byte, containsNull bool, err error) {
var colIDs, extraColIDs []descpb.ColumnID
if numCols <= len(index.ColumnIDs) {
colIDs = index.ColumnIDs[:numCols]
} else {
if index.Unique || numCols > len(index.ColumnIDs)+len(index.ExtraColumnIDs) {
return nil, false, errors.Errorf("encoding too many columns (%d)", numCols)
}
colIDs = index.ColumnIDs
extraColIDs = index.ExtraColumnIDs[:numCols-len(index.ColumnIDs)]
}
// We know we will append to the key which will cause the capacity to grow so
// make it bigger from the get-go.
// Add the length of the key prefix as an initial guess.
// Add 3 bytes for every ancestor: table,index id + interleave sentinel.
// Add 2 bytes for every column value. An underestimate for all but low integers.
key = growKey(keyPrefix, len(keyPrefix)+3*len(index.Interleave.Ancestors)+2*len(values))
dirs := directions(index.ColumnDirections)
if len(index.Interleave.Ancestors) > 0 {
for i, ancestor := range index.Interleave.Ancestors {
// The first ancestor is assumed to already be encoded in keyPrefix.
if i != 0 {
key = EncodePartialTableIDIndexID(key, ancestor.TableID, ancestor.IndexID)
}
partial := false
length := int(ancestor.SharedPrefixLen)
if length > len(colIDs) {
length = len(colIDs)
partial = true
}
var n bool
key, n, err = EncodeColumns(colIDs[:length], dirs[:length], colMap, values, key)
if err != nil {
return nil, false, err
}
containsNull = containsNull || n
if partial {
// Early stop. Note that if we had exactly SharedPrefixLen columns
// remaining, we want to append the next tableID/indexID pair because
// that results in a more specific key.
return key, containsNull, nil
}
colIDs, dirs = colIDs[length:], dirs[length:]
// Each ancestor is separated by an interleaved
// sentinel (0xfe).
key = encoding.EncodeInterleavedSentinel(key)
}
key = EncodePartialTableIDIndexID(key, tableDesc.GetID(), index.ID)
}
var n bool
key, n, err = EncodeColumns(colIDs, dirs, colMap, values, key)
if err != nil {
return nil, false, err
}
containsNull = containsNull || n
key, n, err = EncodeColumns(extraColIDs, nil /* directions */, colMap, values, key)
if err != nil {
return nil, false, err
}
containsNull = containsNull || n
return key, containsNull, nil
}
type directions []descpb.IndexDescriptor_Direction
func (d directions) get(i int) (encoding.Direction, error) {
if i < len(d) {
return d[i].ToEncodingDirection()
}
return encoding.Ascending, nil
}
// MakeSpanFromEncDatums creates a minimal index key span on the input
// values. A minimal index key span is a span that includes the fewest possible
// keys after the start key generated by the input values.
//
// The start key is generated by concatenating keyPrefix with the encodings of
// the given EncDatum values. The values, types, and dirs parameters should be
// specified in the same order as the index key columns and may be a prefix.
//
// If a table or index is interleaved, `encoding.interleavedSentinel` is used
// in place of the family id (a varint) to signal the next component of the
// key. An example of one level of interleaving (a parent):
// /<parent_table_id>/<parent_index_id>/<field_1>/<field_2>/NullDesc/<table_id>/<index_id>/<field_3>/<family>
func MakeSpanFromEncDatums(
values EncDatumRow,
types []*types.T,
dirs []descpb.IndexDescriptor_Direction,
tableDesc catalog.TableDescriptor,
index *descpb.IndexDescriptor,
alloc *DatumAlloc,
keyPrefix []byte,
) (_ roachpb.Span, containsNull bool, _ error) {
startKey, complete, containsNull, err := makeKeyFromEncDatums(values, types, dirs, tableDesc, index, alloc, keyPrefix)
if err != nil {
return roachpb.Span{}, false, err
}
var endKey roachpb.Key
if complete && index.Unique {
// If all values in the input index were specified and the input index is
// unique, indicating that it might have child interleaves, append an
// interleave marker instead of PrefixEnding the key, to avoid including
// any child interleaves of the input key.
//
// Note that currently only primary indexes can contain interleaved
// tables or indexes, so this condition is broader than necessary in
// case one day we permit interleaving into arbitrary unique indexes.
// Note also that we could precisely only emit an interleaved sentinel
// if this index does in fact have interleaves - we choose not to do
// that to make testing simpler and traces and spans more consistent.
endKey = encoding.EncodeInterleavedSentinel(startKey)
} else {
endKey = startKey.PrefixEnd()
}
return roachpb.Span{Key: startKey, EndKey: endKey}, containsNull, nil
}
// NeededColumnFamilyIDs returns the minimal set of column families required to
// retrieve neededCols for the specified table and index. The returned descpb.FamilyIDs
// are in sorted order.
func NeededColumnFamilyIDs(
neededColOrdinals util.FastIntSet, table catalog.TableDescriptor, index *descpb.IndexDescriptor,
) []descpb.FamilyID {
if table.NumFamilies() == 1 {
return []descpb.FamilyID{table.GetFamilies()[0].ID}
}
// Build some necessary data structures for column metadata.
columns := table.ColumnsWithMutations(true /* includeMutations */)
colIdxMap := table.ColumnIdxMapWithMutations(true)
var indexedCols util.FastIntSet
var compositeCols util.FastIntSet
var extraCols util.FastIntSet
for _, columnID := range index.ColumnIDs {
columnOrdinal := colIdxMap.GetDefault(columnID)
indexedCols.Add(columnOrdinal)
}
for _, columnID := range index.CompositeColumnIDs {
columnOrdinal := colIdxMap.GetDefault(columnID)
compositeCols.Add(columnOrdinal)
}
for _, columnID := range index.ExtraColumnIDs {
columnOrdinal := colIdxMap.GetDefault(columnID)
extraCols.Add(columnOrdinal)
}
// The column family with ID 0 is special because it always has a KV entry.
// Other column families will omit a value if all their columns are null, so
// we may need to retrieve family 0 to use as a sentinel for distinguishing
// between null values and the absence of a row. Also, secondary indexes store
// values here for composite and "extra" columns. ("Extra" means primary key
// columns which are not indexed.)
var family0 *descpb.ColumnFamilyDescriptor
hasSecondaryEncoding := index.GetEncodingType(table.GetPrimaryIndexID()) == descpb.SecondaryIndexEncoding
// First iterate over the needed columns and look for a few special cases:
// * columns which can be decoded from the key and columns whose value is stored
// in family 0.
// * certain system columns, like the MVCC timestamp column require all of the
// column families to be scanned to produce a value.
family0Needed := false
mvccColumnRequested := false
nc := neededColOrdinals.Copy()
neededColOrdinals.ForEach(func(columnOrdinal int) {
if indexedCols.Contains(columnOrdinal) && !compositeCols.Contains(columnOrdinal) {
// We can decode this column from the index key, so no particular family
// is needed.
nc.Remove(columnOrdinal)
}
if hasSecondaryEncoding && (compositeCols.Contains(columnOrdinal) ||
extraCols.Contains(columnOrdinal)) {
// Secondary indexes store composite and "extra" column values in family
// 0.
family0Needed = true
nc.Remove(columnOrdinal)
}
// System column ordinals are larger than the number of columns.
if columnOrdinal >= len(columns) {
mvccColumnRequested = true
}
})
// If the MVCC timestamp column was requested, then bail out.
if mvccColumnRequested {
families := make([]descpb.FamilyID, 0, table.NumFamilies())
_ = table.ForeachFamily(func(family *descpb.ColumnFamilyDescriptor) error {
families = append(families, family.ID)
return nil
})
return families
}
// Iterate over the column families to find which ones contain needed columns.
// We also keep track of whether all of the needed families' columns are
// nullable, since this means we need column family 0 as a sentinel, even if
// none of its columns are needed.
var neededFamilyIDs []descpb.FamilyID
allFamiliesNullable := true
_ = table.ForeachFamily(func(family *descpb.ColumnFamilyDescriptor) error {
needed := false
nullable := true
if family.ID == 0 {
// Set column family 0 aside in case we need it as a sentinel.
family0 = family
if family0Needed {
needed = true
}
nullable = false
}
for _, columnID := range family.ColumnIDs {
if needed && !nullable {
// Nothing left to check.
break
}
columnOrdinal := colIdxMap.GetDefault(columnID)
if nc.Contains(columnOrdinal) {
needed = true
}
if !columns[columnOrdinal].Nullable && (!indexedCols.Contains(columnOrdinal) ||
compositeCols.Contains(columnOrdinal) && !hasSecondaryEncoding) {
// The column is non-nullable and cannot be decoded from a different
// family, so this column family must have a KV entry for every row.
nullable = false
}
}
if needed {
neededFamilyIDs = append(neededFamilyIDs, family.ID)
if !nullable {
allFamiliesNullable = false
}
}
return nil
})
if family0 == nil {
panic(errors.AssertionFailedf("column family 0 not found"))
}
// If all the needed families are nullable, we also need family 0 as a
// sentinel. Note that this is only the case if family 0 was not already added
// to neededFamilyIDs.
if allFamiliesNullable {
// Prepend family 0.
neededFamilyIDs = append(neededFamilyIDs, 0)
copy(neededFamilyIDs[1:], neededFamilyIDs)
neededFamilyIDs[0] = family0.ID
}
return neededFamilyIDs
}
// SplitSpanIntoSeparateFamilies splits a span representing a single row point
// lookup into separate disjoint spans that request only the particular column
// families from neededFamilies instead of requesting all the families. It is up
// to the client to ensure the requested span represents a single row lookup and
// that the span splitting is appropriate (see CanSplitSpanIntoSeparateFamilies).
//
// The function accepts a slice of spans to append to.
func SplitSpanIntoSeparateFamilies(
appendTo roachpb.Spans, span roachpb.Span, neededFamilies []descpb.FamilyID,
) roachpb.Spans {
span.Key = span.Key[:len(span.Key):len(span.Key)] // avoid mutation and aliasing
for i, familyID := range neededFamilies {
var famSpan roachpb.Span
famSpan.Key = keys.MakeFamilyKey(span.Key, uint32(familyID))
famSpan.EndKey = famSpan.Key.PrefixEnd()
if i > 0 && familyID == neededFamilies[i-1]+1 {
// This column family is adjacent to the previous one. We can merge
// the two spans into one.
appendTo[len(appendTo)-1].EndKey = famSpan.EndKey
} else {
appendTo = append(appendTo, famSpan)
}
}
return appendTo
}
// makeKeyFromEncDatums creates an index key by concatenating keyPrefix with the
// encodings of the given EncDatum values. The values, types, and dirs
// parameters should be specified in the same order as the index key columns and
// may be a prefix. The complete return value is true if the resultant key
// fully constrains the index.
//
// If a table or index is interleaved, `encoding.interleavedSentinel` is used
// in place of the family id (a varint) to signal the next component of the
// key. An example of one level of interleaving (a parent):
// /<parent_table_id>/<parent_index_id>/<field_1>/<field_2>/NullDesc/<table_id>/<index_id>/<field_3>/<family>
func makeKeyFromEncDatums(
values EncDatumRow,
types []*types.T,
dirs []descpb.IndexDescriptor_Direction,
tableDesc catalog.TableDescriptor,
index *descpb.IndexDescriptor,
alloc *DatumAlloc,
keyPrefix []byte,
) (_ roachpb.Key, complete bool, containsNull bool, _ error) {
// Values may be a prefix of the index columns.
if len(values) > len(dirs) {
return nil, false, false, errors.Errorf("%d values, %d directions", len(values), len(dirs))
}
if len(values) != len(types) {
return nil, false, false, errors.Errorf("%d values, %d types", len(values), len(types))
}
// We know we will append to the key which will cause the capacity to grow
// so make it bigger from the get-go.
key := make(roachpb.Key, len(keyPrefix), len(keyPrefix)*2)
copy(key, keyPrefix)
if len(index.Interleave.Ancestors) > 0 {
for i, ancestor := range index.Interleave.Ancestors {
// The first ancestor is assumed to already be encoded in keyPrefix.
if i != 0 {
key = EncodePartialTableIDIndexID(key, ancestor.TableID, ancestor.IndexID)
}
partial := false
length := int(ancestor.SharedPrefixLen)
if length > len(types) {
length = len(types)
partial = true
}
var (
err error
n bool
)
key, n, err = appendEncDatumsToKey(key, types[:length], values[:length], dirs[:length], alloc)
if err != nil {
return nil, false, false, err
}
containsNull = containsNull || n
if partial {
// Early stop - the number of desired columns was fewer than the number
// left in the current interleave.
return key, false, false, nil
}
types, values, dirs = types[length:], values[length:], dirs[length:]
// Each ancestor is separated by an interleaved
// sentinel (0xfe).
key = encoding.EncodeInterleavedSentinel(key)
}
key = EncodePartialTableIDIndexID(key, tableDesc.GetID(), index.ID)
}
var (
err error
n bool
)
key, n, err = appendEncDatumsToKey(key, types, values, dirs, alloc)
if err != nil {
return key, false, false, err
}
containsNull = containsNull || n
return key, len(types) == len(index.ColumnIDs), containsNull, err
}
// findColumnValue returns the value corresponding to the column. If
// the column isn't present return a NULL value.
func findColumnValue(
column descpb.ColumnID, colMap catalog.TableColMap, values []tree.Datum,
) tree.Datum {
if i, ok := colMap.Get(column); ok {
// TODO(pmattis): Need to convert the values[i] value to the type
// expected by the column.
return values[i]
}
return tree.DNull
}
// appendEncDatumsToKey concatenates the encoded representations of
// the datums at the end of the given roachpb.Key.
func appendEncDatumsToKey(
key roachpb.Key,
types []*types.T,
values EncDatumRow,
dirs []descpb.IndexDescriptor_Direction,
alloc *DatumAlloc,
) (_ roachpb.Key, containsNull bool, _ error) {
for i, val := range values {
encoding := descpb.DatumEncoding_ASCENDING_KEY
if dirs[i] == descpb.IndexDescriptor_DESC {
encoding = descpb.DatumEncoding_DESCENDING_KEY
}
if val.IsNull() {
containsNull = true
}
var err error
key, err = val.Encode(types[i], alloc, encoding, key)
if err != nil {
return nil, false, err
}
}
return key, containsNull, nil
}
// EncodePartialTableIDIndexID encodes a table id followed by an index id to an
// existing key. The key must already contain a tenant id.
func EncodePartialTableIDIndexID(key []byte, tableID descpb.ID, indexID descpb.IndexID) []byte {
return keys.MakeTableIDIndexID(key, uint32(tableID), uint32(indexID))
}
// DecodePartialTableIDIndexID decodes a table id followed by an index id. The
// input key must already have its tenant id removed.
func DecodePartialTableIDIndexID(key []byte) ([]byte, descpb.ID, descpb.IndexID, error) {
key, tableID, indexID, err := keys.DecodeTableIDIndexID(key)
return key, descpb.ID(tableID), descpb.IndexID(indexID), err
}
// DecodeIndexKeyPrefix decodes the prefix of an index key and returns the
// index id and a slice for the rest of the key.
//
// Don't use this function in the scan "hot path".
func DecodeIndexKeyPrefix(
codec keys.SQLCodec, desc catalog.TableDescriptor, key []byte,
) (indexID descpb.IndexID, remaining []byte, err error) {
key, err = codec.StripTenantPrefix(key)
if err != nil {
return 0, nil, err
}
// TODO(dan): This whole operation is n^2 because of the interleaves
// bookkeeping. We could improve it to n with a prefix tree of components.
interleaves := make([]catalog.Index, 0, len(desc.ActiveIndexes()))
interleaves = append(interleaves, desc.ActiveIndexes()...)
for component := 0; ; component++ {
var tableID descpb.ID
key, tableID, indexID, err = DecodePartialTableIDIndexID(key)
if err != nil {
return 0, nil, err
}
if tableID == desc.GetID() {
// Once desc's table id has been decoded, there can be no more
// interleaves.
break
}
for i := len(interleaves) - 1; i >= 0; i-- {
if interleaves[i].NumInterleaveAncestors() <= component ||
interleaves[i].GetInterleaveAncestor(component).TableID != tableID ||
interleaves[i].GetInterleaveAncestor(component).IndexID != indexID {
// This component, and thus this interleave, doesn't match what was
// decoded, remove it.
copy(interleaves[i:], interleaves[i+1:])
interleaves = interleaves[:len(interleaves)-1]
}
}
// The decoded key doesn't many any known interleaves
if len(interleaves) == 0 {
return 0, nil, errors.Errorf("no known interleaves for key")
}
// Anything left has the same SharedPrefixLen at index `component`, so just
// use the first one.
for i := uint32(0); i < interleaves[0].GetInterleaveAncestor(component).SharedPrefixLen; i++ {
l, err := encoding.PeekLength(key)
if err != nil {
return 0, nil, err
}
key = key[l:]
}
// Consume the interleaved sentinel.
var ok bool
key, ok = encoding.DecodeIfInterleavedSentinel(key)
if !ok {
return 0, nil, errors.Errorf("invalid interleave key")
}
}
return indexID, key, err
}
// DecodeIndexKey decodes the values that are a part of the specified index
// key (setting vals).
//
// The remaining bytes in the index key are returned which will either be an
// encoded column ID for the primary key index, the primary key suffix for
// non-unique secondary indexes or unique secondary indexes containing NULL or
// empty. If the given descriptor does not match the key, false is returned with
// no error.
func DecodeIndexKey(
codec keys.SQLCodec,
desc catalog.TableDescriptor,
index *descpb.IndexDescriptor,
types []*types.T,
vals []EncDatum,
colDirs []descpb.IndexDescriptor_Direction,
key []byte,
) (remainingKey []byte, matches bool, foundNull bool, _ error) {
key, err := codec.StripTenantPrefix(key)
if err != nil {
return nil, false, false, err
}
key, _, _, err = DecodePartialTableIDIndexID(key)
if err != nil {
return nil, false, false, err
}
return DecodeIndexKeyWithoutTableIDIndexIDPrefix(desc, index, types, vals, colDirs, key)
}
// DecodeIndexKeyWithoutTableIDIndexIDPrefix is the same as DecodeIndexKey,
// except it expects its index key is missing in its tenant id and first table
// id / index id key prefix.
func DecodeIndexKeyWithoutTableIDIndexIDPrefix(
desc catalog.TableDescriptor,
index *descpb.IndexDescriptor,
types []*types.T,
vals []EncDatum,
colDirs []descpb.IndexDescriptor_Direction,
key []byte,
) (remainingKey []byte, matches bool, foundNull bool, _ error) {
var decodedTableID descpb.ID
var decodedIndexID descpb.IndexID
var err error
if len(index.Interleave.Ancestors) > 0 {
for i, ancestor := range index.Interleave.Ancestors {
// Our input key had its first table id / index id chopped off, so
// don't try to decode those for the first ancestor.
if i != 0 {
key, decodedTableID, decodedIndexID, err = DecodePartialTableIDIndexID(key)
if err != nil {
return nil, false, false, err
}
if decodedTableID != ancestor.TableID || decodedIndexID != ancestor.IndexID {
return nil, false, false, nil
}
}
length := int(ancestor.SharedPrefixLen)
var isNull bool
key, isNull, err = DecodeKeyVals(types[:length], vals[:length], colDirs[:length], key)
if err != nil {
return nil, false, false, err
}
types, vals, colDirs = types[length:], vals[length:], colDirs[length:]
foundNull = foundNull || isNull
// Consume the interleaved sentinel.
var ok bool
key, ok = encoding.DecodeIfInterleavedSentinel(key)
if !ok {
return nil, false, false, nil
}
}
key, decodedTableID, decodedIndexID, err = DecodePartialTableIDIndexID(key)
if err != nil {
return nil, false, false, err
}
if decodedTableID != desc.GetID() || decodedIndexID != index.ID {
return nil, false, false, nil
}
}
var isNull bool
key, isNull, err = DecodeKeyVals(types, vals, colDirs, key)
if err != nil {
return nil, false, false, err
}
foundNull = foundNull || isNull
// We're expecting a column family id next (a varint). If
// interleavedSentinel is actually next, then this key is for a child
// table.
if _, ok := encoding.DecodeIfInterleavedSentinel(key); ok {
return nil, false, false, nil
}
return key, true, foundNull, nil
}
// DecodeKeyVals decodes the values that are part of the key. The decoded
// values are stored in the vals. If this slice is nil, the direction
// used will default to encoding.Ascending.
// DecodeKeyVals returns whether or not NULL was encountered in the key.
func DecodeKeyVals(
types []*types.T, vals []EncDatum, directions []descpb.IndexDescriptor_Direction, key []byte,
) ([]byte, bool, error) {
if directions != nil && len(directions) != len(vals) {
return nil, false, errors.Errorf("encoding directions doesn't parallel vals: %d vs %d.",
len(directions), len(vals))
}
foundNull := false
for j := range vals {
enc := descpb.DatumEncoding_ASCENDING_KEY
if directions != nil && (directions[j] == descpb.IndexDescriptor_DESC) {
enc = descpb.DatumEncoding_DESCENDING_KEY
}
var err error
vals[j], key, err = EncDatumFromBuffer(types[j], enc, key)
if err != nil {
return nil, false, err
}
if vals[j].IsNull() {
foundNull = true
}
}
return key, foundNull, nil
}
// IndexEntry represents an encoded key/value for an index entry.
type IndexEntry struct {
Key roachpb.Key
Value roachpb.Value
// Only used for forward indexes.
Family descpb.FamilyID
}
// valueEncodedColumn represents a composite or stored column of a secondary
// index.
type valueEncodedColumn struct {
id descpb.ColumnID
isComposite bool
}
// byID implements sort.Interface for []valueEncodedColumn based on the id
// field.
type byID []valueEncodedColumn
func (a byID) Len() int { return len(a) }
func (a byID) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byID) Less(i, j int) bool { return a[i].id < a[j].id }
// EncodeInvertedIndexKeys creates a list of inverted index keys by
// concatenating keyPrefix with the encodings of the column in the
// index.
func EncodeInvertedIndexKeys(
index *descpb.IndexDescriptor, colMap catalog.TableColMap, values []tree.Datum, keyPrefix []byte,
) (key [][]byte, err error) {
numColumns := len(index.ColumnIDs)
// If the index is a multi-column inverted index, we encode the non-inverted
// columns in the key prefix.
if numColumns > 1 {
// Do not encode the last column, which is the inverted column, here. It
// is encoded below this block.
colIDs := index.ColumnIDs[:numColumns-1]
dirs := directions(index.ColumnDirections)
// Double the size of the key to make the imminent appends more
// efficient.
keyPrefix = growKey(keyPrefix, len(keyPrefix))
keyPrefix, _, err = EncodeColumns(colIDs, dirs, colMap, values, keyPrefix)
if err != nil {
return nil, err
}
}
var val tree.Datum
if i, ok := colMap.Get(index.ColumnIDs[numColumns-1]); ok {
val = values[i]
} else {
val = tree.DNull
}
if !geoindex.IsEmptyConfig(&index.GeoConfig) {
return EncodeGeoInvertedIndexTableKeys(val, keyPrefix, index)
}
return EncodeInvertedIndexTableKeys(val, keyPrefix, index.Version)
}
// EncodeInvertedIndexTableKeys produces one inverted index key per element in
// the input datum, which should be a container (either JSON or Array). For
// JSON, "element" means unique path through the document. Each output key is
// prefixed by inKey, and is guaranteed to be lexicographically sortable, but
// not guaranteed to be round-trippable during decoding. If the input Datum
// is (SQL) NULL, no inverted index keys will be produced, because inverted
// indexes cannot and do not need to satisfy the predicate col IS NULL.
//
// This function does not return keys for empty arrays or for NULL array
// elements unless the version is at least
// descpb.EmptyArraysInInvertedIndexesVersion. (Note that this only applies
// to arrays, not JSONs. This function returns keys for all non-null JSONs
// regardless of the version.)
func EncodeInvertedIndexTableKeys(
val tree.Datum, inKey []byte, version descpb.IndexDescriptorVersion,
) (key [][]byte, err error) {
if val == tree.DNull {
return nil, nil
}
datum := tree.UnwrapDatum(nil, val)
switch val.ResolvedType().Family() {
case types.JsonFamily:
// We do not need to pass the version for JSON types, since all prior
// versions of JSON inverted indexes include keys for empty objects and
// arrays.
return json.EncodeInvertedIndexKeys(inKey, val.(*tree.DJSON).JSON)
case types.ArrayFamily:
return encodeArrayInvertedIndexTableKeys(val.(*tree.DArray), inKey, version)
}
return nil, errors.AssertionFailedf("trying to apply inverted index to unsupported type %s", datum.ResolvedType())
}
// EncodeContainingInvertedIndexSpans takes in a key prefix and returns the
// spans that must be scanned in the inverted index to evaluate a contains (@>)
// predicate with the given datum, which should be a container (either JSON
// or Array). These spans should be used to find the objects in the index that
// contain the given json or array. In other words, if we have a predicate
// x @> y, this function should use the value of y to find the spans to scan
// in an inverted index on x.
//
// The spans returned by EncodeContainingInvertedIndexSpans represent the
// intersection of unions. For example, if the returned results are:
//
// { {["a", "b"), ["c", "d")}, {["e", "f")} }
//
// the expression should be evaluated as:
//
// INTERSECTION
// / \
// UNION ["e", "f")
// / \
// ["a", "b") ["c", "d")
//
// The input inKey is prefixed to the keys in all returned spans.
//
// Returns tight=true if the returned spans are tight and cannot produce false
// positives. Otherwise, returns tight=false.
//
// Returns unique=true if the spans are guaranteed not to produce duplicate
// primary keys. Otherwise, returns unique=false. This distinction is important
// for the case where the length of spans is 1, and the single element has
// length greater than 0. Per the above description, this would represent a
// UNION over the returned spans, with no INTERSECTION. If unique is true, that
// allows the optimizer to remove the UNION altogether (implemented
// with the invertedFilterer), and simply return the results of the constrained
// scan. unique is always false if the length of spans is greater than 1.
//
// The spans are not guaranteed to be sorted, so the caller must sort them if
// needed.
func EncodeContainingInvertedIndexSpans(
evalCtx *tree.EvalContext, val tree.Datum, inKey []byte, version descpb.IndexDescriptorVersion,
) (spans []roachpb.Spans, tight, unique bool, err error) {
if val == tree.DNull {
return nil, false, false, nil
}
datum := tree.UnwrapDatum(evalCtx, val)
switch val.ResolvedType().Family() {
case types.JsonFamily:
return json.EncodeContainingInvertedIndexSpans(inKey, val.(*tree.DJSON).JSON)
case types.ArrayFamily:
spans, unique, err := encodeContainingArrayInvertedIndexSpans(val.(*tree.DArray), inKey, version)
if err != nil {
return nil, false, false, err
}
// Spans for array inverted indexes are always tight.
return spans, true /* tight */, unique, err
}
return nil, false, false, errors.AssertionFailedf(
"trying to apply inverted index to unsupported type %s", datum.ResolvedType(),
)
}
// encodeArrayInvertedIndexTableKeys returns a list of inverted index keys for
// the given input array, one per entry in the array. The input inKey is
// prefixed to all returned keys.
//
// This function does not return keys for empty arrays or for NULL array elements
// unless the version is at least descpb.EmptyArraysInInvertedIndexesVersion.
func encodeArrayInvertedIndexTableKeys(
val *tree.DArray, inKey []byte, version descpb.IndexDescriptorVersion,
) (key [][]byte, err error) {
if val.Array.Len() == 0 {
if version >= descpb.EmptyArraysInInvertedIndexesVersion {
return [][]byte{encoding.EncodeEmptyArray(inKey)}, nil
}
}
outKeys := make([][]byte, 0, len(val.Array))
for i := range val.Array {
d := val.Array[i]
if d == tree.DNull && version < descpb.EmptyArraysInInvertedIndexesVersion {
// Older versions did not include null elements, but we must include them
// going forward since `SELECT ARRAY[NULL] @> ARRAY[]` returns true.
continue
}
outKey := make([]byte, len(inKey))
copy(outKey, inKey)
newKey, err := EncodeTableKey(outKey, d, encoding.Ascending)
if err != nil {
return nil, err
}
outKeys = append(outKeys, newKey)
}
outKeys = unique.UniquifyByteSlices(outKeys)
return outKeys, nil
}
// encodeContainingArrayInvertedIndexSpans returns the spans that must be
// scanned in the inverted index to evaluate a contains (@>) predicate with
// the given array, one slice of spans per entry in the array. The input
// inKey is prefixed to all returned keys.
//
// Returns unique=true if the spans are guaranteed not to produce
// duplicate primary keys. Otherwise, returns unique=false.
func encodeContainingArrayInvertedIndexSpans(
val *tree.DArray, inKey []byte, version descpb.IndexDescriptorVersion,
) (spans []roachpb.Spans, unique bool, err error) {
if val.Array.Len() == 0 {
// All arrays contain the empty array.
endKey := roachpb.Key(inKey).PrefixEnd()
return []roachpb.Spans{{roachpb.Span{Key: inKey, EndKey: endKey}}}, false, nil
}
if val.HasNulls {
// If there are any nulls, return empty spans. This is needed to ensure
// that `SELECT ARRAY[NULL, 2] @> ARRAY[NULL, 2]` is false.
return nil, true, nil
}
keys, err := encodeArrayInvertedIndexTableKeys(val, inKey, version)
if err != nil {
return nil, false, err
}
spans = make([]roachpb.Spans, len(keys))
for i, key := range keys {
endKey := roachpb.Key(key).PrefixEnd()
spans[i] = roachpb.Spans{{Key: key, EndKey: endKey}}
}
// More than 1 key means that there could be duplicate primary keys in an
// inverted index.
unique = len(keys) == 1
return spans, unique, nil
}
// EncodeGeoInvertedIndexTableKeys is the equivalent of EncodeInvertedIndexTableKeys
// for Geography and Geometry.
func EncodeGeoInvertedIndexTableKeys(
val tree.Datum, inKey []byte, index *descpb.IndexDescriptor,
) (key [][]byte, err error) {
if val == tree.DNull {
return nil, nil
}
switch val.ResolvedType().Family() {
case types.GeographyFamily:
index := geoindex.NewS2GeographyIndex(*index.GeoConfig.S2Geography)
intKeys, bbox, err := index.InvertedIndexKeys(context.TODO(), val.(*tree.DGeography).Geography)
if err != nil {
return nil, err
}
return encodeGeoKeys(encoding.EncodeGeoInvertedAscending(inKey), intKeys, bbox)
case types.GeometryFamily:
index := geoindex.NewS2GeometryIndex(*index.GeoConfig.S2Geometry)
intKeys, bbox, err := index.InvertedIndexKeys(context.TODO(), val.(*tree.DGeometry).Geometry)
if err != nil {
return nil, err
}
return encodeGeoKeys(encoding.EncodeGeoInvertedAscending(inKey), intKeys, bbox)
default:
return nil, errors.Errorf("internal error: unexpected type: %s", val.ResolvedType().Family())
}
}
func encodeGeoKeys(
inKey []byte, geoKeys []geoindex.Key, bbox geopb.BoundingBox,
) (keys [][]byte, err error) {
encodedBBox := make([]byte, 0, encoding.MaxGeoInvertedBBoxLen)
encodedBBox = encoding.EncodeGeoInvertedBBox(encodedBBox, bbox.LoX, bbox.LoY, bbox.HiX, bbox.HiY)
// Avoid per-key heap allocations.
b := make([]byte, 0, len(geoKeys)*(len(inKey)+encoding.MaxVarintLen+len(encodedBBox)))
keys = make([][]byte, len(geoKeys))
for i, k := range geoKeys {
prev := len(b)
b = append(b, inKey...)
b = encoding.EncodeUvarintAscending(b, uint64(k))
b = append(b, encodedBBox...)
// Set capacity so that the caller appending does not corrupt later keys.
newKey := b[prev:len(b):len(b)]
keys[i] = newKey
}
return keys, nil