Skip to content

Commit

Permalink
colblk: add isObsolete bitmap to data block
Browse files Browse the repository at this point in the history
Add an isObsolete bitmap to the colblk data block schema, encoding whether a
key is obsolete (i.e. shadowed by an identical point key or range deletion with
a higher sequence number).

Iteration-time filtering of obsolete keys is not yet implemented.
  • Loading branch information
jbowens committed Sep 14, 2024
1 parent 21897bd commit 139ee80
Show file tree
Hide file tree
Showing 10 changed files with 2,047 additions and 1,819 deletions.
6 changes: 3 additions & 3 deletions sstable/colblk/cockroach_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ func TestCockroachDataBlock(t *testing.T) {
for w.Size() < targetBlockSize {
ik := base.MakeInternalKey(keys[count], base.SeqNum(rng.Uint64n(uint64(base.SeqNumMax))), base.InternalKeyKindSet)
kcmp := w.KeyWriter.ComparePrev(ik.UserKey)
w.Add(ik, values[count], block.InPlaceValuePrefix(kcmp.PrefixEqual()), kcmp)
w.Add(ik, values[count], block.InPlaceValuePrefix(kcmp.PrefixEqual()), kcmp, false /* isObsolete */)
count++
}
serializedBlock, _ := w.Finish(w.Rows(), w.Size())
Expand Down Expand Up @@ -415,7 +415,7 @@ func benchmarkCockroachDataBlockWriter(b *testing.B, keyConfig crdbtest.KeyConfi
for w.Size() < targetBlockSize {
ik := base.MakeInternalKey(keys[count], base.SeqNum(rng.Uint64n(uint64(base.SeqNumMax))), base.InternalKeyKindSet)
kcmp := w.KeyWriter.ComparePrev(ik.UserKey)
w.Add(ik, values[count], block.InPlaceValuePrefix(kcmp.PrefixEqual()), kcmp)
w.Add(ik, values[count], block.InPlaceValuePrefix(kcmp.PrefixEqual()), kcmp, false /* isObsolete */)
count++
}
_, _ = w.Finish(w.Rows(), w.Size())
Expand Down Expand Up @@ -459,7 +459,7 @@ func benchmarkCockroachDataBlockIter(b *testing.B, keyConfig crdbtest.KeyConfig,
for w.Size() < targetBlockSize {
ik := base.MakeInternalKey(keys[count], base.SeqNum(rng.Uint64n(uint64(base.SeqNumMax))), base.InternalKeyKindSet)
kcmp := w.KeyWriter.ComparePrev(ik.UserKey)
w.Add(ik, values[count], block.InPlaceValuePrefix(kcmp.PrefixEqual()), kcmp)
w.Add(ik, values[count], block.InPlaceValuePrefix(kcmp.PrefixEqual()), kcmp, false /* isObsolete */)
count++
}
serializedBlock, _ := w.Finish(w.Rows(), w.Size())
Expand Down
28 changes: 25 additions & 3 deletions sstable/colblk/data_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,10 @@ type DataBlockWriter struct {
// isValueExternal is the column writer for the is-value-external bitmap
// that indicates when a value is stored out-of-band in a value block.
isValueExternal BitmapBuilder
// isObsolete is the column writer for the is-obsolete bitmap that indicates
// when a key is known to be obsolete/non-live (i.e., shadowed by another
// identical point key or range deletion with a higher sequence number).
isObsolete BitmapBuilder

enc blockEncoder
rows int
Expand All @@ -379,13 +383,12 @@ type DataBlockWriter struct {
lastUserKeyTmp []byte
}

// TODO(jackson): Add an isObsolete bitmap column.

const (
dataBlockColumnTrailer = iota
dataBlockColumnPrefixChanged
dataBlockColumnValue
dataBlockColumnIsValueExternal
dataBlockColumnIsObsolete
dataBlockColumnMax
)

Expand All @@ -403,6 +406,7 @@ func (w *DataBlockWriter) Init(schema KeySchema) {
w.prefixSame.Reset()
w.values.Init()
w.isValueExternal.Reset()
w.isObsolete.Reset()
w.rows = 0
w.maximumKeyLength = 0
w.lastUserKeyTmp = w.lastUserKeyTmp[:0]
Expand All @@ -416,6 +420,7 @@ func (w *DataBlockWriter) Reset() {
w.prefixSame.Reset()
w.values.Reset()
w.isValueExternal.Reset()
w.isObsolete.Reset()
w.rows = 0
w.maximumKeyLength = 0
w.lastUserKeyTmp = w.lastUserKeyTmp[:0]
Expand Down Expand Up @@ -445,6 +450,10 @@ func (w *DataBlockWriter) String() string {
w.isValueExternal.WriteDebug(&buf, w.rows)
fmt.Fprintln(&buf)

fmt.Fprintf(&buf, "%d: is-obsolete: ", len(w.Schema.ColumnTypes)+dataBlockColumnIsObsolete)
w.isObsolete.WriteDebug(&buf, w.rows)
fmt.Fprintln(&buf)

return buf.String()
}

Expand All @@ -457,12 +466,19 @@ func (w *DataBlockWriter) String() string {
// The caller is required to pass this in because in expected use cases, the
// caller will also require the same information.
func (w *DataBlockWriter) Add(
ikey base.InternalKey, value []byte, valuePrefix block.ValuePrefix, kcmp KeyComparison,
ikey base.InternalKey,
value []byte,
valuePrefix block.ValuePrefix,
kcmp KeyComparison,
isObsolete bool,
) {
w.KeyWriter.WriteKey(w.rows, ikey.UserKey, kcmp.PrefixLen, kcmp.CommonPrefixLen)
if kcmp.PrefixEqual() {
w.prefixSame.Set(w.rows)
}
if isObsolete {
w.isObsolete.Set(w.rows)
}
w.trailers.Set(w.rows, uint64(ikey.Trailer))
if valuePrefix.IsValueHandle() {
w.isValueExternal.Set(w.rows)
Expand Down Expand Up @@ -493,6 +509,7 @@ func (w *DataBlockWriter) Size() int {
off = w.prefixSame.InvertedSize(w.rows, off)
off = w.values.Size(w.rows, off)
off = w.isValueExternal.Size(w.rows, off)
off = w.isObsolete.Size(w.rows, off)
off++ // trailer padding byte
return int(off)
}
Expand Down Expand Up @@ -532,6 +549,7 @@ func (w *DataBlockWriter) Finish(rows, size int) (finished []byte, lastKey base.
w.enc.encode(rows, &w.prefixSame)
w.enc.encode(rows, &w.values)
w.enc.encode(rows, &w.isValueExternal)
w.enc.encode(rows, &w.isObsolete)
finished = w.enc.finish()

w.lastUserKeyTmp = w.lastUserKeyTmp[:0]
Expand Down Expand Up @@ -568,6 +586,9 @@ type DataBlockReader struct {
// true, the value contains a ValuePrefix byte followed by an encoded value
// handle indicating the value's location within the value block(s).
isValueExternal Bitmap
// isObsolete is the column reader for the is-obsolete bitmap
// that indicates whether a key is obsolete/non-live.
isObsolete Bitmap
// maximumKeyLength is the maximum length of a user key in the block.
// Iterators may use it to allocate a sufficiently large buffer up front,
// and elide size checks during iteration.
Expand All @@ -586,6 +607,7 @@ func (r *DataBlockReader) Init(schema KeySchema, data []byte) {
r.prefixChanged = r.r.Bitmap(len(schema.ColumnTypes) + dataBlockColumnPrefixChanged)
r.values = r.r.RawBytes(len(schema.ColumnTypes) + dataBlockColumnValue)
r.isValueExternal = r.r.Bitmap(len(schema.ColumnTypes) + dataBlockColumnIsValueExternal)
r.isObsolete = r.r.Bitmap(len(schema.ColumnTypes) + dataBlockColumnIsObsolete)
r.maximumKeyLength = binary.LittleEndian.Uint32(data[:dataBlockCustomHeaderSize])
}

Expand Down
9 changes: 7 additions & 2 deletions sstable/colblk/data_block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ func TestDataBlock(t *testing.T) {
return buf.String()
case "write":
for _, line := range strings.Split(td.Input, "\n") {
isObsolete := strings.HasSuffix(line, "obsolete")
if isObsolete {
line = strings.TrimSuffix(line, "obsolete")
}

j := strings.IndexRune(line, ':')
ik := base.ParseInternalKey(line[:j])

Expand All @@ -55,7 +60,7 @@ func TestDataBlock(t *testing.T) {
vp = block.ValueHandlePrefix(kcmp.PrefixEqual(), 0)
}
v := []byte(line[j+1:])
w.Add(ik, v, vp, kcmp)
w.Add(ik, v, vp, kcmp, isObsolete)
sizes = append(sizes, w.Size())
}
fmt.Fprint(&buf, &w)
Expand Down Expand Up @@ -108,7 +113,7 @@ func benchmarkDataBlockWriter(b *testing.B, prefixSize, valueSize int) {
ik := base.MakeInternalKey(keys[j], base.SeqNum(rng.Uint64n(uint64(base.SeqNumMax))), base.InternalKeyKindSet)
kcmp := w.KeyWriter.ComparePrev(ik.UserKey)
vp := block.InPlaceValuePrefix(kcmp.PrefixEqual())
w.Add(ik, values[j], vp, kcmp)
w.Add(ik, values[j], vp, kcmp, false /* isObsolete */)
j++
}
w.Finish(w.Rows(), w.Size())
Expand Down
Loading

0 comments on commit 139ee80

Please sign in to comment.