Skip to content

Commit

Permalink
colblk: use crlib/crbytes.CommonPrefix
Browse files Browse the repository at this point in the history
Update crlib and use its faster implementation for finding the longest
shared prefix of two byte slices.
  • Loading branch information
RaduBerinde committed Aug 19, 2024
1 parent f90b350 commit c2749cc
Show file tree
Hide file tree
Showing 7 changed files with 16 additions and 28 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ require (
github.com/DataDog/zstd v1.5.6-0.20230824185856-869dae002e5e
github.com/HdrHistogram/hdrhistogram-go v1.1.2
github.com/cespare/xxhash/v2 v2.2.0
github.com/cockroachdb/crlib v0.0.0-20240729155931-991150b7e290
github.com/cockroachdb/crlib v0.0.0-20240816115810-1c502cdb7c1d
github.com/cockroachdb/datadriven v1.0.3-0.20240530155848-7682d40af056
github.com/cockroachdb/errors v1.11.3
github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5P
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cockroachdb/crlib v0.0.0-20240729155931-991150b7e290 h1:oJGWhlrgtcPSzJEgFHo6VIQW2TCFR/OiByQr6yzjDkU=
github.com/cockroachdb/crlib v0.0.0-20240729155931-991150b7e290/go.mod h1:Gq51ZeKaFCXk6QwuGM0w1dnaOqc/F5zKT2zA9D6Xeac=
github.com/cockroachdb/crlib v0.0.0-20240816115810-1c502cdb7c1d h1:IgQRpDBWe+tThngFMwhWpRJ/oEe5ZUJHq5nBNqeiIeI=
github.com/cockroachdb/crlib v0.0.0-20240816115810-1c502cdb7c1d/go.mod h1:Gq51ZeKaFCXk6QwuGM0w1dnaOqc/F5zKT2zA9D6Xeac=
github.com/cockroachdb/datadriven v1.0.3-0.20240530155848-7682d40af056 h1:slXychO2uDM6hYRu4c0pD0udNI8uObfeKN6UInWViS8=
github.com/cockroachdb/datadriven v1.0.3-0.20240530155848-7682d40af056/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU=
github.com/cockroachdb/errors v1.11.3 h1:5bA+k2Y6r+oz/6Z/RFlNeVCesGARKuC6YymtcDrbC/I=
Expand Down
3 changes: 2 additions & 1 deletion sstable/colblk/block_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"testing"
"time"

"github.com/cockroachdb/crlib/crbytes"
"github.com/cockroachdb/datadriven"
"golang.org/x/exp/rand"
)
Expand Down Expand Up @@ -244,7 +245,7 @@ func buildBlock(schema []testColumnSpec, rows int, data []interface{}) []byte {
for r, v := range colData {
sharedPrefix := 0
if r > 0 {
sharedPrefix = bytesSharedPrefix(colData[r-1], v)
sharedPrefix = crbytes.CommonPrefix(colData[r-1], v)
}
pbb.Put(v, sharedPrefix)
}
Expand Down
3 changes: 2 additions & 1 deletion sstable/colblk/cockroach_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"time"
"unsafe"

"github.com/cockroachdb/crlib/crbytes"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/crdbtest"
"github.com/cockroachdb/pebble/sstable/block"
Expand Down Expand Up @@ -62,7 +63,7 @@ func (kw *cockroachKeyWriter) ComparePrev(key []byte) KeyComparison {
lp := kw.prefixes.LastKey()
var cmpv KeyComparison
cmpv.PrefixLen = int32(crdbtest.Split(key)) // TODO(jackson): Inline
cmpv.CommonPrefixLen = int32(bytesSharedPrefix(lp, key[:cmpv.PrefixLen]))
cmpv.CommonPrefixLen = int32(crbytes.CommonPrefix(lp, key[:cmpv.PrefixLen]))
if cmpv.CommonPrefixLen == cmpv.PrefixLen {
cmpv.UserKeyComparison = int32(crdbtest.CompareSuffixes(key[cmpv.PrefixLen:], kw.prevSuffix))
return cmpv
Expand Down
3 changes: 2 additions & 1 deletion sstable/colblk/data_block.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (
"sync"
"unsafe"

"github.com/cockroachdb/crlib/crbytes"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/binfmt"
Expand Down Expand Up @@ -163,7 +164,7 @@ func (w *defaultKeyWriter) ComparePrev(key []byte) KeyComparison {

var cmpv KeyComparison
cmpv.PrefixLen = int32(w.comparer.Split(key))
cmpv.CommonPrefixLen = int32(bytesSharedPrefix(lp, key[:cmpv.PrefixLen]))
cmpv.CommonPrefixLen = int32(crbytes.CommonPrefix(lp, key[:cmpv.PrefixLen]))
if len(lp) == 0 {
// The first key has no previous key to compare to.
return cmpv
Expand Down
22 changes: 3 additions & 19 deletions sstable/colblk/prefix_bytes.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@ package colblk

import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math/bits"
"slices"
"strings"
"unsafe"

"github.com/cockroachdb/crlib/crbytes"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/binfmt"
"github.com/cockroachdb/pebble/internal/invariants"
Expand Down Expand Up @@ -757,9 +757,9 @@ func (b *PrefixBytesBuilder) Put(key []byte, bytesSharedWithPrev int) {
if bytes.Compare(key, b.data[len(b.data)-prev.lastKeyLen:]) < 0 {
panic(errors.AssertionFailedf("keys must be added in order: %q < %q", key, b.data[len(b.data)-prev.lastKeyLen:]))
}
if bytesSharedWithPrev != bytesSharedPrefix(key, b.data[len(b.data)-prev.lastKeyLen:]) {
if bytesSharedWithPrev != crbytes.CommonPrefix(key, b.data[len(b.data)-prev.lastKeyLen:]) {
panic(errors.AssertionFailedf("bytesSharedWithPrev %d != %d", bytesSharedWithPrev,
bytesSharedPrefix(key, b.data[len(b.data)-prev.lastKeyLen:])))
crbytes.CommonPrefix(key, b.data[len(b.data)-prev.lastKeyLen:])))
}
}
}
Expand Down Expand Up @@ -1091,19 +1091,3 @@ func (b bundleCalc) offsetIndexByBundleIndex(bi int) int {
func (b bundleCalc) bundleCount(rows int) int {
return 1 + (rows-1)>>b.bundleShift
}

// bytesSharedPrefix returns the length of the shared prefix between a and b.
func bytesSharedPrefix(a, b []byte) int {
asUint64 := func(data []byte, i int) uint64 {
return binary.LittleEndian.Uint64(data[i:])
}
var shared int
n := min(len(a), len(b))
for shared < n-7 && asUint64(a, shared) == asUint64(b, shared) {
shared += 8
}
for shared < n && a[shared] == b[shared] {
shared++
}
return shared
}
7 changes: 4 additions & 3 deletions sstable/colblk/prefix_bytes_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"testing"
"time"

"github.com/cockroachdb/crlib/crbytes"
"github.com/cockroachdb/datadriven"
"github.com/cockroachdb/pebble/internal/binfmt"
"github.com/cockroachdb/pebble/internal/invariants"
Expand Down Expand Up @@ -46,7 +47,7 @@ func TestPrefixBytes(t *testing.T) {
for _, k := range inputKeys {
keyPrefixLenSharedWithPrev := len(k)
if builder.nKeys > 0 {
keyPrefixLenSharedWithPrev = bytesSharedPrefix(builder.LastKey(), k)
keyPrefixLenSharedWithPrev = crbytes.CommonPrefix(builder.LastKey(), k)
}
p := []byte(k)
builder.Put(p, keyPrefixLenSharedWithPrev)
Expand Down Expand Up @@ -149,7 +150,7 @@ func TestPrefixBytesRandomized(t *testing.T) {
for i := 0; i < len(userKeys); i++ {
keyPrefixLenSharedWithPrev := 0
if i > 0 {
keyPrefixLenSharedWithPrev = bytesSharedPrefix(userKeys[i-1], userKeys[i])
keyPrefixLenSharedWithPrev = crbytes.CommonPrefix(userKeys[i-1], userKeys[i])
}
pbb.Put(userKeys[i], keyPrefixLenSharedWithPrev)
}
Expand Down Expand Up @@ -247,7 +248,7 @@ func BenchmarkPrefixBytes(b *testing.B) {
for i := 0; i < n; i++ {
keyPrefixLenSharedWithPrev := 0
if i > 0 {
keyPrefixLenSharedWithPrev = bytesSharedPrefix(userKeys[i-1], userKeys[i])
keyPrefixLenSharedWithPrev = crbytes.CommonPrefix(userKeys[i-1], userKeys[i])
}
pbb.Put(userKeys[i], keyPrefixLenSharedWithPrev)
}
Expand Down

0 comments on commit c2749cc

Please sign in to comment.