Skip to content

Commit

Permalink
kvserver: reenable splitting of snapshot sstables
Browse files Browse the repository at this point in the history
This change updates the snapshot strategy's sender side
to iterate over points and ranges together, instead of only
iterating on points first, then only ranges. This allows us to
more efficiently split snapshot sstables on the receiver side.
To avoid the need to add a version gate on the receiver side, we
propagate a bool, RangeKeysInOrder, to the receiver which is a signal
to it to enable sstable splits.

Fixes cockroachdb#129026.

Epic: none

Release note: None
  • Loading branch information
itsbilal committed Sep 30, 2024
1 parent 57d918b commit 6abec1b
Show file tree
Hide file tree
Showing 28 changed files with 158 additions and 122 deletions.
9 changes: 5 additions & 4 deletions pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,10 +515,10 @@ func runDebugRangeData(cmd *cobra.Command, args []string) error {
var results int
return rditer.IterateReplicaKeySpans(cmd.Context(), &desc, snapshot, debugCtx.replicated,
rditer.ReplicatedSpansAll,
func(iter storage.EngineIterator, _ roachpb.Span, keyType storage.IterKeyType) error {
func(iter storage.EngineIterator, _ roachpb.Span) error {
for ok := true; ok && err == nil; ok, err = iter.NextEngineKey() {
switch keyType {
case storage.IterKeyTypePointsOnly:
hasPoint, hasRange := iter.HasPointAndRange()
if hasPoint {
key, err := iter.UnsafeEngineKey()
if err != nil {
return err
Expand All @@ -532,8 +532,9 @@ func runDebugRangeData(cmd *cobra.Command, args []string) error {
if results == debugCtx.maxResults {
return iterutil.StopIteration()
}
}

case storage.IterKeyTypeRangesOnly:
if hasRange && iter.RangeKeyChanged() {
bounds, err := iter.EngineRangeBounds()
if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/client_merge_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3903,7 +3903,7 @@ func TestStoreRangeMergeRaftSnapshot(t *testing.T) {

err := rditer.IterateReplicaKeySpans(
context.Background(), inSnap.Desc, sendingEngSnapshot, true /* replicatedOnly */, rditer.ReplicatedSpansAll,
func(iter storage.EngineIterator, span roachpb.Span, keyType storage.IterKeyType) error {
func(iter storage.EngineIterator, span roachpb.Span) error {
fw, ok := sstFileWriters[string(span.Key)]
if !ok || !fw.span.Equal(span) {
return errors.Errorf("unexpected span %s", span)
Expand Down
6 changes: 6 additions & 0 deletions pkg/kv/kvserver/kvserverpb/raft.proto
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,12 @@ message SnapshotRequest {
// file contents.
bool external_replicate = 13;

// If true, the snapshot is iterating over range keys and point
// keys in key order, as opposed to iterating over point keys first
// and range keys second. The receiver can take advantage of this
// to split points/range keys into multiple sstables for ingestion.
bool range_keys_in_order = 14;

reserved 1, 4, 6, 7, 8, 9;
}

Expand Down
37 changes: 17 additions & 20 deletions pkg/kv/kvserver/rditer/replica_data_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ func IterateReplicaKeySpans(
reader storage.Reader,
replicatedOnly bool,
replicatedSpansFilter ReplicatedSpansFilter,
visitor func(storage.EngineIterator, roachpb.Span, storage.IterKeyType) error,
visitor func(storage.EngineIterator, roachpb.Span) error,
) error {
if !reader.ConsistentIterators() {
panic("reader must provide consistent iterators")
Expand All @@ -425,28 +425,25 @@ func IterateReplicaKeySpans(
UnreplicatedByRangeID: true,
})
}
keyTypes := []storage.IterKeyType{storage.IterKeyTypePointsOnly, storage.IterKeyTypeRangesOnly}
for _, span := range spans {
for _, keyType := range keyTypes {
err := func() error {
iter, err := reader.NewEngineIterator(ctx, storage.IterOptions{
KeyTypes: keyType,
LowerBound: span.Key,
UpperBound: span.EndKey,
})
if err != nil {
return err
}
defer iter.Close()
ok, err := iter.SeekEngineKeyGE(storage.EngineKey{Key: span.Key})
if err == nil && ok {
err = visitor(iter, span, keyType)
}
return err
}()
err := func() error {
iter, err := reader.NewEngineIterator(ctx, storage.IterOptions{
KeyTypes: storage.IterKeyTypePointsAndRanges,
LowerBound: span.Key,
UpperBound: span.EndKey,
})
if err != nil {
return iterutil.Map(err)
return err
}
defer iter.Close()
ok, err := iter.SeekEngineKeyGE(storage.EngineKey{Key: span.Key})
if err == nil && ok {
err = visitor(iter, span)
}
return err
}()
if err != nil {
return iterutil.Map(err)
}
}
return nil
Expand Down
21 changes: 10 additions & 11 deletions pkg/kv/kvserver/rditer/replica_data_iter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func verifyIterateReplicaKeySpans(

require.NoError(t, IterateReplicaKeySpans(context.Background(), desc, readWriter, replicatedOnly,
replicatedSpansFilter,
func(iter storage.EngineIterator, span roachpb.Span, keyType storage.IterKeyType) error {
func(iter storage.EngineIterator, span roachpb.Span) error {
var err error
for ok := true; ok && err == nil; ok, err = iter.NextEngineKey() {
// Span should not be empty.
Expand All @@ -186,8 +186,8 @@ func verifyIterateReplicaKeySpans(
require.True(t, span.ContainsKey(key.Key), "%s not in %s", key, span)
require.True(t, key.IsLockTableKey() || key.IsMVCCKey(), "%s neither lock nor MVCC", key)

switch keyType {
case storage.IterKeyTypePointsOnly:
hasPoint, hasRange := iter.HasPointAndRange()
if hasPoint {
var mvccKey storage.MVCCKey
if key.IsMVCCKey() {
var err error
Expand All @@ -213,8 +213,8 @@ func verifyIterateReplicaKeySpans(
fmt.Sprintf("%x", key.Version),
mvccKey.String(),
})

case storage.IterKeyTypeRangesOnly:
}
if hasRange && iter.RangeKeyChanged() {
bounds, err := iter.EngineRangeBounds()
require.NoError(t, err)
require.True(t, span.Contains(bounds), "%s not contained in %s", bounds, span)
Expand All @@ -234,9 +234,6 @@ func verifyIterateReplicaKeySpans(
mvccRangeKey.String(),
})
}

default:
t.Fatalf("unexpected key type %v", keyType)
}
}
return err
Expand Down Expand Up @@ -482,9 +479,11 @@ func TestReplicaDataIteratorGlobalRangeKey(t *testing.T) {
var actualSpans []roachpb.Span
require.NoError(t, IterateReplicaKeySpans(
context.Background(), &desc, snapshot, replicatedOnly, ReplicatedSpansAll,
func(iter storage.EngineIterator, span roachpb.Span, keyType storage.IterKeyType) error {
func(iter storage.EngineIterator, span roachpb.Span) error {
// We should never see any point keys.
require.Equal(t, storage.IterKeyTypeRangesOnly, keyType)
hasPoint, hasRange := iter.HasPointAndRange()
require.False(t, hasPoint)
require.True(t, hasRange)

// The iterator should already be positioned on the range key, which should
// span the entire key span and be the only range key.
Expand Down Expand Up @@ -590,7 +589,7 @@ func benchReplicaEngineDataIterator(b *testing.B, numRanges, numKeysPerRange, va
for _, desc := range descs {
err := IterateReplicaKeySpans(
context.Background(), &desc, snapshot, false /* replicatedOnly */, ReplicatedSpansAll,
func(iter storage.EngineIterator, _ roachpb.Span, _ storage.IterKeyType) error {
func(iter storage.EngineIterator, _ roachpb.Span) error {
var err error
for ok := true; ok && err == nil; ok, err = iter.NextEngineKey() {
_, _ = iter.UnsafeEngineKey()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,25 @@ echo
+---------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+
| SPAN | KEY HEX | ENDKEY HEX | VERSION HEX | PRETTY |
+---------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+
| /Local/RangeID/1/{r""-s""} | 016989723a61 | 016989723a78 | 000000000000000109 | /Local/RangeID/1/r":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{r""-s""} | 016989726162632d120ce61c175eb445878c36dcf4062ada4c0001 | | | /Local/RangeID/1/r/AbortSpan/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" |
| /Local/RangeID/1/{r""-s""} | 016989726162632d129855a1ef8eb94c06a106cab1dda78a2b0001 | | | /Local/RangeID/1/r/AbortSpan/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" |
| /Local/RangeID/1/{r""-s""} | 016989726c67632d | | | /Local/RangeID/1/r/RangeGCThreshold |
| /Local/RangeID/1/{r""-s""} | 016989727261736b | | | /Local/RangeID/1/r/RangeAppliedState |
| /Local/RangeID/1/{r""-s""} | 01698972726c6c2d | | | /Local/RangeID/1/r/RangeLease |
| /Local/RangeID/1/{r""-s""} | 016989723a61 | 016989723a78 | 000000000000000109 | /Local/RangeID/1/r":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{u""-v""} | 016989753a61 | 016989753a78 | 000000000000000109 | /Local/RangeID/1/u":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{u""-v""} | 0169897572667462 | | | /Local/RangeID/1/u/RangeTombstone |
| /Local/RangeID/1/{u""-v""} | 0169897572667468 | | | /Local/RangeID/1/u/RaftHardState |
| /Local/RangeID/1/{u""-v""} | 016989757266746c0000000000000001 | | | /Local/RangeID/1/u/RaftLog/logIndex:1 |
| /Local/RangeID/1/{u""-v""} | 016989757266746c0000000000000002 | | | /Local/RangeID/1/u/RaftLog/logIndex:2 |
| /Local/RangeID/1/{u""-v""} | 01698975726c7274 | | | /Local/RangeID/1/u/RangeLastReplicaGCTimestamp |
| /Local/RangeID/1/{u""-v""} | 016989753a61 | 016989753a78 | 000000000000000109 | /Local/RangeID/1/u":{a"-x"}/0.000000001,0 |
| /Local/Range"{a"-b"} | 016b1261000172647363 | | 0000000000000001 | /Local/Range"a"/RangeDescriptor/0.000000001,0 |
| /Local/Range"{a"-b"} | 016b1261000174786e2d0ce61c175eb445878c36dcf4062ada4c | | | /Local/Range"a"/Transaction/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" |
| /Local/Range"{a"-b"} | 016b126100ff000174786e2d9855a1ef8eb94c06a106cab1dda78a2b | | | /Local/Range"a\x00"/Transaction/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" |
| /Local/Range"{a"-b"} | 016b1261ffffffff000174786e2d295e727c8ca9437cbb5e8e2ebbad996f | | | /Local/Range"a\xff\xff\xff\xff"/Transaction/"295e727c-8ca9-437c-bb5e-8e2ebbad996f" |
| /Local/Lock/Local/Range"{a"-b"} | 017a6b12016b126100ff01726473630001 | | 030ce61c175eb445878c36dcf4062ada4c | /Local/Range"a"/RangeDescriptor |
| /Local/Lock"{a"-b"} | 017a6b12610001 | | 030ce61c175eb445878c36dcf4062ada4c | "a" |
| {a-b} | 61 | 62 | 000000000000000109 | {a-b}/0.000000001,0 |
| {a-b} | 61 | | 0000000000000001 | "a"/0.000000001,0 |
| {a-b} | 61ffffffff | | 0000000000000001 | "a\xff\xff\xff\xff"/0.000000001,0 |
| {a-b} | 61 | 62 | 000000000000000109 | {a-b}/0.000000001,0 |
+---------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,18 @@ echo
+---------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+
| SPAN | KEY HEX | ENDKEY HEX | VERSION HEX | PRETTY |
+---------------------------------+--------------------------------------------------------------+--------------+------------------------------------+------------------------------------------------------------------------------------+
| /Local/RangeID/1/{r""-s""} | 016989723a61 | 016989723a78 | 000000000000000109 | /Local/RangeID/1/r":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{r""-s""} | 016989726162632d120ce61c175eb445878c36dcf4062ada4c0001 | | | /Local/RangeID/1/r/AbortSpan/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" |
| /Local/RangeID/1/{r""-s""} | 016989726162632d129855a1ef8eb94c06a106cab1dda78a2b0001 | | | /Local/RangeID/1/r/AbortSpan/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" |
| /Local/RangeID/1/{r""-s""} | 016989726c67632d | | | /Local/RangeID/1/r/RangeGCThreshold |
| /Local/RangeID/1/{r""-s""} | 016989727261736b | | | /Local/RangeID/1/r/RangeAppliedState |
| /Local/RangeID/1/{r""-s""} | 01698972726c6c2d | | | /Local/RangeID/1/r/RangeLease |
| /Local/RangeID/1/{r""-s""} | 016989723a61 | 016989723a78 | 000000000000000109 | /Local/RangeID/1/r":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{u""-v""} | 016989753a61 | 016989753a78 | 000000000000000109 | /Local/RangeID/1/u":{a"-x"}/0.000000001,0 |
| /Local/RangeID/1/{u""-v""} | 0169897572667462 | | | /Local/RangeID/1/u/RangeTombstone |
| /Local/RangeID/1/{u""-v""} | 0169897572667468 | | | /Local/RangeID/1/u/RaftHardState |
| /Local/RangeID/1/{u""-v""} | 016989757266746c0000000000000001 | | | /Local/RangeID/1/u/RaftLog/logIndex:1 |
| /Local/RangeID/1/{u""-v""} | 016989757266746c0000000000000002 | | | /Local/RangeID/1/u/RaftLog/logIndex:2 |
| /Local/RangeID/1/{u""-v""} | 01698975726c7274 | | | /Local/RangeID/1/u/RangeLastReplicaGCTimestamp |
| /Local/RangeID/1/{u""-v""} | 016989753a61 | 016989753a78 | 000000000000000109 | /Local/RangeID/1/u":{a"-x"}/0.000000001,0 |
| /Local/Range"{a"-b"} | 016b1261000172647363 | | 0000000000000001 | /Local/Range"a"/RangeDescriptor/0.000000001,0 |
| /Local/Range"{a"-b"} | 016b1261000174786e2d0ce61c175eb445878c36dcf4062ada4c | | | /Local/Range"a"/Transaction/"0ce61c17-5eb4-4587-8c36-dcf4062ada4c" |
| /Local/Range"{a"-b"} | 016b126100ff000174786e2d9855a1ef8eb94c06a106cab1dda78a2b | | | /Local/Range"a\x00"/Transaction/"9855a1ef-8eb9-4c06-a106-cab1dda78a2b" |
Expand Down
Loading

0 comments on commit 6abec1b

Please sign in to comment.