diff --git a/pkg/keys/constants.go b/pkg/keys/constants.go index fce1908c3299..2d29ed116728 100644 --- a/pkg/keys/constants.go +++ b/pkg/keys/constants.go @@ -76,10 +76,10 @@ var ( // localStoreSuggestedCompactionSuffix stores suggested compactions to // be aggregated and processed on the store. localStoreSuggestedCompactionSuffix = []byte("comp") - // localRemovedLeakedRaftEntriesSuffix marks that a store has completed - // its migration to remove all possibly-leaked Raft entries. - // TODO(nvanbenschoten): This can be removed in 2.2. + + // localRemovedLeakedRaftEntriesSuffix is DEPRECATED and remains to prevent reuse. localRemovedLeakedRaftEntriesSuffix = []byte("dlre") + _ = localRemovedLeakedRaftEntriesSuffix // LocalStoreSuggestedCompactionsMin is the start of the span of // possible suggested compaction keys for a store. diff --git a/pkg/keys/keys.go b/pkg/keys/keys.go index 8f3a48608469..c742b34a173b 100644 --- a/pkg/keys/keys.go +++ b/pkg/keys/keys.go @@ -115,13 +115,6 @@ func DecodeStoreSuggestedCompactionKey(key roachpb.Key) (start, end roachpb.Key, return start, end, nil } -// StoreRemovedLeakedRaftEntriesKey returns a store-local key that marks -// when a store has completed its migration to remove all possibly-leaked -// Raft entries on all replicas. -func StoreRemovedLeakedRaftEntriesKey() roachpb.Key { - return MakeStoreKey(localRemovedLeakedRaftEntriesSuffix, nil) -} - // NodeLivenessKey returns the key for the node liveness record. func NodeLivenessKey(nodeID roachpb.NodeID) roachpb.Key { key := make(roachpb.Key, 0, len(NodeLivenessPrefix)+9) diff --git a/pkg/storage/migrations.go b/pkg/storage/migrations.go index 808d471c7fe0..19f5fbe88df4 100644 --- a/pkg/storage/migrations.go +++ b/pkg/storage/migrations.go @@ -13,140 +13,3 @@ // permissions and limitations under the License. package storage - -import ( - "context" - "time" - - "github.com/cockroachdb/cockroach/pkg/keys" - "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/storage/engine" - "github.com/cockroachdb/cockroach/pkg/util/hlc" - "github.com/cockroachdb/cockroach/pkg/util/log" - "github.com/cockroachdb/cockroach/pkg/util/timeutil" -) - -// clearLegacyTombstone removes the legacy tombstone for the given rangeID. -func clearLegacyTombstone(eng engine.Writer, rangeID roachpb.RangeID) error { - return eng.Clear(engine.MakeMVCCMetadataKey(keys.RaftTombstoneIncorrectLegacyKey(rangeID))) -} - -// migrateLegacyTombstones rewrites all legacy tombstones into the correct key. -// It can be removed in binaries post v2.1. -func migrateLegacyTombstones(ctx context.Context, eng engine.Engine) error { - var tombstone roachpb.RaftTombstone - handleTombstone := func(rangeID roachpb.RangeID) (more bool, _ error) { - batch := eng.NewBatch() - defer batch.Close() - - tombstoneKey := keys.RaftTombstoneKey(rangeID) - - { - // If there's already a new-style tombstone, pick the larger NextReplicaID - // (this will be the new-style tombstone's). - var exTombstone roachpb.RaftTombstone - ok, err := engine.MVCCGetProto(ctx, batch, tombstoneKey, - hlc.Timestamp{}, true, nil, &exTombstone) - if err != nil { - return false, err - } - if ok && exTombstone.NextReplicaID > tombstone.NextReplicaID { - tombstone.NextReplicaID = exTombstone.NextReplicaID - } - } - - if err := engine.MVCCPutProto(ctx, batch, nil, tombstoneKey, - hlc.Timestamp{}, nil, &tombstone); err != nil { - return false, err - } - if err := clearLegacyTombstone(batch, rangeID); err != nil { - return false, err - } - // Specify sync==false because we don't want to sync individually, - // but see the end of the surrounding method where we sync explicitly. - err := batch.Commit(false /* sync */) - return err == nil, err - } - err := IterateIDPrefixKeys(ctx, eng, keys.RaftTombstoneIncorrectLegacyKey, &tombstone, - handleTombstone) - if err != nil { - return err - } - - // Write a final bogus batch so that we get to do a sync commit, which - // implicitly also syncs everything written before. - batch := eng.NewBatch() - defer batch.Close() - - if err := clearLegacyTombstone(batch, 1 /* rangeID */); err != nil { - return err - } - return batch.Commit(true /* sync */) -} - -// removeLeakedRaftEntries iterates over all replicas and ensures that all -// Raft entries that are beneath a replica's truncated index are removed. -// Earlier versions of Cockroach permitted a race where a replica's truncated -// index could be moved forward without the corresponding Raft entries being -// deleted atomically. This introduced a window in which an untimely crash -// could abandon Raft entries until the next log truncation. -// TODO(nvanbenschoten): It can be removed in binaries post v2.1. -func removeLeakedRaftEntries( - ctx context.Context, clock *hlc.Clock, eng engine.Engine, v *storeReplicaVisitor, -) error { - // Check if migration has already been performed. - marker := keys.StoreRemovedLeakedRaftEntriesKey() - found, err := engine.MVCCGetProto(ctx, eng, marker, hlc.Timestamp{}, false, nil, nil) - if found || err != nil { - return err - } - - // Iterate over replicas and clear out any leaked raft entries. Visit - // them in increasing rangeID order so all accesses to the engine are in - // increasing order, which experimentally speeds this up by about 35%. - tBegin := timeutil.Now() - leaked := 0 - v.InOrder().Visit(func(r *Replica) bool { - var ts roachpb.RaftTruncatedState - ts, err = r.raftTruncatedState(ctx) - if err != nil { - return false - } - - // If any Raft entries were leaked then it must be true that the last - // entry that was truncated was also leaked. We use this to create a - // fast-path to rule out replicas that have not leaked any entries. - last := keys.RaftLogKey(r.RangeID, ts.Index) - found, err = engine.MVCCGetProto(ctx, eng, last, hlc.Timestamp{}, false, nil, nil) - if !found || err != nil { - return err == nil - } - leaked++ - - // Start at index zero and clear entries up through the truncated index. - start := engine.MakeMVCCMetadataKey(keys.RaftLogKey(r.RangeID, 0)) - end := engine.MakeMVCCMetadataKey(last.PrefixEnd()) - - iter := eng.NewIterator(engine.IterOptions{UpperBound: end.Key}) - defer iter.Close() - - err = eng.ClearIterRange(iter, start, end) - return err == nil - }) - if err != nil { - return err - } - - f := log.Eventf - dur := timeutil.Since(tBegin) - if leaked > 0 || dur > 2*time.Second { - f = log.Infof - } - f(ctx, "found %d replicas with abandoned raft entries in %s", leaked, dur) - - // Set the migration marker so that we can avoid checking for leaked entries - // again in the future. It doesn't matter what we actually use as the value, - // so just use the current time. - now := clock.Now() - return engine.MVCCPutProto(ctx, eng, nil, marker, hlc.Timestamp{}, nil, &now) -} diff --git a/pkg/storage/replica_raftstorage.go b/pkg/storage/replica_raftstorage.go index 2ee0e3e55dba..890199c9a03b 100644 --- a/pkg/storage/replica_raftstorage.go +++ b/pkg/storage/replica_raftstorage.go @@ -332,15 +332,6 @@ func (r *Replica) raftTruncatedStateLocked( return ts, nil } -// raftTruncatedState returns metadata about the log that preceded the first -// current entry. This includes both entries that have been compacted away and -// the dummy entries that make up the starting point of an empty log. -func (r *Replica) raftTruncatedState(ctx context.Context) (roachpb.RaftTruncatedState, error) { - r.mu.Lock() - defer r.mu.Unlock() - return r.raftTruncatedStateLocked(ctx) -} - // FirstIndex implements the raft.Storage interface. func (r *replicaRaftStorage) FirstIndex() (uint64, error) { ctx := r.AnnotateCtx(context.TODO()) @@ -858,19 +849,6 @@ func (r *Replica) applySnapshot( } } - // Nodes running v2.0 and earlier may send an incorrect Raft tombstone (see - // #12154) that was supposed to be unreplicated. Simply remove it. - // - // NB: this can be removed post v2.1. This is because when we are running a - // binary at v2.2, we know that peers are at least running v2.1, which will - // never send out snapshots with incorrect tombstones. v2.0 nodes can send out - // these incorrect snapshots if they were upgraded from a v1.1 store with - // incorrect tombstones and never rebooted while - // VersionUnreplicatedTombstoneKey was active. - if err := clearLegacyTombstone(batch, r.RangeID); err != nil { - return errors.Wrap(err, "while clearing legacy tombstone key") - } - // The log entries are all written to distinct keys so we can use a // distinct batch. distinctBatch := batch.Distinct() diff --git a/pkg/storage/store.go b/pkg/storage/store.go index 4b1618a5ed64..8599e3631018 100644 --- a/pkg/storage/store.go +++ b/pkg/storage/store.go @@ -1247,30 +1247,6 @@ func (s *Store) Start(ctx context.Context, stopper *stop.Stopper) error { now := s.cfg.Clock.Now() s.startedAt = now.WallTime - // Migrate legacy tombstones away. This is safe to do unconditionally: this is - // a post-v2.0 binary, so we're guaranteed that every node in the cluster is - // running a version that understands the non-legacy tombstones (v2.0 or - // later). - // - // We want to run this migration the first time the node boots with this - // binary version so that we can assume local data never contains legacy range - // tombstones. For simplicity, we do it on *every* boot. Should this be found - // to impact startup times too much, we can make it only run the first time - // this binary version is booted. - { - tBegin := timeutil.Now() - if err := migrateLegacyTombstones(ctx, s.engine); err != nil { - return errors.Wrapf(err, "migrating legacy tombstones for %v", s.engine) - } - f := log.Eventf - - dur := timeutil.Since(tBegin) - if dur > 10*time.Second { - f = log.Infof - } - f(ctx, "ran legacy tombstone migration in %s", dur) - } - // Iterate over all range descriptors, ignoring uncommitted versions // (consistent=false). Uncommitted intents which have been abandoned // due to a split crashing halfway will simply be resolved on the @@ -1330,15 +1306,6 @@ func (s *Store) Start(ctx context.Context, stopper *stop.Stopper) error { return err } - // Ensure that no Raft entries were abandoned by previous versions of Cockroach - // that did not delete Raft entries atomically with applying log truncation - // Raft commands. This will only be performed once, after which this call will - // see a migration marker and quickly no-op. - err = removeLeakedRaftEntries(ctx, s.Clock(), s.engine, newStoreReplicaVisitor(s)) - if err != nil { - return errors.Wrapf(err, "checking for leaked raft entries for %v", s.engine) - } - // Start Raft processing goroutines. s.cfg.Transport.Listen(s.StoreID(), s) s.processRaft(ctx) diff --git a/pkg/storage/store_test.go b/pkg/storage/store_test.go index 46659ccf8ac7..f4f0a6b744f4 100644 --- a/pkg/storage/store_test.go +++ b/pkg/storage/store_test.go @@ -392,72 +392,6 @@ func TestStoreInitAndBootstrap(t *testing.T) { } } - // Put down a few fake legacy tombstones (#12154) to jog `migrateLegacyTombstones`, including - // one for the existing range, but also some for non-existing ones. We'll check that these get - // migrated properly. - // - // Range 1 actually exists (since we will bootstrap it). The rest don't. - legacyTombstones := []struct { - rangeID roachpb.RangeID - legacyTombstone roachpb.ReplicaID // legacy tombstone's NextReplicaID - newTombstone roachpb.ReplicaID // new-style tombstone's NextReplicaID (if nonzero) - exNewTombstone roachpb.ReplicaID // resulting new-style tombstone - }{ - {rangeID: 1, legacyTombstone: 1, exNewTombstone: 1}, - {rangeID: 200, legacyTombstone: 123, newTombstone: 122, exNewTombstone: 123}, - {rangeID: 300, legacyTombstone: 333, newTombstone: 335, exNewTombstone: 335}, - } - - for _, stone := range legacyTombstones { - legacyTombstoneKey := keys.RaftTombstoneIncorrectLegacyKey(stone.rangeID) - - if err := engine.MVCCPutProto( - ctx, eng, nil /* ms */, legacyTombstoneKey, hlc.Timestamp{}, nil, /* txn */ - &roachpb.RaftTombstone{NextReplicaID: stone.legacyTombstone}, - ); err != nil { - t.Fatal(err) - } - if stone.newTombstone == 0 { - // No new-style tombstone for this key is present. - continue - } - - tombstoneKey := keys.RaftTombstoneKey(stone.rangeID) - if err := engine.MVCCPutProto( - ctx, eng, nil /* ms */, tombstoneKey, hlc.Timestamp{}, - nil /* txn */, &roachpb.RaftTombstone{NextReplicaID: stone.newTombstone}, - ); err != nil { - t.Fatal(err) - } - } - - // Lay down a few fake Raft entries for Range 1 to jog `removeLeakedRaftEntries`. - { - ts, err := stateloader.Make(nil /* st */, 1).LoadTruncatedState(ctx, eng) - if err != nil { - t.Fatal(err) - } - - const entryCount = 4 - for i := 0; i < entryCount; i++ { - if ts.Index < uint64(i) { - t.Fatal("index cannot be negative") - } - key := keys.RaftLogKey(1, ts.Index-uint64(i)) - - var ent raftpb.Entry - var value roachpb.Value - if err := value.SetProto(&ent); err != nil { - t.Fatal(err) - } - if err := engine.MVCCPut( - ctx, eng, nil, key, hlc.Timestamp{}, value, nil, /* txn */ - ); err != nil { - t.Fatal(err) - } - } - } - // Now, attempt to initialize a store with a now-bootstrapped range. { store := NewStore(cfg, eng, &roachpb.NodeDescriptor{NodeID: 1}) @@ -465,33 +399,6 @@ func TestStoreInitAndBootstrap(t *testing.T) { t.Fatalf("failure initializing bootstrapped store: %s", err) } - // Check that `migrateLegacyTombstone` did what it was supposed to. - for _, stone := range legacyTombstones { - legacyTombstoneKey := keys.RaftTombstoneIncorrectLegacyKey(stone.rangeID) - if ok, err := engine.MVCCGetProto( - ctx, store.engine, legacyTombstoneKey, hlc.Timestamp{}, true /* consistent */, nil /* txn */, nil, /* msg */ - ); err != nil { - t.Fatal(err) - } else if ok { - t.Fatalf("unexpectedly found legacy tombstone key at %s", legacyTombstoneKey) - } - - tombstoneKey := keys.RaftTombstoneKey(stone.rangeID) - var tombstone roachpb.RaftTombstone - ok, err := engine.MVCCGetProto( - ctx, store.engine, tombstoneKey, hlc.Timestamp{}, true /* consistent */, nil /* txn */, &tombstone, /* msg */ - ) - if err != nil { - t.Fatal(err) - } - if !ok { - t.Fatalf("unexpectedly did not find migrated tombstone key at %s", legacyTombstoneKey) - } - if tombstone.NextReplicaID != stone.exNewTombstone { - t.Fatalf("tombstone at %d is %d, but wanted %d", stone.rangeID, tombstone.NextReplicaID, stone.exNewTombstone) - } - } - // 1st range should be available. r, err := store.GetReplica(1) if err != nil { @@ -499,22 +406,6 @@ func TestStoreInitAndBootstrap(t *testing.T) { } rs := r.GetMVCCStats() - // Check that `removeLeakedRaftEntries` did what it was supposed to. - ts, err := r.raftTruncatedState(ctx) - if err != nil { - t.Fatalf("failure fetching raft truncated state: %s", err) - } - for i := uint64(0); i <= ts.Index; i++ { - key := keys.RaftLogKey(1, i) - if found, err := engine.MVCCGetProto( - ctx, eng, key, hlc.Timestamp{}, false, nil /* txn */, nil, /* msg */ - ); err != nil { - t.Fatal(err) - } else if found { - t.Fatalf("found unexpected raft entry for key %v", key) - } - } - // Stats should agree with a recomputation. now := r.store.Clock().Now() if ms, err := rditer.ComputeStatsForRange(r.Desc(), eng, now.WallTime); err != nil { @@ -799,7 +690,7 @@ func TestStoreReplicaVisitor(t *testing.T) { } } - // Verify two passes of the visit. + // Verify two passes of the visit, the second one in-order. visitor := newStoreReplicaVisitor(store) exp := make(map[roachpb.RangeID]struct{}) for i := 0; i < newCount; i++ { @@ -812,7 +703,19 @@ func TestStoreReplicaVisitor(t *testing.T) { } i := 1 seen := make(map[roachpb.RangeID]struct{}) + + // Ensure that our next pass is done in-order. + if pass == 1 { + _ = visitor.InOrder() + } + var lastRangeID roachpb.RangeID visitor.Visit(func(repl *Replica) bool { + if pass == 1 { + if repl.RangeID <= lastRangeID { + t.Fatalf("on second pass, expect ranges to be visited in ascending range ID order; %d !> %d", repl.RangeID, lastRangeID) + } + lastRangeID = repl.RangeID + } _, ok := seen[repl.RangeID] if ok { t.Fatalf("already saw %d", repl.RangeID)