Skip to content

Commit

Permalink
kvserver: more Raft closed timestamp assertions
Browse files Browse the repository at this point in the history
In cockroachdb#62655 we see that there appears to be something wrong with the Raft
closed timestamp. That issue shows an assertion failure about a command
trying to write below a timestamp that was promised to have been closed
by a previous command. This patch includes a little bit more info in
that assertion (the current lease) and adds another two assertions:
- an assertion that the closed timestamp itself does not regress. This
assertion already existed in stageTrivialReplicatedEvalResult(), but
that comes after the failure we've seen. The point of copying this
assertion up is to ascertain whether we're screwing up by regressing the
closed timestamp, or whether a particular request/command is at fault
for not obeying the closed timestamp.
- an assertion against closed ts regression when copying the replica
state from a staging batch to the replica.

All these assertions can be disabled with an env var if things go bad.

Fixes cockroachdb#62765

Release note: None
  • Loading branch information
andreimatei committed Apr 21, 2021
1 parent 5334f33 commit 22d737a
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 20 deletions.
97 changes: 78 additions & 19 deletions pkg/kv/kvserver/replica_application_state_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
Expand Down Expand Up @@ -454,20 +455,11 @@ func (b *replicaAppBatch) Stage(cmdI apply.Command) (apply.CheckedCommand, error
cmd.raftCmd.LogicalOpLog = nil
cmd.raftCmd.ClosedTimestamp = nil
} else {
// Assert that we're not writing under the closed timestamp. We can only do
// these checks on IsIntentWrite requests, since others (for example,
// EndTxn) can operate below the closed timestamp. In turn, this means that
// we can only assert on the leaseholder, as only that replica has
// cmd.proposal.Request filled in.
if cmd.IsLocal() && cmd.proposal.Request.IsIntentWrite() {
wts := cmd.raftCmd.ReplicatedEvalResult.WriteTimestamp
if !wts.IsEmpty() && wts.LessEq(b.state.RaftClosedTimestamp) {
wts := wts // Shadow variable that escapes to the heap.
return nil, wrapWithNonDeterministicFailure(
errors.AssertionFailedf("writing at %s below closed ts: %s (%s)",
wts, b.state.RaftClosedTimestamp.String(), cmd.proposal.Request),
"attempting to write below closed timestamp")
}
if err := b.assertNoCmdClosedTimestampRegression(cmd); err != nil {
return nil, err
}
if err := b.assertNoWriteBelowClosedTimestamp(cmd); err != nil {
return nil, err
}
log.Event(ctx, "applying command")
}
Expand Down Expand Up @@ -834,11 +826,6 @@ func (b *replicaAppBatch) stageTrivialReplicatedEvalResult(
b.state.LeaseAppliedIndex = leaseAppliedIndex
}
if cts := cmd.raftCmd.ClosedTimestamp; cts != nil && !cts.IsEmpty() {
if cts.Less(b.state.RaftClosedTimestamp) {
log.Fatalf(ctx,
"closed timestamp regressing from %s to %s when applying command %x",
b.state.RaftClosedTimestamp, cts, cmd.idKey)
}
b.state.RaftClosedTimestamp = *cts
if clockTS, ok := cts.TryToClockTimestamp(); ok {
b.maxTS.Forward(clockTS)
Expand Down Expand Up @@ -902,6 +889,16 @@ func (b *replicaAppBatch) ApplyToStateMachine(ctx context.Context) error {
r.mu.Lock()
r.mu.state.RaftAppliedIndex = b.state.RaftAppliedIndex
r.mu.state.LeaseAppliedIndex = b.state.LeaseAppliedIndex

// Sanity check that the RaftClosedTimestamp doesn't go backwards.
existingClosed := r.mu.state.RaftClosedTimestamp
newClosed := b.state.RaftClosedTimestamp
if !newClosed.IsEmpty() && newClosed.Less(existingClosed) && raftClosedTimestampAssertionsEnabled {
return errors.AssertionFailedf(
"raft closed timestamp regression; replica has: %s, new batch has: %s.",
existingClosed.String(), newClosed.String())
}

closedTimestampUpdated := r.mu.state.RaftClosedTimestamp.Forward(b.state.RaftClosedTimestamp)
prevStats := *r.mu.state.Stats
*r.mu.state.Stats = *b.state.Stats
Expand Down Expand Up @@ -1015,6 +1012,68 @@ func (b *replicaAppBatch) Close() {
*b = replicaAppBatch{}
}

// raftClosedTimestampAssertionsEnabled provides an emergency way of shutting
// down assertions.
var raftClosedTimestampAssertionsEnabled = envutil.EnvOrDefaultBool("COCKROACH_RAFT_CLOSEDTS_ASSERTIONS_ENABLED", true)

// Assert that the current command is not writing under the closed timestamp.
// This check only applies to IntentWrite commands, since others (for example,
// EndTxn) can operate below the closed timestamp.
//
// Note that we check that we're we're writing under b.state.RaftClosedTimestamp
// (i.e. below the timestamp closed by previous commands), not below
// cmd.raftCmd.ClosedTimestamp. A command is allowed to write below the closed
// timestamp carried by itself; in other words cmd.raftCmd.ClosedTimestamp is a
// promise about future commands, not the command carrying it.
func (b *replicaAppBatch) assertNoWriteBelowClosedTimestamp(cmd *replicatedCmd) error {
if !cmd.IsLocal() || !cmd.proposal.Request.IsIntentWrite() {
return nil
}
if !raftClosedTimestampAssertionsEnabled {
return nil
}
wts := cmd.raftCmd.ReplicatedEvalResult.WriteTimestamp
if !wts.IsEmpty() && wts.LessEq(b.state.RaftClosedTimestamp) {
wts := wts // Make a shadow variable that escapes to the heap.
var req string
if cmd.proposal != nil {
req = cmd.proposal.Request.String()
} else {
req = "request unknown; not leaseholder"
}
return wrapWithNonDeterministicFailure(errors.AssertionFailedf(
"command writing below closed timestamp; cmd: %x, write ts: %s, "+
"batch state closed: %s, command closed: %s, request: %s, lease: %s",
cmd.idKey, wts,
b.state.RaftClosedTimestamp.String(), cmd.raftCmd.ClosedTimestamp,
req, b.state.Lease),
"command writing below closed timestamp")
}
return nil
}

// Assert that the closed timestamp carried by the command is not below one from
// previous commands.
func (b *replicaAppBatch) assertNoCmdClosedTimestampRegression(cmd *replicatedCmd) error {
if !raftClosedTimestampAssertionsEnabled {
return nil
}
existingClosed := b.state.RaftClosedTimestamp
newClosed := cmd.raftCmd.ClosedTimestamp
if newClosed != nil && !newClosed.IsEmpty() && newClosed.Less(existingClosed) {
var req string
if cmd.IsLocal() && cmd.proposal.Request.IsIntentWrite() {
req = cmd.proposal.Request.String()
} else {
req = "<unknown; not leaseholder>"
}
return errors.AssertionFailedf(
"raft closed timestamp regression in cmd: %x; batch state: %s, command: %s, lease: %s, req: %s",
cmd.idKey, existingClosed.String(), newClosed.String(), b.state.Lease, req)
}
return nil
}

// ephemeralReplicaAppBatch implements the apply.Batch interface.
//
// The batch performs the bare-minimum amount of work to be able to
Expand Down
2 changes: 1 addition & 1 deletion pkg/util/hlc/timestamp.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func (t Timestamp) AsOfSystemTime() string {
return fmt.Sprintf("%d.%010d%s", t.WallTime, t.Logical, syn)
}

// IsEmpty retruns true if t is an empty Timestamp.
// IsEmpty returns true if t is an empty Timestamp.
func (t Timestamp) IsEmpty() bool {
return t == Timestamp{}
}
Expand Down

0 comments on commit 22d737a

Please sign in to comment.