diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index b58befa9f02a..8f346df6a298 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -245,6 +245,9 @@ const ( // via their IDs instead of their names, which leads to allowing such // sequences to be renamed. SequencesRegclass + // ClosedTimestampsRaftTransport enables the Raft transport for closed + // timestamps and disables the previous per-node transport. + ClosedTimestampsRaftTransport // Step (1): Add new versions here. ) @@ -411,6 +414,10 @@ var versionsSingleton = keyedVersions([]keyedVersion{ Key: SequencesRegclass, Version: roachpb.Version{Major: 20, Minor: 2, Internal: 30}, }, + { + Key: ClosedTimestampsRaftTransport, + Version: roachpb.Version{Major: 20, Minor: 2, Internal: 32}, + }, // Step (2): Add new versions here. }) diff --git a/pkg/kv/kvserver/BUILD.bazel b/pkg/kv/kvserver/BUILD.bazel index 402d71680912..ca306e3d1fc1 100644 --- a/pkg/kv/kvserver/BUILD.bazel +++ b/pkg/kv/kvserver/BUILD.bazel @@ -117,6 +117,7 @@ go_library( "//pkg/kv/kvserver/closedts/container", "//pkg/kv/kvserver/closedts/ctpb", "//pkg/kv/kvserver/closedts/storage", + "//pkg/kv/kvserver/closedts/tracker", "//pkg/kv/kvserver/concurrency", "//pkg/kv/kvserver/constraint", "//pkg/kv/kvserver/gc", @@ -299,6 +300,7 @@ go_test( "//pkg/kv/kvserver/batcheval/result", "//pkg/kv/kvserver/closedts", "//pkg/kv/kvserver/closedts/ctpb", + "//pkg/kv/kvserver/closedts/tracker", "//pkg/kv/kvserver/concurrency", "//pkg/kv/kvserver/concurrency/lock", "//pkg/kv/kvserver/constraint", diff --git a/pkg/kv/kvserver/below_raft_protos_test.go b/pkg/kv/kvserver/below_raft_protos_test.go index a3e04b28ea0e..00652ba4853e 100644 --- a/pkg/kv/kvserver/below_raft_protos_test.go +++ b/pkg/kv/kvserver/below_raft_protos_test.go @@ -77,13 +77,8 @@ var belowRaftGoldenProtos = map[reflect.Type]fixture{ populatedConstructor: func(r *rand.Rand) protoutil.Message { return enginepb.NewPopulatedRangeAppliedState(r, false) }, - // The populatedSum has changed from 10390885694280604642 to - // 7958815789228166749, as of 21.1, due to the addition of the - // SeparatedIntentCount field in MVCCStats. This field will not actually - // be populated until all nodes are on 21.1, so there isn't a risk of - // divergence. emptySum: 615555020845646359, - populatedSum: 7958815789228166749, + populatedSum: 3253881774919630461, }, reflect.TypeOf(&raftpb.HardState{}): { populatedConstructor: func(r *rand.Rand) protoutil.Message { diff --git a/pkg/kv/kvserver/client_replica_test.go b/pkg/kv/kvserver/client_replica_test.go index 50836e8ec237..d453d9fbee53 100644 --- a/pkg/kv/kvserver/client_replica_test.go +++ b/pkg/kv/kvserver/client_replica_test.go @@ -3332,7 +3332,7 @@ func TestProposalOverhead(t *testing.T) { // overhead is that users ranges do not have rangefeeds on by default whereas // system ranges do. const ( - expectedUserOverhead uint32 = 42 + expectedUserOverhead uint32 = 45 ) t.Run("user-key overhead", func(t *testing.T) { userKey := tc.ScratchRange(t) diff --git a/pkg/kv/kvserver/closedts/tracker/heap_tracker.go b/pkg/kv/kvserver/closedts/tracker/heap_tracker.go index 620bd05462a6..a78a90caea83 100644 --- a/pkg/kv/kvserver/closedts/tracker/heap_tracker.go +++ b/pkg/kv/kvserver/closedts/tracker/heap_tracker.go @@ -124,3 +124,10 @@ func (h *heapTracker) LowerBound(ctx context.Context) hlc.Timestamp { } return h.mu.rs[0].ts } + +// Count is part of the Tracker interface. +func (h *heapTracker) Count() int { + h.mu.Lock() + defer h.mu.Unlock() + return h.mu.rs.Len() +} diff --git a/pkg/kv/kvserver/closedts/tracker/lockfree_tracker.go b/pkg/kv/kvserver/closedts/tracker/lockfree_tracker.go index b9cd8bced181..86cdb0ff9c57 100644 --- a/pkg/kv/kvserver/closedts/tracker/lockfree_tracker.go +++ b/pkg/kv/kvserver/closedts/tracker/lockfree_tracker.go @@ -16,6 +16,7 @@ import ( "sync/atomic" "github.com/cockroachdb/cockroach/pkg/util/hlc" + "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/timeutil" ) @@ -169,6 +170,9 @@ func (t *lockfreeTracker) Untrack(ctx context.Context, tok RemovalToken) { b := tok.(lockfreeToken).b // Note that atomic ops are not required here, as we hold the exclusive lock. b.refcnt-- + if b.refcnt < 0 { + log.Fatalf(ctx, "negative bucket refcount: %d", b.refcnt) + } if b.refcnt == 0 { // Reset the bucket, so that future Track() calls can create a new one. b.ts = 0 @@ -198,6 +202,11 @@ func (t *lockfreeTracker) LowerBound(ctx context.Context) hlc.Timestamp { } } +// Count is part of the Tracker interface. +func (t *lockfreeTracker) Count() int { + return int(t.b1.refcnt) + int(t.b2.refcnt) +} + // bucket represent a Tracker bucket: a data structure that coalesces a number // of timestamps, keeping track only of their count and minimum. // diff --git a/pkg/kv/kvserver/closedts/tracker/tracker.go b/pkg/kv/kvserver/closedts/tracker/tracker.go index 4fc41d429bff..e00f1b41af7f 100644 --- a/pkg/kv/kvserver/closedts/tracker/tracker.go +++ b/pkg/kv/kvserver/closedts/tracker/tracker.go @@ -85,6 +85,11 @@ type Tracker interface { // make is that, if no synthethic timestamp is inserted into the tracked set // for a while, eventually the LowerBound value will not be synthetic. LowerBound(context.Context) hlc.Timestamp + + // Count returns the current size of the tracked set. + // + // Count cannot be called concurrently with other methods. + Count() int } // RemovalToken represents the result of Track: a token to be later used with diff --git a/pkg/kv/kvserver/kvserverpb/proposer_kv.go b/pkg/kv/kvserver/kvserverpb/proposer_kv.go index 04a4000420e2..14fc1ca2d064 100644 --- a/pkg/kv/kvserver/kvserverpb/proposer_kv.go +++ b/pkg/kv/kvserver/kvserverpb/proposer_kv.go @@ -10,18 +10,36 @@ package kvserverpb -import "math" +import ( + "math" + + "github.com/cockroachdb/cockroach/pkg/util/hlc" +) var maxRaftCommandFooterSize = (&RaftCommandFooter{ MaxLeaseIndex: math.MaxUint64, }).Size() +var maxClosedTimestampFooterSize = (&ClosedTimestampFooter{ + ClosedTimestamp: hlc.Timestamp{ + WallTime: math.MaxInt64, + Logical: math.MaxInt32, + Synthetic: true, + }, +}).Size() + // MaxRaftCommandFooterSize returns the maximum possible size of an // encoded RaftCommandFooter proto. func MaxRaftCommandFooterSize() int { return maxRaftCommandFooterSize } +// MaxClosedTimestampFooterSize returns the maximmum possible size of an encoded +// ClosedTimestampFooter. +func MaxClosedTimestampFooterSize() int { + return maxClosedTimestampFooterSize +} + // IsZero returns whether all fields are set to their zero value. func (r ReplicatedEvalResult) IsZero() bool { return r == ReplicatedEvalResult{} diff --git a/pkg/kv/kvserver/kvserverpb/proposer_kv.pb.go b/pkg/kv/kvserver/kvserverpb/proposer_kv.pb.go index 8b43941018d9..ae64003650e3 100644 --- a/pkg/kv/kvserver/kvserverpb/proposer_kv.pb.go +++ b/pkg/kv/kvserver/kvserverpb/proposer_kv.pb.go @@ -48,7 +48,7 @@ func (m *Split) Reset() { *m = Split{} } func (m *Split) String() string { return proto.CompactTextString(m) } func (*Split) ProtoMessage() {} func (*Split) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{0} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{0} } func (m *Split) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -83,7 +83,7 @@ func (m *Merge) Reset() { *m = Merge{} } func (m *Merge) String() string { return proto.CompactTextString(m) } func (*Merge) ProtoMessage() {} func (*Merge) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{1} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{1} } func (m *Merge) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -117,7 +117,7 @@ type ChangeReplicas struct { func (m *ChangeReplicas) Reset() { *m = ChangeReplicas{} } func (*ChangeReplicas) ProtoMessage() {} func (*ChangeReplicas) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{2} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{2} } func (m *ChangeReplicas) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -169,7 +169,7 @@ func (m *ComputeChecksum) Reset() { *m = ComputeChecksum{} } func (m *ComputeChecksum) String() string { return proto.CompactTextString(m) } func (*ComputeChecksum) ProtoMessage() {} func (*ComputeChecksum) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{3} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{3} } func (m *ComputeChecksum) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -206,7 +206,7 @@ func (m *Compaction) Reset() { *m = Compaction{} } func (m *Compaction) String() string { return proto.CompactTextString(m) } func (*Compaction) ProtoMessage() {} func (*Compaction) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{4} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{4} } func (m *Compaction) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -243,7 +243,7 @@ func (m *SuggestedCompaction) Reset() { *m = SuggestedCompaction{} } func (m *SuggestedCompaction) String() string { return proto.CompactTextString(m) } func (*SuggestedCompaction) ProtoMessage() {} func (*SuggestedCompaction) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{5} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{5} } func (m *SuggestedCompaction) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -305,7 +305,7 @@ func (m *ReplicatedEvalResult) Reset() { *m = ReplicatedEvalResult{} } func (m *ReplicatedEvalResult) String() string { return proto.CompactTextString(m) } func (*ReplicatedEvalResult) ProtoMessage() {} func (*ReplicatedEvalResult) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{6} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{6} } func (m *ReplicatedEvalResult) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -349,7 +349,7 @@ func (m *ReplicatedEvalResult_AddSSTable) Reset() { *m = ReplicatedEvalR func (m *ReplicatedEvalResult_AddSSTable) String() string { return proto.CompactTextString(m) } func (*ReplicatedEvalResult_AddSSTable) ProtoMessage() {} func (*ReplicatedEvalResult_AddSSTable) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{6, 0} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{6, 0} } func (m *ReplicatedEvalResult_AddSSTable) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -386,7 +386,7 @@ func (m *WriteBatch) Reset() { *m = WriteBatch{} } func (m *WriteBatch) String() string { return proto.CompactTextString(m) } func (*WriteBatch) ProtoMessage() {} func (*WriteBatch) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{7} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{7} } func (m *WriteBatch) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -423,7 +423,7 @@ func (m *LogicalOpLog) Reset() { *m = LogicalOpLog{} } func (m *LogicalOpLog) String() string { return proto.CompactTextString(m) } func (*LogicalOpLog) ProtoMessage() {} func (*LogicalOpLog) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{8} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{8} } func (m *LogicalOpLog) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -505,7 +505,22 @@ type RaftCommand struct { // been added after it, and on removal, the assignment counters must be // updated accordingly. Managing retry of proposals becomes trickier as // well as that uproots whatever ordering was originally envisioned. + // + // This field is set through RaftCommandFooter hackery. MaxLeaseIndex uint64 `protobuf:"varint,4,opt,name=max_lease_index,json=maxLeaseIndex,proto3" json:"max_lease_index,omitempty"` + // The closed timestamp carried by this command. Once a follower is told to + // apply this command, it knows that there will be no further writes at + // timestamps <= closed_timestamp. Note that the command itself might + // represent a write at a lower timestamp, so the closed timestamp can only be + // used after this command is applied. + // + // The field can be zero, which is to be interpreted as no closed timestamp + // update. Some commands (lease requests) implicitly carry a closed timestamp + // in a command-specific way. If the value is not zero, the value is greater + // or equal to that of the previous commands (and all before it). + // + // This field is set through ClosedTimestampFooter hackery. + ClosedTimestamp hlc.Timestamp `protobuf:"bytes,17,opt,name=closed_timestamp,json=closedTimestamp,proto3" json:"closed_timestamp"` // replicated_eval_result is a set of structured information that instructs // replicated state changes to the part of a Range's replicated state machine // that exists outside of RocksDB. @@ -529,7 +544,7 @@ func (m *RaftCommand) Reset() { *m = RaftCommand{} } func (m *RaftCommand) String() string { return proto.CompactTextString(m) } func (*RaftCommand) ProtoMessage() {} func (*RaftCommand) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{9} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{9} } func (m *RaftCommand) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -568,7 +583,7 @@ func (m *RaftCommandFooter) Reset() { *m = RaftCommandFooter{} } func (m *RaftCommandFooter) String() string { return proto.CompactTextString(m) } func (*RaftCommandFooter) ProtoMessage() {} func (*RaftCommandFooter) Descriptor() ([]byte, []int) { - return fileDescriptor_proposer_kv_0c8837b323bf7b92, []int{10} + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{10} } func (m *RaftCommandFooter) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -593,6 +608,42 @@ func (m *RaftCommandFooter) XXX_DiscardUnknown() { var xxx_messageInfo_RaftCommandFooter proto.InternalMessageInfo +// ClosedTimestampFooter is similar to RaftCommandFooter, allowing the proposal +// buffer to fill in the closed_timestamp field after most of the proto has been +// marshaled already. +type ClosedTimestampFooter struct { + ClosedTimestamp hlc.Timestamp `protobuf:"bytes,17,opt,name=closed_timestamp,json=closedTimestamp,proto3" json:"closed_timestamp"` +} + +func (m *ClosedTimestampFooter) Reset() { *m = ClosedTimestampFooter{} } +func (m *ClosedTimestampFooter) String() string { return proto.CompactTextString(m) } +func (*ClosedTimestampFooter) ProtoMessage() {} +func (*ClosedTimestampFooter) Descriptor() ([]byte, []int) { + return fileDescriptor_proposer_kv_0b3536bd0bf3d98c, []int{11} +} +func (m *ClosedTimestampFooter) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *ClosedTimestampFooter) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + b = b[:cap(b)] + n, err := m.MarshalTo(b) + if err != nil { + return nil, err + } + return b[:n], nil +} +func (dst *ClosedTimestampFooter) XXX_Merge(src proto.Message) { + xxx_messageInfo_ClosedTimestampFooter.Merge(dst, src) +} +func (m *ClosedTimestampFooter) XXX_Size() int { + return m.Size() +} +func (m *ClosedTimestampFooter) XXX_DiscardUnknown() { + xxx_messageInfo_ClosedTimestampFooter.DiscardUnknown(m) +} + +var xxx_messageInfo_ClosedTimestampFooter proto.InternalMessageInfo + func init() { proto.RegisterType((*Split)(nil), "cockroach.kv.kvserver.storagepb.Split") proto.RegisterType((*Merge)(nil), "cockroach.kv.kvserver.storagepb.Merge") @@ -607,6 +658,7 @@ func init() { proto.RegisterType((*RaftCommand)(nil), "cockroach.kv.kvserver.storagepb.RaftCommand") proto.RegisterMapType((map[string]string)(nil), "cockroach.kv.kvserver.storagepb.RaftCommand.TraceDataEntry") proto.RegisterType((*RaftCommandFooter)(nil), "cockroach.kv.kvserver.storagepb.RaftCommandFooter") + proto.RegisterType((*ClosedTimestampFooter)(nil), "cockroach.kv.kvserver.storagepb.ClosedTimestampFooter") } func (this *Split) Equal(that interface{}) bool { if that == nil { @@ -1314,6 +1366,16 @@ func (m *RaftCommand) MarshalTo(dAtA []byte) (int, error) { i += copy(dAtA[i:], v) } } + dAtA[i] = 0x8a + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintProposerKv(dAtA, i, uint64(m.ClosedTimestamp.Size())) + n21, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n21 return i, nil } @@ -1340,6 +1402,34 @@ func (m *RaftCommandFooter) MarshalTo(dAtA []byte) (int, error) { return i, nil } +func (m *ClosedTimestampFooter) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalTo(dAtA) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *ClosedTimestampFooter) MarshalTo(dAtA []byte) (int, error) { + var i int + _ = i + var l int + _ = l + dAtA[i] = 0x8a + i++ + dAtA[i] = 0x1 + i++ + i = encodeVarintProposerKv(dAtA, i, uint64(m.ClosedTimestamp.Size())) + n22, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n22 + return i, nil +} + func encodeVarintProposerKv(dAtA []byte, offset int, v uint64) int { for v >= 1<<7 { dAtA[offset] = uint8(v&0x7f | 0x80) @@ -1576,6 +1666,8 @@ func (m *RaftCommand) Size() (n int) { n += mapEntrySize + 2 + sovProposerKv(uint64(mapEntrySize)) } } + l = m.ClosedTimestamp.Size() + n += 2 + l + sovProposerKv(uint64(l)) return n } @@ -1591,6 +1683,17 @@ func (m *RaftCommandFooter) Size() (n int) { return n } +func (m *ClosedTimestampFooter) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = m.ClosedTimestamp.Size() + n += 2 + l + sovProposerKv(uint64(l)) + return n +} + func sovProposerKv(x uint64) (n int) { for { n++ @@ -3282,6 +3385,36 @@ func (m *RaftCommand) Unmarshal(dAtA []byte) error { } m.TraceData[mapkey] = mapvalue iNdEx = postIndex + case 17: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ClosedTimestamp", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowProposerKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthProposerKv + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.ClosedTimestamp.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipProposerKv(dAtA[iNdEx:]) @@ -3372,6 +3505,86 @@ func (m *RaftCommandFooter) Unmarshal(dAtA []byte) error { } return nil } +func (m *ClosedTimestampFooter) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowProposerKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: ClosedTimestampFooter: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: ClosedTimestampFooter: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 17: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ClosedTimestamp", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowProposerKv + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthProposerKv + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.ClosedTimestamp.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipProposerKv(dAtA[iNdEx:]) + if err != nil { + return err + } + if (skippy < 0) || (iNdEx+skippy) < 0 { + return ErrInvalidLengthProposerKv + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} func skipProposerKv(dAtA []byte) (n int, err error) { l := len(dAtA) iNdEx := 0 @@ -3478,98 +3691,100 @@ var ( ) func init() { - proto.RegisterFile("kv/kvserver/kvserverpb/proposer_kv.proto", fileDescriptor_proposer_kv_0c8837b323bf7b92) -} - -var fileDescriptor_proposer_kv_0c8837b323bf7b92 = []byte{ - // 1424 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xa4, 0x57, 0x4f, 0x6f, 0xdb, 0x46, - 0x16, 0xb7, 0x2c, 0xc9, 0xa6, 0x9e, 0x6c, 0x89, 0x9e, 0x38, 0x09, 0xd7, 0xbb, 0x2b, 0x19, 0xda, - 0x6c, 0xe0, 0xdd, 0xcd, 0x52, 0x81, 0xbd, 0x0b, 0x14, 0x49, 0x50, 0xc4, 0x92, 0x93, 0xc6, 0x8a, - 0xed, 0x26, 0x23, 0x27, 0x2d, 0xd2, 0x03, 0x31, 0x22, 0x27, 0x14, 0x2b, 0x8a, 0x64, 0x86, 0x23, - 0x25, 0xfe, 0x14, 0x6d, 0x81, 0x1e, 0x7a, 0x6a, 0x73, 0xec, 0xd7, 0xe8, 0x2d, 0x97, 0x02, 0x39, - 0x06, 0x3d, 0x08, 0x8d, 0x73, 0xe9, 0x67, 0xc8, 0xa9, 0x98, 0xe1, 0x50, 0x92, 0x0b, 0xa7, 0x56, - 0xda, 0xdb, 0xf0, 0xcd, 0xbc, 0xdf, 0x7b, 0xf3, 0xfe, 0xfc, 0xde, 0x10, 0x36, 0x7a, 0xc3, 0x7a, - 0x6f, 0x18, 0x53, 0x36, 0xa4, 0x6c, 0xbc, 0x88, 0x3a, 0xf5, 0x88, 0x85, 0x51, 0x18, 0x53, 0x66, - 0xf5, 0x86, 0x66, 0xc4, 0x42, 0x1e, 0xa2, 0xaa, 0x1d, 0xda, 0x3d, 0x16, 0x12, 0xbb, 0x6b, 0xf6, - 0x86, 0x66, 0x7a, 0xd4, 0x8c, 0x79, 0xc8, 0x88, 0x4b, 0xa3, 0xce, 0xda, 0x8a, 0xdc, 0x8c, 0x3a, - 0x75, 0x12, 0x79, 0x89, 0xce, 0x1a, 0x4a, 0x45, 0x0e, 0xe1, 0x44, 0xc9, 0x2e, 0xa4, 0xb2, 0x3e, - 0xe5, 0x64, 0x4a, 0xfe, 0x57, 0x85, 0x54, 0xa7, 0x81, 0xeb, 0x05, 0x54, 0x1c, 0x18, 0xda, 0xb6, - 0xda, 0xfc, 0xdb, 0xa9, 0x9b, 0x5b, 0x6a, 0xb7, 0xf6, 0x8e, 0x4b, 0xc4, 0x9c, 0x70, 0xaa, 0xce, - 0x18, 0x03, 0xee, 0xf9, 0xf5, 0xae, 0x6f, 0xd7, 0xb9, 0xd7, 0xa7, 0x31, 0x27, 0xfd, 0x48, 0xed, - 0xac, 0xba, 0xa1, 0x1b, 0xca, 0x65, 0x5d, 0xac, 0x12, 0x69, 0xed, 0xfb, 0x0c, 0xe4, 0xdb, 0x91, - 0xef, 0x71, 0xd4, 0x84, 0x45, 0xce, 0x3c, 0xd7, 0xa5, 0xcc, 0xc8, 0xac, 0x67, 0x36, 0x8a, 0x9b, - 0x55, 0x73, 0x12, 0x0a, 0x75, 0x19, 0x53, 0x1e, 0x3d, 0x4c, 0x8e, 0x35, 0xb4, 0x17, 0xa3, 0xea, - 0xdc, 0xcb, 0x51, 0x35, 0x83, 0x53, 0x4d, 0x74, 0x08, 0x05, 0xd6, 0x8d, 0x2d, 0x87, 0xfa, 0x9c, - 0x18, 0xf3, 0x12, 0xe6, 0x9f, 0x53, 0x30, 0xea, 0x7a, 0x66, 0x7a, 0x3d, 0x73, 0xff, 0x61, 0xb3, - 0xd9, 0xe6, 0x84, 0xc7, 0x0d, 0x5d, 0x80, 0x1d, 0x8f, 0xaa, 0x1a, 0xbe, 0xd3, 0xde, 0x11, 0xea, - 0x58, 0x63, 0xdd, 0x58, 0xae, 0xae, 0xe5, 0x7e, 0x79, 0x5e, 0xcd, 0xd4, 0x30, 0xe4, 0xf7, 0x29, - 0x73, 0xe9, 0x6c, 0x9e, 0xca, 0xa3, 0xef, 0xf6, 0x54, 0x61, 0x3a, 0x50, 0x6a, 0x76, 0x49, 0xe0, - 0x52, 0x4c, 0x23, 0xdf, 0xb3, 0x49, 0x8c, 0xf6, 0x7e, 0x0b, 0xbe, 0x71, 0x0a, 0xf8, 0x49, 0x9d, - 0xdf, 0xb3, 0xf2, 0xcd, 0xf3, 0xea, 0x5c, 0xed, 0xf5, 0x3c, 0x94, 0x9b, 0x61, 0x3f, 0x1a, 0x70, - 0xda, 0xec, 0x52, 0xbb, 0x17, 0x0f, 0xfa, 0xe8, 0x73, 0x28, 0xda, 0x6a, 0x6d, 0x79, 0x8e, 0xb4, - 0xb5, 0xd4, 0xd8, 0x15, 0x08, 0x3f, 0x8d, 0xaa, 0x5b, 0xae, 0xc7, 0xbb, 0x83, 0x8e, 0x69, 0x87, - 0xfd, 0xfa, 0xd8, 0xba, 0xd3, 0x99, 0xac, 0xeb, 0x51, 0xcf, 0xad, 0xcb, 0x54, 0x0f, 0x06, 0x9e, - 0x63, 0x3e, 0x78, 0xb0, 0xbb, 0x73, 0x3c, 0xaa, 0x42, 0x8a, 0xbe, 0xbb, 0x83, 0x21, 0x45, 0xdf, - 0x75, 0xd0, 0x3f, 0x60, 0x39, 0x26, 0x43, 0x6a, 0xc5, 0x01, 0x89, 0xe2, 0x6e, 0xc8, 0x65, 0x66, - 0x34, 0xbc, 0x24, 0x84, 0x6d, 0x25, 0x43, 0x5b, 0x90, 0xeb, 0x87, 0x0e, 0x35, 0xb2, 0xeb, 0x99, - 0x8d, 0xd2, 0xa9, 0x21, 0x4d, 0xd1, 0xf7, 0x43, 0x87, 0x62, 0x79, 0x18, 0x55, 0x20, 0xb1, 0x13, - 0x85, 0x5e, 0xc0, 0x8d, 0x9c, 0x84, 0x9d, 0x92, 0x20, 0x03, 0x16, 0x87, 0x94, 0xc5, 0x5e, 0x18, - 0x18, 0xf9, 0xf5, 0xcc, 0xc6, 0x32, 0x4e, 0x3f, 0xd1, 0x1d, 0x28, 0x70, 0xca, 0xfa, 0x5e, 0x40, - 0x38, 0x35, 0x16, 0xd6, 0xb3, 0x1b, 0xc5, 0xcd, 0x4b, 0xa7, 0xd8, 0x54, 0x31, 0xde, 0xa1, 0xb1, - 0xcd, 0xbc, 0x88, 0x87, 0xac, 0x91, 0x13, 0x31, 0xc2, 0x13, 0x65, 0x95, 0xc9, 0x87, 0x00, 0x22, - 0xc4, 0xc4, 0xe6, 0x02, 0x7d, 0x15, 0xf2, 0x9d, 0x23, 0x4e, 0x63, 0x19, 0xd7, 0x2c, 0x4e, 0x3e, - 0xd0, 0x15, 0x40, 0xf1, 0xc0, 0x75, 0x69, 0xcc, 0xa9, 0x63, 0x11, 0x6e, 0x05, 0x24, 0x08, 0x63, - 0x19, 0x8c, 0x2c, 0xd6, 0xc7, 0x3b, 0xdb, 0xfc, 0x40, 0xc8, 0x15, 0xee, 0xd7, 0xf3, 0x70, 0xae, - 0x9d, 0x6e, 0x4d, 0x59, 0xb8, 0x0f, 0x85, 0x98, 0x13, 0xc6, 0xad, 0x1e, 0x3d, 0x52, 0xd9, 0xfb, - 0xdf, 0xdb, 0x51, 0xf5, 0xea, 0x4c, 0x99, 0x4b, 0x6f, 0x77, 0x97, 0x1e, 0x61, 0x4d, 0xc2, 0xdc, - 0xa5, 0x47, 0x68, 0x1f, 0x16, 0x69, 0xe0, 0x48, 0xc0, 0xf9, 0x3f, 0x01, 0xb8, 0x40, 0x03, 0x47, - 0xc0, 0x3d, 0x00, 0xb0, 0xc7, 0xfe, 0xca, 0xb4, 0x16, 0x37, 0xff, 0x63, 0x9e, 0x41, 0x6f, 0xe6, - 0xe4, 0x8a, 0x53, 0xf5, 0x3c, 0x05, 0xa4, 0xc2, 0xf2, 0x83, 0x06, 0xab, 0x2a, 0x37, 0x9c, 0x3a, - 0xb7, 0x86, 0xc4, 0xc7, 0x34, 0x1e, 0xf8, 0x82, 0x46, 0xf2, 0x92, 0x8f, 0x54, 0xf7, 0xff, 0xf7, - 0x4c, 0x83, 0x0a, 0x45, 0xb0, 0x00, 0xc5, 0x89, 0x2e, 0xba, 0x01, 0xf9, 0x58, 0x30, 0x8d, 0xf2, - 0xfa, 0xf2, 0x99, 0x20, 0x92, 0x97, 0x70, 0xa2, 0x24, 0xb4, 0xfb, 0xa2, 0xfb, 0x65, 0x3d, 0xce, - 0xa2, 0x2d, 0xb9, 0x02, 0x27, 0x4a, 0x68, 0x03, 0x74, 0x2f, 0xb6, 0x7c, 0x4a, 0x62, 0x6a, 0x31, - 0xfa, 0x64, 0x40, 0x63, 0x6e, 0x2c, 0xc8, 0xc2, 0x2e, 0x79, 0xf1, 0x9e, 0x10, 0xe3, 0x44, 0x8a, - 0xb6, 0xa1, 0x30, 0x26, 0x59, 0x43, 0x93, 0xb6, 0xfe, 0x3e, 0x65, 0x4b, 0xb4, 0xa7, 0xd9, 0xf5, - 0x6d, 0xf3, 0x30, 0x3d, 0x34, 0xae, 0xdd, 0x54, 0x80, 0xee, 0x81, 0xee, 0xd0, 0x88, 0x51, 0x19, - 0x45, 0x45, 0x9b, 0xf0, 0x1e, 0xb4, 0x89, 0xcb, 0x13, 0x75, 0xc9, 0x95, 0xe8, 0x53, 0x28, 0xdb, - 0x92, 0x9d, 0x2c, 0xa6, 0xe8, 0xc9, 0x58, 0x92, 0x80, 0xf5, 0xb3, 0x53, 0x7f, 0x82, 0xd5, 0x70, - 0xc9, 0x3e, 0xc9, 0x8c, 0x97, 0xa0, 0xc4, 0xc8, 0x63, 0x6e, 0xf9, 0xa1, 0xab, 0x3c, 0x5d, 0x96, - 0x9d, 0xb3, 0x24, 0xa4, 0x7b, 0xa1, 0x9b, 0xd8, 0x7f, 0x02, 0x45, 0xe2, 0x38, 0x56, 0x1c, 0x73, - 0xd2, 0xf1, 0xa9, 0xb1, 0x22, 0x6d, 0xdf, 0x9c, 0xb5, 0x0a, 0x4e, 0xd4, 0x92, 0xb9, 0xed, 0x38, - 0xed, 0xf6, 0xa1, 0xc0, 0x69, 0x94, 0x04, 0xbd, 0x4d, 0xbe, 0x31, 0x10, 0xc7, 0x69, 0x27, 0x36, - 0xd0, 0x6d, 0xc8, 0x27, 0xfe, 0x20, 0x69, 0xec, 0xdf, 0x33, 0x45, 0x4e, 0x7a, 0xab, 0x12, 0x92, - 0xa8, 0xa3, 0x2f, 0x32, 0x70, 0x2e, 0x62, 0x74, 0xa8, 0x92, 0x9f, 0xbc, 0x0d, 0x88, 0x6f, 0xac, - 0xce, 0x92, 0xda, 0x9b, 0x6f, 0x47, 0xd5, 0x1b, 0xb3, 0xd3, 0xb6, 0x50, 0x6e, 0xfa, 0xa1, 0xdd, - 0x1b, 0x23, 0xe0, 0x15, 0x61, 0x5b, 0x16, 0xd8, 0x3d, 0x65, 0x19, 0x7d, 0x06, 0xba, 0x9d, 0xcc, - 0x0d, 0x2b, 0xa5, 0x73, 0xe3, 0xbc, 0xf4, 0xe6, 0xea, 0x4c, 0x8d, 0x3c, 0x35, 0x70, 0x70, 0xd9, - 0x3e, 0x29, 0x58, 0xfb, 0x08, 0xa6, 0x02, 0x8a, 0x10, 0xe4, 0xc4, 0x2b, 0x25, 0xa1, 0x32, 0x2c, - 0xd7, 0xa8, 0x0a, 0x79, 0x9b, 0xd9, 0x5b, 0x9b, 0xb2, 0x97, 0x97, 0x1b, 0x85, 0xe3, 0x51, 0x35, - 0xdf, 0xc4, 0xcd, 0xad, 0x4d, 0x9c, 0xc8, 0x13, 0x2e, 0x68, 0xe5, 0xb4, 0x8c, 0x3e, 0xdf, 0xca, - 0x69, 0x79, 0x7d, 0xa1, 0x95, 0xd3, 0x16, 0x75, 0xad, 0x95, 0xd3, 0x0a, 0x3a, 0xb4, 0x72, 0x5a, - 0x49, 0x2f, 0xb7, 0x72, 0x5a, 0x59, 0xd7, 0x5b, 0x39, 0x4d, 0xd7, 0x57, 0x5a, 0x39, 0xed, 0x9c, - 0xbe, 0xda, 0x5a, 0xd0, 0xbe, 0x3a, 0xd0, 0xbf, 0x3d, 0xa8, 0xad, 0x03, 0x7c, 0xc2, 0x3c, 0x4e, - 0x1b, 0x84, 0xdb, 0xdd, 0xd3, 0x1c, 0xa8, 0xdd, 0x87, 0xa5, 0xbd, 0xd0, 0xf5, 0x6c, 0xe2, 0x7f, - 0x1c, 0xed, 0x85, 0x2e, 0xda, 0x86, 0x6c, 0x18, 0x09, 0x52, 0x17, 0xe3, 0xe2, 0x5f, 0x67, 0xe5, - 0x79, 0xac, 0xaa, 0xd2, 0x2c, 0x74, 0x6b, 0x3f, 0xe6, 0xa1, 0x88, 0xc9, 0x63, 0xde, 0x0c, 0xfb, - 0x7d, 0x12, 0x38, 0xe8, 0x32, 0x94, 0xfb, 0xe4, 0x99, 0x4a, 0xb9, 0x17, 0x38, 0xf4, 0x99, 0xa4, - 0x8d, 0x1c, 0x5e, 0xee, 0x93, 0x67, 0x32, 0x1b, 0xbb, 0x42, 0x88, 0x0e, 0xe1, 0x2f, 0x53, 0x9d, - 0x3a, 0x7e, 0x37, 0x4a, 0x3d, 0x39, 0xdb, 0x8a, 0x9b, 0xc6, 0x29, 0xf3, 0x2b, 0x21, 0x8c, 0x8b, - 0x13, 0xd5, 0x7b, 0x4a, 0x53, 0x6e, 0xa0, 0x21, 0x5c, 0x3c, 0x09, 0x65, 0xc5, 0x82, 0x5c, 0x02, - 0x9b, 0x4a, 0xce, 0xc9, 0x36, 0x3e, 0x7c, 0x3b, 0xaa, 0x5e, 0x7b, 0xaf, 0x11, 0x20, 0x81, 0xdb, - 0x0a, 0x05, 0x9f, 0x8f, 0xa6, 0xed, 0xa5, 0x62, 0xf4, 0x04, 0x2e, 0xb0, 0x71, 0xc7, 0x59, 0x74, - 0x48, 0x7c, 0x8b, 0xc9, 0x9e, 0x93, 0x3d, 0x5d, 0xdc, 0xfc, 0xff, 0x1f, 0x6a, 0x58, 0x15, 0xe7, - 0x55, 0x76, 0xda, 0x60, 0xd8, 0x83, 0xe2, 0x53, 0x91, 0x6d, 0xab, 0x23, 0xd2, 0x6d, 0x94, 0x66, - 0x9c, 0x47, 0x93, 0x0a, 0xc1, 0xf0, 0x74, 0x52, 0x2d, 0x6d, 0x28, 0xf9, 0x49, 0x7a, 0xad, 0x30, - 0x12, 0x94, 0x64, 0x94, 0x67, 0x9c, 0x37, 0xd3, 0x05, 0x85, 0x97, 0xfc, 0xe9, 0xf2, 0x7a, 0x04, - 0xc0, 0x19, 0xb1, 0xa9, 0x25, 0x0b, 0x51, 0x97, 0x55, 0x76, 0xfd, 0xec, 0x48, 0x4c, 0xaa, 0xc9, - 0x3c, 0x14, 0xea, 0x3b, 0x84, 0x93, 0x5b, 0x01, 0x67, 0x47, 0xb8, 0xc0, 0xd3, 0xef, 0xb5, 0x1b, - 0x50, 0x3a, 0xb9, 0x89, 0x74, 0xc8, 0xa6, 0x6f, 0x87, 0x02, 0x16, 0x4b, 0xf1, 0x6a, 0x19, 0x12, - 0x7f, 0x90, 0xcc, 0xce, 0x02, 0x4e, 0x3e, 0xae, 0xcd, 0x7f, 0x20, 0x5a, 0x2c, 0xab, 0xe7, 0xc6, - 0x8d, 0x36, 0xaf, 0x67, 0x93, 0x26, 0xfa, 0xee, 0xa0, 0x76, 0x1d, 0x56, 0xa6, 0x1c, 0xb8, 0x1d, - 0x86, 0x9c, 0xb2, 0x59, 0x8b, 0xba, 0x71, 0xe5, 0xc5, 0xeb, 0xca, 0xdc, 0x8b, 0xe3, 0x4a, 0xe6, - 0xe5, 0x71, 0x25, 0xf3, 0xea, 0xb8, 0x92, 0xf9, 0xf9, 0xb8, 0x92, 0xf9, 0xf2, 0x4d, 0x65, 0xee, - 0xe5, 0x9b, 0xca, 0xdc, 0xab, 0x37, 0x95, 0xb9, 0x47, 0x30, 0xf9, 0xcf, 0xe8, 0x2c, 0xc8, 0x5f, - 0x86, 0xad, 0x5f, 0x03, 0x00, 0x00, 0xff, 0xff, 0xf1, 0xe4, 0x48, 0xb2, 0x4d, 0x0d, 0x00, 0x00, + proto.RegisterFile("kv/kvserver/kvserverpb/proposer_kv.proto", fileDescriptor_proposer_kv_0b3536bd0bf3d98c) +} + +var fileDescriptor_proposer_kv_0b3536bd0bf3d98c = []byte{ + // 1453 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xac, 0x57, 0x4f, 0x73, 0x13, 0xc7, + 0x12, 0xb7, 0x2c, 0xc9, 0x5e, 0xb5, 0x6c, 0x69, 0x3d, 0x18, 0xd8, 0xe7, 0xf7, 0x9e, 0xe4, 0xd2, + 0xe3, 0x51, 0x4e, 0x42, 0x56, 0x94, 0x9d, 0x54, 0xa5, 0x80, 0x4a, 0x61, 0xc9, 0x10, 0x2c, 0x6c, + 0x07, 0x46, 0x86, 0xa4, 0xc8, 0x61, 0x6b, 0xb4, 0x3b, 0xac, 0x36, 0x5a, 0x69, 0x97, 0x9d, 0x91, + 0xc0, 0x9f, 0x22, 0x49, 0x55, 0x0e, 0xb9, 0x24, 0xe1, 0x98, 0xaf, 0x91, 0x1b, 0x47, 0x8e, 0x54, + 0x0e, 0xaa, 0x60, 0x2e, 0xf9, 0x0c, 0x9c, 0x52, 0x33, 0x3b, 0xab, 0x3f, 0x29, 0x13, 0x8b, 0x24, + 0xb7, 0xd9, 0x9e, 0xe9, 0x5f, 0xf7, 0xf4, 0x9f, 0x5f, 0xcf, 0xc2, 0x46, 0x67, 0x50, 0xed, 0x0c, + 0x18, 0x8d, 0x06, 0x34, 0x1a, 0x2d, 0xc2, 0x56, 0x35, 0x8c, 0x82, 0x30, 0x60, 0x34, 0xb2, 0x3a, + 0x03, 0x33, 0x8c, 0x02, 0x1e, 0xa0, 0xb2, 0x1d, 0xd8, 0x9d, 0x28, 0x20, 0x76, 0xdb, 0xec, 0x0c, + 0xcc, 0xe4, 0xa8, 0xc9, 0x78, 0x10, 0x11, 0x97, 0x86, 0xad, 0xb5, 0x15, 0xb9, 0x19, 0xb6, 0xaa, + 0x24, 0xf4, 0x62, 0x9d, 0x35, 0x94, 0x88, 0x1c, 0xc2, 0x89, 0x92, 0x9d, 0x4b, 0x64, 0x5d, 0xca, + 0xc9, 0x84, 0xfc, 0xdf, 0x0a, 0xa9, 0x4a, 0x7b, 0xae, 0xd7, 0xa3, 0xe2, 0xc0, 0xc0, 0xb6, 0xd5, + 0xe6, 0x7f, 0x4e, 0xdc, 0xdc, 0x52, 0xbb, 0x95, 0x37, 0x5c, 0x82, 0x71, 0xc2, 0xa9, 0x3a, 0x63, + 0xf4, 0xb9, 0xe7, 0x57, 0xdb, 0xbe, 0x5d, 0xe5, 0x5e, 0x97, 0x32, 0x4e, 0xba, 0xa1, 0xda, 0x59, + 0x75, 0x03, 0x37, 0x90, 0xcb, 0xaa, 0x58, 0xc5, 0xd2, 0xca, 0x4f, 0x29, 0xc8, 0x36, 0x43, 0xdf, + 0xe3, 0xa8, 0x0e, 0x8b, 0x3c, 0xf2, 0x5c, 0x97, 0x46, 0x46, 0x6a, 0x3d, 0xb5, 0x91, 0xdf, 0x2c, + 0x9b, 0xe3, 0x50, 0xa8, 0xcb, 0x98, 0xf2, 0xe8, 0x61, 0x7c, 0xac, 0xa6, 0x3d, 0x1b, 0x96, 0xe7, + 0x9e, 0x0f, 0xcb, 0x29, 0x9c, 0x68, 0xa2, 0x43, 0xc8, 0x45, 0x6d, 0x66, 0x39, 0xd4, 0xe7, 0xc4, + 0x98, 0x97, 0x30, 0xff, 0x9f, 0x80, 0x51, 0xd7, 0x33, 0x93, 0xeb, 0x99, 0xfb, 0xf7, 0xeb, 0xf5, + 0x26, 0x27, 0x9c, 0xd5, 0x74, 0x01, 0x76, 0x3c, 0x2c, 0x6b, 0xf8, 0x56, 0x73, 0x47, 0xa8, 0x63, + 0x2d, 0x6a, 0x33, 0xb9, 0xba, 0x92, 0xf9, 0xed, 0x69, 0x39, 0x55, 0xc1, 0x90, 0xdd, 0xa7, 0x91, + 0x4b, 0x67, 0xf3, 0x54, 0x1e, 0x7d, 0xb3, 0xa7, 0x0a, 0xd3, 0x81, 0x42, 0xbd, 0x4d, 0x7a, 0x2e, + 0xc5, 0x34, 0xf4, 0x3d, 0x9b, 0x30, 0xb4, 0xf7, 0x47, 0xf0, 0x8d, 0x13, 0xc0, 0xa7, 0x75, 0xfe, + 0xcc, 0xca, 0x77, 0x4f, 0xcb, 0x73, 0x95, 0x97, 0xf3, 0x50, 0xac, 0x07, 0xdd, 0xb0, 0xcf, 0x69, + 0xbd, 0x4d, 0xed, 0x0e, 0xeb, 0x77, 0xd1, 0x97, 0x90, 0xb7, 0xd5, 0xda, 0xf2, 0x1c, 0x69, 0x6b, + 0xa9, 0xb6, 0x2b, 0x10, 0x7e, 0x19, 0x96, 0xb7, 0x5c, 0x8f, 0xb7, 0xfb, 0x2d, 0xd3, 0x0e, 0xba, + 0xd5, 0x91, 0x75, 0xa7, 0x35, 0x5e, 0x57, 0xc3, 0x8e, 0x5b, 0x95, 0xa9, 0xee, 0xf7, 0x3d, 0xc7, + 0xbc, 0x77, 0x6f, 0x77, 0xe7, 0x78, 0x58, 0x86, 0x04, 0x7d, 0x77, 0x07, 0x43, 0x82, 0xbe, 0xeb, + 0xa0, 0xff, 0xc1, 0x32, 0x23, 0x03, 0x6a, 0xb1, 0x1e, 0x09, 0x59, 0x3b, 0xe0, 0x32, 0x33, 0x1a, + 0x5e, 0x12, 0xc2, 0xa6, 0x92, 0xa1, 0x2d, 0xc8, 0x74, 0x03, 0x87, 0x1a, 0xe9, 0xf5, 0xd4, 0x46, + 0xe1, 0xc4, 0x90, 0x26, 0xe8, 0xfb, 0x81, 0x43, 0xb1, 0x3c, 0x8c, 0x4a, 0x10, 0xdb, 0x09, 0x03, + 0xaf, 0xc7, 0x8d, 0x8c, 0x84, 0x9d, 0x90, 0x20, 0x03, 0x16, 0x07, 0x34, 0x62, 0x5e, 0xd0, 0x33, + 0xb2, 0xeb, 0xa9, 0x8d, 0x65, 0x9c, 0x7c, 0xa2, 0x5b, 0x90, 0xe3, 0x34, 0xea, 0x7a, 0x3d, 0xc2, + 0xa9, 0xb1, 0xb0, 0x9e, 0xde, 0xc8, 0x6f, 0x5e, 0x38, 0xc1, 0xa6, 0x8a, 0xf1, 0x0e, 0x65, 0x76, + 0xe4, 0x85, 0x3c, 0x88, 0x6a, 0x19, 0x11, 0x23, 0x3c, 0x56, 0x56, 0x99, 0xbc, 0x0f, 0x20, 0x42, + 0x4c, 0x6c, 0x2e, 0xd0, 0x57, 0x21, 0xdb, 0x3a, 0xe2, 0x94, 0xc9, 0xb8, 0xa6, 0x71, 0xfc, 0x81, + 0x2e, 0x01, 0x62, 0x7d, 0xd7, 0xa5, 0x8c, 0x53, 0xc7, 0x22, 0xdc, 0xea, 0x91, 0x5e, 0xc0, 0x64, + 0x30, 0xd2, 0x58, 0x1f, 0xed, 0x6c, 0xf3, 0x03, 0x21, 0x57, 0xb8, 0xdf, 0xce, 0xc3, 0x99, 0x66, + 0xb2, 0x35, 0x61, 0xe1, 0x2e, 0xe4, 0x18, 0x27, 0x11, 0xb7, 0x3a, 0xf4, 0x48, 0x65, 0xef, 0x83, + 0xd7, 0xc3, 0xf2, 0xe5, 0x99, 0x32, 0x97, 0xdc, 0xee, 0x36, 0x3d, 0xc2, 0x9a, 0x84, 0xb9, 0x4d, + 0x8f, 0xd0, 0x3e, 0x2c, 0xd2, 0x9e, 0x23, 0x01, 0xe7, 0xff, 0x06, 0xe0, 0x02, 0xed, 0x39, 0x02, + 0xee, 0x1e, 0x80, 0x3d, 0xf2, 0x57, 0xa6, 0x35, 0xbf, 0xf9, 0x9e, 0x79, 0x0a, 0xbd, 0x99, 0xe3, + 0x2b, 0x4e, 0xd4, 0xf3, 0x04, 0x90, 0x0a, 0xcb, 0xcf, 0x1a, 0xac, 0xaa, 0xdc, 0x70, 0xea, 0xdc, + 0x18, 0x10, 0x1f, 0x53, 0xd6, 0xf7, 0x05, 0x8d, 0x64, 0x25, 0x1f, 0xa9, 0xee, 0x7f, 0xff, 0x54, + 0x83, 0x0a, 0x45, 0xb0, 0x00, 0xc5, 0xb1, 0x2e, 0xba, 0x06, 0x59, 0x26, 0x98, 0x46, 0x79, 0x7d, + 0xf1, 0x54, 0x10, 0xc9, 0x4b, 0x38, 0x56, 0x12, 0xda, 0x5d, 0xd1, 0xfd, 0xb2, 0x1e, 0x67, 0xd1, + 0x96, 0x5c, 0x81, 0x63, 0x25, 0xb4, 0x01, 0xba, 0xc7, 0x2c, 0x9f, 0x12, 0x46, 0xad, 0x88, 0x3e, + 0xea, 0x53, 0xc6, 0x8d, 0x05, 0x59, 0xd8, 0x05, 0x8f, 0xed, 0x09, 0x31, 0x8e, 0xa5, 0x68, 0x1b, + 0x72, 0x23, 0x92, 0x35, 0x34, 0x69, 0xeb, 0xbf, 0x13, 0xb6, 0x44, 0x7b, 0x9a, 0x6d, 0xdf, 0x36, + 0x0f, 0x93, 0x43, 0xa3, 0xda, 0x4d, 0x04, 0xe8, 0x0e, 0xe8, 0x0e, 0x0d, 0x23, 0x2a, 0xa3, 0xa8, + 0x68, 0x13, 0xde, 0x82, 0x36, 0x71, 0x71, 0xac, 0x2e, 0xb9, 0x12, 0x7d, 0x0e, 0x45, 0x5b, 0xb2, + 0x93, 0x15, 0x29, 0x7a, 0x32, 0x96, 0x24, 0x60, 0xf5, 0xf4, 0xd4, 0x4f, 0xb1, 0x1a, 0x2e, 0xd8, + 0xd3, 0xcc, 0x78, 0x01, 0x0a, 0x11, 0x79, 0xc8, 0x2d, 0x3f, 0x70, 0x95, 0xa7, 0xcb, 0xb2, 0x73, + 0x96, 0x84, 0x74, 0x2f, 0x70, 0x63, 0xfb, 0x8f, 0x20, 0x4f, 0x1c, 0xc7, 0x62, 0x8c, 0x93, 0x96, + 0x4f, 0x8d, 0x15, 0x69, 0xfb, 0xfa, 0xac, 0x55, 0x30, 0x55, 0x4b, 0xe6, 0xb6, 0xe3, 0x34, 0x9b, + 0x87, 0x02, 0xa7, 0x56, 0x10, 0xf4, 0x36, 0xfe, 0xc6, 0x40, 0x1c, 0xa7, 0x19, 0xdb, 0x40, 0x37, + 0x21, 0x1b, 0xfb, 0x83, 0xa4, 0xb1, 0x77, 0x67, 0x8a, 0x9c, 0xf4, 0x56, 0x25, 0x24, 0x56, 0x47, + 0x5f, 0xa5, 0xe0, 0x4c, 0x18, 0xd1, 0x81, 0x4a, 0x7e, 0xfc, 0x36, 0x20, 0xbe, 0xb1, 0x3a, 0x4b, + 0x6a, 0xaf, 0xbf, 0x1e, 0x96, 0xaf, 0xcd, 0x4e, 0xdb, 0x42, 0xb9, 0xee, 0x07, 0x76, 0x67, 0x84, + 0x80, 0x57, 0x84, 0x6d, 0x59, 0x60, 0x77, 0x94, 0x65, 0xf4, 0x05, 0xe8, 0x76, 0x3c, 0x37, 0xac, + 0x84, 0xce, 0x8d, 0xb3, 0xd2, 0x9b, 0xcb, 0x33, 0x35, 0xf2, 0xc4, 0xc0, 0xc1, 0x45, 0x7b, 0x5a, + 0xb0, 0xf6, 0x09, 0x4c, 0x04, 0x14, 0x21, 0xc8, 0x88, 0x57, 0x4a, 0x4c, 0x65, 0x58, 0xae, 0x51, + 0x19, 0xb2, 0x76, 0x64, 0x6f, 0x6d, 0xca, 0x5e, 0x5e, 0xae, 0xe5, 0x8e, 0x87, 0xe5, 0x6c, 0x1d, + 0xd7, 0xb7, 0x36, 0x71, 0x2c, 0x8f, 0xb9, 0xa0, 0x91, 0xd1, 0x52, 0xfa, 0x7c, 0x23, 0xa3, 0x65, + 0xf5, 0x85, 0x46, 0x46, 0x5b, 0xd4, 0xb5, 0x46, 0x46, 0xcb, 0xe9, 0xd0, 0xc8, 0x68, 0x05, 0xbd, + 0xd8, 0xc8, 0x68, 0x45, 0x5d, 0x6f, 0x64, 0x34, 0x5d, 0x5f, 0x69, 0x64, 0xb4, 0x33, 0xfa, 0x6a, + 0x63, 0x41, 0xfb, 0xe6, 0x40, 0xff, 0xe1, 0xa0, 0xb2, 0x0e, 0xf0, 0x59, 0xe4, 0x71, 0x5a, 0x23, + 0xdc, 0x6e, 0x9f, 0xe4, 0x40, 0xe5, 0x2e, 0x2c, 0xed, 0x05, 0xae, 0x67, 0x13, 0xff, 0xd3, 0x70, + 0x2f, 0x70, 0xd1, 0x36, 0xa4, 0x83, 0x50, 0x90, 0xba, 0x18, 0x17, 0xef, 0x9c, 0x96, 0xe7, 0x91, + 0xaa, 0x4a, 0xb3, 0xd0, 0xad, 0x7c, 0xbf, 0x00, 0x79, 0x4c, 0x1e, 0xf2, 0x7a, 0xd0, 0xed, 0x92, + 0x9e, 0x83, 0x2e, 0x42, 0xb1, 0x4b, 0x9e, 0xa8, 0x94, 0x7b, 0x3d, 0x87, 0x3e, 0x91, 0xb4, 0x91, + 0xc1, 0xcb, 0x5d, 0xf2, 0x44, 0x66, 0x63, 0x57, 0x08, 0xd1, 0x21, 0xfc, 0x6b, 0xa2, 0x53, 0x47, + 0xef, 0x46, 0xa9, 0x27, 0x67, 0x5b, 0x7e, 0xd3, 0x38, 0x61, 0x7e, 0xc5, 0x84, 0x71, 0x7e, 0xac, + 0x7a, 0x47, 0x69, 0xca, 0x0d, 0x34, 0x80, 0xf3, 0xd3, 0x50, 0x16, 0x13, 0xe4, 0xd2, 0xb3, 0xa9, + 0xe4, 0x9c, 0x74, 0xed, 0xe3, 0xd7, 0xc3, 0xf2, 0x95, 0xb7, 0x1a, 0x01, 0x12, 0xb8, 0xa9, 0x50, + 0xf0, 0xd9, 0x70, 0xd2, 0x5e, 0x22, 0x46, 0x8f, 0xe0, 0x5c, 0x34, 0xea, 0x38, 0x8b, 0x0e, 0x88, + 0x6f, 0x45, 0xb2, 0xe7, 0x64, 0x4f, 0xe7, 0x37, 0x3f, 0xfc, 0x4b, 0x0d, 0xab, 0xe2, 0xbc, 0x1a, + 0x9d, 0x34, 0x18, 0xf6, 0x20, 0xff, 0x58, 0x64, 0xdb, 0x6a, 0x89, 0x74, 0x1b, 0x85, 0x19, 0xe7, + 0xd1, 0xb8, 0x42, 0x30, 0x3c, 0x1e, 0x57, 0x4b, 0x13, 0x0a, 0x7e, 0x9c, 0x5e, 0x2b, 0x08, 0x05, + 0x25, 0x19, 0xc5, 0x19, 0xe7, 0xcd, 0x64, 0x41, 0xe1, 0x25, 0x7f, 0xb2, 0xbc, 0x1e, 0x00, 0xf0, + 0x88, 0xd8, 0xd4, 0x92, 0x85, 0xa8, 0xcb, 0x2a, 0xbb, 0x7a, 0x7a, 0x24, 0xc6, 0xd5, 0x64, 0x1e, + 0x0a, 0xf5, 0x1d, 0xc2, 0xc9, 0x8d, 0x1e, 0x8f, 0x8e, 0x70, 0x8e, 0x27, 0xdf, 0xe8, 0x00, 0x74, + 0xdb, 0x0f, 0x18, 0x75, 0xac, 0xf1, 0xcc, 0x58, 0x99, 0x7d, 0x66, 0x14, 0x63, 0xe5, 0x91, 0x78, + 0xed, 0x1a, 0x14, 0xa6, 0x8d, 0x21, 0x1d, 0xd2, 0xc9, 0x5b, 0x24, 0x87, 0xc5, 0x52, 0xbc, 0x82, + 0x06, 0xc4, 0xef, 0xc7, 0xb3, 0x38, 0x87, 0xe3, 0x8f, 0x2b, 0xf3, 0x1f, 0x89, 0x96, 0x4d, 0xeb, + 0x99, 0x51, 0xe3, 0xce, 0xeb, 0xe9, 0xb8, 0x29, 0x7f, 0x3c, 0xa8, 0x5c, 0x85, 0x95, 0x89, 0x0b, + 0xdd, 0x0c, 0x02, 0x4e, 0xa3, 0x59, 0x9b, 0xa4, 0xe2, 0xc2, 0xd9, 0xfa, 0xb4, 0x9f, 0x0a, 0xe0, + 0x1f, 0xbe, 0x7d, 0xed, 0xd2, 0xb3, 0x97, 0xa5, 0xb9, 0x67, 0xc7, 0xa5, 0xd4, 0xf3, 0xe3, 0x52, + 0xea, 0xc5, 0x71, 0x29, 0xf5, 0xeb, 0x71, 0x29, 0xf5, 0xf5, 0xab, 0xd2, 0xdc, 0xf3, 0x57, 0xa5, + 0xb9, 0x17, 0xaf, 0x4a, 0x73, 0x0f, 0x60, 0xfc, 0x83, 0xd4, 0x5a, 0x90, 0xff, 0x3a, 0x5b, 0xbf, + 0x07, 0x00, 0x00, 0xff, 0xff, 0x38, 0xb4, 0xb7, 0xb4, 0x06, 0x0e, 0x00, 0x00, } diff --git a/pkg/kv/kvserver/kvserverpb/proposer_kv.proto b/pkg/kv/kvserver/kvserverpb/proposer_kv.proto index 3267b2a7b4a8..249b4e45ca58 100644 --- a/pkg/kv/kvserver/kvserverpb/proposer_kv.proto +++ b/pkg/kv/kvserver/kvserverpb/proposer_kv.proto @@ -240,8 +240,24 @@ message RaftCommand { // been added after it, and on removal, the assignment counters must be // updated accordingly. Managing retry of proposals becomes trickier as // well as that uproots whatever ordering was originally envisioned. + // + // This field is set through RaftCommandFooter hackery. uint64 max_lease_index = 4; + // The closed timestamp carried by this command. Once a follower is told to + // apply this command, it knows that there will be no further writes at + // timestamps <= closed_timestamp. Note that the command itself might + // represent a write at a lower timestamp, so the closed timestamp can only be + // used after this command is applied. + // + // The field can be zero, which is to be interpreted as no closed timestamp + // update. Some commands (lease requests) implicitly carry a closed timestamp + // in a command-specific way. If the value is not zero, the value is greater + // or equal to that of the previous commands (and all before it). + // + // This field is set through ClosedTimestampFooter hackery. + util.hlc.Timestamp closed_timestamp = 17 [(gogoproto.nullable) = false]; + reserved 3; // Proposer-evaluated KV mode. @@ -277,3 +293,10 @@ message RaftCommand { message RaftCommandFooter { uint64 max_lease_index = 4; } + +// ClosedTimestampFooter is similar to RaftCommandFooter, allowing the proposal +// buffer to fill in the closed_timestamp field after most of the proto has been +// marshaled already. +message ClosedTimestampFooter { + util.hlc.Timestamp closed_timestamp = 17 [(gogoproto.nullable) = false]; +} diff --git a/pkg/kv/kvserver/kvserverpb/state.pb.go b/pkg/kv/kvserver/kvserverpb/state.pb.go index d33ed0c2ccae..b81a0c1f0d0d 100644 --- a/pkg/kv/kvserver/kvserverpb/state.pb.go +++ b/pkg/kv/kvserver/kvserverpb/state.pb.go @@ -102,13 +102,20 @@ type ReplicaState struct { // [1]: migration.Manager // [2]: PurgeOutdatedReplicas Version *roachpb.Version `protobuf:"bytes,12,opt,name=version,proto3" json:"version,omitempty"` + // closed_timestamp is the largest timestamp that is known to have been + // closed. This means that the current leaseholder (if any) and any future + // leaseholder will not evaluate writes at or below this timestamp, and also + // that any in-flight commands that can still apply are writing at higher + // timestamps. Non-leaseholder replicas are free to serve "follower reads" at + // or below this timestamp. + ClosedTimestamp hlc.Timestamp `protobuf:"bytes,13,opt,name=closed_timestamp,json=closedTimestamp,proto3" json:"closed_timestamp"` } func (m *ReplicaState) Reset() { *m = ReplicaState{} } func (m *ReplicaState) String() string { return proto.CompactTextString(m) } func (*ReplicaState) ProtoMessage() {} func (*ReplicaState) Descriptor() ([]byte, []int) { - return fileDescriptor_state_acd314f0f91777f8, []int{0} + return fileDescriptor_state_884b07f3590284d4, []int{0} } func (m *ReplicaState) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -180,7 +187,7 @@ func (m *RangeInfo) Reset() { *m = RangeInfo{} } func (m *RangeInfo) String() string { return proto.CompactTextString(m) } func (*RangeInfo) ProtoMessage() {} func (*RangeInfo) Descriptor() ([]byte, []int) { - return fileDescriptor_state_acd314f0f91777f8, []int{1} + return fileDescriptor_state_884b07f3590284d4, []int{1} } func (m *RangeInfo) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -216,7 +223,7 @@ func (m *RangeInfo_CTEntry) Reset() { *m = RangeInfo_CTEntry{} } func (m *RangeInfo_CTEntry) String() string { return proto.CompactTextString(m) } func (*RangeInfo_CTEntry) ProtoMessage() {} func (*RangeInfo_CTEntry) Descriptor() ([]byte, []int) { - return fileDescriptor_state_acd314f0f91777f8, []int{1, 0} + return fileDescriptor_state_884b07f3590284d4, []int{1, 0} } func (m *RangeInfo_CTEntry) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -252,7 +259,7 @@ func (m *LatchManagerInfo) Reset() { *m = LatchManagerInfo{} } func (m *LatchManagerInfo) String() string { return proto.CompactTextString(m) } func (*LatchManagerInfo) ProtoMessage() {} func (*LatchManagerInfo) Descriptor() ([]byte, []int) { - return fileDescriptor_state_acd314f0f91777f8, []int{2} + return fileDescriptor_state_884b07f3590284d4, []int{2} } func (m *LatchManagerInfo) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -329,6 +336,9 @@ func (this *ReplicaState) Equal(that interface{}) bool { if !this.Version.Equal(that1.Version) { return false } + if !this.ClosedTimestamp.Equal(&that1.ClosedTimestamp) { + return false + } return true } func (this *RangeInfo) Equal(that interface{}) bool { @@ -530,6 +540,14 @@ func (m *ReplicaState) MarshalTo(dAtA []byte) (int, error) { } i += n6 } + dAtA[i] = 0x6a + i++ + i = encodeVarintState(dAtA, i, uint64(m.ClosedTimestamp.Size())) + n7, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n7 return i, nil } @@ -551,11 +569,11 @@ func (m *RangeInfo) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintState(dAtA, i, uint64(m.ReplicaState.Size())) - n7, err := m.ReplicaState.MarshalTo(dAtA[i:]) + n8, err := m.ReplicaState.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n7 + i += n8 if m.LastIndex != 0 { dAtA[i] = 0x10 i++ @@ -599,19 +617,19 @@ func (m *RangeInfo) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x5a i++ i = encodeVarintState(dAtA, i, uint64(m.NewestClosedTimestamp.Size())) - n8, err := m.NewestClosedTimestamp.MarshalTo(dAtA[i:]) + n9, err := m.NewestClosedTimestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n8 + i += n9 dAtA[i] = 0x62 i++ i = encodeVarintState(dAtA, i, uint64(m.ActiveClosedTimestamp.Size())) - n9, err := m.ActiveClosedTimestamp.MarshalTo(dAtA[i:]) + n10, err := m.ActiveClosedTimestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n9 + i += n10 if m.RangefeedRegistrations != 0 { dAtA[i] = 0x68 i++ @@ -623,22 +641,22 @@ func (m *RangeInfo) MarshalTo(dAtA []byte) (int, error) { i = encodeVarintState(dAtA, i, uint64(m.ProposalQuotaBaseIndex)) } if len(m.ProposalQuotaReleaseQueue) > 0 { - dAtA11 := make([]byte, len(m.ProposalQuotaReleaseQueue)*10) - var j10 int + dAtA12 := make([]byte, len(m.ProposalQuotaReleaseQueue)*10) + var j11 int for _, num1 := range m.ProposalQuotaReleaseQueue { num := uint64(num1) for num >= 1<<7 { - dAtA11[j10] = uint8(uint64(num)&0x7f | 0x80) + dAtA12[j11] = uint8(uint64(num)&0x7f | 0x80) num >>= 7 - j10++ + j11++ } - dAtA11[j10] = uint8(num) - j10++ + dAtA12[j11] = uint8(num) + j11++ } dAtA[i] = 0x7a i++ - i = encodeVarintState(dAtA, i, uint64(j10)) - i += copy(dAtA[i:], dAtA11[:j10]) + i = encodeVarintState(dAtA, i, uint64(j11)) + i += copy(dAtA[i:], dAtA12[:j11]) } if m.TenantID != 0 { dAtA[i] = 0x80 @@ -681,11 +699,11 @@ func (m *RangeInfo_CTEntry) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x12 i++ i = encodeVarintState(dAtA, i, uint64(m.ClosedTimestamp.Size())) - n12, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) + n13, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n12 + i += n13 if m.MLAI != 0 { dAtA[i] = 0x18 i++ @@ -775,6 +793,8 @@ func (m *ReplicaState) Size() (n int) { l = m.Version.Size() n += 1 + l + sovState(uint64(l)) } + l = m.ClosedTimestamp.Size() + n += 1 + l + sovState(uint64(l)) return n } @@ -1167,6 +1187,36 @@ func (m *ReplicaState) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 13: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ClosedTimestamp", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowState + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthState + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.ClosedTimestamp.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipState(dAtA[iNdEx:]) @@ -1952,76 +2002,76 @@ var ( ) func init() { - proto.RegisterFile("kv/kvserver/kvserverpb/state.proto", fileDescriptor_state_acd314f0f91777f8) + proto.RegisterFile("kv/kvserver/kvserverpb/state.proto", fileDescriptor_state_884b07f3590284d4) } -var fileDescriptor_state_acd314f0f91777f8 = []byte{ - // 1061 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x55, 0xcd, 0x6e, 0x1b, 0x37, - 0x10, 0xf6, 0x5a, 0x2b, 0x5b, 0xa2, 0xec, 0x58, 0x61, 0x9d, 0x78, 0xe3, 0xc4, 0x92, 0x21, 0xa0, - 0x85, 0x0b, 0xa4, 0x2b, 0xd4, 0xe9, 0x0f, 0xfa, 0x03, 0x14, 0x96, 0x5c, 0x14, 0x72, 0x6d, 0x23, - 0x66, 0x84, 0x1c, 0xd2, 0xc3, 0x82, 0xda, 0x1d, 0xaf, 0x16, 0x5a, 0x2d, 0x37, 0x24, 0xa5, 0xd8, - 0x79, 0x8a, 0x3e, 0x42, 0x5f, 0xa3, 0x6f, 0x60, 0xf4, 0xe4, 0x63, 0x4e, 0x42, 0x2b, 0x5f, 0x7a, - 0xeb, 0xbd, 0xa7, 0x80, 0xa4, 0x56, 0x96, 0x6c, 0x03, 0xc9, 0x8d, 0xfb, 0x7d, 0xdf, 0x0c, 0xc9, - 0xf9, 0x86, 0xb3, 0xa8, 0xd6, 0x1b, 0xd6, 0x7b, 0x43, 0x01, 0x7c, 0x08, 0x7c, 0xba, 0x48, 0x3b, - 0x75, 0x21, 0xa9, 0x04, 0x37, 0xe5, 0x4c, 0x32, 0x5c, 0xf5, 0x99, 0xdf, 0xe3, 0x8c, 0xfa, 0x5d, - 0xb7, 0x37, 0x74, 0x33, 0x91, 0x2b, 0x24, 0xe3, 0x34, 0x84, 0xb4, 0xb3, 0xf9, 0x78, 0xb2, 0xac, - 0x43, 0x12, 0x46, 0x09, 0xa4, 0x9d, 0x7a, 0x7f, 0xe8, 0xfb, 0x26, 0x7a, 0xf3, 0xb1, 0x8e, 0x4c, - 0x3b, 0xf5, 0x28, 0x91, 0xc0, 0x13, 0x1a, 0x7b, 0x9c, 0x9e, 0xca, 0x09, 0xf9, 0x30, 0x23, 0xfb, - 0x20, 0x69, 0x40, 0x25, 0x9d, 0xe0, 0x38, 0xc3, 0x67, 0x30, 0x67, 0x20, 0xa3, 0xb8, 0xde, 0x8d, - 0xfd, 0xba, 0x8c, 0xfa, 0x20, 0x24, 0xed, 0xa7, 0x13, 0x66, 0x3d, 0x64, 0x21, 0xd3, 0xcb, 0xba, - 0x5a, 0x19, 0xb4, 0xf6, 0xa7, 0x8d, 0x56, 0x08, 0xa4, 0x71, 0xe4, 0xd3, 0x17, 0xea, 0x36, 0xf8, - 0x29, 0xc2, 0x6a, 0x6b, 0x8f, 0xa6, 0x69, 0x1c, 0x41, 0xe0, 0x45, 0x49, 0x00, 0x67, 0x8e, 0xb5, - 0x6d, 0xed, 0xd8, 0xa4, 0xac, 0x98, 0x3d, 0x43, 0xb4, 0x14, 0x8e, 0x5d, 0xf4, 0x49, 0x0c, 0x54, - 0xc0, 0x0d, 0xf9, 0xa2, 0x96, 0xdf, 0xd7, 0xd4, 0x9c, 0xfe, 0x1b, 0x64, 0x07, 0x20, 0x7c, 0x27, - 0xb7, 0x6d, 0xed, 0x94, 0x76, 0x6b, 0xee, 0x75, 0xd1, 0x26, 0x77, 0x71, 0x09, 0x4d, 0x42, 0xd8, - 0x07, 0xe1, 0xf3, 0x28, 0x95, 0x8c, 0x13, 0xad, 0xc7, 0x2e, 0xca, 0xeb, 0x64, 0x8e, 0xad, 0x03, - 0x9d, 0x3b, 0x02, 0x0f, 0x15, 0x4f, 0x8c, 0x0c, 0x1f, 0xa3, 0x35, 0xc9, 0x07, 0x89, 0x4f, 0x25, - 0x04, 0x9e, 0xb6, 0xc9, 0xc9, 0xeb, 0xc8, 0x4f, 0xef, 0xdc, 0xf2, 0x54, 0xb6, 0x33, 0xb5, 0xae, - 0x02, 0xb9, 0x27, 0xe7, 0xbe, 0xf1, 0x09, 0x5a, 0x09, 0x7d, 0x4f, 0x76, 0x39, 0x88, 0x2e, 0x8b, - 0x03, 0x67, 0x49, 0x27, 0xdb, 0x9a, 0x49, 0xa6, 0xea, 0xee, 0x76, 0x63, 0xdf, 0x6d, 0x67, 0x75, - 0x6f, 0xac, 0x8d, 0x47, 0xd5, 0xd2, 0x2f, 0xcd, 0x76, 0x16, 0x45, 0x4a, 0xa1, 0x3f, 0xfd, 0xc0, - 0x3f, 0xa0, 0xbc, 0x3a, 0x98, 0x70, 0x96, 0x6f, 0x1d, 0x6c, 0xd2, 0x29, 0x6e, 0xd6, 0x29, 0xee, - 0xd1, 0xcb, 0x66, 0x53, 0x1d, 0x44, 0x10, 0x13, 0x83, 0xbf, 0x46, 0x1b, 0x03, 0x11, 0x25, 0xe1, - 0xb4, 0xee, 0xfa, 0x8e, 0x5e, 0x0f, 0xce, 0x9d, 0xd2, 0xb6, 0xb5, 0x53, 0x20, 0xeb, 0x9a, 0x9e, - 0xd4, 0x5e, 0xdf, 0xe1, 0x57, 0x38, 0xc7, 0x5f, 0xa1, 0xe5, 0x21, 0x70, 0x11, 0xb1, 0xc4, 0x59, - 0xd1, 0xbb, 0x6e, 0xde, 0x51, 0x8e, 0x97, 0x46, 0x41, 0x32, 0xe9, 0xf7, 0xf6, 0xbf, 0x7f, 0x54, - 0xad, 0x03, 0xbb, 0x50, 0x28, 0x17, 0x0f, 0xec, 0x42, 0xb1, 0x8c, 0x0e, 0xec, 0x02, 0x2a, 0x97, - 0x6a, 0x7f, 0x15, 0x50, 0x51, 0xdb, 0xd5, 0x4a, 0x4e, 0x19, 0x3e, 0x32, 0xf7, 0x01, 0xdd, 0x2b, - 0xa5, 0xdd, 0x2f, 0xdc, 0x0f, 0x3c, 0x08, 0x77, 0xb6, 0xed, 0x1a, 0x85, 0x8b, 0x51, 0x75, 0xe1, - 0x72, 0x54, 0xb5, 0xcc, 0x0d, 0x01, 0x6f, 0x21, 0x14, 0x53, 0x21, 0xe7, 0x1a, 0xaa, 0xa8, 0x10, - 0xd3, 0x48, 0x55, 0x54, 0x4a, 0x06, 0x7d, 0x2f, 0x85, 0x24, 0x88, 0x92, 0x50, 0xf7, 0x93, 0x4d, - 0x50, 0x32, 0xe8, 0x3f, 0x37, 0x48, 0x26, 0x08, 0x38, 0x4b, 0x53, 0x08, 0xb4, 0xfb, 0x46, 0xb0, - 0x6f, 0x10, 0x5c, 0x43, 0xab, 0xba, 0xd1, 0x63, 0x16, 0x7a, 0x22, 0x7a, 0x0b, 0xda, 0xd3, 0x1c, - 0x29, 0x29, 0xf0, 0x90, 0x85, 0x2f, 0xa2, 0xb7, 0x80, 0x7f, 0x44, 0x9b, 0x34, 0x4d, 0x39, 0x3b, - 0x8b, 0xfa, 0xaa, 0xbc, 0x29, 0x67, 0x29, 0x13, 0x34, 0xf6, 0x5e, 0x0f, 0x98, 0xa4, 0xda, 0xb8, - 0x1c, 0x71, 0x66, 0x14, 0xcf, 0x27, 0x82, 0x13, 0xc5, 0xe3, 0xcf, 0xd0, 0x1a, 0x57, 0xe5, 0xf1, - 0xfa, 0xf4, 0xcc, 0xeb, 0x9c, 0x4b, 0x10, 0x4e, 0x41, 0x87, 0xac, 0x6a, 0xf8, 0x88, 0x9e, 0x35, - 0x14, 0x88, 0xbf, 0x44, 0x0f, 0xe6, 0x4e, 0xe2, 0x49, 0x3e, 0x10, 0x12, 0x02, 0x07, 0x69, 0x2b, - 0xf1, 0xcc, 0x89, 0xda, 0x86, 0xc1, 0x29, 0xda, 0x48, 0xe0, 0x0d, 0x08, 0xe9, 0xf9, 0x31, 0x13, - 0x10, 0x78, 0xd3, 0xd7, 0xae, 0xfd, 0x2f, 0xed, 0xee, 0x7e, 0xb8, 0xfc, 0x99, 0x73, 0x6e, 0xb3, - 0xfd, 0x73, 0x22, 0xf9, 0x79, 0xc3, 0x56, 0x1e, 0x90, 0x07, 0x26, 0x71, 0x53, 0xe7, 0x9d, 0x36, - 0x33, 0xfe, 0x0d, 0x6d, 0x50, 0x5f, 0x46, 0x43, 0xb8, 0xbd, 0xe3, 0xca, 0xc7, 0x3c, 0x86, 0x49, - 0x72, 0x93, 0xe3, 0x66, 0xf2, 0x6f, 0xd1, 0x86, 0x2e, 0xc9, 0x29, 0x40, 0xe0, 0x71, 0x08, 0x23, - 0x21, 0x39, 0x95, 0x11, 0x4b, 0x84, 0xb3, 0xaa, 0x2b, 0xf6, 0x70, 0x4a, 0x93, 0x59, 0x16, 0x7f, - 0x87, 0x1e, 0xcd, 0x9b, 0xe2, 0x75, 0xd4, 0x34, 0x32, 0x4d, 0x73, 0xcf, 0x84, 0xa6, 0xb3, 0xa6, - 0x34, 0xa8, 0x00, 0xd3, 0x41, 0x3f, 0xa1, 0x27, 0x37, 0x42, 0x39, 0x98, 0x59, 0xf6, 0x7a, 0x00, - 0x03, 0x70, 0xd6, 0xb6, 0x73, 0x3b, 0x39, 0xf2, 0x68, 0x2e, 0x9a, 0x18, 0xc5, 0x89, 0x12, 0xe0, - 0xcf, 0x51, 0x51, 0x42, 0x42, 0x13, 0xe9, 0x45, 0x81, 0x53, 0x56, 0xfd, 0xd5, 0x58, 0x19, 0x8f, - 0xaa, 0x85, 0xb6, 0x06, 0x5b, 0xfb, 0xa4, 0x60, 0xe8, 0x56, 0xa0, 0x9b, 0x99, 0xf9, 0x3d, 0x4f, - 0xd2, 0x4e, 0x0c, 0xce, 0xfd, 0x6d, 0x6b, 0xa7, 0x48, 0x8a, 0x0a, 0x69, 0x2b, 0x60, 0xf3, 0x3f, - 0x0b, 0x2d, 0x4f, 0x4c, 0xc0, 0xaf, 0xd0, 0x72, 0xc2, 0x02, 0x50, 0x39, 0xd5, 0x43, 0xca, 0x37, - 0xf6, 0xc6, 0xa3, 0xea, 0xd2, 0x31, 0x0b, 0xa0, 0xb5, 0xff, 0xff, 0xa8, 0xfa, 0x2c, 0x8c, 0x64, - 0x77, 0xd0, 0x71, 0x7d, 0xd6, 0xaf, 0x4f, 0xeb, 0x1d, 0x74, 0xae, 0xd7, 0xf5, 0xb4, 0x17, 0xd6, - 0xb3, 0xa7, 0x6c, 0xc2, 0xc8, 0x92, 0xca, 0xd8, 0x0a, 0xf0, 0x31, 0x2a, 0xdf, 0x32, 0x6f, 0xf1, - 0xe3, 0xcd, 0x5b, 0xf3, 0x6f, 0xd8, 0xf6, 0x04, 0xd9, 0xfd, 0x98, 0x46, 0xfa, 0xf5, 0xe5, 0x1a, - 0x85, 0xf1, 0xa8, 0x6a, 0x1f, 0x1d, 0xee, 0xb5, 0x88, 0x46, 0xf1, 0x3a, 0xca, 0x43, 0xca, 0xfc, - 0xae, 0x9e, 0xd9, 0x39, 0x62, 0x3e, 0xcc, 0x30, 0x99, 0x8e, 0x14, 0xbb, 0x9c, 0x37, 0x23, 0xa5, - 0x46, 0x50, 0xf9, 0x90, 0x4a, 0xbf, 0x7b, 0x44, 0x13, 0x1a, 0x02, 0xd7, 0x23, 0x65, 0x0b, 0x21, - 0x0e, 0x34, 0xf0, 0x7c, 0x36, 0x48, 0xa4, 0x2e, 0x47, 0x8e, 0x14, 0x15, 0xd2, 0x54, 0x80, 0x7a, - 0xe2, 0x6f, 0x78, 0x24, 0x61, 0xc2, 0x2f, 0x6a, 0x1e, 0x69, 0x48, 0x0b, 0x1a, 0x4f, 0x2f, 0xfe, - 0xa9, 0x2c, 0x5c, 0x8c, 0x2b, 0xd6, 0xe5, 0xb8, 0x62, 0xbd, 0x1b, 0x57, 0xac, 0xbf, 0xc7, 0x15, - 0xeb, 0xf7, 0xab, 0xca, 0xc2, 0xe5, 0x55, 0x65, 0xe1, 0xdd, 0x55, 0x65, 0xe1, 0x15, 0xba, 0xfe, - 0x9b, 0x77, 0x96, 0xf4, 0x1f, 0xf1, 0xd9, 0xfb, 0x00, 0x00, 0x00, 0xff, 0xff, 0xd2, 0x8a, 0xae, - 0x49, 0xee, 0x07, 0x00, 0x00, +var fileDescriptor_state_884b07f3590284d4 = []byte{ + // 1068 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x9c, 0x56, 0xcf, 0x4f, 0x1b, 0x47, + 0x14, 0x66, 0xe3, 0x35, 0xd8, 0x63, 0x08, 0xce, 0x14, 0xc2, 0x86, 0x04, 0x1b, 0x59, 0x6a, 0x45, + 0xa5, 0x74, 0xad, 0x92, 0xfe, 0x50, 0x7f, 0x48, 0x15, 0x36, 0x55, 0x65, 0x0a, 0x28, 0x4c, 0xac, + 0x1c, 0xd2, 0xc3, 0x6a, 0xbc, 0xfb, 0x58, 0xaf, 0xbc, 0xde, 0xd9, 0xcc, 0x8c, 0x1d, 0xc8, 0x7f, + 0xd0, 0x5b, 0xff, 0x84, 0xfe, 0x39, 0xa8, 0x27, 0x8e, 0x39, 0x59, 0xad, 0xb9, 0xf4, 0xd6, 0x7b, + 0x4f, 0xd5, 0xcc, 0x78, 0x8d, 0x0d, 0x48, 0x89, 0x7a, 0x9b, 0xfd, 0xde, 0xf7, 0xde, 0xcc, 0x7c, + 0xef, 0x9b, 0x67, 0xa3, 0x5a, 0x6f, 0x58, 0xef, 0x0d, 0x05, 0xf0, 0x21, 0xf0, 0xe9, 0x22, 0xed, + 0xd4, 0x85, 0xa4, 0x12, 0xdc, 0x94, 0x33, 0xc9, 0x70, 0xd5, 0x67, 0x7e, 0x8f, 0x33, 0xea, 0x77, + 0xdd, 0xde, 0xd0, 0xcd, 0x48, 0xae, 0x90, 0x8c, 0xd3, 0x10, 0xd2, 0xce, 0xe6, 0xe3, 0xc9, 0xb2, + 0x0e, 0x49, 0x18, 0x25, 0x90, 0x76, 0xea, 0xfd, 0xa1, 0xef, 0x9b, 0xec, 0xcd, 0xc7, 0x3a, 0x33, + 0xed, 0xd4, 0xa3, 0x44, 0x02, 0x4f, 0x68, 0xec, 0x71, 0x7a, 0x2a, 0x27, 0xc1, 0x87, 0x59, 0xb0, + 0x0f, 0x92, 0x06, 0x54, 0xd2, 0x09, 0x8e, 0x33, 0x7c, 0x06, 0x73, 0x06, 0x32, 0x8a, 0xeb, 0xdd, + 0xd8, 0xaf, 0xcb, 0xa8, 0x0f, 0x42, 0xd2, 0x7e, 0x3a, 0x89, 0xac, 0x85, 0x2c, 0x64, 0x7a, 0x59, + 0x57, 0x2b, 0x83, 0xd6, 0x7e, 0xcd, 0xa3, 0x65, 0x02, 0x69, 0x1c, 0xf9, 0xf4, 0x85, 0xba, 0x0d, + 0x7e, 0x8a, 0xb0, 0xda, 0xda, 0xa3, 0x69, 0x1a, 0x47, 0x10, 0x78, 0x51, 0x12, 0xc0, 0x99, 0x63, + 0x6d, 0x5b, 0x3b, 0x36, 0x29, 0xab, 0xc8, 0x9e, 0x09, 0xb4, 0x14, 0x8e, 0x5d, 0xf4, 0x51, 0x0c, + 0x54, 0xc0, 0x0d, 0xfa, 0x3d, 0x4d, 0x7f, 0xa0, 0x43, 0x73, 0xfc, 0xaf, 0x90, 0x1d, 0x80, 0xf0, + 0x9d, 0xdc, 0xb6, 0xb5, 0x53, 0xda, 0xad, 0xb9, 0xd7, 0xa2, 0x4d, 0xee, 0xe2, 0x12, 0x9a, 0x84, + 0xb0, 0x0f, 0xc2, 0xe7, 0x51, 0x2a, 0x19, 0x27, 0x9a, 0x8f, 0x5d, 0x94, 0xd7, 0xc5, 0x1c, 0x5b, + 0x27, 0x3a, 0x77, 0x24, 0x1e, 0xaa, 0x38, 0x31, 0x34, 0x7c, 0x8c, 0x56, 0x25, 0x1f, 0x24, 0x3e, + 0x95, 0x10, 0x78, 0xba, 0x4d, 0x4e, 0x5e, 0x67, 0x7e, 0x7c, 0xe7, 0x96, 0xa7, 0xb2, 0x9d, 0xb1, + 0xb5, 0x0a, 0xe4, 0xbe, 0x9c, 0xfb, 0xc6, 0x27, 0x68, 0x39, 0xf4, 0x3d, 0xd9, 0xe5, 0x20, 0xba, + 0x2c, 0x0e, 0x9c, 0x45, 0x5d, 0x6c, 0x6b, 0xa6, 0x98, 0xd2, 0xdd, 0xed, 0xc6, 0xbe, 0xdb, 0xce, + 0x74, 0x6f, 0xac, 0x8e, 0x47, 0xd5, 0xd2, 0x4f, 0xcd, 0x76, 0x96, 0x45, 0x4a, 0xa1, 0x3f, 0xfd, + 0xc0, 0xdf, 0xa1, 0xbc, 0x3a, 0x98, 0x70, 0x96, 0x6e, 0x1d, 0x6c, 0xe2, 0x14, 0x37, 0x73, 0x8a, + 0x7b, 0xf4, 0xb2, 0xd9, 0x54, 0x07, 0x11, 0xc4, 0xe4, 0xe0, 0x2f, 0xd1, 0xc6, 0x40, 0x44, 0x49, + 0x38, 0xd5, 0x5d, 0xdf, 0xd1, 0xeb, 0xc1, 0xb9, 0x53, 0xda, 0xb6, 0x76, 0x0a, 0x64, 0x4d, 0x87, + 0x27, 0xda, 0xeb, 0x3b, 0xfc, 0x0c, 0xe7, 0xf8, 0x0b, 0xb4, 0x34, 0x04, 0x2e, 0x22, 0x96, 0x38, + 0xcb, 0x7a, 0xd7, 0xcd, 0x3b, 0xe4, 0x78, 0x69, 0x18, 0x24, 0xa3, 0xe2, 0x63, 0x54, 0xf6, 0x63, + 0x26, 0x20, 0xf0, 0xa6, 0x9e, 0x72, 0x56, 0x3e, 0x44, 0x00, 0xfb, 0x62, 0x54, 0x5d, 0x20, 0xab, + 0x26, 0x79, 0x0a, 0x7f, 0x6b, 0xff, 0xfd, 0x7b, 0xd5, 0x3a, 0xb0, 0x0b, 0x85, 0x72, 0xf1, 0xc0, + 0x2e, 0x14, 0xcb, 0xe8, 0xc0, 0x2e, 0xa0, 0x72, 0xa9, 0xf6, 0x47, 0x01, 0x15, 0x75, 0xfb, 0x5b, + 0xc9, 0x29, 0xc3, 0x47, 0x46, 0x1f, 0xd0, 0xde, 0x2b, 0xed, 0x7e, 0xe6, 0xbe, 0xe7, 0x81, 0xb9, + 0xb3, 0x36, 0x6e, 0x14, 0xd4, 0xd6, 0x97, 0xa3, 0xaa, 0x65, 0x14, 0x03, 0xbc, 0x85, 0x50, 0x4c, + 0x85, 0x9c, 0x33, 0x68, 0x51, 0x21, 0xc6, 0x98, 0x55, 0x54, 0x4a, 0x06, 0x7d, 0x2f, 0x85, 0x24, + 0x88, 0x92, 0x50, 0xfb, 0xd3, 0x26, 0x28, 0x19, 0xf4, 0x9f, 0x1b, 0x24, 0x23, 0x04, 0x9c, 0xa5, + 0x29, 0x04, 0xda, 0x4d, 0x86, 0xb0, 0x6f, 0x10, 0x5c, 0x43, 0x2b, 0xfa, 0xe1, 0xc4, 0x2c, 0xf4, + 0x44, 0xf4, 0x16, 0xb4, 0x47, 0x72, 0xa4, 0xa4, 0xc0, 0x43, 0x16, 0xbe, 0x88, 0xde, 0x02, 0xfe, + 0x1e, 0x6d, 0xd2, 0x34, 0xe5, 0xec, 0x2c, 0xea, 0xab, 0x76, 0xa5, 0x9c, 0xa5, 0x4c, 0xd0, 0xd8, + 0x7b, 0x3d, 0x60, 0x92, 0x6a, 0x23, 0xe4, 0x88, 0x33, 0xc3, 0x78, 0x3e, 0x21, 0x9c, 0xa8, 0x38, + 0xfe, 0x04, 0xad, 0x72, 0x25, 0x8f, 0xd7, 0xa7, 0x67, 0x5e, 0xe7, 0x5c, 0x82, 0x70, 0x0a, 0x3a, + 0x65, 0x45, 0xc3, 0x47, 0xf4, 0xac, 0xa1, 0x40, 0xfc, 0x39, 0x5a, 0x9f, 0x3b, 0x89, 0x27, 0xf9, + 0x40, 0x48, 0x08, 0x1c, 0xa4, 0xad, 0x81, 0x67, 0x4e, 0xd4, 0x36, 0x11, 0x9c, 0xa2, 0x8d, 0x04, + 0xde, 0x80, 0x90, 0xde, 0xad, 0x4e, 0x97, 0xb4, 0xfc, 0xbb, 0xef, 0x97, 0x3f, 0xeb, 0x9c, 0xdb, + 0x6c, 0xff, 0x98, 0x48, 0x7e, 0x3e, 0x69, 0xff, 0xba, 0x29, 0xdc, 0x9c, 0x37, 0x01, 0xfe, 0x05, + 0x6d, 0x50, 0x5f, 0x46, 0x43, 0xb8, 0xbd, 0xe3, 0xf2, 0x87, 0x7b, 0x6b, 0xdd, 0xd4, 0xb8, 0x59, + 0xfc, 0x6b, 0xb4, 0xa1, 0x25, 0x39, 0x05, 0x08, 0x3c, 0x0e, 0x61, 0x24, 0x24, 0xa7, 0x32, 0x62, + 0x89, 0xd0, 0xc6, 0xcd, 0x91, 0x87, 0xd3, 0x30, 0x99, 0x8d, 0xe2, 0x6f, 0xd0, 0xa3, 0xf9, 0xa6, + 0x78, 0x1d, 0x35, 0xdd, 0x8c, 0x69, 0xee, 0x9b, 0xd4, 0x74, 0xb6, 0x29, 0x0d, 0x2a, 0xc0, 0x38, + 0xe8, 0x07, 0xf4, 0xe4, 0x46, 0x2a, 0x07, 0x33, 0x1b, 0x5f, 0x0f, 0x60, 0x00, 0xce, 0xea, 0x76, + 0x6e, 0x27, 0x47, 0x1e, 0xcd, 0x65, 0x13, 0xc3, 0x38, 0x51, 0x04, 0xfc, 0x29, 0x2a, 0x4a, 0x48, + 0x68, 0x22, 0xbd, 0x28, 0x70, 0xca, 0xca, 0x5f, 0x8d, 0xe5, 0xf1, 0xa8, 0x5a, 0x68, 0x6b, 0xb0, + 0xb5, 0x4f, 0x0a, 0x26, 0xdc, 0x0a, 0xb4, 0x99, 0x99, 0xdf, 0xf3, 0x24, 0xed, 0xc4, 0xe0, 0x3c, + 0xd8, 0xb6, 0x76, 0x8a, 0xa4, 0xa8, 0x90, 0xb6, 0x02, 0x36, 0xff, 0xb1, 0xd0, 0xd2, 0xa4, 0x09, + 0xf8, 0x15, 0x5a, 0x4a, 0x58, 0x00, 0xaa, 0xa6, 0x7a, 0x48, 0xf9, 0xc6, 0xde, 0x78, 0x54, 0x5d, + 0x3c, 0x66, 0x01, 0xb4, 0xf6, 0xff, 0x1d, 0x55, 0x9f, 0x85, 0x91, 0xec, 0x0e, 0x3a, 0xae, 0xcf, + 0xfa, 0xf5, 0xa9, 0xde, 0x41, 0xe7, 0x7a, 0x5d, 0x4f, 0x7b, 0x61, 0x3d, 0x1b, 0x0d, 0x26, 0x8d, + 0x2c, 0xaa, 0x8a, 0xad, 0xe0, 0xce, 0xc1, 0x70, 0xef, 0xff, 0x0f, 0x06, 0xfc, 0x04, 0xd9, 0xfd, + 0x98, 0x46, 0xfa, 0xf5, 0xe5, 0x1a, 0x85, 0xf1, 0xa8, 0x6a, 0x1f, 0x1d, 0xee, 0xb5, 0x88, 0x46, + 0xf1, 0x1a, 0xca, 0x43, 0xca, 0xfc, 0xae, 0xfe, 0x0d, 0xc8, 0x11, 0xf3, 0x61, 0x86, 0xc9, 0x74, + 0xa4, 0xd8, 0xe5, 0xbc, 0x19, 0x29, 0x35, 0x82, 0xca, 0x87, 0x54, 0xfa, 0xdd, 0x23, 0x9a, 0xd0, + 0x10, 0xb8, 0x1e, 0x29, 0x5b, 0x08, 0x71, 0xa0, 0x81, 0xe7, 0xb3, 0x41, 0x22, 0xb5, 0x1c, 0x39, + 0x52, 0x54, 0x48, 0x53, 0x01, 0xea, 0x89, 0xbf, 0xe1, 0x91, 0x84, 0x49, 0xfc, 0x9e, 0x8e, 0x23, + 0x0d, 0x69, 0x42, 0xe3, 0xe9, 0xc5, 0x5f, 0x95, 0x85, 0x8b, 0x71, 0xc5, 0xba, 0x1c, 0x57, 0xac, + 0x77, 0xe3, 0x8a, 0xf5, 0xe7, 0xb8, 0x62, 0xfd, 0x76, 0x55, 0x59, 0xb8, 0xbc, 0xaa, 0x2c, 0xbc, + 0xbb, 0xaa, 0x2c, 0xbc, 0x42, 0xd7, 0xff, 0x0e, 0x3a, 0x8b, 0xfa, 0x17, 0xf6, 0xd9, 0x7f, 0x01, + 0x00, 0x00, 0xff, 0xff, 0x23, 0xeb, 0x99, 0xc3, 0x3e, 0x08, 0x00, 0x00, } diff --git a/pkg/kv/kvserver/kvserverpb/state.proto b/pkg/kv/kvserver/kvserverpb/state.proto index f8288300aeac..45e07446177e 100644 --- a/pkg/kv/kvserver/kvserverpb/state.proto +++ b/pkg/kv/kvserver/kvserverpb/state.proto @@ -100,6 +100,14 @@ message ReplicaState { // [2]: PurgeOutdatedReplicas roachpb.Version version = 12; + // closed_timestamp is the largest timestamp that is known to have been + // closed. This means that the current leaseholder (if any) and any future + // leaseholder will not evaluate writes at or below this timestamp, and also + // that any in-flight commands that can still apply are writing at higher + // timestamps. Non-leaseholder replicas are free to serve "follower reads" at + // or below this timestamp. + util.hlc.Timestamp closed_timestamp = 13 [(gogoproto.nullable) = false]; + reserved 8, 9, 10; } diff --git a/pkg/kv/kvserver/replica.go b/pkg/kv/kvserver/replica.go index 8014cb82bfeb..5af4d8f06856 100644 --- a/pkg/kv/kvserver/replica.go +++ b/pkg/kv/kvserver/replica.go @@ -360,6 +360,10 @@ type Replica struct { // consumed, commands are proposed through Raft and moved to the // proposals map. // + // The propBuf is the one closing timestamps, so evaluating writes must be + // registered with the propBuf through TrackEvaluatingRequest before their + // write timestamp is decided. + // // Access to proposalBuf must occur *without* holding the mutex. // Instead, the buffer internally holds a reference to mu and will use // it appropriately. diff --git a/pkg/kv/kvserver/replica_application_result.go b/pkg/kv/kvserver/replica_application_result.go index a2761aa41bbd..9477b8cc0cec 100644 --- a/pkg/kv/kvserver/replica_application_result.go +++ b/pkg/kv/kvserver/replica_application_result.go @@ -205,6 +205,16 @@ func (r *Replica) tryReproposeWithNewLeaseIndex( minTS, untrack := r.store.cfg.ClosedTimestamp.Tracker.Track(ctx) defer untrack(ctx, 0, 0, 0) // covers all error paths below + + // We need to track the request again in order to protect its timestamp until + // it gets reproposed. + // TODO(andrei): Only track if the request consults the ts cache. Some + // requests (e.g. EndTxn) don't care about closed timestamps. + minTS2, tok := r.mu.proposalBuf.TrackEvaluatingRequest(ctx, p.Request.WriteTimestamp()) + defer tok.DoneIfNotMoved(ctx) + minTS.Forward(minTS2) + + // NB: p.Request.Timestamp reflects the action of ba.SetActiveTimestamp. // The IsIntentWrite condition matches the similar logic for caring // about the closed timestamp cache in applyTimestampCache(). if p.Request.IsIntentWrite() && p.Request.WriteTimestamp().LessEq(minTS) { @@ -222,7 +232,7 @@ func (r *Replica) tryReproposeWithNewLeaseIndex( // Some tests check for this log message in the trace. log.VEventf(ctx, 2, "retry: proposalIllegalLeaseIndex") - maxLeaseIndex, pErr := r.propose(ctx, p) + maxLeaseIndex, pErr := r.propose(ctx, p, tok.Move(ctx)) if pErr != nil { return pErr } diff --git a/pkg/kv/kvserver/replica_application_state_machine.go b/pkg/kv/kvserver/replica_application_state_machine.go index cfd7cef7a35b..7793ca520fdd 100644 --- a/pkg/kv/kvserver/replica_application_state_machine.go +++ b/pkg/kv/kvserver/replica_application_state_machine.go @@ -376,8 +376,10 @@ type replicaAppBatch struct { // replicaState other than Stats are overwritten completely rather than // updated in-place. stats enginepb.MVCCStats - // maxTS is the maximum clock timestamp that any command that was staged in - // this batch was evaluated at. + // maxTS is the maximum clock timestamp that this command carries. Timestamps + // come from the writes that are part of this command, and also from the + // closed timestamp carried by this command. Synthetic timestamps are not + // registered here. maxTS hlc.ClockTimestamp // migrateToAppliedStateKey tracks whether any command in the batch // triggered a migration to the replica applied state key. If so, this @@ -428,7 +430,8 @@ func (b *replicaAppBatch) Stage(cmdI apply.Command) (apply.CheckedCommand, error return nil, makeNonDeterministicFailure("applied index jumped from %d to %d", applied, idx) } if log.V(4) { - log.Infof(ctx, "processing command %x: maxLeaseIndex=%d", cmd.idKey, cmd.raftCmd.MaxLeaseIndex) + log.Infof(ctx, "processing command %x: raftIndex=%d maxLeaseIndex=%d closedts=%s", + cmd.idKey, cmd.ent.Index, cmd.raftCmd.MaxLeaseIndex, cmd.raftCmd.ClosedTimestamp) } // Determine whether the command should be applied to the replicated state @@ -442,7 +445,20 @@ func (b *replicaAppBatch) Stage(cmdI apply.Command) (apply.CheckedCommand, error cmd.raftCmd.ReplicatedEvalResult = kvserverpb.ReplicatedEvalResult{} cmd.raftCmd.WriteBatch = nil cmd.raftCmd.LogicalOpLog = nil + cmd.raftCmd.ClosedTimestamp.Reset() } else { + // Assert that we're not writing under the closed timestamp. We can only do + // these checks on IsIntentWrite requests, since others (for example, + // EndTxn) can operate below the closed timestamp. In turn, this means that + // we can only assert on the leaseholder, as only that replica has + // cmd.proposal.Request filled in. + if cmd.IsLocal() && cmd.proposal.Request.IsIntentWrite() { + wts := cmd.proposal.Request.WriteTimestamp() + if wts.LessEq(b.state.ClosedTimestamp) { + return nil, makeNonDeterministicFailure("writing at %s below closed ts: %s (%s)", + wts, b.state.ClosedTimestamp.String(), cmd.proposal.Request.String()) + } + } log.Event(ctx, "applying command") } @@ -623,7 +639,7 @@ func (b *replicaAppBatch) runPreApplyTriggersAfterStagingWriteBatch( // // Alternatively if we discover that the RHS has already been removed // from this store, clean up its data. - splitPreApply(ctx, b.batch, res.Split.SplitTrigger, b.r) + splitPreApply(ctx, b.batch, res.Split.SplitTrigger, b.r, cmd.raftCmd.ClosedTimestamp) // The rangefeed processor will no longer be provided logical ops for // its entire range, so it needs to be shut down and all registrations @@ -807,6 +823,18 @@ func (b *replicaAppBatch) stageTrivialReplicatedEvalResult( if leaseAppliedIndex := cmd.leaseIndex; leaseAppliedIndex != 0 { b.state.LeaseAppliedIndex = leaseAppliedIndex } + if cts := cmd.raftCmd.ClosedTimestamp; !cts.IsEmpty() { + if cts.Less(b.state.ClosedTimestamp) { + log.Fatalf(ctx, + "closed timestamp regressing from %s to %s when applying command %x", + b.state.ClosedTimestamp, cts, cmd.idKey) + } + b.state.ClosedTimestamp = cts + if clockTS, ok := cts.TryToClockTimestamp(); ok { + b.maxTS.Forward(clockTS) + } + } + res := cmd.replicatedResult() // Special-cased MVCC stats handling to exploit commutativity of stats delta @@ -860,10 +888,11 @@ func (b *replicaAppBatch) ApplyToStateMachine(ctx context.Context) error { b.batch.Close() b.batch = nil - // Update the replica's applied indexes and mvcc stats. + // Update the replica's applied indexes, mvcc stats and closed timestamp. r.mu.Lock() r.mu.state.RaftAppliedIndex = b.state.RaftAppliedIndex r.mu.state.LeaseAppliedIndex = b.state.LeaseAppliedIndex + closedTimestampUpdated := r.mu.state.ClosedTimestamp.Forward(b.state.ClosedTimestamp) prevStats := *r.mu.state.Stats *r.mu.state.Stats = *b.state.Stats @@ -879,6 +908,13 @@ func (b *replicaAppBatch) ApplyToStateMachine(ctx context.Context) error { needsTruncationByLogSize := r.needsRaftLogTruncationLocked() tenantID := r.mu.tenantID r.mu.Unlock() + if closedTimestampUpdated { + // TODO(andrei): Pass in the new closed timestamp to + // r.handleClosedTimestampUpdateRaftMuLocked directly after the old closed + // ts tracker goes away. Until then we can't do it; we have to let the + // method consult r.maxClosed(). + r.handleClosedTimestampUpdateRaftMuLocked(ctx) + } // Record the stats delta in the StoreMetrics. deltaStats := *b.state.Stats @@ -926,7 +962,8 @@ func (b *replicaAppBatch) addAppliedStateKeyToBatch(ctx context.Context) error { // Set the range applied state, which includes the last applied raft and // lease index along with the mvcc stats, all in one key. if err := loader.SetRangeAppliedState( - ctx, b.batch, b.state.RaftAppliedIndex, b.state.LeaseAppliedIndex, b.state.Stats, + ctx, b.batch, b.state.RaftAppliedIndex, b.state.LeaseAppliedIndex, + b.state.Stats, &b.state.ClosedTimestamp, ); err != nil { return wrapWithNonDeterministicFailure(err, "unable to set range applied state") } diff --git a/pkg/kv/kvserver/replica_follower_read.go b/pkg/kv/kvserver/replica_follower_read.go index ddfea460cc21..a842acf4b5e9 100644 --- a/pkg/kv/kvserver/replica_follower_read.go +++ b/pkg/kv/kvserver/replica_follower_read.go @@ -122,13 +122,19 @@ func (r *Replica) maxClosedRLocked(ctx context.Context) (_ hlc.Timestamp, ok boo lai := r.mu.state.LeaseAppliedIndex lease := *r.mu.state.Lease initialMaxClosed := r.mu.initialMaxClosed + replicaStateClosed := r.mu.state.ClosedTimestamp if lease.Expiration != nil { return hlc.Timestamp{}, false } + // Look at the legacy closed timestamp propagation mechanism. maxClosed := r.store.cfg.ClosedTimestamp.Provider.MaxClosed( lease.Replica.NodeID, r.RangeID, ctpb.Epoch(lease.Epoch), ctpb.LAI(lai)) maxClosed.Forward(lease.Start.ToTimestamp()) maxClosed.Forward(initialMaxClosed) + + // Look at the "new" closed timestamp propagation mechanism. + maxClosed.Forward(replicaStateClosed) + return maxClosed, true } diff --git a/pkg/kv/kvserver/replica_init.go b/pkg/kv/kvserver/replica_init.go index de73dd090f99..c64027597c26 100644 --- a/pkg/kv/kvserver/replica_init.go +++ b/pkg/kv/kvserver/replica_init.go @@ -18,6 +18,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/abortspan" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/tracker" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/split" @@ -95,8 +96,9 @@ func newUnloadedReplica( }) r.mu.proposals = map[kvserverbase.CmdIDKey]*ProposalData{} r.mu.checksums = map[uuid.UUID]ReplicaChecksum{} - r.mu.proposalBuf.Init((*replicaProposer)(r)) + r.mu.proposalBuf.Init((*replicaProposer)(r), tracker.NewLockfreeTracker(), r.Clock(), r.ClusterSettings()) r.mu.proposalBuf.testing.allowLeaseProposalWhenNotLeader = store.cfg.TestingKnobs.AllowLeaseRequestProposalsWhenNotLeader + r.mu.proposalBuf.testing.dontCloseTimestamps = store.cfg.TestingKnobs.DontCloseTimestamps if leaseHistoryMaxEntries > 0 { r.leaseHistory = newLeaseHistory() diff --git a/pkg/kv/kvserver/replica_proposal.go b/pkg/kv/kvserver/replica_proposal.go index d4af67968b54..5ccc84b1025f 100644 --- a/pkg/kv/kvserver/replica_proposal.go +++ b/pkg/kv/kvserver/replica_proposal.go @@ -114,6 +114,15 @@ type ProposalData struct { // here; this could be replaced with isLease and isChangeReplicas // booleans. Request *roachpb.BatchRequest + + // leaseStatus represents the lease under which the Request was evaluated and + // under which this proposal is being made. For lease requests, this is the + // previous lease that the requester was aware of. + leaseStatus kvserverpb.LeaseStatus + + // tok identifies the request to the propBuf. Once the proposal is made, the + // token will be used to stop tracking this request. + tok TrackedRequestToken } // finishApplication is called when a command application has finished. The @@ -327,6 +336,10 @@ const ( func (r *Replica) leasePostApplyLocked( ctx context.Context, prevLease *roachpb.Lease, newLease *roachpb.Lease, jumpOpt leaseJumpOption, ) { + // Note that we actually install the lease further down in this method. + // Everything we do before then doesn't need to worry about requests being + // evaluated under the new lease. + // Sanity check to make sure that the lease sequence is moving in the right // direction. if s1, s2 := prevLease.Sequence, newLease.Sequence; s1 != 0 { @@ -412,6 +425,10 @@ func (r *Replica) leasePostApplyLocked( // to not matter). r.concMgr.OnRangeLeaseUpdated(newLease.Sequence, iAmTheLeaseHolder) + // Inform the propBuf about the new lease so that it can initialize its closed + // timestamp tracking. + r.mu.proposalBuf.OnLeaseChangeLocked(iAmTheLeaseHolder, r.mu.state.ClosedTimestamp) + // Ordering is critical here. We only install the new lease after we've // checked for an in-progress merge and updated the timestamp cache. If the // ordering were reversed, it would be possible for requests to see the new @@ -833,6 +850,7 @@ func (r *Replica) requestToProposal( ctx context.Context, idKey kvserverbase.CmdIDKey, ba *roachpb.BatchRequest, + st kvserverpb.LeaseStatus, lul hlc.Timestamp, latchSpans *spanset.SpanSet, ) (*ProposalData, *roachpb.Error) { @@ -840,11 +858,12 @@ func (r *Replica) requestToProposal( // Fill out the results even if pErr != nil; we'll return the error below. proposal := &ProposalData{ - ctx: ctx, - idKey: idKey, - doneCh: make(chan proposalResult, 1), - Local: &res.Local, - Request: ba, + ctx: ctx, + idKey: idKey, + doneCh: make(chan proposalResult, 1), + Local: &res.Local, + Request: ba, + leaseStatus: st, } if needConsensus { diff --git a/pkg/kv/kvserver/replica_proposal_buf.go b/pkg/kv/kvserver/replica_proposal_buf.go index f5b31c1db79a..f991c35f41c9 100644 --- a/pkg/kv/kvserver/replica_proposal_buf.go +++ b/pkg/kv/kvserver/replica_proposal_buf.go @@ -15,7 +15,13 @@ import ( "sync" "sync/atomic" + "github.com/cockroachdb/cockroach/pkg/clusterversion" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/tracker" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/errors" @@ -105,8 +111,8 @@ func (c *propBufCnt) read() propBufCntRes { return propBufCntRes(atomic.LoadUint64((*uint64)(c))) } -// propBuf is a multi-producer, single-consumer buffer for Raft proposals. The -// buffer supports concurrent insertion of proposals. +// propBuf is a multi-producer, single-consumer buffer for Raft proposals on a +// range. The buffer supports concurrent insertion of proposals. // // The proposal buffer also handles the assignment of maximum lease indexes for // commands. Picking the maximum lease index for commands is done atomically @@ -116,6 +122,11 @@ func (c *propBufCnt) read() propBufCntRes { // get out of sync then some commands would necessarily be rejected beneath Raft // during application (see checkForcedErr). // +// The proposal buffer also is in charge of advancing the respective range's +// closed timestamp by assigning closed timestamp to proposals. For this +// purpose, new requests starting evaluation needs to synchronize with the +// proposal buffer (see TrackEvaluatingRequest). +// // Proposals enter the buffer via Insert() or ReinsertLocked(). They are moved // into Raft via FlushLockedWithRaftGroup() when the buffer fills up, or during // the next handleRaftReady iteration, whichever happens earlier. This @@ -127,13 +138,35 @@ func (c *propBufCnt) read() propBufCntRes { // initialization. Methods called "...Locked" and "...RLocked" expect the // corresponding locker() and rlocker() to be held. type propBuf struct { - p proposer - full sync.Cond + p proposer + clock *hlc.Clock + settings *cluster.Settings + // evalTracker tracks currently-evaluating requests, making sure that + // proposals coming out of the propBuf don't carry closed timestamps below + // currently-evaluating requests. + evalTracker tracker.Tracker + full sync.Cond liBase uint64 cnt propBufCnt arr propBufArray + // assignedClosedTimestamp is the largest "closed timestamp" - i.e. the largest + // timestamp that was communicated to other replicas as closed, representing a + // promise that this leaseholder will not evaluate writes below this timestamp + // any more. + // + // Note that this field is not used by the local replica (or by anybody) + // directly to decide whether follower reads can be served. See + // ReplicaState.closed_timestamp. + // + // This field can be read under the proposer's read lock, and written to under + // the write lock. + assignedClosedTimestamp hlc.Timestamp + + // A buffer used to avoid allocations. + tmpClosedTimestampFooter kvserverpb.ClosedTimestampFooter + testing struct { // leaseIndexFilter can be used by tests to override the max lease index // assigned to a proposal by returning a non-zero lease index. @@ -151,6 +184,8 @@ type propBuf struct { // heartbeats and then expect other replicas to take the lease without // worrying about Raft). allowLeaseProposalWhenNotLeader bool + // dontCloseTimestamps inhibits the closing of timestamps. + dontCloseTimestamps bool } } @@ -178,6 +213,12 @@ type proposer interface { destroyed() destroyStatus leaseAppliedIndex() uint64 enqueueUpdateCheck() + closeTimestampPolicy() roachpb.RangeClosedTimestampPolicy + // raftTransportClosedTimestampEnabled returns whether the range has switched + // to the Raft-based closed timestamp transport. + // TODO(andrei): This shouldn't be needed any more in 21.2, once the Raft + // transport is unconditionally enabled. + raftTransportClosedTimestampEnabled() bool // The following require the proposer to hold an exclusive lock. withGroupLocked(func(proposerRaft) error) error registerProposalLocked(*ProposalData) @@ -205,9 +246,14 @@ type proposerRaft interface { } // Init initializes the proposal buffer and binds it to the provided proposer. -func (b *propBuf) Init(p proposer) { +func (b *propBuf) Init( + p proposer, tracker tracker.Tracker, clock *hlc.Clock, settings *cluster.Settings, +) { b.p = p b.full.L = p.rlocker() + b.clock = clock + b.evalTracker = tracker + b.settings = settings b.liBase = p.leaseAppliedIndex() } @@ -225,12 +271,20 @@ func (b *propBuf) LastAssignedLeaseIndexRLocked() uint64 { // proposer's Raft group. The method accepts the Raft command as part of the // ProposalData struct, along with a partial encoding of the command in the // provided byte slice. It is expected that the byte slice contains marshaled -// information for all of the command's fields except for its max lease index, -// which is assigned by the method when the command is sequenced in the buffer. -// It is also expected that the byte slice has sufficient capacity to marshal -// the maximum lease index field into it. After adding the proposal to the +// information for all of the command's fields except for MaxLeaseIndex, and +// ClosedTimestamp. MaxLeaseIndex is assigned here, when the command is +// sequenced in the buffer. ClosedTimestamp will be assigned later, when the +// buffer is flushed. It is also expected that the byte slice has sufficient +// capacity to marshal these fields into it. After adding the proposal to the // buffer, the assigned max lease index is returned. -func (b *propBuf) Insert(ctx context.Context, p *ProposalData, data []byte) (uint64, error) { +// +// Insert takes ownership of the supplied token; the caller should tok.Move() it +// into this method. It will be used to untrack the request once it comes out of the +// proposal buffer. +func (b *propBuf) Insert( + ctx context.Context, p *ProposalData, data []byte, tok TrackedRequestToken, +) (uint64, error) { + defer tok.DoneIfNotMoved(ctx) // Request a new max lease applied index for any request that isn't itself // a lease request. Lease requests don't need unique max lease index values // because their max lease indexes are ignored. See checkForcedErr. @@ -252,6 +306,10 @@ func (b *propBuf) Insert(ctx context.Context, p *ProposalData, data []byte) (uin } // Assign the command's maximum lease index. + // TODO(andrei): Move this to Flush in 21.2, to mirror the assignment of the + // closed timestamp. For now it's needed here because Insert needs to return + // the MLAI for the benefit of the "old" closed timestamp tracker. When moving + // to flush, make sure to not reassign it on reproposals. p.command.MaxLeaseIndex = b.liBase + res.leaseIndexOffset() if filter := b.testing.leaseIndexFilter; filter != nil { if override, err := filter(p); err != nil { @@ -277,7 +335,9 @@ func (b *propBuf) Insert(ctx context.Context, p *ProposalData, data []byte) (uin return 0, err } - // Insert the proposal into the buffer's array. + // Insert the proposal into the buffer's array. The buffer now takes ownership + // of the token. + p.tok = tok.Move(ctx) b.insertIntoArray(p, res.arrayIndex()) // Return the maximum lease index that the proposal's command was given. @@ -458,6 +518,8 @@ func (b *propBuf) FlushLockedWithRaftGroup( } } + closedTSTarget := b.computeClosedTimestampTarget() + // Remember the first error that we see when proposing the batch. We don't // immediately return this error because we want to finish clearing out the // buffer and registering each of the proposals with the proposer, but we @@ -516,6 +578,11 @@ func (b *propBuf) FlushLockedWithRaftGroup( } } + // Exit the tracker. + reproposal := !p.tok.stillTracked() + if !reproposal { + p.tok.doneLocked(ctx) + } // Raft processing bookkeeping. b.p.registerProposalLocked(p) @@ -538,6 +605,20 @@ func (b *propBuf) FlushLockedWithRaftGroup( continue } + // Figure out what closed timestamp this command will carry. + // + // If this is a reproposal, we don't reassign the closed timestamp. We + // could, in principle, but we'd have to make a copy of the encoded command + // as to not modify the copy that's already stored in the local replica's + // raft entry cache. + if !reproposal { + err := b.assignClosedTimestampToProposalLocked(ctx, p, closedTSTarget) + if err != nil { + firstErr = err + continue + } + } + // Coordinate proposing the command to etcd/raft. if crt := p.command.ReplicatedEvalResult.ChangeReplicas; crt != nil { // Flush any previously batched (non-conf change) proposals to @@ -596,6 +677,121 @@ func (b *propBuf) FlushLockedWithRaftGroup( return used, proposeBatch(raftGroup, b.p.replicaID(), ents) } +// computeClosedTimestampTarget computes the timestamp we'd like to close for +// our range. Note that we might not be able to ultimately close this timestamp +// if there's requests in flight. +func (b *propBuf) computeClosedTimestampTarget() hlc.Timestamp { + now := b.clock.Now().WallTime + closedTSPolicy := b.p.closeTimestampPolicy() + var closedTSTarget hlc.Timestamp + switch closedTSPolicy { + case roachpb.LAG_BY_CLUSTER_SETTING, roachpb.LEAD_FOR_GLOBAL_READS: + targetDuration := closedts.TargetDuration.Get(&b.settings.SV) + closedTSTarget = hlc.Timestamp{WallTime: now - targetDuration.Nanoseconds()} + // TODO(andrei,nvanbenschoten): Resolve all the issues preventing us from closing + // timestamps in the future (which, in turn, forces future-time writes on + // global ranges), and enable the proper logic below. + //case roachpb.LEAD_FOR_GLOBAL_READS: + // closedTSTarget = hlc.Timestamp{ + // WallTime: now + 2*b.clock.MaxOffset().Nanoseconds(), + // Synthetic: true, + // } + } + return closedTSTarget +} + +// assignClosedTimestampToProposalLocked assigns a closed timestamp to be carried by +// an outgoing proposal. +// +// This shouldn't be called for reproposals. +func (b *propBuf) assignClosedTimestampToProposalLocked( + ctx context.Context, p *ProposalData, closedTSTarget hlc.Timestamp, +) error { + if b.testing.dontCloseTimestamps { + return nil + } + // If the Raft transport is not enabled yet, bail. If the range has already + // started publishing closed timestamps using Raft, then it doesn't matter + // whether this node found out about the version bump yet. + if !b.p.raftTransportClosedTimestampEnabled() && + !b.settings.Version.IsActive(ctx, clusterversion.ClosedTimestampsRaftTransport) { + return nil + } + + // For lease requests, we make a distinction between lease extensions and + // brand new leases. Brand new leases carry a closed timestamp equal to the + // lease start time. Lease extensions don't get a closed timestamp. This is + // because they're proposed without a MLAI, and so two lease extensions might + // commute and both apply which would result in a closed timestamp regression. + // The command application side doesn't bother protecting against such + // regressions. + // Lease transfers behave like regular proposals. Note that transfers + // carry a summary of the timestamp cache, so the new leaseholder will be + // aware of all the reads performed by the previous leaseholder. + isBrandNewLeaseRequest := false + if p.Request.IsLeaseRequest() { + // We read the lease from the ReplicatedEvalResult, not from leaseReq, because the + // former is more up to date, having been modified by the evaluation. + newLease := p.command.ReplicatedEvalResult.State.Lease + oldLease := p.leaseStatus.Lease + leaseExtension := newLease.Sequence == oldLease.Sequence + if leaseExtension { + return nil + } + isBrandNewLeaseRequest = true + // For brand new leases, we close the lease start time. Since this proposing + // replica is not the leaseholder, the previous target is meaningless. + closedTSTarget = newLease.Start.ToTimestamp() + } + if !isBrandNewLeaseRequest { + lb := b.evalTracker.LowerBound(ctx) + if !lb.IsEmpty() { + // If the tracker told us that requests are currently evaluating at + // timestamps >= lb, then we can close up to lb.Prev(). We use FloorPrev() + // to get rid of the logical ticks; we try to not publish closed ts with + // logical ticks when there's no good reason for them. + closedTSTarget.Backward(lb.FloorPrev()) + } + // We can't close timestamps above the current lease's expiration(*). This is + // in order to keep the monotonic property of closed timestamps carried by + // commands, which makes for straight-forward closed timestamp management on + // the command application side: if we allowed requests to close timestamps + // above the lease's expiration, then a future LeaseRequest proposed by + // another node might carry a lower closed timestamp (i.e. the lease start + // time). + // (*) If we've previously closed a higher timestamp under a previous lease + // with a higher expiration, then requests will keep carrying that closed + // timestamp; we won't regress the closed timestamp. + // + // HACK(andrei): We declare the lease expiration to be synthetic by fiat, + // because it frequently is synthetic even though currently it's not marked + // as such. See the TODO in Timestamp.Add() about the work remaining to + // properly mark these timestamps as synthetic. We need to make sure it's + // synthetic here so that the results of Backwards() can be synthetic. + leaseExpiration := p.leaseStatus.Expiration().WithSynthetic(true) + closedTSTarget.Backward(leaseExpiration) + } + + // We're about to close closedTSTarget. The propBuf needs to remember that in + // order for incoming requests to be bumped above it (through + // TrackEvaluatingRequest). + b.forwardClosedTimestampLocked(closedTSTarget) + // Fill in the closed ts in the proposal. + f := &b.tmpClosedTimestampFooter + f.ClosedTimestamp = b.assignedClosedTimestamp + footerLen := f.Size() + if log.ExpensiveLogEnabled(ctx, 4) { + log.VEventf(ctx, 4, "attaching closed timestamp %s to proposal %x", b.assignedClosedTimestamp, p.idKey) + } + + preLen := len(p.encodedCommand) + // Here we rely on p.encodedCommand to have been allocated with enough + // capacity for this footer. + p.encodedCommand = p.encodedCommand[:preLen+footerLen] + _, err := protoutil.MarshalTo(f, p.encodedCommand[preLen:]) + return err +} + func (b *propBuf) forwardLeaseIndexBase(v uint64) { if b.liBase < v { b.liBase = v @@ -636,6 +832,109 @@ func (b *propBuf) FlushLockedWithoutProposing(ctx context.Context) { } } +// OnLeaseChangeLocked is called when a new lease is applied to this range. +// assignedClosedTimestamp is the range's closed timestamp after the new lease was applied. The +// closed timestamp tracked by the propBuf is updated accordingly. +func (b *propBuf) OnLeaseChangeLocked(leaseOwned bool, closedTS hlc.Timestamp) { + if leaseOwned { + b.forwardClosedTimestampLocked(closedTS) + } else { + // Zero out to avoid any confusion. + b.assignedClosedTimestamp = hlc.Timestamp{} + } +} + +// forwardClosedTimestamp forwards the closed timestamp tracked by the propBuf. +func (b *propBuf) forwardClosedTimestampLocked(closedTS hlc.Timestamp) { + b.assignedClosedTimestamp.Forward(closedTS) +} + +// EvaluatingRequestsCount returns the count of requests currently tracked by +// the propBuf. +func (b *propBuf) EvaluatingRequestsCount() int { + b.p.rlocker().Lock() + defer b.p.rlocker().Unlock() + return b.evalTracker.Count() +} + +// TrackedRequestToken represents the result of propBuf.TrackEvaluatingRequest: +// a token to be later used for untracking the respective request. +// +// This token tries to make it easy to pass responsibility for untracking. The +// intended pattern is: +// tok := propbBuf.TrackEvaluatingRequest() +// defer tok.DoneIfNotMoved() +// fn(tok.Move()) +type TrackedRequestToken struct { + done bool + tok tracker.RemovalToken + b *propBuf +} + +// DoneIfNotMoved untracks the request if Move had not been called on the token +// previously. If Move had been called, this is a no-op. +// +// Note that if this ends up actually destroying the token (i.e. if Move() had +// not been called previously) this takes r.mu, so it's pretty expensive. On +// happy paths, the token is expected to have been Move()d, and a batch of +// tokens are expected to be destroyed at once by the propBuf (which calls +// doneLocked). +func (t *TrackedRequestToken) DoneIfNotMoved(ctx context.Context) { + if t.done { + return + } + t.b.p.locker().Lock() + t.doneLocked(ctx) + t.b.p.locker().Unlock() +} + +func (t *TrackedRequestToken) doneLocked(ctx context.Context) { + if t.done { + log.Fatalf(ctx, "duplicate Done() call") + } + t.done = true + t.b.evalTracker.Untrack(ctx, t.tok) +} + +// stillTracked returns true if no Done* method has been called. +func (t *TrackedRequestToken) stillTracked() bool { + return !t.done +} + +// Move returns a new token which can untrack the request. The original token is +// neutered; calling DoneIfNotMoved on it becomes a no-op. +func (t *TrackedRequestToken) Move(ctx context.Context) TrackedRequestToken { + if t.done { + log.Fatalf(ctx, "attempting to Move() after Done() call") + } + cpy := *t + t.done = true + return cpy +} + +// TrackEvaluatingRequest atomically starts tracking an evaluating request and +// returns the minimum timestamp at which this request can write. The tracked +// request is identified by its tentative write timestamp. After calling this, +// the caller must bump the write timestamp to at least the returned minTS. +// +// The returned token must be used to eventually remove this request from the +// tracked set by calling tok.Done(); the removal will allow timestamps above +// its write timestamp to be closed. If the evaluation results in a proposal, +// the token will make it back to this propBuf through Insert; in this case it +// will be the propBuf itself that ultimately stops tracking the request once +// the proposal is flushed from the buffer. +func (b *propBuf) TrackEvaluatingRequest( + ctx context.Context, wts hlc.Timestamp, +) (minTS hlc.Timestamp, _ TrackedRequestToken) { + b.p.rlocker().Lock() + defer b.p.rlocker().Unlock() + + minTS = b.assignedClosedTimestamp.Next() + wts.Forward(minTS) + tok := b.evalTracker.Track(ctx, wts) + return minTS, TrackedRequestToken{tok: tok, b: b} +} + const propBufArrayMinSize = 4 const propBufArrayMaxSize = 256 const propBufArrayShrinkDelay = 16 @@ -702,6 +1001,8 @@ func (a *propBufArray) adjustSize(used int) { // replicaProposer implements the proposer interface. type replicaProposer Replica +var _ proposer = &replicaProposer{} + func (rp *replicaProposer) locker() sync.Locker { return &rp.mu.RWMutex } @@ -726,6 +1027,14 @@ func (rp *replicaProposer) enqueueUpdateCheck() { rp.store.enqueueRaftUpdateCheck(rp.RangeID) } +func (rp *replicaProposer) closeTimestampPolicy() roachpb.RangeClosedTimestampPolicy { + return (*Replica)(rp).closedTimestampPolicyRLocked() +} + +func (rp *replicaProposer) raftTransportClosedTimestampEnabled() bool { + return !(*Replica)(rp).mu.state.ClosedTimestamp.IsEmpty() +} + func (rp *replicaProposer) withGroupLocked(fn func(raftGroup proposerRaft) error) error { // Pass true for mayCampaignOnWake because we're about to propose a command. return (*Replica)(rp).withRaftGroupLocked(true, func(raftGroup *raft.RawNode) (bool, error) { diff --git a/pkg/kv/kvserver/replica_proposal_buf_test.go b/pkg/kv/kvserver/replica_proposal_buf_test.go index 116009a2acb9..07b5b159bd15 100644 --- a/pkg/kv/kvserver/replica_proposal_buf_test.go +++ b/pkg/kv/kvserver/replica_proposal_buf_test.go @@ -17,10 +17,16 @@ import ( "testing" "time" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/closedts/tracker" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverbase" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/kvserverpb" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/errors" "github.com/stretchr/testify/assert" @@ -53,16 +59,31 @@ type testProposer struct { // If leaderReplicaInDescriptor is set, this specifies what type of replica it // is. Some types of replicas are not eligible to get a lease. leaderReplicaType roachpb.ReplicaType + rangePolicy roachpb.RangeClosedTimestampPolicy } +var _ proposer = &testProposer{} + type testProposerRaft struct { status raft.BasicStatus + // lastProps are the command that the propBuf flushed last. + lastProps []kvserverpb.RaftCommand } -var _ proposerRaft = testProposerRaft{} +var _ proposerRaft = &testProposerRaft{} -func (t testProposerRaft) Step(raftpb.Message) error { - // TODO(andrei, nvanbenschoten): Capture the message and test against it. +func (t *testProposerRaft) Step(msg raftpb.Message) error { + if msg.Type != raftpb.MsgProp { + return nil + } + // Decode and save all the commands. + t.lastProps = make([]kvserverpb.RaftCommand, len(msg.Entries)) + for i, e := range msg.Entries { + _ /* idKey */, encodedCommand := DecodeRaftCommand(e.Data) + if err := protoutil.Unmarshal(encodedCommand, &t.lastProps[i]); err != nil { + return err + } + } return nil } @@ -99,6 +120,14 @@ func (t *testProposer) enqueueUpdateCheck() { t.enqueued++ } +func (t *testProposer) closeTimestampPolicy() roachpb.RangeClosedTimestampPolicy { + return t.rangePolicy +} + +func (t *testProposer) raftTransportClosedTimestampEnabled() bool { + return true +} + func (t *testProposer) withGroupLocked(fn func(proposerRaft) error) error { // Note that t.raftGroup can be nil, which FlushLockedWithRaftGroup supports. return fn(t.raftGroup) @@ -150,18 +179,55 @@ func (t *testProposer) rejectProposalWithRedirectLocked( t.onRejectProposalWithRedirectLocked(prop, redirectTo) } -func newPropData(leaseReq bool) (*ProposalData, []byte) { +// proposalCreator holds on to a lease and creates proposals using it. +type proposalCreator struct { + lease kvserverpb.LeaseStatus +} + +func (pc proposalCreator) newPutProposal() (*ProposalData, []byte) { var ba roachpb.BatchRequest - if leaseReq { - ba.Add(&roachpb.RequestLeaseRequest{}) - } else { - ba.Add(&roachpb.PutRequest{}) + ba.Add(&roachpb.PutRequest{}) + return pc.newProposal(ba) +} + +func (pc proposalCreator) newLeaseProposal(lease roachpb.Lease) (*ProposalData, []byte) { + var ba roachpb.BatchRequest + ba.Add(&roachpb.RequestLeaseRequest{Lease: lease}) + return pc.newProposal(ba) +} + +func (pc proposalCreator) newProposal(ba roachpb.BatchRequest) (*ProposalData, []byte) { + var lease *roachpb.Lease + r, ok := ba.GetArg(roachpb.RequestLease) + if ok { + lease = &r.(*roachpb.RequestLeaseRequest).Lease + } + p := &ProposalData{ + ctx: context.Background(), + idKey: kvserverbase.CmdIDKey("test-cmd"), + command: &kvserverpb.RaftCommand{ + ReplicatedEvalResult: kvserverpb.ReplicatedEvalResult{ + State: &kvserverpb.ReplicaState{Lease: lease}, + }, + }, + Request: &ba, + leaseStatus: pc.lease, + } + return p, pc.encodeProposal(p) +} + +func (pc proposalCreator) encodeProposal(p *ProposalData) []byte { + cmdLen := p.command.Size() + needed := raftCommandPrefixLen + cmdLen + + kvserverpb.MaxRaftCommandFooterSize() + + kvserverpb.MaxClosedTimestampFooterSize() + data := make([]byte, raftCommandPrefixLen, needed) + encodeRaftCommandPrefix(data, raftVersionStandard, p.idKey) + data = data[:raftCommandPrefixLen+p.command.Size()] + if _, err := protoutil.MarshalTo(p.command, data[raftCommandPrefixLen:]); err != nil { + panic(err) } - return &ProposalData{ - ctx: context.Background(), - command: &kvserverpb.RaftCommand{}, - Request: &ba, - }, make([]byte, 0, kvserverpb.MaxRaftCommandFooterSize()) + return data } // TestProposalBuffer tests the basic behavior of the Raft proposal buffer. @@ -172,14 +238,23 @@ func TestProposalBuffer(t *testing.T) { var p testProposer var b propBuf - b.Init(&p) + var pc proposalCreator + clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) + b.Init(&p, tracker.NewLockfreeTracker(), clock, cluster.MakeTestingClusterSettings()) // Insert propBufArrayMinSize proposals. The buffer should not be flushed. num := propBufArrayMinSize for i := 0; i < num; i++ { leaseReq := i == 3 - pd, data := newPropData(leaseReq) - mlai, err := b.Insert(ctx, pd, data) + var pd *ProposalData + var data []byte + if leaseReq { + pd, data = pc.newLeaseProposal(roachpb.Lease{}) + } else { + pd, data = pc.newPutProposal() + } + _, tok := b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + mlai, err := b.Insert(ctx, pd, data, tok) require.Nil(t, err) if leaseReq { expMlai := uint64(i) @@ -196,12 +271,14 @@ func TestProposalBuffer(t *testing.T) { require.Equal(t, 1, p.enqueued) require.Equal(t, 0, p.registered) } + require.Equal(t, num, b.evalTracker.Count()) // Insert another proposal. This causes the buffer to flush. Doing so // results in a lease applied index being skipped, which is harmless. // Remember that the lease request above did not receive a lease index. - pd, data := newPropData(false) - mlai, err := b.Insert(ctx, pd, data) + pd, data := pc.newPutProposal() + _, tok := b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + mlai, err := b.Insert(ctx, pd, data, tok) require.Nil(t, err) expMlai := uint64(num + 1) require.Equal(t, expMlai, mlai) @@ -212,6 +289,7 @@ func TestProposalBuffer(t *testing.T) { require.Equal(t, num, p.registered) require.Equal(t, uint64(num), b.liBase) require.Equal(t, 2*propBufArrayMinSize, b.arr.len()) + require.Equal(t, 1, b.evalTracker.Count()) // Increase the proposer's applied lease index and flush. The buffer's // lease index offset should jump up. @@ -224,7 +302,8 @@ func TestProposalBuffer(t *testing.T) { // Insert one more proposal. The lease applied index should adjust to // the increase accordingly. - mlai, err = b.Insert(ctx, pd, data) + _, tok = b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + mlai, err = b.Insert(ctx, pd, data, tok) require.Nil(t, err) expMlai = p.lai + 1 require.Equal(t, expMlai, mlai) @@ -252,7 +331,9 @@ func TestProposalBufferConcurrentWithDestroy(t *testing.T) { var p testProposer var b propBuf - b.Init(&p) + var pc proposalCreator + clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) + b.Init(&p, tracker.NewLockfreeTracker(), clock, cluster.MakeTestingClusterSettings()) mlais := make(map[uint64]struct{}) dsErr := errors.New("destroyed") @@ -263,8 +344,9 @@ func TestProposalBufferConcurrentWithDestroy(t *testing.T) { for i := 0; i < concurrency; i++ { g.Go(func() error { for { - pd, data := newPropData(false) - mlai, err := b.Insert(ctx, pd, data) + pd, data := pc.newPutProposal() + _, tok := b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + mlai, err := b.Insert(ctx, pd, data, tok) if err != nil { if errors.Is(err, dsErr) { return nil @@ -323,12 +405,16 @@ func TestProposalBufferRegistersAllOnProposalError(t *testing.T) { var p testProposer var b propBuf - b.Init(&p) + var pc proposalCreator + clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) + b.Init(&p, tracker.NewLockfreeTracker(), clock, cluster.MakeTestingClusterSettings()) num := propBufArrayMinSize + toks := make([]TrackedRequestToken, num) for i := 0; i < num; i++ { - pd, data := newPropData(false) - _, err := b.Insert(ctx, pd, data) + pd, data := pc.newPutProposal() + _, toks[i] = b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + _, err := b.Insert(ctx, pd, data, toks[i]) require.Nil(t, err) } require.Equal(t, num, b.Len()) @@ -346,6 +432,7 @@ func TestProposalBufferRegistersAllOnProposalError(t *testing.T) { err := b.flushLocked(ctx) require.Equal(t, propErr, err) require.Equal(t, num, p.registered) + require.Zero(t, b.evalTracker.Count()) } // TestProposalBufferRegistrationWithInsertionErrors tests that if during @@ -358,12 +445,22 @@ func TestProposalBufferRegistrationWithInsertionErrors(t *testing.T) { var p testProposer var b propBuf - b.Init(&p) + var pc proposalCreator + clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) + b.Init(&p, tracker.NewLockfreeTracker(), clock, cluster.MakeTestingClusterSettings()) num := propBufArrayMinSize / 2 + toks1 := make([]TrackedRequestToken, num) for i := 0; i < num; i++ { - pd, data := newPropData(i%2 == 0) - _, err := b.Insert(ctx, pd, data) + var pd *ProposalData + var data []byte + if i%2 == 0 { + pd, data = pc.newLeaseProposal(roachpb.Lease{}) + } else { + pd, data = pc.newPutProposal() + } + _, toks1[i] = b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + _, err := b.Insert(ctx, pd, data, toks1[i]) require.Nil(t, err) } @@ -372,9 +469,17 @@ func TestProposalBufferRegistrationWithInsertionErrors(t *testing.T) { return 0, insertErr } + toks2 := make([]TrackedRequestToken, num) for i := 0; i < num; i++ { - pd, data := newPropData(i%2 == 0) - _, err := b.Insert(ctx, pd, data) + var pd *ProposalData + var data []byte + if i%2 == 0 { + pd, data = pc.newLeaseProposal(roachpb.Lease{}) + } else { + pd, data = pc.newPutProposal() + } + _, toks2[i] = b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + _, err := b.Insert(ctx, pd, data, toks2[i]) require.Equal(t, insertErr, err) } require.Equal(t, 2*num, b.Len()) @@ -383,6 +488,7 @@ func TestProposalBufferRegistrationWithInsertionErrors(t *testing.T) { require.Equal(t, 0, b.Len()) require.Equal(t, num, p.registered) + require.Zero(t, b.evalTracker.Count()) } // TestPropBufCnt tests the basic behavior of the counter maintained by the @@ -494,6 +600,7 @@ func TestProposalBufferRejectLeaseAcqOnFollower(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { var p testProposer + var pc proposalCreator // p.replicaID() is hardcoded; it'd better be hardcoded to what this test // expects. require.Equal(t, self, uint64(p.replicaID())) @@ -519,7 +626,7 @@ func TestProposalBufferRejectLeaseAcqOnFollower(t *testing.T) { Lead: tc.leader, }, } - r := testProposerRaft{ + r := &testProposerRaft{ status: raftStatus, } p.raftGroup = r @@ -527,10 +634,12 @@ func TestProposalBufferRejectLeaseAcqOnFollower(t *testing.T) { p.leaderReplicaType = tc.leaderRepType var b propBuf - b.Init(&p) + clock := hlc.NewClock(hlc.UnixNano, time.Nanosecond) + b.Init(&p, tracker.NewLockfreeTracker(), clock, cluster.MakeTestingClusterSettings()) - pd, data := newPropData(true /* leaseReq */) - _, err := b.Insert(ctx, pd, data) + pd, data := pc.newLeaseProposal(roachpb.Lease{}) + _, tok := b.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + _, err := b.Insert(ctx, pd, data, tok) require.NoError(t, err) require.NoError(t, b.flushLocked(ctx)) if tc.expRejection { @@ -541,3 +650,273 @@ func TestProposalBufferRejectLeaseAcqOnFollower(t *testing.T) { }) } } + +func TestProposalBufferComputeClosedTimestampTarget(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + const nowNanos = 100 + const maxOffsetNanos = 20 + manualClock := hlc.NewManualClock(nowNanos) + clock := hlc.NewClock(manualClock.UnixNano, maxOffsetNanos) + + const lagTargetNanos = 10 + st := cluster.MakeTestingClusterSettings() + closedts.TargetDuration.Override(&st.SV, lagTargetNanos) + + for _, tc := range []struct { + rangePolicy roachpb.RangeClosedTimestampPolicy + expClosedTSTarget hlc.Timestamp + }{ + { + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + expClosedTSTarget: hlc.Timestamp{WallTime: nowNanos - lagTargetNanos}, + }, + { + rangePolicy: roachpb.LEAD_FOR_GLOBAL_READS, + expClosedTSTarget: hlc.Timestamp{WallTime: nowNanos - lagTargetNanos}, + // TODO(andrei, nvanbenschoten): What we should be expecting here is the following, once + // the propBuf starts properly implementing this timestamp closing policy: + // expClosedTSTarget: hlc.Timestamp{WallTime: nowNanos + 2*maxOffsetNanos, Synthetic: true}, + }, + } { + t.Run(tc.rangePolicy.String(), func(t *testing.T) { + var p testProposer + p.rangePolicy = tc.rangePolicy + var b propBuf + b.Init(&p, tracker.NewLockfreeTracker(), clock, st) + require.Equal(t, tc.expClosedTSTarget, b.computeClosedTimestampTarget()) + }) + } +} + +// Test that the propBuf properly assigns closed timestamps to proposals being +// flushed out of it. Each subtest proposes one command and checks for the +// expected closed timestamp being written to the proposal by the propBuf. +func TestProposalBufferClosedTimestamp(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + ctx := context.Background() + + mc := hlc.NewManualClock((1613588135 * time.Second).Nanoseconds()) + clock := hlc.NewClock(mc.UnixNano, time.Nanosecond) + st := cluster.MakeTestingClusterSettings() + closedts.TargetDuration.Override(&st.SV, time.Second) + now := clock.Now() + newLeaseStart := now.MustToClockTimestamp() + nowMinusClosedLag := hlc.Timestamp{ + WallTime: mc.UnixNano() - closedts.TargetDuration.Get(&st.SV).Nanoseconds(), + } + nowMinusTwiceClosedLag := hlc.Timestamp{ + WallTime: mc.UnixNano() - 2*closedts.TargetDuration.Get(&st.SV).Nanoseconds(), + } + expiredLeaseTimestamp := hlc.Timestamp{WallTime: mc.UnixNano() - 1000} + someClosedTS := hlc.Timestamp{WallTime: mc.UnixNano() - 2000} + + type reqType int + checkClosedTS := func(t *testing.T, r *testProposerRaft, exp hlc.Timestamp) { + require.Len(t, r.lastProps, 1) + require.Equal(t, exp, r.lastProps[0].ClosedTimestamp) + } + + // The lease that the proposals are made under. + curLease := roachpb.Lease{ + Epoch: 0, // Expiration-based lease. + Sequence: 1, + Start: hlc.ClockTimestamp{}, + // Expiration is filled by each test. + Expiration: nil, + } + + const ( + regularWrite reqType = iota + // newLease means that the request is a lease acquisition (new lease or + // lease extension). + newLease + leaseTransfer + ) + + for _, tc := range []struct { + name string + + reqType reqType + // The lower bound of all currently-evaluating requests. We can't close this + // or above. + trackerLowerBound hlc.Timestamp + // The expiration of the current lease. The closed timestamp of most + // proposal is upper-bounded by this, which matters for + // LEAD_FOR_GLOBAL_READS ranges (on other ranges the propBuf would never + // like to close a timestamp above the current lease expiration because it + // wouldn't be processing commands if the lease is expired). + leaseExp hlc.Timestamp + rangePolicy roachpb.RangeClosedTimestampPolicy + // The highest closed timestamp that the propBuf has previously attached to + // a proposal. The propBuf should never propose a new closedTS below this. + prevClosedTimestamp hlc.Timestamp + + // lease is used when reqType = newLease. This will be the lease being + // proposed. + lease roachpb.Lease + + expClosed hlc.Timestamp + }{ + { + name: "basic", + reqType: regularWrite, + trackerLowerBound: hlc.Timestamp{}, + leaseExp: hlc.MaxTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + prevClosedTimestamp: hlc.Timestamp{}, + expClosed: nowMinusClosedLag, + }, + { + // The request tracker will prevent us from closing below its lower bound. + name: "not closing below evaluating requests", + reqType: regularWrite, + trackerLowerBound: nowMinusTwiceClosedLag, + leaseExp: hlc.MaxTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + prevClosedTimestamp: hlc.Timestamp{}, + expClosed: nowMinusTwiceClosedLag.FloorPrev(), + }, + { + // Like the basic test, except that we can't close timestamp below what + // we've already closed previously. + name: "no regression", + reqType: regularWrite, + trackerLowerBound: hlc.Timestamp{}, + leaseExp: hlc.MaxTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + prevClosedTimestamp: someClosedTS, + expClosed: someClosedTS, + }, + { + name: "brand new lease", + reqType: newLease, + lease: roachpb.Lease{ + // Higher sequence => this is a brand new lease, not an extension. + Sequence: curLease.Sequence + 1, + Start: newLeaseStart, + }, + trackerLowerBound: hlc.Timestamp{}, + // The current lease can be expired; we won't backtrack the closed + // timestamp to this expiration. + leaseExp: expiredLeaseTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + expClosed: newLeaseStart.ToTimestamp(), + }, + { + name: "lease extension", + reqType: newLease, + lease: roachpb.Lease{ + // Same sequence => this is a lease extension. + Sequence: curLease.Sequence, + Start: newLeaseStart, + }, + trackerLowerBound: hlc.Timestamp{}, + // The current lease can be expired; we won't backtrack the closed + // timestamp to this expiration. + leaseExp: expiredLeaseTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + // Lease extensions don't carry closed timestamps because they don't get + // MLAIs, and so they can be reordered. + expClosed: hlc.Timestamp{}, + }, + { + // Lease transfers behave just like regular writes. The lease start time + // doesn't matter. + name: "lease transfer", + reqType: leaseTransfer, + lease: roachpb.Lease{ + Sequence: curLease.Sequence + 1, + Start: newLeaseStart, + }, + trackerLowerBound: hlc.Timestamp{}, + leaseExp: hlc.MaxTimestamp, + rangePolicy: roachpb.LAG_BY_CLUSTER_SETTING, + expClosed: nowMinusClosedLag, + }, + { + // With the LEAD_FOR_GLOBAL_READS policy, we're expecting to close + // timestamps in the future. + // TODO(andrei,nvanbenschoten): The global policy is not actually hooked + // up at the moment, so this test expects a past timestamp to be closed. + // Once it is hooked up, we should also add another test that checks that + // timestamps above the current lease expiration are not closed. + name: "global range", + reqType: regularWrite, + trackerLowerBound: hlc.Timestamp{}, + leaseExp: hlc.MaxTimestamp, + rangePolicy: roachpb.LEAD_FOR_GLOBAL_READS, + prevClosedTimestamp: hlc.Timestamp{}, + expClosed: nowMinusClosedLag, + }, + } { + t.Run(tc.name, func(t *testing.T) { + r := &testProposerRaft{} + p := testProposer{ + lai: 10, + raftGroup: r, + } + tracker := mockTracker{ + lowerBound: tc.trackerLowerBound, + } + pc := proposalCreator{lease: kvserverpb.LeaseStatus{Lease: curLease}} + pc.lease.Lease.Expiration = &tc.leaseExp + + var b propBuf + b.Init(&p, tracker, clock, st) + b.forwardClosedTimestampLocked(tc.prevClosedTimestamp) + + var pd *ProposalData + var data []byte + switch tc.reqType { + case regularWrite: + pd, data = pc.newPutProposal() + case newLease: + pd, data = pc.newLeaseProposal(tc.lease) + case leaseTransfer: + var ba roachpb.BatchRequest + ba.Add(&roachpb.TransferLeaseRequest{ + Lease: roachpb.Lease{ + Start: now.MustToClockTimestamp(), + Sequence: pc.lease.Lease.Sequence + 1, + }, + PrevLease: pc.lease.Lease, + }) + pd, data = pc.newProposal(ba) + default: + t.Fatalf("unknown req type %d", tc.reqType) + } + tok := TrackedRequestToken{ + done: false, + tok: nil, + b: &b, + } + _, err := b.Insert(ctx, pd, data, tok) + require.NoError(t, err) + require.NoError(t, b.flushLocked(ctx)) + checkClosedTS(t, r, tc.expClosed) + }) + } +} + +type mockTracker struct { + lowerBound hlc.Timestamp +} + +func (t mockTracker) Track(ctx context.Context, ts hlc.Timestamp) tracker.RemovalToken { + panic("unimplemented") +} + +func (t mockTracker) Untrack(context.Context, tracker.RemovalToken) {} + +func (t mockTracker) LowerBound(context.Context) hlc.Timestamp { + return t.lowerBound +} + +func (t mockTracker) Count() int { + panic("unimplemented") +} + +var _ tracker.Tracker = mockTracker{} diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index ca0b62679e25..cadc2be7e46b 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -58,6 +58,10 @@ func makeIDKey() kvserverbase.CmdIDKey { // caller should relinquish all ownership of it. If it does return an error, the // caller retains full ownership over the guard. // +// evalAndPropose takes ownership of the supplied token; the caller should +// tok.Move() it into this method. It will be used to untrack the request once +// it comes out of the proposal buffer. +// // Return values: // - a channel which receives a response or error upon application // - a closure used to attempt to abandon the command. When called, it unbinds @@ -73,9 +77,11 @@ func (r *Replica) evalAndPropose( g *concurrency.Guard, st kvserverpb.LeaseStatus, lul hlc.Timestamp, + tok TrackedRequestToken, ) (chan proposalResult, func(), int64, *roachpb.Error) { + defer tok.DoneIfNotMoved(ctx) idKey := makeIDKey() - proposal, pErr := r.requestToProposal(ctx, idKey, ba, lul, g.LatchSpans()) + proposal, pErr := r.requestToProposal(ctx, idKey, ba, st, lul, g.LatchSpans()) log.Event(proposal.ctx, "evaluated request") // If the request hit a server-side concurrency retry error, immediately @@ -196,7 +202,7 @@ func (r *Replica) evalAndPropose( } } - maxLeaseIndex, pErr := r.propose(ctx, proposal) + maxLeaseIndex, pErr := r.propose(ctx, proposal, tok.Move(ctx)) if pErr != nil { return nil, nil, 0, pErr } @@ -230,7 +236,14 @@ func (r *Replica) evalAndPropose( // the method returns, all access to the command must be performed while holding // Replica.mu and Replica.raftMu. If a non-nil error is returned the // MaxLeaseIndex is not updated. -func (r *Replica) propose(ctx context.Context, p *ProposalData) (index int64, pErr *roachpb.Error) { +// +// propose takes ownership of the supplied token; the caller should tok.Move() +// it into this method. It will be used to untrack the request once it comes out +// of the proposal buffer. +func (r *Replica) propose( + ctx context.Context, p *ProposalData, tok TrackedRequestToken, +) (index int64, pErr *roachpb.Error) { + defer tok.DoneIfNotMoved(ctx) // If an error occurs reset the command's MaxLeaseIndex to its initial value. // Failure to propose will propagate to the client. An invariant of this @@ -300,8 +313,12 @@ func (r *Replica) propose(ctx context.Context, p *ProposalData) (index int64, pE preLen = raftCommandPrefixLen } cmdLen := p.command.Size() - cap := preLen + cmdLen + kvserverpb.MaxRaftCommandFooterSize() - data := make([]byte, preLen, cap) + // Allocate the data slice with enough capacity to eventually hold the two + // "footers" that are filled later. + needed := preLen + cmdLen + + kvserverpb.MaxRaftCommandFooterSize() + + kvserverpb.MaxClosedTimestampFooterSize() + data := make([]byte, preLen, needed) // Encode prefix with command ID, if necessary. if prefix { encodeRaftCommandPrefix(data, version, p.idKey) @@ -341,7 +358,7 @@ func (r *Replica) propose(ctx context.Context, p *ProposalData) (index int64, pE // // NB: we must not hold r.mu while using the proposal buffer, see comment // on the field. - maxLeaseIndex, err := r.mu.proposalBuf.Insert(ctx, p, data) + maxLeaseIndex, err := r.mu.proposalBuf.Insert(ctx, p, data, tok.Move(ctx)) if err != nil { return 0, roachpb.NewError(err) } @@ -973,6 +990,9 @@ const ( // waiting on. // mu must be held. // +// Note that reproposals don't need to worry about checking the closed timestamp +// before reproposing, since they're reusing the original LAI. +// // refreshAtDelta only applies for reasonTicks and specifies how old (in ticks) // a command must be for it to be inspected; the usual value is the number of // ticks of an election timeout (affect only proposals that have had ample time diff --git a/pkg/kv/kvserver/replica_rangefeed_test.go b/pkg/kv/kvserver/replica_rangefeed_test.go index 4105f97d4374..de435bd8712b 100644 --- a/pkg/kv/kvserver/replica_rangefeed_test.go +++ b/pkg/kv/kvserver/replica_rangefeed_test.go @@ -167,6 +167,7 @@ func TestReplicaRangefeed(t *testing.T) { } events = stream.Events() + // Filter out checkpoints. Those are not deterministic; they can come at any time. var filteredEvents []*roachpb.RangeFeedEvent for _, e := range events { if e.Checkpoint != nil { diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index caa40fde4334..ac468760c49f 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -198,6 +198,9 @@ func (tc *testContext) Clock() *hlc.Clock { func (tc *testContext) Start(t testing.TB, stopper *stop.Stopper) { tc.manualClock = hlc.NewManualClock(123) cfg := TestStoreConfig(hlc.NewClock(tc.manualClock.UnixNano, time.Nanosecond)) + // testContext tests like to move the manual clock around and assume that they can write at past + // timestamps. + cfg.TestingKnobs.DontCloseTimestamps = true tc.StartWithStoreConfig(t, stopper, cfg) } @@ -617,7 +620,8 @@ func sendLeaseRequest(r *Replica, l *roachpb.Lease) error { ba.Timestamp = r.store.Clock().Now() ba.Add(&roachpb.RequestLeaseRequest{Lease: *l}) st := r.CurrentLeaseStatus(ctx) - ch, _, _, pErr := r.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := r.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, _, pErr := r.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr == nil { // Next if the command was committed, wait for the range to apply it. // TODO(bdarnell): refactor this to a more conventional error-handling pattern. @@ -1418,7 +1422,8 @@ func TestReplicaLeaseRejectUnknownRaftNodeID(t *testing.T) { ba := roachpb.BatchRequest{} ba.Timestamp = tc.repl.store.Clock().Now() ba.Add(&roachpb.RequestLeaseRequest{Lease: *lease}) - ch, _, _, pErr := tc.repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := tc.repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, _, pErr := tc.repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr == nil { // Next if the command was committed, wait for the range to apply it. // TODO(bdarnell): refactor to a more conventional error-handling pattern. @@ -7894,7 +7899,8 @@ func TestReplicaCancelRaftCommandProgress(t *testing.T) { }, }) st := repl.CurrentLeaseStatus(ctx) - ch, _, idx, err := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, idx, err := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if err != nil { t.Fatal(err) } @@ -7963,8 +7969,9 @@ func TestReplicaBurstPendingCommandsAndRepropose(t *testing.T) { Key: roachpb.Key(fmt.Sprintf("k%d", i)), }, }) + _, tok := tc.repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) st := tc.repl.CurrentLeaseStatus(ctx) - ch, _, idx, err := tc.repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + ch, _, idx, err := tc.repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if err != nil { t.Fatal(err) } @@ -8082,8 +8089,8 @@ func TestReplicaRefreshPendingCommandsTicks(t *testing.T) { var ba roachpb.BatchRequest ba.Timestamp = tc.Clock().Now() ba.Add(&roachpb.PutRequest{RequestHeader: roachpb.RequestHeader{Key: roachpb.Key(id)}}) - lease, _ := r.GetLease() - cmd, pErr := r.requestToProposal(ctx, kvserverbase.CmdIDKey(id), &ba, hlc.Timestamp{}, &allSpans) + st := r.CurrentLeaseStatus(ctx) + cmd, pErr := r.requestToProposal(ctx, kvserverbase.CmdIDKey(id), &ba, st, hlc.Timestamp{}, &allSpans) if pErr != nil { t.Fatal(pErr) } @@ -8092,8 +8099,9 @@ func TestReplicaRefreshPendingCommandsTicks(t *testing.T) { dropProposals.m[cmd] = struct{}{} // silently drop proposals dropProposals.Unlock() - cmd.command.ProposerLeaseSequence = lease.Sequence - if _, pErr := r.propose(ctx, cmd); pErr != nil { + cmd.command.ProposerLeaseSequence = st.Lease.Sequence + _, tok := r.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + if _, pErr := r.propose(ctx, cmd, tok); pErr != nil { t.Error(pErr) } r.mu.Lock() @@ -8204,7 +8212,7 @@ func TestReplicaRefreshMultiple(t *testing.T) { incCmdID = makeIDKey() atomic.StoreInt32(&filterActive, 1) - proposal, pErr := repl.requestToProposal(ctx, incCmdID, &ba, hlc.Timestamp{}, &allSpans) + proposal, pErr := repl.requestToProposal(ctx, incCmdID, &ba, repl.CurrentLeaseStatus(ctx), hlc.Timestamp{}, &allSpans) if pErr != nil { t.Fatal(pErr) } @@ -8234,7 +8242,8 @@ func TestReplicaRefreshMultiple(t *testing.T) { // that it will generate a retry when it fails. Then call refreshProposals // twice to repropose it and put it in the logs twice more. proposal.command.ProposerLeaseSequence = repl.mu.state.Lease.Sequence - if _, pErr := repl.propose(ctx, proposal); pErr != nil { + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + if _, pErr := repl.propose(ctx, proposal, tok); pErr != nil { t.Fatal(pErr) } repl.mu.Lock() @@ -8244,6 +8253,7 @@ func TestReplicaRefreshMultiple(t *testing.T) { repl.refreshProposalsLocked(ctx, 0 /* refreshAtDelta */, reasonNewLeader) repl.refreshProposalsLocked(ctx, 0 /* refreshAtDelta */, reasonNewLeader) repl.mu.Unlock() + require.Zero(t, tc.repl.mu.proposalBuf.EvaluatingRequestsCount()) // Wait for our proposal to apply. The two refreshed proposals above // will fail due to their illegal lease index. Then they'll generate @@ -9238,6 +9248,7 @@ func TestCommandTooLarge(t *testing.T) { func TestErrorInRaftApplicationClearsIntents(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + ctx := context.Background() var storeKnobs StoreTestingKnobs var filterActive int32 @@ -9257,7 +9268,7 @@ func TestErrorInRaftApplicationClearsIntents(t *testing.T) { defer s.Stopper().Stop(context.Background()) splitKey := roachpb.Key("b") - if err := kvDB.AdminSplit(context.Background(), splitKey, hlc.MaxTimestamp /* expirationTime */); err != nil { + if err := kvDB.AdminSplit(ctx, splitKey, hlc.MaxTimestamp /* expirationTime */); err != nil { t.Fatal(err) } @@ -9296,7 +9307,8 @@ func TestErrorInRaftApplicationClearsIntents(t *testing.T) { exLease, _ := repl.GetLease() st := kvserverpb.LeaseStatus{Lease: exLease, State: kvserverpb.LeaseState_VALID} - ch, _, _, pErr := repl.evalAndPropose(context.Background(), &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr != nil { t.Fatal(pErr) } @@ -9343,7 +9355,8 @@ func TestProposeWithAsyncConsensus(t *testing.T) { atomic.StoreInt32(&filterActive, 1) st := tc.repl.CurrentLeaseStatus(ctx) - ch, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr != nil { t.Fatal(pErr) } @@ -9407,7 +9420,8 @@ func TestApplyPaginatedCommittedEntries(t *testing.T) { atomic.StoreInt32(&filterActive, 1) st := repl.CurrentLeaseStatus(ctx) - _, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + _, _, _, pErr := repl.evalAndPropose(ctx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr != nil { t.Fatal(pErr) } @@ -9425,7 +9439,8 @@ func TestApplyPaginatedCommittedEntries(t *testing.T) { ba2.Timestamp = tc.Clock().Now() var pErr *roachpb.Error - ch, _, _, pErr = repl.evalAndPropose(ctx, &ba2, allSpansGuard(), st, hlc.Timestamp{}) + _, tok := repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) + ch, _, _, pErr = repl.evalAndPropose(ctx, &ba2, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr != nil { t.Fatal(pErr) } @@ -12537,9 +12552,10 @@ func TestProposalNotAcknowledgedOrReproposedAfterApplication(t *testing.T) { // Hold the RaftLock to ensure that after evalAndPropose our proposal is in // the proposal map. Entries are only removed from that map underneath raft. tc.repl.RaftLock() + _, tok := tc.repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) sp := cfg.AmbientCtx.Tracer.StartSpan("replica send", tracing.WithForceRealSpan()) tracedCtx := tracing.ContextWithSpan(ctx, sp) - ch, _, _, pErr := tc.repl.evalAndPropose(tracedCtx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + ch, _, _, pErr := tc.repl.evalAndPropose(tracedCtx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok) if pErr != nil { t.Fatal(pErr) } @@ -12625,13 +12641,14 @@ func TestLaterReproposalsDoNotReuseContext(t *testing.T) { Value: roachpb.MakeValueFromBytes([]byte("val")), }) + _, tok := tc.repl.mu.proposalBuf.TrackEvaluatingRequest(ctx, hlc.MinTimestamp) // Hold the RaftLock to encourage the reproposals to occur in the same batch. tc.repl.RaftLock() tracedCtx, sp := tracer.StartSpanCtx(ctx, "replica send", tracing.WithForceRealSpan()) // Go out of our way to enable recording so that expensive logging is enabled // for this context. sp.SetVerbose(true) - ch, _, _, pErr := tc.repl.evalAndPropose(tracedCtx, &ba, allSpansGuard(), st, hlc.Timestamp{}) + ch, _, _, pErr := tc.repl.evalAndPropose(tracedCtx, &ba, allSpansGuard(), st, hlc.Timestamp{}, tok.Move(ctx)) if pErr != nil { t.Fatal(pErr) } @@ -12800,7 +12817,7 @@ func TestContainsEstimatesClampProposal(t *testing.T) { ba.Timestamp = tc.Clock().Now() req := putArgs(roachpb.Key("some-key"), []byte("some-value")) ba.Add(&req) - proposal, err := tc.repl.requestToProposal(ctx, cmdIDKey, &ba, hlc.Timestamp{}, &allSpans) + proposal, err := tc.repl.requestToProposal(ctx, cmdIDKey, &ba, tc.repl.CurrentLeaseStatus(ctx), hlc.Timestamp{}, &allSpans) if err != nil { t.Error(err) } diff --git a/pkg/kv/kvserver/replica_write.go b/pkg/kv/kvserver/replica_write.go index b02db0ae3af0..fab0d7372cde 100644 --- a/pkg/kv/kvserver/replica_write.go +++ b/pkg/kv/kvserver/replica_write.go @@ -88,6 +88,25 @@ func (r *Replica) executeWriteBatch( minTS, untrack := r.store.cfg.ClosedTimestamp.Tracker.Track(ctx) defer untrack(ctx, 0, 0, 0) // covers all error returns below + // Start tracking this request. The act of tracking also gives us a closed + // timestamp, which we must ensure to evaluate above of. We're going to pass + // in minTS to applyTimestampCache(), which bumps us accordingly if necessary. + // We need to start tracking this request before we know the final write + // timestamp at which this request will evaluate because we need to atomically + // read the closed timestamp and start to be tracked. + // TODO(andrei): The timestamp cache might bump us above the timestamp at + // which we're registering with the proposalBuf. In that case, this request + // will be tracked at an unnecessarily low timestamp. We could invent an + // interface through which to communicate the updated timestamp to the + // proposalBuf. + minTS2, tok := r.mu.proposalBuf.TrackEvaluatingRequest(ctx, ba.WriteTimestamp()) + defer tok.DoneIfNotMoved(ctx) + minTS.Forward(minTS2) + + if !ba.IsSingleSkipLeaseCheckRequest() && st.Expiration().Less(minTS) { + log.Fatalf(ctx, "closed timestamp above lease expiration (%s vs %s): %s", minTS, st.Expiration(), ba) + } + // Examine the timestamp cache for preceding commands which require this // command to move its timestamp forward. Or, in the case of a transactional // write, the txn timestamp and possible write-too-old bool. @@ -120,7 +139,7 @@ func (r *Replica) executeWriteBatch( // If the command is proposed to Raft, ownership of and responsibility for // the concurrency guard will be assumed by Raft, so provide the guard to // evalAndPropose. - ch, abandon, maxLeaseIndex, pErr := r.evalAndPropose(ctx, ba, g, st, localUncertaintyLimit) + ch, abandon, maxLeaseIndex, pErr := r.evalAndPropose(ctx, ba, g, st, localUncertaintyLimit, tok.Move(ctx)) if pErr != nil { if maxLeaseIndex != 0 { log.Fatalf( diff --git a/pkg/kv/kvserver/stateloader/stateloader.go b/pkg/kv/kvserver/stateloader/stateloader.go index 9a1a4d5be558..a6610a807d39 100644 --- a/pkg/kv/kvserver/stateloader/stateloader.go +++ b/pkg/kv/kvserver/stateloader/stateloader.go @@ -83,6 +83,9 @@ func (rsl StateLoader) Load( ms := as.RangeStats.ToStats() s.Stats = &ms + if as.ClosedTimestamp != nil { + s.ClosedTimestamp = *as.ClosedTimestamp + } } else { if s.RaftAppliedIndex, s.LeaseAppliedIndex, err = rsl.LoadAppliedIndex(ctx, reader); err != nil { return kvserverpb.ReplicaState{}, err @@ -167,8 +170,8 @@ func (rsl StateLoader) Save( } } if state.UsingAppliedStateKey { - rai, lai := state.RaftAppliedIndex, state.LeaseAppliedIndex - if err := rsl.SetRangeAppliedState(ctx, readWriter, rai, lai, ms); err != nil { + rai, lai, ct := state.RaftAppliedIndex, state.LeaseAppliedIndex, &state.ClosedTimestamp + if err := rsl.SetRangeAppliedState(ctx, readWriter, rai, lai, ms, ct); err != nil { return enginepb.MVCCStats{}, err } } else { @@ -294,17 +297,26 @@ func (rsl StateLoader) LoadMVCCStats( // The applied indices and the stats used to be stored separately in different // keys. We now deem those keys to be "legacy" because they have been replaced // by the range applied state key. +// +// TODO(andrei): closedTimestamp is a pointer to avoid an allocation when +// putting it in RangeAppliedState. RangeAppliedState.ClosedTimestamp is made +// non-nullable (see comments on the field), this argument should be taken by +// value. func (rsl StateLoader) SetRangeAppliedState( ctx context.Context, readWriter storage.ReadWriter, appliedIndex, leaseAppliedIndex uint64, newMS *enginepb.MVCCStats, + closedTimestamp *hlc.Timestamp, ) error { as := enginepb.RangeAppliedState{ RaftAppliedIndex: appliedIndex, LeaseAppliedIndex: leaseAppliedIndex, RangeStats: newMS.ToPersistentStats(), } + if closedTimestamp != nil && !closedTimestamp.IsEmpty() { + as.ClosedTimestamp = closedTimestamp + } // The RangeAppliedStateKey is not included in stats. This is also reflected // in C.MVCCComputeStats and ComputeStatsForRange. ms := (*enginepb.MVCCStats)(nil) @@ -477,12 +489,26 @@ func (rsl StateLoader) SetMVCCStats( if as, err := rsl.LoadRangeAppliedState(ctx, readWriter); err != nil { return err } else if as != nil { - return rsl.SetRangeAppliedState(ctx, readWriter, as.RaftAppliedIndex, as.LeaseAppliedIndex, newMS) + return rsl.SetRangeAppliedState( + ctx, readWriter, as.RaftAppliedIndex, as.LeaseAppliedIndex, newMS, as.ClosedTimestamp) } return rsl.writeLegacyMVCCStatsInternal(ctx, readWriter, newMS) } +// SetClosedTimestamp overwrites the closed timestamp. +func (rsl StateLoader) SetClosedTimestamp( + ctx context.Context, readWriter storage.ReadWriter, closedTS hlc.Timestamp, +) error { + as, err := rsl.LoadRangeAppliedState(ctx, readWriter) + if err != nil { + return err + } + return rsl.SetRangeAppliedState( + ctx, readWriter, as.RaftAppliedIndex, as.LeaseAppliedIndex, + as.RangeStats.ToStatsPtr(), &closedTS) +} + // SetLegacyRaftTruncatedState overwrites the truncated state. func (rsl StateLoader) SetLegacyRaftTruncatedState( ctx context.Context, diff --git a/pkg/kv/kvserver/store_split.go b/pkg/kv/kvserver/store_split.go index 83692feba9f1..7f8dccbfa3c4 100644 --- a/pkg/kv/kvserver/store_split.go +++ b/pkg/kv/kvserver/store_split.go @@ -18,6 +18,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" + "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/errors" "go.etcd.io/etcd/raft/v3" @@ -28,7 +29,12 @@ import ( // changes to the given ReadWriter will be written atomically with the // split commit. func splitPreApply( - ctx context.Context, readWriter storage.ReadWriter, split roachpb.SplitTrigger, r *Replica, + ctx context.Context, + readWriter storage.ReadWriter, + split roachpb.SplitTrigger, + r *Replica, + // The closed timestamp used to initialize the RHS. + closedTS hlc.Timestamp, ) { // Sanity check that the store is in the split. // @@ -116,6 +122,11 @@ func splitPreApply( log.Fatalf(ctx, "%v", err) } + // Persist the closed timestamp. + if err := rsl.SetClosedTimestamp(ctx, readWriter, closedTS); err != nil { + log.Fatalf(ctx, "%s", err) + } + // The initialMaxClosed is assigned to the RHS replica to ensure that // follower reads do not regress following the split. After the split occurs // there will be no information in the closedts subsystem about the newly diff --git a/pkg/kv/kvserver/testing_knobs.go b/pkg/kv/kvserver/testing_knobs.go index 96ad6bff8468..aaba2ea7c185 100644 --- a/pkg/kv/kvserver/testing_knobs.go +++ b/pkg/kv/kvserver/testing_knobs.go @@ -239,6 +239,9 @@ type StoreTestingKnobs struct { // heartbeats and then expect other replicas to take the lease without // worrying about Raft). AllowLeaseRequestProposalsWhenNotLeader bool + // DontCloseTimestamps inhibits the propBuf's closing of timestamps. All Raft + // commands will carry an empty closed timestamp. + DontCloseTimestamps bool // AllowDangerousReplicationChanges disables safeguards // in execChangeReplicasTxn that prevent moving // to a configuration that cannot make progress. diff --git a/pkg/roachpb/batch.go b/pkg/roachpb/batch.go index ed1bf4d6a188..02e7c4ed51fe 100644 --- a/pkg/roachpb/batch.go +++ b/pkg/roachpb/batch.go @@ -56,7 +56,7 @@ func (ba *BatchRequest) SetActiveTimestamp(nowFn func() hlc.Timestamp) error { // provisional commit timestamp evolves. // // Note that writes will be performed at the provisional commit timestamp, - // txn.Timestamp, regardless of the batch timestamp. + // txn.WriteTimestamp, regardless of the batch timestamp. ba.Timestamp = txn.ReadTimestamp } else { // When not transactional, allow empty timestamp and use nowFn instead @@ -116,6 +116,16 @@ func (ba *BatchRequest) IsLeaseRequest() bool { return ok } +// IsLeaseTransferRequest returns whether the batch consists of a single +// TransferLease request. +func (ba *BatchRequest) IsLeaseTransferRequest() bool { + if !ba.IsSingleRequest() { + return false + } + _, ok := ba.GetArg(TransferLease) + return ok +} + // IsAdmin returns true iff the BatchRequest contains an admin request. func (ba *BatchRequest) IsAdmin() bool { return ba.hasFlag(isAdmin) @@ -262,6 +272,16 @@ func (ba *BatchRequest) IsSingleAddSSTableRequest() bool { return false } +// IsSingleRequestLeaseRequest returns true iff the batch contains a single +// request, and that request is a RequestLeaseRequest. +func (ba *BatchRequest) IsSingleRequestLeaseRequest() bool { + if ba.IsSingleRequest() { + _, ok := ba.Requests[0].GetInner().(*RequestLeaseRequest) + return ok + } + return false +} + // IsCompleteTransaction determines whether a batch contains every write in a // transactions. func (ba *BatchRequest) IsCompleteTransaction() bool { diff --git a/pkg/storage/enginepb/mvcc3.go b/pkg/storage/enginepb/mvcc3.go index b03c00ca9bce..1cb6924095d5 100644 --- a/pkg/storage/enginepb/mvcc3.go +++ b/pkg/storage/enginepb/mvcc3.go @@ -30,6 +30,11 @@ func (ms *MVCCPersistentStats) ToStats() MVCCStats { return MVCCStats(*ms) } +// ToStatsPtr converts the receiver to a *MVCCStats. +func (ms *MVCCPersistentStats) ToStatsPtr() *MVCCStats { + return (*MVCCStats)(ms) +} + // SafeValue implements the redact.SafeValue interface. func (ms *MVCCStats) SafeValue() {} diff --git a/pkg/storage/enginepb/mvcc3.pb.go b/pkg/storage/enginepb/mvcc3.pb.go index 8d61e5cafc58..def873b63b77 100644 --- a/pkg/storage/enginepb/mvcc3.pb.go +++ b/pkg/storage/enginepb/mvcc3.pb.go @@ -129,7 +129,7 @@ type TxnMeta struct { func (m *TxnMeta) Reset() { *m = TxnMeta{} } func (*TxnMeta) ProtoMessage() {} func (*TxnMeta) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{0} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{0} } func (m *TxnMeta) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -165,7 +165,7 @@ func (m *IgnoredSeqNumRange) Reset() { *m = IgnoredSeqNumRange{} } func (m *IgnoredSeqNumRange) String() string { return proto.CompactTextString(m) } func (*IgnoredSeqNumRange) ProtoMessage() {} func (*IgnoredSeqNumRange) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{1} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{1} } func (m *IgnoredSeqNumRange) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -216,7 +216,7 @@ func (m *MVCCStatsDelta) Reset() { *m = MVCCStatsDelta{} } func (m *MVCCStatsDelta) String() string { return proto.CompactTextString(m) } func (*MVCCStatsDelta) ProtoMessage() {} func (*MVCCStatsDelta) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{2} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{2} } func (m *MVCCStatsDelta) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -268,7 +268,7 @@ func (m *MVCCPersistentStats) Reset() { *m = MVCCPersistentStats{} } func (m *MVCCPersistentStats) String() string { return proto.CompactTextString(m) } func (*MVCCPersistentStats) ProtoMessage() {} func (*MVCCPersistentStats) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{3} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{3} } func (m *MVCCPersistentStats) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -307,13 +307,25 @@ type RangeAppliedState struct { // range_stats is the set of mvcc stats that accounts for the current value // of the Raft state machine. RangeStats MVCCPersistentStats `protobuf:"bytes,3,opt,name=range_stats,json=rangeStats,proto3" json:"range_stats"` + // closed_timestamp is the largest timestamp that is known to have been closed + // as of this lease applied index. This means that the current leaseholder (if + // any) and any future leaseholder will not evaluate writes at or below this + // timestamp, and also that any in-flight commands that can still apply are + // writing at higher timestamps. Non-leaseholder replicas are free to serve + // "follower reads" at or below this timestamp. + // + // TODO(andrei): Make this field not-nullable in 21.2, once all the ranges + // have a closed timestamp applied to their state (this might need a + // migration). In 21.1 we cannot write empty timestamp to disk because that + // looks like an inconsistency to the consistency-checker. + ClosedTimestamp *hlc.Timestamp `protobuf:"bytes,4,opt,name=closed_timestamp,json=closedTimestamp,proto3" json:"closed_timestamp,omitempty"` } func (m *RangeAppliedState) Reset() { *m = RangeAppliedState{} } func (m *RangeAppliedState) String() string { return proto.CompactTextString(m) } func (*RangeAppliedState) ProtoMessage() {} func (*RangeAppliedState) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{4} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{4} } func (m *RangeAppliedState) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -351,7 +363,7 @@ func (m *MVCCWriteValueOp) Reset() { *m = MVCCWriteValueOp{} } func (m *MVCCWriteValueOp) String() string { return proto.CompactTextString(m) } func (*MVCCWriteValueOp) ProtoMessage() {} func (*MVCCWriteValueOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{5} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{5} } func (m *MVCCWriteValueOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -389,7 +401,7 @@ func (m *MVCCWriteIntentOp) Reset() { *m = MVCCWriteIntentOp{} } func (m *MVCCWriteIntentOp) String() string { return proto.CompactTextString(m) } func (*MVCCWriteIntentOp) ProtoMessage() {} func (*MVCCWriteIntentOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{6} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{6} } func (m *MVCCWriteIntentOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -425,7 +437,7 @@ func (m *MVCCUpdateIntentOp) Reset() { *m = MVCCUpdateIntentOp{} } func (m *MVCCUpdateIntentOp) String() string { return proto.CompactTextString(m) } func (*MVCCUpdateIntentOp) ProtoMessage() {} func (*MVCCUpdateIntentOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{7} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{7} } func (m *MVCCUpdateIntentOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -464,7 +476,7 @@ func (m *MVCCCommitIntentOp) Reset() { *m = MVCCCommitIntentOp{} } func (m *MVCCCommitIntentOp) String() string { return proto.CompactTextString(m) } func (*MVCCCommitIntentOp) ProtoMessage() {} func (*MVCCCommitIntentOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{8} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{8} } func (m *MVCCCommitIntentOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -504,7 +516,7 @@ func (m *MVCCAbortIntentOp) Reset() { *m = MVCCAbortIntentOp{} } func (m *MVCCAbortIntentOp) String() string { return proto.CompactTextString(m) } func (*MVCCAbortIntentOp) ProtoMessage() {} func (*MVCCAbortIntentOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{9} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{9} } func (m *MVCCAbortIntentOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -540,7 +552,7 @@ func (m *MVCCAbortTxnOp) Reset() { *m = MVCCAbortTxnOp{} } func (m *MVCCAbortTxnOp) String() string { return proto.CompactTextString(m) } func (*MVCCAbortTxnOp) ProtoMessage() {} func (*MVCCAbortTxnOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{10} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{10} } func (m *MVCCAbortTxnOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -579,7 +591,7 @@ func (m *MVCCLogicalOp) Reset() { *m = MVCCLogicalOp{} } func (m *MVCCLogicalOp) String() string { return proto.CompactTextString(m) } func (*MVCCLogicalOp) ProtoMessage() {} func (*MVCCLogicalOp) Descriptor() ([]byte, []int) { - return fileDescriptor_mvcc3_91ae80d2fc45e9ad, []int{11} + return fileDescriptor_mvcc3_ed774bd3b5c68109, []int{11} } func (m *MVCCLogicalOp) XXX_Unmarshal(b []byte) error { return m.Unmarshal(b) @@ -811,6 +823,9 @@ func (this *RangeAppliedState) Equal(that interface{}) bool { if !this.RangeStats.Equal(&that1.RangeStats) { return false } + if !this.ClosedTimestamp.Equal(that1.ClosedTimestamp) { + return false + } return true } func (m *TxnMeta) Marshal() (dAtA []byte, err error) { @@ -1143,6 +1158,16 @@ func (m *RangeAppliedState) MarshalTo(dAtA []byte) (int, error) { return 0, err } i += n4 + if m.ClosedTimestamp != nil { + dAtA[i] = 0x22 + i++ + i = encodeVarintMvcc3(dAtA, i, uint64(m.ClosedTimestamp.Size())) + n5, err := m.ClosedTimestamp.MarshalTo(dAtA[i:]) + if err != nil { + return 0, err + } + i += n5 + } return i, nil } @@ -1170,11 +1195,11 @@ func (m *MVCCWriteValueOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x12 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.Timestamp.Size())) - n5, err := m.Timestamp.MarshalTo(dAtA[i:]) + n6, err := m.Timestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n5 + i += n6 if len(m.Value) > 0 { dAtA[i] = 0x1a i++ @@ -1208,11 +1233,11 @@ func (m *MVCCWriteIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnID.Size())) - n6, err := m.TxnID.MarshalTo(dAtA[i:]) + n7, err := m.TxnID.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n6 + i += n7 if len(m.TxnKey) > 0 { dAtA[i] = 0x12 i++ @@ -1222,19 +1247,19 @@ func (m *MVCCWriteIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x1a i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.Timestamp.Size())) - n7, err := m.Timestamp.MarshalTo(dAtA[i:]) + n8, err := m.Timestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n7 + i += n8 dAtA[i] = 0x22 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnMinTimestamp.Size())) - n8, err := m.TxnMinTimestamp.MarshalTo(dAtA[i:]) + n9, err := m.TxnMinTimestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n8 + i += n9 return i, nil } @@ -1256,19 +1281,19 @@ func (m *MVCCUpdateIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnID.Size())) - n9, err := m.TxnID.MarshalTo(dAtA[i:]) + n10, err := m.TxnID.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n9 + i += n10 dAtA[i] = 0x12 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.Timestamp.Size())) - n10, err := m.Timestamp.MarshalTo(dAtA[i:]) + n11, err := m.Timestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n10 + i += n11 return i, nil } @@ -1290,11 +1315,11 @@ func (m *MVCCCommitIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnID.Size())) - n11, err := m.TxnID.MarshalTo(dAtA[i:]) + n12, err := m.TxnID.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n11 + i += n12 if len(m.Key) > 0 { dAtA[i] = 0x12 i++ @@ -1304,11 +1329,11 @@ func (m *MVCCCommitIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0x1a i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.Timestamp.Size())) - n12, err := m.Timestamp.MarshalTo(dAtA[i:]) + n13, err := m.Timestamp.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n12 + i += n13 if len(m.Value) > 0 { dAtA[i] = 0x22 i++ @@ -1342,11 +1367,11 @@ func (m *MVCCAbortIntentOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnID.Size())) - n13, err := m.TxnID.MarshalTo(dAtA[i:]) + n14, err := m.TxnID.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n13 + i += n14 return i, nil } @@ -1368,11 +1393,11 @@ func (m *MVCCAbortTxnOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.TxnID.Size())) - n14, err := m.TxnID.MarshalTo(dAtA[i:]) + n15, err := m.TxnID.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n14 + i += n15 return i, nil } @@ -1395,61 +1420,61 @@ func (m *MVCCLogicalOp) MarshalTo(dAtA []byte) (int, error) { dAtA[i] = 0xa i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.WriteValue.Size())) - n15, err := m.WriteValue.MarshalTo(dAtA[i:]) + n16, err := m.WriteValue.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n15 + i += n16 } if m.WriteIntent != nil { dAtA[i] = 0x12 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.WriteIntent.Size())) - n16, err := m.WriteIntent.MarshalTo(dAtA[i:]) + n17, err := m.WriteIntent.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n16 + i += n17 } if m.UpdateIntent != nil { dAtA[i] = 0x1a i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.UpdateIntent.Size())) - n17, err := m.UpdateIntent.MarshalTo(dAtA[i:]) + n18, err := m.UpdateIntent.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n17 + i += n18 } if m.CommitIntent != nil { dAtA[i] = 0x22 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.CommitIntent.Size())) - n18, err := m.CommitIntent.MarshalTo(dAtA[i:]) + n19, err := m.CommitIntent.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n18 + i += n19 } if m.AbortIntent != nil { dAtA[i] = 0x2a i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.AbortIntent.Size())) - n19, err := m.AbortIntent.MarshalTo(dAtA[i:]) + n20, err := m.AbortIntent.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n19 + i += n20 } if m.AbortTxn != nil { dAtA[i] = 0x32 i++ i = encodeVarintMvcc3(dAtA, i, uint64(m.AbortTxn.Size())) - n20, err := m.AbortTxn.MarshalTo(dAtA[i:]) + n21, err := m.AbortTxn.MarshalTo(dAtA[i:]) if err != nil { return 0, err } - i += n20 + i += n21 } return i, nil } @@ -1585,6 +1610,9 @@ func NewPopulatedRangeAppliedState(r randyMvcc3, easy bool) *RangeAppliedState { this.LeaseAppliedIndex = uint64(uint64(r.Uint32())) v5 := NewPopulatedMVCCPersistentStats(r, easy) this.RangeStats = *v5 + if r.Intn(10) != 0 { + this.ClosedTimestamp = hlc.NewPopulatedTimestamp(r, easy) + } if !easy && r.Intn(10) != 0 { } return this @@ -1833,6 +1861,10 @@ func (m *RangeAppliedState) Size() (n int) { } l = m.RangeStats.Size() n += 1 + l + sovMvcc3(uint64(l)) + if m.ClosedTimestamp != nil { + l = m.ClosedTimestamp.Size() + n += 1 + l + sovMvcc3(uint64(l)) + } return n } @@ -3116,6 +3148,39 @@ func (m *RangeAppliedState) Unmarshal(dAtA []byte) error { return err } iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field ClosedTimestamp", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowMvcc3 + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthMvcc3 + } + postIndex := iNdEx + msglen + if postIndex > l { + return io.ErrUnexpectedEOF + } + if m.ClosedTimestamp == nil { + m.ClosedTimestamp = &hlc.Timestamp{} + } + if err := m.ClosedTimestamp.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipMvcc3(dAtA[iNdEx:]) @@ -4308,86 +4373,87 @@ var ( ) func init() { - proto.RegisterFile("storage/enginepb/mvcc3.proto", fileDescriptor_mvcc3_91ae80d2fc45e9ad) + proto.RegisterFile("storage/enginepb/mvcc3.proto", fileDescriptor_mvcc3_ed774bd3b5c68109) } -var fileDescriptor_mvcc3_91ae80d2fc45e9ad = []byte{ - // 1219 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcc, 0x57, 0x41, 0x6f, 0x1b, 0x45, - 0x14, 0xf6, 0x7a, 0xd7, 0xc9, 0xfa, 0xd9, 0x49, 0xec, 0x69, 0x05, 0x56, 0x69, 0xed, 0xe0, 0x03, - 0x8a, 0x4a, 0xbb, 0x46, 0x2d, 0xa7, 0xdc, 0xec, 0xa4, 0x2a, 0x2e, 0x4d, 0x53, 0x36, 0x6e, 0x91, - 0x40, 0x62, 0x35, 0x5e, 0x0f, 0x9b, 0x55, 0xd6, 0xb3, 0xdb, 0xdd, 0xb1, 0x6b, 0xff, 0x0b, 0x2e, - 0x48, 0x1c, 0x40, 0xca, 0x8f, 0xe0, 0xc0, 0x4f, 0xc8, 0xb1, 0x12, 0x1c, 0x2a, 0x24, 0x2c, 0x70, - 0x2f, 0xfc, 0x86, 0xf6, 0x82, 0x66, 0x66, 0xbd, 0xb6, 0x03, 0x71, 0x0c, 0x81, 0x88, 0xdb, 0xcc, - 0xfb, 0xde, 0xfb, 0xde, 0x9b, 0xe7, 0x6f, 0xe7, 0x8d, 0xe1, 0x7a, 0xc4, 0xfc, 0x10, 0x3b, 0xa4, - 0x46, 0xa8, 0xe3, 0x52, 0x12, 0xb4, 0x6b, 0xdd, 0xbe, 0x6d, 0xdf, 0x35, 0x82, 0xd0, 0x67, 0x3e, - 0xba, 0x66, 0xfb, 0xf6, 0x51, 0xe8, 0x63, 0xfb, 0xd0, 0x88, 0xfd, 0x8c, 0x89, 0xdf, 0xb5, 0x52, - 0x8f, 0xb9, 0x5e, 0xed, 0xd0, 0xb3, 0x6b, 0xcc, 0xed, 0x92, 0x88, 0xe1, 0x6e, 0x20, 0xa3, 0xae, - 0x5d, 0x75, 0x7c, 0xc7, 0x17, 0xcb, 0x1a, 0x5f, 0x49, 0x6b, 0xf5, 0x6b, 0x15, 0x56, 0x5b, 0x03, - 0xba, 0x47, 0x18, 0x46, 0x9f, 0x40, 0xda, 0xed, 0x94, 0x94, 0x4d, 0x65, 0x2b, 0xdf, 0xa8, 0x9f, - 0x8c, 0x2a, 0xa9, 0x9f, 0x47, 0x95, 0xbb, 0x8e, 0xcb, 0x0e, 0x7b, 0x6d, 0xc3, 0xf6, 0xbb, 0xb5, - 0x24, 0x6d, 0xa7, 0x3d, 0x5d, 0xd7, 0x82, 0x23, 0xa7, 0x26, 0x92, 0xf6, 0x7a, 0x6e, 0xc7, 0x78, - 0xf2, 0xa4, 0xb9, 0x3b, 0x1e, 0x55, 0xd2, 0xcd, 0x5d, 0x33, 0xed, 0x76, 0x50, 0x01, 0xd4, 0x23, - 0x32, 0x2c, 0xa9, 0x9c, 0xd3, 0xe4, 0x4b, 0x54, 0x85, 0x0c, 0x09, 0x7c, 0xfb, 0xb0, 0xa4, 0x6d, - 0x2a, 0x5b, 0x99, 0x46, 0xfe, 0xf5, 0xa8, 0xa2, 0xb7, 0x06, 0xf4, 0x1e, 0xb7, 0x99, 0x12, 0x42, - 0x0f, 0x61, 0xe3, 0x79, 0xe8, 0x32, 0x62, 0x25, 0x67, 0x28, 0x65, 0x36, 0x95, 0xad, 0xdc, 0x9d, - 0x1b, 0xc6, 0xf4, 0xe8, 0x3c, 0xa7, 0x71, 0xe8, 0xd9, 0x46, 0x6b, 0xe2, 0xd4, 0xd0, 0x78, 0xd1, - 0xe6, 0xba, 0x88, 0x4d, 0xac, 0xe8, 0x7d, 0xd0, 0x83, 0xd0, 0xf5, 0x43, 0x97, 0x0d, 0x4b, 0x2b, - 0x22, 0xe9, 0xc6, 0xeb, 0x51, 0x25, 0xd7, 0x1a, 0xd0, 0xc7, 0xb1, 0xd9, 0x4c, 0x1c, 0xd0, 0x7b, - 0xa0, 0x47, 0xe4, 0x59, 0x8f, 0x50, 0x9b, 0x94, 0x56, 0x85, 0x33, 0xbc, 0x1e, 0x55, 0x56, 0x5a, - 0x03, 0x7a, 0x40, 0x9e, 0x99, 0x09, 0x86, 0x3e, 0x82, 0xb5, 0xae, 0x4b, 0x67, 0x0a, 0xcc, 0x2e, - 0x5f, 0x60, 0xbe, 0xeb, 0xd2, 0xc4, 0xb6, 0xad, 0x7f, 0x73, 0x5c, 0x49, 0xfd, 0x70, 0x5c, 0x51, - 0x1e, 0x68, 0x7a, 0xba, 0xa0, 0x3e, 0xd0, 0x74, 0xbd, 0x90, 0xad, 0x7e, 0x01, 0xa8, 0xe9, 0x50, - 0x3f, 0x24, 0x9d, 0x03, 0xf2, 0xec, 0x51, 0xaf, 0x6b, 0x62, 0xea, 0x10, 0xb4, 0x09, 0x99, 0x88, - 0xe1, 0x90, 0x89, 0x1f, 0x69, 0xbe, 0x34, 0x09, 0xa0, 0xeb, 0xa0, 0x12, 0xda, 0x29, 0xa5, 0xff, - 0x84, 0x73, 0xf3, 0xb6, 0xce, 0xf3, 0xfc, 0x7e, 0x5c, 0x51, 0xaa, 0x3f, 0x69, 0xb0, 0xbe, 0xf7, - 0x74, 0x67, 0xe7, 0x80, 0x61, 0x16, 0xed, 0x12, 0x8f, 0x61, 0x74, 0x13, 0x8a, 0x1e, 0x8e, 0x98, - 0xd5, 0x0b, 0x3a, 0x98, 0x11, 0x8b, 0x62, 0xea, 0x47, 0x22, 0x51, 0xc1, 0xdc, 0xe0, 0xc0, 0x13, - 0x61, 0x7f, 0xc4, 0xcd, 0xe8, 0x06, 0x80, 0x4b, 0x19, 0xa1, 0xcc, 0xc2, 0x0e, 0x11, 0xd9, 0x0a, - 0x66, 0x56, 0x5a, 0xea, 0x0e, 0x41, 0x1f, 0x40, 0xde, 0xb1, 0xad, 0xf6, 0x90, 0x91, 0x48, 0x38, - 0xf0, 0xdf, 0xbf, 0xd0, 0x58, 0x1f, 0x8f, 0x2a, 0x70, 0x7f, 0xa7, 0xc1, 0xcd, 0x75, 0x87, 0x98, - 0xe0, 0xd8, 0x93, 0x35, 0x27, 0xf4, 0xdc, 0x3e, 0x91, 0x31, 0x42, 0x1b, 0xc8, 0xcc, 0x72, 0x8b, - 0xf0, 0x48, 0x60, 0xdb, 0xef, 0x51, 0x26, 0xc4, 0x10, 0xc3, 0x3b, 0xdc, 0x80, 0xde, 0x81, 0xec, - 0x11, 0x19, 0xc6, 0xc1, 0x2b, 0x02, 0xd5, 0x8f, 0xc8, 0x50, 0xc6, 0xc6, 0xa0, 0x0c, 0x5d, 0x4d, - 0xc0, 0x24, 0xb2, 0x8f, 0xbd, 0x38, 0x52, 0x97, 0x60, 0x1f, 0x7b, 0x49, 0x24, 0x07, 0x65, 0x64, - 0x36, 0x01, 0x65, 0xe4, 0xbb, 0x90, 0x8f, 0x5b, 0x20, 0x83, 0x41, 0xe0, 0x39, 0x69, 0x93, 0xf1, - 0x53, 0x17, 0x49, 0x91, 0x9b, 0x75, 0x49, 0xf2, 0x47, 0xc3, 0x28, 0xa6, 0xc8, 0xcb, 0x14, 0xd1, - 0x30, 0x4a, 0xf2, 0x73, 0x50, 0x06, 0xaf, 0x25, 0xa0, 0x8c, 0xbc, 0x0d, 0xc8, 0xf6, 0x29, 0xc3, - 0x2e, 0x8d, 0x2c, 0x12, 0x31, 0xb7, 0x8b, 0x39, 0xc5, 0xfa, 0xa6, 0xb2, 0xa5, 0x9a, 0xc5, 0x09, - 0x72, 0x6f, 0x02, 0xa0, 0x2d, 0x28, 0xe0, 0xb6, 0x1f, 0x32, 0x2b, 0x0a, 0x30, 0x8d, 0xf3, 0x6d, - 0x08, 0xca, 0x75, 0x61, 0x3f, 0x08, 0x30, 0x95, 0x59, 0x3f, 0x84, 0xb7, 0x22, 0x12, 0xe0, 0x10, - 0x33, 0xd2, 0xb1, 0xe6, 0xea, 0x2f, 0x08, 0xff, 0xab, 0x09, 0xda, 0x9c, 0x1e, 0x64, 0x5b, 0x13, - 0xb2, 0xfa, 0x45, 0x83, 0x2b, 0x5c, 0x56, 0x8f, 0x49, 0x18, 0xb9, 0x11, 0x47, 0x85, 0xc0, 0xfe, - 0x6f, 0xda, 0x52, 0x17, 0x6b, 0x4b, 0x5d, 0xa8, 0x2d, 0x75, 0x91, 0xb6, 0xd4, 0x45, 0xda, 0x52, - 0x17, 0x69, 0x4b, 0x3d, 0x47, 0x5b, 0xea, 0xf9, 0xda, 0x52, 0xcf, 0xd1, 0x96, 0xba, 0x48, 0x5b, - 0xea, 0xbf, 0xaf, 0x2d, 0xf5, 0x6f, 0x6a, 0x4b, 0x3d, 0x43, 0x5b, 0xd3, 0x6b, 0xeb, 0x47, 0x05, - 0x8a, 0xe2, 0x2a, 0xac, 0x07, 0x81, 0xe7, 0x92, 0x0e, 0x57, 0x17, 0x41, 0xb7, 0x00, 0x85, 0xf8, - 0x4b, 0x66, 0x61, 0x69, 0xb4, 0x5c, 0xda, 0x21, 0x03, 0x21, 0x2f, 0xcd, 0x2c, 0x70, 0x24, 0xf6, - 0x6e, 0x72, 0x3b, 0x32, 0xe0, 0x8a, 0x47, 0x70, 0x44, 0x4e, 0xb9, 0xa7, 0x85, 0x7b, 0x51, 0x40, - 0x73, 0xfe, 0x4f, 0x21, 0x17, 0xf2, 0x94, 0x56, 0xc4, 0xa5, 0x2c, 0xf4, 0x96, 0xbb, 0x53, 0x33, - 0xce, 0x1e, 0xc2, 0xc6, 0x5f, 0x7c, 0x01, 0xf1, 0xd5, 0x0f, 0x82, 0x49, 0x58, 0x66, 0x4e, 0xf5, - 0xad, 0x02, 0x05, 0x1e, 0xf3, 0x29, 0x1f, 0x5c, 0x4f, 0xb1, 0xd7, 0x23, 0xfb, 0xc1, 0x64, 0x74, - 0x2a, 0xd3, 0xd1, 0x59, 0x87, 0xec, 0x74, 0xde, 0xa4, 0x97, 0x9f, 0x37, 0xd3, 0x28, 0x74, 0x15, - 0x32, 0x7d, 0xce, 0x1f, 0x4f, 0x64, 0xb9, 0xe1, 0x5f, 0x40, 0x10, 0x92, 0xbe, 0x25, 0x21, 0x4d, - 0x40, 0x59, 0x6e, 0x11, 0xb5, 0x54, 0xbf, 0x4b, 0x43, 0x31, 0x29, 0x4f, 0xfe, 0x2e, 0xfb, 0x01, - 0xfa, 0x1c, 0x56, 0xd8, 0x80, 0x5a, 0xc9, 0x8b, 0x61, 0xf7, 0x62, 0x2f, 0x86, 0x4c, 0x6b, 0x40, - 0x9b, 0xbb, 0x66, 0x86, 0x0d, 0x68, 0xb3, 0x83, 0xde, 0x86, 0x55, 0x4e, 0xce, 0x1b, 0x90, 0x16, - 0xe5, 0xf0, 0x5c, 0x1f, 0x9f, 0xee, 0x81, 0xfa, 0x8f, 0x7a, 0xb0, 0x0f, 0x45, 0xce, 0x3d, 0x3f, - 0xbe, 0xb5, 0xe5, 0xa9, 0x36, 0xd8, 0x80, 0xee, 0xcd, 0x4c, 0xf0, 0xea, 0xf7, 0x0a, 0x20, 0xde, - 0x1f, 0x79, 0x89, 0x5d, 0x4e, 0x83, 0x2e, 0xae, 0x85, 0xea, 0x9b, 0xb8, 0xec, 0x1d, 0xbf, 0xdb, - 0x75, 0xd9, 0xe5, 0x94, 0x1d, 0x8b, 0x3a, 0x7d, 0x86, 0xa8, 0xd5, 0x8b, 0x89, 0x5a, 0x3b, 0x5b, - 0xd4, 0x99, 0xd3, 0xa2, 0x0e, 0xa4, 0xa6, 0xeb, 0xfc, 0x7e, 0xba, 0x94, 0xb3, 0x57, 0xbb, 0xf2, - 0xc5, 0x25, 0x32, 0xb6, 0x06, 0xf4, 0xbf, 0x4e, 0xf7, 0x46, 0x85, 0x35, 0x9e, 0xef, 0xa1, 0xef, - 0xb8, 0x36, 0xf6, 0xf6, 0x03, 0xb4, 0x07, 0x39, 0xf9, 0xac, 0x96, 0x2d, 0x51, 0x44, 0xb3, 0x6f, - 0x9d, 0x77, 0x91, 0xcd, 0x5e, 0x4a, 0x26, 0x3c, 0x4f, 0x76, 0xe8, 0x31, 0xe4, 0x25, 0x9d, 0xbc, - 0xc7, 0x63, 0x15, 0xde, 0x5e, 0x8a, 0x6f, 0xd2, 0x71, 0x53, 0x56, 0x24, 0xb7, 0xe8, 0x00, 0xd6, - 0xe2, 0x07, 0x42, 0x4c, 0x29, 0xf5, 0x60, 0x9c, 0x47, 0x39, 0xff, 0xe1, 0x99, 0xf9, 0xde, 0xcc, - 0x9e, 0x93, 0xda, 0x42, 0xe1, 0x13, 0x52, 0x6d, 0x39, 0xd2, 0xf9, 0xcf, 0xc2, 0xcc, 0xdb, 0x33, - 0x7b, 0x7e, 0x76, 0x39, 0xf1, 0x62, 0xce, 0xcc, 0x72, 0x67, 0x9f, 0x53, 0x9b, 0x99, 0xc3, 0xd3, - 0x2d, 0xba, 0x0f, 0x59, 0xc9, 0xc8, 0x06, 0x54, 0x3c, 0x33, 0x72, 0x77, 0x6e, 0x2e, 0x45, 0x27, - 0xa4, 0x64, 0xea, 0x38, 0x5e, 0x6f, 0x6b, 0x27, 0xc7, 0x15, 0xa5, 0x71, 0xf3, 0xe4, 0xb7, 0x72, - 0xea, 0x64, 0x5c, 0x56, 0x5e, 0x8c, 0xcb, 0xca, 0xcb, 0x71, 0x59, 0xf9, 0x75, 0x5c, 0x56, 0xbe, - 0x7a, 0x55, 0x4e, 0xbd, 0x78, 0x55, 0x4e, 0xbd, 0x7c, 0x55, 0x4e, 0x7d, 0xa6, 0x4f, 0xa8, 0xda, - 0x2b, 0xe2, 0xaf, 0xe0, 0xdd, 0x3f, 0x02, 0x00, 0x00, 0xff, 0xff, 0x83, 0xbe, 0x76, 0x2a, 0x76, - 0x0e, 0x00, 0x00, +var fileDescriptor_mvcc3_ed774bd3b5c68109 = []byte{ + // 1236 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xcc, 0x57, 0x41, 0x6f, 0xe3, 0x44, + 0x14, 0x8e, 0x63, 0xa7, 0x75, 0x26, 0x69, 0x93, 0xce, 0xae, 0x20, 0x5a, 0x76, 0x93, 0x92, 0x03, + 0xaa, 0x96, 0x5d, 0x07, 0xb5, 0x9c, 0x7a, 0x4b, 0xda, 0xd5, 0x6e, 0x96, 0xed, 0xb6, 0xb8, 0x69, + 0x91, 0x40, 0xc2, 0x9a, 0xd8, 0x83, 0x6b, 0xd5, 0x19, 0xbb, 0xf6, 0x24, 0xeb, 0xfc, 0x0b, 0x2e, + 0x48, 0x1c, 0x40, 0xea, 0x8f, 0xe0, 0xc0, 0x4f, 0xe8, 0x71, 0x0f, 0x1c, 0x56, 0x48, 0x44, 0x90, + 0x5e, 0xf8, 0x0d, 0xdd, 0x0b, 0x9a, 0x19, 0xc7, 0x49, 0x0a, 0x4d, 0x03, 0x85, 0x8a, 0xdb, 0xcc, + 0xfb, 0xde, 0xfb, 0xde, 0x9b, 0x97, 0xcf, 0xf3, 0x26, 0xe0, 0x7e, 0x48, 0xbd, 0x00, 0xd9, 0xb8, + 0x86, 0x89, 0xed, 0x10, 0xec, 0xb7, 0x6b, 0x9d, 0x9e, 0x69, 0x6e, 0x68, 0x7e, 0xe0, 0x51, 0x0f, + 0xde, 0x33, 0x3d, 0xf3, 0x38, 0xf0, 0x90, 0x79, 0xa4, 0xc5, 0x7e, 0xda, 0xc8, 0xef, 0x5e, 0xa9, + 0x4b, 0x1d, 0xb7, 0x76, 0xe4, 0x9a, 0x35, 0xea, 0x74, 0x70, 0x48, 0x51, 0xc7, 0x17, 0x51, 0xf7, + 0xee, 0xda, 0x9e, 0xed, 0xf1, 0x65, 0x8d, 0xad, 0x84, 0xb5, 0xfa, 0x8d, 0x0c, 0x16, 0x5b, 0x11, + 0xd9, 0xc1, 0x14, 0xc1, 0x4f, 0x41, 0xda, 0xb1, 0x4a, 0xd2, 0xaa, 0xb4, 0x96, 0x6f, 0xd4, 0xcf, + 0x06, 0x95, 0xd4, 0xcf, 0x83, 0xca, 0x86, 0xed, 0xd0, 0xa3, 0x6e, 0x5b, 0x33, 0xbd, 0x4e, 0x2d, + 0x49, 0x6b, 0xb5, 0xc7, 0xeb, 0x9a, 0x7f, 0x6c, 0xd7, 0x78, 0xd2, 0x6e, 0xd7, 0xb1, 0xb4, 0x83, + 0x83, 0xe6, 0xf6, 0x70, 0x50, 0x49, 0x37, 0xb7, 0xf5, 0xb4, 0x63, 0xc1, 0x22, 0x90, 0x8f, 0x71, + 0xbf, 0x24, 0x33, 0x4e, 0x9d, 0x2d, 0x61, 0x15, 0x64, 0xb0, 0xef, 0x99, 0x47, 0x25, 0x65, 0x55, + 0x5a, 0xcb, 0x34, 0xf2, 0x17, 0x83, 0x8a, 0xda, 0x8a, 0xc8, 0x13, 0x66, 0xd3, 0x05, 0x04, 0x5f, + 0x80, 0xc2, 0xab, 0xc0, 0xa1, 0xd8, 0x48, 0xce, 0x50, 0xca, 0xac, 0x4a, 0x6b, 0xb9, 0xf5, 0x07, + 0xda, 0xf8, 0xe8, 0x2c, 0xa7, 0x76, 0xe4, 0x9a, 0x5a, 0x6b, 0xe4, 0xd4, 0x50, 0x58, 0xd1, 0xfa, + 0x32, 0x8f, 0x4d, 0xac, 0xf0, 0x43, 0xa0, 0xfa, 0x81, 0xe3, 0x05, 0x0e, 0xed, 0x97, 0x16, 0x78, + 0xd2, 0xc2, 0xc5, 0xa0, 0x92, 0x6b, 0x45, 0x64, 0x2f, 0x36, 0xeb, 0x89, 0x03, 0xfc, 0x00, 0xa8, + 0x21, 0x3e, 0xe9, 0x62, 0x62, 0xe2, 0xd2, 0x22, 0x77, 0x06, 0x17, 0x83, 0xca, 0x42, 0x2b, 0x22, + 0xfb, 0xf8, 0x44, 0x4f, 0x30, 0xf8, 0x0c, 0x2c, 0x75, 0x1c, 0x32, 0x51, 0x60, 0x76, 0xfe, 0x02, + 0xf3, 0x1d, 0x87, 0x24, 0xb6, 0x4d, 0xf5, 0xdb, 0xd3, 0x4a, 0xea, 0xc7, 0xd3, 0x8a, 0xf4, 0x5c, + 0x51, 0xd3, 0x45, 0xf9, 0xb9, 0xa2, 0xaa, 0xc5, 0x6c, 0xf5, 0x4b, 0x00, 0x9b, 0x36, 0xf1, 0x02, + 0x6c, 0xed, 0xe3, 0x93, 0x97, 0xdd, 0x8e, 0x8e, 0x88, 0x8d, 0xe1, 0x2a, 0xc8, 0x84, 0x14, 0x05, + 0x94, 0xff, 0x48, 0xd3, 0xa5, 0x09, 0x00, 0xde, 0x07, 0x32, 0x26, 0x56, 0x29, 0xfd, 0x27, 0x9c, + 0x99, 0x37, 0x55, 0x96, 0xe7, 0xf7, 0xd3, 0x8a, 0x54, 0xfd, 0x49, 0x01, 0xcb, 0x3b, 0x87, 0x5b, + 0x5b, 0xfb, 0x14, 0xd1, 0x70, 0x1b, 0xbb, 0x14, 0xc1, 0x87, 0x60, 0xc5, 0x45, 0x21, 0x35, 0xba, + 0xbe, 0x85, 0x28, 0x36, 0x08, 0x22, 0x5e, 0xc8, 0x13, 0x15, 0xf5, 0x02, 0x03, 0x0e, 0xb8, 0xfd, + 0x25, 0x33, 0xc3, 0x07, 0x00, 0x38, 0x84, 0x62, 0x42, 0x0d, 0x64, 0x63, 0x9e, 0xad, 0xa8, 0x67, + 0x85, 0xa5, 0x6e, 0x63, 0xf8, 0x11, 0xc8, 0xdb, 0xa6, 0xd1, 0xee, 0x53, 0x1c, 0x72, 0x07, 0xf6, + 0xfb, 0x17, 0x1b, 0xcb, 0xc3, 0x41, 0x05, 0x3c, 0xdd, 0x6a, 0x30, 0x73, 0xdd, 0xc6, 0x3a, 0xb0, + 0xcd, 0xd1, 0x9a, 0x11, 0xba, 0x4e, 0x0f, 0x8b, 0x18, 0xae, 0x0d, 0xa8, 0x67, 0x99, 0x85, 0x7b, + 0x24, 0xb0, 0xe9, 0x75, 0x09, 0xe5, 0x62, 0x88, 0xe1, 0x2d, 0x66, 0x80, 0xef, 0x81, 0xec, 0x31, + 0xee, 0xc7, 0xc1, 0x0b, 0x1c, 0x55, 0x8f, 0x71, 0x5f, 0xc4, 0xc6, 0xa0, 0x08, 0x5d, 0x4c, 0xc0, + 0x24, 0xb2, 0x87, 0xdc, 0x38, 0x52, 0x15, 0x60, 0x0f, 0xb9, 0x49, 0x24, 0x03, 0x45, 0x64, 0x36, + 0x01, 0x45, 0xe4, 0xfb, 0x20, 0x1f, 0xb7, 0x40, 0x04, 0x03, 0x8e, 0xe7, 0x84, 0x4d, 0xc4, 0x8f, + 0x5d, 0x04, 0x45, 0x6e, 0xd2, 0x25, 0xc9, 0x1f, 0xf6, 0xc3, 0x98, 0x22, 0x2f, 0x52, 0x84, 0xfd, + 0x30, 0xc9, 0xcf, 0x40, 0x11, 0xbc, 0x94, 0x80, 0x22, 0xf2, 0x31, 0x80, 0xa6, 0x47, 0x28, 0x72, + 0x48, 0x68, 0xe0, 0x90, 0x3a, 0x1d, 0xc4, 0x28, 0x96, 0x57, 0xa5, 0x35, 0x59, 0x5f, 0x19, 0x21, + 0x4f, 0x46, 0x00, 0x5c, 0x03, 0x45, 0xd4, 0xf6, 0x02, 0x6a, 0x84, 0x3e, 0x22, 0x71, 0xbe, 0x02, + 0xa7, 0x5c, 0xe6, 0xf6, 0x7d, 0x1f, 0x11, 0x91, 0xf5, 0x63, 0xf0, 0x4e, 0x88, 0x7d, 0x14, 0x20, + 0x8a, 0x2d, 0x63, 0xaa, 0xfe, 0x22, 0xf7, 0xbf, 0x9b, 0xa0, 0xcd, 0xf1, 0x41, 0x36, 0x15, 0x2e, + 0xab, 0x5f, 0x14, 0x70, 0x87, 0xc9, 0x6a, 0x0f, 0x07, 0xa1, 0x13, 0x32, 0x94, 0x0b, 0xec, 0xff, + 0xa6, 0x2d, 0x79, 0xb6, 0xb6, 0xe4, 0x99, 0xda, 0x92, 0x67, 0x69, 0x4b, 0x9e, 0xa5, 0x2d, 0x79, + 0x96, 0xb6, 0xe4, 0x6b, 0xb4, 0x25, 0x5f, 0xaf, 0x2d, 0xf9, 0x1a, 0x6d, 0xc9, 0xb3, 0xb4, 0x25, + 0xff, 0xfb, 0xda, 0x92, 0xff, 0xa6, 0xb6, 0xe4, 0x2b, 0xb4, 0x35, 0xbe, 0xb6, 0xbe, 0x4f, 0x83, + 0x15, 0x7e, 0x15, 0xd6, 0x7d, 0xdf, 0x75, 0xb0, 0xc5, 0xd4, 0x85, 0xe1, 0x23, 0x00, 0x03, 0xf4, + 0x15, 0x35, 0x90, 0x30, 0x1a, 0x0e, 0xb1, 0x70, 0xc4, 0xe5, 0xa5, 0xe8, 0x45, 0x86, 0xc4, 0xde, + 0x4d, 0x66, 0x87, 0x1a, 0xb8, 0xe3, 0x62, 0x14, 0xe2, 0x4b, 0xee, 0x69, 0xee, 0xbe, 0xc2, 0xa1, + 0x29, 0xff, 0x43, 0x90, 0x0b, 0x58, 0x4a, 0x23, 0x64, 0x52, 0xe6, 0x7a, 0xcb, 0xad, 0xd7, 0xb4, + 0xab, 0x87, 0xb0, 0xf6, 0x17, 0x5f, 0x40, 0x7c, 0xf5, 0x03, 0xce, 0x24, 0xbe, 0x89, 0x67, 0xa0, + 0x68, 0xba, 0x5e, 0x88, 0xad, 0x89, 0x29, 0xa2, 0xcc, 0x31, 0x45, 0xf4, 0x82, 0x08, 0x9b, 0x18, + 0x21, 0x49, 0x7f, 0xbe, 0x93, 0x40, 0x91, 0x65, 0xff, 0x8c, 0x8d, 0xc0, 0x43, 0xe4, 0x76, 0xf1, + 0xae, 0x3f, 0x1a, 0xc2, 0xd2, 0x78, 0x08, 0xd7, 0x41, 0x76, 0x9c, 0x33, 0x3d, 0xff, 0xe4, 0x1a, + 0x47, 0xc1, 0xbb, 0x20, 0xd3, 0x63, 0xfc, 0xf1, 0x6c, 0x17, 0x1b, 0xf6, 0x2d, 0xf9, 0x01, 0xee, + 0x19, 0x02, 0x52, 0x38, 0x94, 0x65, 0x16, 0x5e, 0x0b, 0xff, 0xf9, 0x92, 0xf2, 0xc4, 0x2f, 0xbc, + 0xeb, 0xc3, 0x2f, 0xc0, 0x02, 0x8d, 0x88, 0x91, 0xbc, 0x3d, 0xb6, 0x6f, 0xf6, 0xf6, 0xc8, 0xb4, + 0x22, 0xd2, 0xdc, 0xd6, 0x33, 0x34, 0x22, 0x4d, 0x0b, 0xbe, 0x0b, 0x16, 0x19, 0x39, 0x6b, 0x40, + 0x9a, 0x97, 0xc3, 0x72, 0x7d, 0x72, 0xb9, 0x07, 0xf2, 0x3f, 0xea, 0xc1, 0x2e, 0x58, 0x61, 0xdc, + 0xd3, 0x0f, 0x01, 0x65, 0x7e, 0xaa, 0x02, 0x8d, 0xc8, 0xce, 0xc4, 0x5b, 0xa0, 0xfa, 0x83, 0x04, + 0x20, 0xeb, 0x8f, 0xb8, 0x0e, 0x6f, 0xa7, 0x41, 0x37, 0xd7, 0x42, 0xf5, 0x6d, 0x5c, 0xf6, 0x96, + 0xd7, 0xe9, 0x38, 0xf4, 0x76, 0xca, 0x8e, 0x45, 0x9d, 0xbe, 0x42, 0xd4, 0xf2, 0xcd, 0x44, 0xad, + 0x5c, 0x2d, 0xea, 0xcc, 0x65, 0x51, 0xfb, 0x42, 0xd3, 0x75, 0x76, 0xd3, 0xdd, 0xca, 0xd9, 0xab, + 0x1d, 0xf1, 0x76, 0xe3, 0x19, 0x5b, 0x11, 0xf9, 0xaf, 0xd3, 0xbd, 0x95, 0xc1, 0x12, 0xcb, 0xf7, + 0xc2, 0xb3, 0x1d, 0x13, 0xb9, 0xbb, 0x3e, 0xdc, 0x01, 0x39, 0xf1, 0x40, 0x17, 0x2d, 0x91, 0x78, + 0xb3, 0x1f, 0x5d, 0x77, 0x25, 0x4e, 0x5e, 0x4a, 0x3a, 0x78, 0x95, 0xec, 0xe0, 0x1e, 0xc8, 0x0b, + 0x3a, 0x31, 0x11, 0x62, 0x15, 0x3e, 0x9e, 0x8b, 0x6f, 0xd4, 0x71, 0x5d, 0x54, 0x24, 0xb6, 0x70, + 0x1f, 0x2c, 0xc5, 0x4f, 0x8d, 0x98, 0x52, 0xe8, 0x41, 0xbb, 0x8e, 0x72, 0xfa, 0xc3, 0xd3, 0xf3, + 0xdd, 0x89, 0x3d, 0x23, 0x35, 0xb9, 0xc2, 0x47, 0xa4, 0xca, 0x7c, 0xa4, 0xd3, 0x9f, 0x85, 0x9e, + 0x37, 0x27, 0xf6, 0xec, 0xec, 0x62, 0x76, 0xc6, 0x9c, 0x99, 0xf9, 0xce, 0x3e, 0xa5, 0x36, 0x3d, + 0x87, 0xc6, 0x5b, 0xf8, 0x14, 0x64, 0x05, 0x23, 0x8d, 0x08, 0x7f, 0xb0, 0xe4, 0xd6, 0x1f, 0xce, + 0x45, 0xc7, 0xa5, 0xa4, 0xab, 0x28, 0x5e, 0x6f, 0x2a, 0x67, 0xa7, 0x15, 0xa9, 0xf1, 0xf0, 0xec, + 0xb7, 0x72, 0xea, 0x6c, 0x58, 0x96, 0x5e, 0x0f, 0xcb, 0xd2, 0x9b, 0x61, 0x59, 0xfa, 0x75, 0x58, + 0x96, 0xbe, 0x3e, 0x2f, 0xa7, 0x5e, 0x9f, 0x97, 0x53, 0x6f, 0xce, 0xcb, 0xa9, 0xcf, 0xd5, 0x11, + 0x55, 0x7b, 0x81, 0xff, 0xa9, 0xdc, 0xf8, 0x23, 0x00, 0x00, 0xff, 0xff, 0x46, 0x22, 0xed, 0xb0, + 0xc0, 0x0e, 0x00, 0x00, } diff --git a/pkg/storage/enginepb/mvcc3.proto b/pkg/storage/enginepb/mvcc3.proto index a283b4088bed..66181e1e064f 100644 --- a/pkg/storage/enginepb/mvcc3.proto +++ b/pkg/storage/enginepb/mvcc3.proto @@ -203,6 +203,19 @@ message RangeAppliedState { // range_stats is the set of mvcc stats that accounts for the current value // of the Raft state machine. MVCCPersistentStats range_stats = 3 [(gogoproto.nullable) = false]; + + // closed_timestamp is the largest timestamp that is known to have been closed + // as of this lease applied index. This means that the current leaseholder (if + // any) and any future leaseholder will not evaluate writes at or below this + // timestamp, and also that any in-flight commands that can still apply are + // writing at higher timestamps. Non-leaseholder replicas are free to serve + // "follower reads" at or below this timestamp. + // + // TODO(andrei): Make this field not-nullable in 21.2, once all the ranges + // have a closed timestamp applied to their state (this might need a + // migration). In 21.1 we cannot write empty timestamp to disk because that + // looks like an inconsistency to the consistency-checker. + util.hlc.Timestamp closed_timestamp = 4; } // MVCCWriteValueOp corresponds to a value being written outside of a diff --git a/pkg/util/hlc/timestamp.go b/pkg/util/hlc/timestamp.go index 8a92f35190f2..af127a9c2457 100644 --- a/pkg/util/hlc/timestamp.go +++ b/pkg/util/hlc/timestamp.go @@ -196,6 +196,10 @@ func (t Timestamp) Add(wallTime int64, logical int32) Timestamp { // // Adding a positive value to a Timestamp adds the Synthetic flag. // s.Synthetic = true // } + // + // When addressing this TODO, remove the hack in + // propBuf.assignClosedTimestampToProposal that manually marks lease + // expirations as synthetic. return s } @@ -360,6 +364,15 @@ func (t Timestamp) UnsafeToClockTimestamp() ClockTimestamp { return ClockTimestamp(t) } +// MustToClockTimestamp casts a Timestamp to a ClockTimestamp. Panics if the +// timestamp is synthetic. See TryToClockTimestamp if you don't want to panic. +func (t Timestamp) MustToClockTimestamp() ClockTimestamp { + if t.Synthetic { + panic(fmt.Sprintf("can't convert synthetic timestamp to ClockTimestamp: %s", t)) + } + return ClockTimestamp(t) +} + // ToTimestamp upcasts a ClockTimestamp into a Timestamp. func (t ClockTimestamp) ToTimestamp() Timestamp { if t.Synthetic {