Skip to content

Commit

Permalink
Merge #51523
Browse files Browse the repository at this point in the history
51523: kvserver: improve logging for follower reads r=andreimatei a=andreimatei

Release note: None

Co-authored-by: Andrei Matei <andrei@cockroachlabs.com>
  • Loading branch information
craig[bot] and andreimatei committed Jul 24, 2020
2 parents d185eac + 733bd70 commit ae8e630
Showing 1 changed file with 46 additions and 38 deletions.
84 changes: 46 additions & 38 deletions pkg/kv/kvserver/replica_follower_read.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,61 +39,69 @@ var FollowerReadsEnabled = settings.RegisterPublicBoolSetting(
func (r *Replica) canServeFollowerRead(
ctx context.Context, ba *roachpb.BatchRequest, pErr *roachpb.Error,
) *roachpb.Error {
canServeFollowerRead := false
if lErr, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError); ok &&
lErr, ok := pErr.GetDetail().(*roachpb.NotLeaseHolderError)
eligible := ok &&
lErr.LeaseHolder != nil && lErr.Lease.Type() == roachpb.LeaseEpoch &&
(!ba.IsLocking() && ba.IsAllTransactional()) && // followerreadsccl.batchCanBeEvaluatedOnFollower
(ba.Txn == nil || !ba.Txn.IsLocking()) && // followerreadsccl.txnCanPerformFollowerRead
FollowerReadsEnabled.Get(&r.store.cfg.Settings.SV) {
FollowerReadsEnabled.Get(&r.store.cfg.Settings.SV)

// There's no known reason that a non-VOTER_FULL replica couldn't serve follower
// reads (or RangeFeed), but as of the time of writing, these are expected
// to be short-lived, so it's not worth working out the edge-cases. Revisit if
// we add long-lived learners or feel that incoming/outgoing voters also need
// to be able to serve follower reads.
repDesc, err := r.GetReplicaDescriptor()
if err != nil {
return roachpb.NewError(err)
}
if typ := repDesc.GetType(); typ != roachpb.VOTER_FULL {
log.Eventf(ctx, "%s replicas cannot serve follower reads", typ)
return pErr
}
if !eligible {
// We couldn't do anything with the error, propagate it.
return pErr
}

// There's no known reason that a non-VOTER_FULL replica couldn't serve follower
// reads (or RangeFeed), but as of the time of writing, these are expected
// to be short-lived, so it's not worth working out the edge-cases. Revisit if
// we add long-lived learners or feel that incoming/outgoing voters also need
// to be able to serve follower reads.
repDesc, err := r.GetReplicaDescriptor()
if err != nil {
return roachpb.NewError(err)
}
if typ := repDesc.GetType(); typ != roachpb.VOTER_FULL {
log.Eventf(ctx, "%s replicas cannot serve follower reads", typ)
return pErr
}

ts := ba.Timestamp
if ba.Txn != nil {
ts.Forward(ba.Txn.MaxTimestamp)
}

ts := ba.Timestamp
maxClosed, _ := r.maxClosed(ctx)
canServeFollowerRead := ts.LessEq(maxClosed)
tsDiff := ts.GoTime().Sub(maxClosed.GoTime())
if !canServeFollowerRead {
maxTsStr := "n/a"
if ba.Txn != nil {
ts.Forward(ba.Txn.MaxTimestamp)
maxTsStr = ba.Txn.MaxTimestamp.String()
}

maxClosed, _ := r.maxClosed(ctx)
canServeFollowerRead = ts.LessEq(maxClosed)
if !canServeFollowerRead {
// We can't actually serve the read based on the closed timestamp.
// Signal the clients that we want an update so that future requests can succeed.
r.store.cfg.ClosedTimestamp.Clients.Request(lErr.LeaseHolder.NodeID, r.RangeID)
// We can't actually serve the read based on the closed timestamp.
// Signal the clients that we want an update so that future requests can succeed.
r.store.cfg.ClosedTimestamp.Clients.Request(lErr.LeaseHolder.NodeID, r.RangeID)
log.Eventf(ctx, "can't serve follower read; closed timestamp too low by: %s; maxClosed: %s ts: %s maxTS: %s",
tsDiff, maxClosed, ba.Timestamp, maxTsStr)

if false {
// NB: this can't go behind V(x) because the log message created by the
// storage might be gigantic in real clusters, and we don't want to trip it
// using logspy.
log.Warningf(ctx, "can't serve follower read for %s at epo %d, storage is %s",
ba.Timestamp, lErr.Lease.Epoch,
r.store.cfg.ClosedTimestamp.Storage.(*ctstorage.MultiStorage).StringForNodes(lErr.LeaseHolder.NodeID),
)
}
if false {
// NB: this can't go behind V(x) because the log message created by the
// storage might be gigantic in real clusters, and we don't want to trip it
// using logspy.
log.Warningf(ctx, "can't serve follower read for %s at epo %d, storage is %s",
ba.Timestamp, lErr.Lease.Epoch,
r.store.cfg.ClosedTimestamp.Storage.(*ctstorage.MultiStorage).StringForNodes(lErr.LeaseHolder.NodeID),
)
}
}

if !canServeFollowerRead {
// We couldn't do anything with the error, propagate it.
return pErr
}

// This replica can serve this read!
//
// TODO(tschottdorf): once a read for a timestamp T has been served, the replica may
// serve reads for that and smaller timestamps forever.
log.Event(ctx, "serving via follower read")
log.Eventf(ctx, "serving via follower read; query timestamp below closed timestamp by %s", -tsDiff)
r.store.metrics.FollowerReadsCount.Inc(1)
return nil
}
Expand Down

0 comments on commit ae8e630

Please sign in to comment.