From c1c681adc3583d53a47c80b8c18e61a9e3d389c3 Mon Sep 17 00:00:00 2001 From: Ankur Gargi Date: Tue, 3 Nov 2020 22:01:36 -0500 Subject: [PATCH] server: Added config parameter experimental-warning-apply-duration --- server/embed/config.go | 9 +++++++-- server/embed/etcd.go | 1 + server/etcdmain/config.go | 1 + server/etcdmain/help.go | 2 ++ server/etcdserver/apply.go | 5 ++--- server/etcdserver/apply_v2.go | 2 +- server/etcdserver/config.go | 2 ++ server/etcdserver/util.go | 19 ++++++++++--------- server/etcdserver/v3_server.go | 4 ++-- 9 files changed, 28 insertions(+), 17 deletions(-) diff --git a/server/embed/config.go b/server/embed/config.go index a77931d5afd..ed11c18ab1e 100644 --- a/server/embed/config.go +++ b/server/embed/config.go @@ -52,6 +52,7 @@ const ( DefaultMaxSnapshots = 5 DefaultMaxWALs = 5 DefaultMaxTxnOps = uint(128) + DefaultWarningApplyDuration = 100 * time.Millisecond DefaultMaxRequestBytes = 1.5 * 1024 * 1024 DefaultGRPCKeepAliveMinTime = 5 * time.Second DefaultGRPCKeepAliveInterval = 2 * time.Hour @@ -289,6 +290,9 @@ type Config struct { ExperimentalEnableLeaseCheckpoint bool `json:"experimental-enable-lease-checkpoint"` ExperimentalCompactionBatchLimit int `json:"experimental-compaction-batch-limit"` ExperimentalWatchProgressNotifyInterval time.Duration `json:"experimental-watch-progress-notify-interval"` + // ExperimentalWarningApplyDuration is the time duration after which a warning is generated if applying request + // takes more time than this value. + ExperimentalWarningApplyDuration time.Duration `json:"experimental-warning-apply-duration"` // ForceNewCluster starts a new cluster even if previously started; unsafe. ForceNewCluster bool `json:"force-new-cluster"` @@ -382,8 +386,9 @@ func NewConfig() *Config { SnapshotCount: etcdserver.DefaultSnapshotCount, SnapshotCatchUpEntries: etcdserver.DefaultSnapshotCatchUpEntries, - MaxTxnOps: DefaultMaxTxnOps, - MaxRequestBytes: DefaultMaxRequestBytes, + MaxTxnOps: DefaultMaxTxnOps, + MaxRequestBytes: DefaultMaxRequestBytes, + ExperimentalWarningApplyDuration: DefaultWarningApplyDuration, GRPCKeepAliveMinTime: DefaultGRPCKeepAliveMinTime, GRPCKeepAliveInterval: DefaultGRPCKeepAliveInterval, diff --git a/server/embed/etcd.go b/server/embed/etcd.go index 79bfccbe880..0b896f9b5d2 100644 --- a/server/embed/etcd.go +++ b/server/embed/etcd.go @@ -202,6 +202,7 @@ func StartEtcd(inCfg *Config) (e *Etcd, err error) { CompactionBatchLimit: cfg.ExperimentalCompactionBatchLimit, WatchProgressNotifyInterval: cfg.ExperimentalWatchProgressNotifyInterval, DowngradeCheckTime: cfg.ExperimentalDowngradeCheckTime, + WarningApplyDuration: cfg.ExperimentalWarningApplyDuration, } print(e.cfg.logger, *cfg, srvcfg, memberInitialized) if e.Server, err = etcdserver.NewServer(srvcfg); err != nil { diff --git a/server/etcdmain/config.go b/server/etcdmain/config.go index 67b37deebfc..ef7dcc2831f 100644 --- a/server/etcdmain/config.go +++ b/server/etcdmain/config.go @@ -254,6 +254,7 @@ func newConfig() *config { fs.IntVar(&cfg.ec.ExperimentalCompactionBatchLimit, "experimental-compaction-batch-limit", cfg.ec.ExperimentalCompactionBatchLimit, "Sets the maximum revisions deleted in each compaction batch.") fs.DurationVar(&cfg.ec.ExperimentalWatchProgressNotifyInterval, "experimental-watch-progress-notify-interval", cfg.ec.ExperimentalWatchProgressNotifyInterval, "Duration of periodic watch progress notifications.") fs.DurationVar(&cfg.ec.ExperimentalDowngradeCheckTime, "experimental-downgrade-check-time", cfg.ec.ExperimentalDowngradeCheckTime, "Duration of time between two downgrade status check.") + fs.DurationVar(&cfg.ec.ExperimentalWarningApplyDuration, "experimental-warning-apply-duration", cfg.ec.ExperimentalWarningApplyDuration, "Time duration after which a warning is generated if request takes more time.") // unsafe fs.BoolVar(&cfg.ec.UnsafeNoFsync, "unsafe-no-fsync", false, "Disables fsync, unsafe, will cause data loss.") diff --git a/server/etcdmain/help.go b/server/etcdmain/help.go index 81682573ec2..0834881ab6c 100644 --- a/server/etcdmain/help.go +++ b/server/etcdmain/help.go @@ -214,6 +214,8 @@ Experimental feature: Skip verification of SAN field in client certificate for peer connections. --experimental-watch-progress-notify-interval '10m' Duration of periodical watch progress notification. + --experimental-warning-apply-duration '100ms' + Warning is generated if requests take more than this duration. Unsafe feature: --force-new-cluster 'false' diff --git a/server/etcdserver/apply.go b/server/etcdserver/apply.go index 352e13dea3b..30a12ba2458 100644 --- a/server/etcdserver/apply.go +++ b/server/etcdserver/apply.go @@ -39,8 +39,7 @@ import ( ) const ( - warnApplyDuration = 100 * time.Millisecond - v3Version = "v3" + v3Version = "v3" ) type applyResult struct { @@ -137,7 +136,7 @@ func (a *applierV3backend) Apply(r *pb.InternalRaftRequest) *applyResult { defer func(start time.Time) { success := ar.err == nil || ar.err == mvcc.ErrCompacted applySec.WithLabelValues(v3Version, op, strconv.FormatBool(success)).Observe(time.Since(start).Seconds()) - warnOfExpensiveRequest(a.s.getLogger(), start, &pb.InternalRaftStringer{Request: r}, ar.resp, ar.err) + warnOfExpensiveRequest(a.s.getLogger(), a.s.Cfg.WarningApplyDuration, start, &pb.InternalRaftStringer{Request: r}, ar.resp, ar.err) if !success { warnOfFailedRequest(a.s.getLogger(), start, &pb.InternalRaftStringer{Request: r}, ar.resp, ar.err) } diff --git a/server/etcdserver/apply_v2.go b/server/etcdserver/apply_v2.go index 386f325ca93..73c735d650f 100644 --- a/server/etcdserver/apply_v2.go +++ b/server/etcdserver/apply_v2.go @@ -120,7 +120,7 @@ func (s *EtcdServer) applyV2Request(r *RequestV2) (resp Response) { defer func(start time.Time) { success := resp.Err == nil applySec.WithLabelValues(v2Version, r.Method, strconv.FormatBool(success)).Observe(time.Since(start).Seconds()) - warnOfExpensiveRequest(s.getLogger(), start, stringer, nil, nil) + warnOfExpensiveRequest(s.getLogger(), s.Cfg.WarningApplyDuration, start, stringer, nil, nil) }(time.Now()) switch r.Method { diff --git a/server/etcdserver/config.go b/server/etcdserver/config.go index e5322829f52..49fe04005d4 100644 --- a/server/etcdserver/config.go +++ b/server/etcdserver/config.go @@ -119,6 +119,8 @@ type ServerConfig struct { // MaxRequestBytes is the maximum request size to send over raft. MaxRequestBytes uint + WarningApplyDuration time.Duration + StrictReconfigCheck bool // ClientCertAuthEnabled is true when cert has been signed by the client CA. diff --git a/server/etcdserver/util.go b/server/etcdserver/util.go index c82c90abdc1..8c81dac9093 100644 --- a/server/etcdserver/util.go +++ b/server/etcdserver/util.go @@ -103,12 +103,12 @@ func (nc *notifier) notify(err error) { close(nc.c) } -func warnOfExpensiveRequest(lg *zap.Logger, now time.Time, reqStringer fmt.Stringer, respMsg proto.Message, err error) { +func warnOfExpensiveRequest(lg *zap.Logger, warningApplyDuration time.Duration, now time.Time, reqStringer fmt.Stringer, respMsg proto.Message, err error) { var resp string if !isNil(respMsg) { resp = fmt.Sprintf("size:%d", proto.Size(respMsg)) } - warnOfExpensiveGenericRequest(lg, now, reqStringer, "", resp, err) + warnOfExpensiveGenericRequest(lg, warningApplyDuration, now, reqStringer, "", resp, err) } func warnOfFailedRequest(lg *zap.Logger, now time.Time, reqStringer fmt.Stringer, respMsg proto.Message, err error) { @@ -126,7 +126,7 @@ func warnOfFailedRequest(lg *zap.Logger, now time.Time, reqStringer fmt.Stringer ) } -func warnOfExpensiveReadOnlyTxnRequest(lg *zap.Logger, now time.Time, r *pb.TxnRequest, txnResponse *pb.TxnResponse, err error) { +func warnOfExpensiveReadOnlyTxnRequest(lg *zap.Logger, warningApplyDuration time.Duration, now time.Time, r *pb.TxnRequest, txnResponse *pb.TxnResponse, err error) { reqStringer := pb.NewLoggableTxnRequest(r) var resp string if !isNil(txnResponse) { @@ -141,24 +141,25 @@ func warnOfExpensiveReadOnlyTxnRequest(lg *zap.Logger, now time.Time, r *pb.TxnR } resp = fmt.Sprintf("responses:<%s> size:%d", strings.Join(resps, " "), proto.Size(txnResponse)) } - warnOfExpensiveGenericRequest(lg, now, reqStringer, "read-only txn ", resp, err) + warnOfExpensiveGenericRequest(lg, warningApplyDuration, now, reqStringer, "read-only txn ", resp, err) } -func warnOfExpensiveReadOnlyRangeRequest(lg *zap.Logger, now time.Time, reqStringer fmt.Stringer, rangeResponse *pb.RangeResponse, err error) { +func warnOfExpensiveReadOnlyRangeRequest(lg *zap.Logger, warningApplyDuration time.Duration, now time.Time, reqStringer fmt.Stringer, rangeResponse *pb.RangeResponse, err error) { var resp string if !isNil(rangeResponse) { resp = fmt.Sprintf("range_response_count:%d size:%d", len(rangeResponse.Kvs), proto.Size(rangeResponse)) } - warnOfExpensiveGenericRequest(lg, now, reqStringer, "read-only range ", resp, err) + warnOfExpensiveGenericRequest(lg, warningApplyDuration, now, reqStringer, "read-only range ", resp, err) } -func warnOfExpensiveGenericRequest(lg *zap.Logger, now time.Time, reqStringer fmt.Stringer, prefix string, resp string, err error) { +func warnOfExpensiveGenericRequest(lg *zap.Logger, warningApplyDuration time.Duration, now time.Time, reqStringer fmt.Stringer, prefix string, resp string, err error) { d := time.Since(now) - if d > warnApplyDuration { + + if d > warningApplyDuration { lg.Warn( "apply request took too long", zap.Duration("took", d), - zap.Duration("expected-duration", warnApplyDuration), + zap.Duration("expected-duration", warningApplyDuration), zap.String("prefix", prefix), zap.String("request", reqStringer.String()), zap.String("response", resp), diff --git a/server/etcdserver/v3_server.go b/server/etcdserver/v3_server.go index a7797eb4711..4130d8e081f 100644 --- a/server/etcdserver/v3_server.go +++ b/server/etcdserver/v3_server.go @@ -102,7 +102,7 @@ func (s *EtcdServer) Range(ctx context.Context, r *pb.RangeRequest) (*pb.RangeRe var resp *pb.RangeResponse var err error defer func(start time.Time) { - warnOfExpensiveReadOnlyRangeRequest(s.getLogger(), start, r, resp, err) + warnOfExpensiveReadOnlyRangeRequest(s.getLogger(), s.Cfg.WarningApplyDuration, start, r, resp, err) if resp != nil { trace.AddField( traceutil.Field{Key: "response_count", Value: len(resp.Kvs)}, @@ -169,7 +169,7 @@ func (s *EtcdServer) Txn(ctx context.Context, r *pb.TxnRequest) (*pb.TxnResponse } defer func(start time.Time) { - warnOfExpensiveReadOnlyTxnRequest(s.getLogger(), start, r, resp, err) + warnOfExpensiveReadOnlyTxnRequest(s.getLogger(), s.Cfg.WarningApplyDuration, start, r, resp, err) trace.LogIfLong(traceThreshold) }(time.Now())