Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhance health check endpoint to support serializable request #13399

Merged
merged 1 commit into from
Nov 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG-3.6.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ See [code changes](https://github.com/etcd-io/etcd/compare/v3.5.0...v3.6.0).
- Add [`etcd --log-format`](https://github.com/etcd-io/etcd/pull/13339) flag to support log format.
- Fix [non mutating requests pass through quotaKVServer when NOSPACE](https://github.com/etcd-io/etcd/pull/13435)
- Fix [exclude the same alarm type activated by multiple peers](https://github.com/etcd-io/etcd/pull/13467).
- Fix [Provide a better liveness probe for when etcd runs as a Kubernetes pod](https://github.com/etcd-io/etcd/pull/13399)

### tools/benchmark

Expand Down
31 changes: 21 additions & 10 deletions server/etcdserver/api/etcdhttp/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,16 @@ const (
// HandleMetricsHealth registers metrics and health handlers.
func HandleMetricsHealth(lg *zap.Logger, mux *http.ServeMux, srv etcdserver.ServerV2) {
mux.Handle(PathMetrics, promhttp.Handler())
mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet) Health { return checkV2Health(lg, srv, excludedAlarms) }))
mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health { return checkV2Health(lg, srv, excludedAlarms) }))
}

// HandleMetricsHealthForV3 registers metrics and health handlers. it checks health by using v3 range request
// and its corresponding timeout.
func HandleMetricsHealthForV3(lg *zap.Logger, mux *http.ServeMux, srv *etcdserver.EtcdServer) {
mux.Handle(PathMetrics, promhttp.Handler())
mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet) Health { return checkV3Health(lg, srv, excludedAlarms) }))
mux.Handle(PathHealth, NewHealthHandler(lg, func(excludedAlarms AlarmSet, serializable bool) Health {
return checkV3Health(lg, srv, excludedAlarms, serializable)
}))
}

// HandlePrometheus registers prometheus handler on '/metrics'.
Expand All @@ -56,7 +58,7 @@ func HandlePrometheus(mux *http.ServeMux) {
}

// NewHealthHandler handles '/health' requests.
func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet) Health) http.HandlerFunc {
func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet, Serializable bool) Health) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
w.Header().Set("Allow", http.MethodGet)
Expand All @@ -65,7 +67,12 @@ func NewHealthHandler(lg *zap.Logger, hfunc func(excludedAlarms AlarmSet) Health
return
}
excludedAlarms := getExcludedAlarms(r)
h := hfunc(excludedAlarms)
// Passing the query parameter "serializable=true" ensures that the
// health of the local etcd is checked vs the health of the cluster.
// This is useful for probes attempting to validate the liveness of
// the etcd process vs readiness of the cluster to serve requests.
serializableFlag := getSerializableFlag(r)
h := hfunc(excludedAlarms, serializableFlag)
defer func() {
if h.Health == "true" {
healthSuccess.Inc()
Expand Down Expand Up @@ -128,9 +135,13 @@ func getExcludedAlarms(r *http.Request) (alarms AlarmSet) {
return alarms
}

func getSerializableFlag(r *http.Request) bool {
return r.URL.Query().Get("serializable") == "true"
}

// TODO: etcdserver.ErrNoLeader in health API

func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet) Health {
func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet, serializable bool) Health {
h := Health{}
h.Health = "true"
as := srv.Alarms()
Expand All @@ -156,7 +167,7 @@ func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSe
}
}

if uint64(srv.Leader()) == raft.None {
if !serializable && (uint64(srv.Leader()) == raft.None) {
h.Health = "false"
h.Reason = "RAFT NO LEADER"
lg.Warn("serving /health false; no leader")
Expand All @@ -166,7 +177,7 @@ func checkHealth(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSe
}

func checkV2Health(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms AlarmSet) (h Health) {
if h = checkHealth(lg, srv, excludedAlarms); h.Health != "true" {
if h = checkHealth(lg, srv, excludedAlarms, false); h.Health != "true" {
return
}
ctx, cancel := context.WithTimeout(context.Background(), time.Second)
Expand All @@ -182,12 +193,12 @@ func checkV2Health(lg *zap.Logger, srv etcdserver.ServerV2, excludedAlarms Alarm
return
}

func checkV3Health(lg *zap.Logger, srv *etcdserver.EtcdServer, excludedAlarms AlarmSet) (h Health) {
if h = checkHealth(lg, srv, excludedAlarms); h.Health != "true" {
func checkV3Health(lg *zap.Logger, srv *etcdserver.EtcdServer, excludedAlarms AlarmSet, serializable bool) (h Health) {
if h = checkHealth(lg, srv, excludedAlarms, serializable); h.Health != "true" {
return
}
ctx, cancel := context.WithTimeout(context.Background(), srv.Cfg.ReqTimeout())
_, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1})
_, err := srv.Range(ctx, &etcdserverpb.RangeRequest{KeysOnly: true, Limit: 1, Serializable: serializable})
cancel()
if err != nil && err != auth.ErrUserEmpty && err != auth.ErrPermissionDenied {
h.Health = "false"
Expand Down
4 changes: 2 additions & 2 deletions server/proxy/grpcproxy/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@ func HandleHealth(lg *zap.Logger, mux *http.ServeMux, c *clientv3.Client) {
if lg == nil {
lg = zap.NewNop()
}
mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet) etcdhttp.Health { return checkHealth(c) }))
mux.Handle(etcdhttp.PathHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet, serializable bool) etcdhttp.Health { return checkHealth(c) }))
}

// HandleProxyHealth registers health handler on '/proxy/health'.
func HandleProxyHealth(lg *zap.Logger, mux *http.ServeMux, c *clientv3.Client) {
if lg == nil {
lg = zap.NewNop()
}
mux.Handle(etcdhttp.PathProxyHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet) etcdhttp.Health { return checkProxyHealth(c) }))
mux.Handle(etcdhttp.PathProxyHealth, etcdhttp.NewHealthHandler(lg, func(excludedAlarms etcdhttp.AlarmSet, serializable bool) etcdhttp.Health { return checkProxyHealth(c) }))
}

func checkHealth(c *clientv3.Client) etcdhttp.Health {
Expand Down