Skip to content

Commit

Permalink
Implement /v1/agent/health/service/<service id> endpoint
Browse files Browse the repository at this point in the history
This endpoint aggregate all checks related to <service id> on the agent
and return an appropriate http code + the string describing the worst
check.

This allows to cleanly expose service status to other component, hiding
complexity of multiple checks.
This is especially useful to use consul to feed a loadbalancer which
would deleguate healthchecking to consul agent.

Exposing this endpoint on the agent is necessary to avoid a hit on
consul servers and avoid decreasing resiliency (this endpoint will work
even if there is no consul leader in the cluster).

Fix hashicorp#2488, relates to hashicorp#802

Change-Id: Ib340c62bbbba46fd4256ed31474d8ffb1762d4df
Signed-off-by: Grégoire Seux <g.seux@criteo.com>
  • Loading branch information
kamaradclimber committed Dec 5, 2017
1 parent 112c060 commit a588e73
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 0 deletions.
51 changes: 51 additions & 0 deletions agent/agent_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,57 @@ func (s *HTTPServer) AgentCheckUpdate(resp http.ResponseWriter, req *http.Reques
return nil, nil
}

func (s *HTTPServer) AgentHealthService(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
if req.Method != "GET" {
return nil, MethodNotAllowedError{req.Method, []string{"GET"}}
}

// Pull out the service id (service id since there may be several instance of the same service on this host)
serviceID := strings.TrimPrefix(req.URL.Path, "/v1/agent/health/service/")
if serviceID == "" {
resp.WriteHeader(http.StatusBadRequest)
fmt.Fprint(resp, "Missing service id")
return nil, nil
}

checks := s.agent.state.Checks()
// TODO: should we filter using acls like in AgentChecks method?
serviceChecks := make(api.HealthChecks, 0)
for _, c := range checks {
if c.ServiceID == serviceID {
// TODO: harmonize struct.HealthCheck and api.HealthCheck (or at least extract conversion function)
healthCheck := &api.HealthCheck{
Node: c.Node,
CheckID: string(c.CheckID),
Name: c.Name,
Status: c.Status,
Notes: c.Notes,
Output: c.Output,
ServiceID: c.ServiceID,
ServiceName: c.ServiceName,
ServiceTags: c.ServiceTags,
}
serviceChecks = append(serviceChecks, healthCheck)
}
}
if len(serviceChecks) == 0 {
resp.WriteHeader(http.StatusBadRequest)
fmt.Fprintf(resp, "Invalid serviceID %s", serviceID)
return nil, nil
}
status := serviceChecks.AggregatedStatus()
switch status {
case api.HealthWarning:
resp.WriteHeader(http.StatusTooManyRequests)
case api.HealthPassing:
resp.WriteHeader(http.StatusOK)
default:
resp.WriteHeader(http.StatusServiceUnavailable)
}
fmt.Fprint(resp, status)
return nil, nil
}

func (s *HTTPServer) AgentRegisterService(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
var args structs.ServiceDefinition
// Fixup the type decode of TTL or Interval if a check if provided.
Expand Down
157 changes: 157 additions & 0 deletions agent/agent_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,163 @@ func TestAgent_Checks(t *testing.T) {
}
}

func TestAgent_Health_Service(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), "")
defer a.Shutdown()

service := &structs.NodeService{
ID: "mysql",
Service: "mysql",
}
if err := a.AddService(service, nil, false, ""); err != nil {
t.Fatalf("err: %v", err)
}
service = &structs.NodeService{
ID: "mysql2",
Service: "mysql2",
}
if err := a.AddService(service, nil, false, ""); err != nil {
t.Fatalf("err: %v", err)
}
service = &structs.NodeService{
ID: "mysql3",
Service: "mysql3",
}
if err := a.AddService(service, nil, false, ""); err != nil {
t.Fatalf("err: %v", err)
}

chk1 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "mysql",
Name: "mysql",
ServiceID: "mysql",
Status: api.HealthPassing,
}
err := a.state.AddCheck(chk1, "")
if err != nil {
t.Fatalf("Err: %v", err)
}

chk2 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "mysql",
Name: "mysql",
ServiceID: "mysql",
Status: api.HealthPassing,
}
err = a.state.AddCheck(chk2, "")
if err != nil {
t.Fatalf("Err: %v", err)
}

chk3 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "mysql2",
Name: "mysql2",
ServiceID: "mysql2",
Status: api.HealthPassing,
}
err = a.state.AddCheck(chk3, "")
if err != nil {
t.Fatalf("Err: %v", err)
}

chk4 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "mysql2",
Name: "mysql2",
ServiceID: "mysql2",
Status: api.HealthWarning,
}
err = a.state.AddCheck(chk4, "")
if err != nil {
t.Fatalf("Err: %v", err)
}

chk5 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "mysql3",
Name: "mysql3",
ServiceID: "mysql3",
Status: api.HealthMaint,
}
err = a.state.AddCheck(chk5, "")
if err != nil {
t.Fatalf("Err: %v", err)
}

chk6 := &structs.HealthCheck{
Node: a.Config.NodeName,
CheckID: "mysql3",
Name: "mysql3",
ServiceID: "mysql3",
Status: api.HealthCritical,
}
err = a.state.AddCheck(chk6, "")
if err != nil {
t.Fatalf("Err: %v", err)
}

t.Run("passing checks", func(t *testing.T) {
req, _ := http.NewRequest("GET", "/v1/agent/health/service/mysql", nil)
resp := httptest.NewRecorder()
_, err := a.srv.AgentHealthService(resp, req)
if err != nil {
t.Fatalf("Err: %v", err)
}
if got, want := resp.Code, 200; got != want {
t.Fatalf("returned bad status: %d. Body: %q", resp.Code, resp.Body.String())
}
if got, want := resp.Body.String(), "passing"; got != want {
t.Fatalf("got body %q want %q", got, want)
}
})
t.Run("warning checks", func(t *testing.T) {
req, _ := http.NewRequest("GET", "/v1/agent/health/service/mysql2", nil)
resp := httptest.NewRecorder()
_, err := a.srv.AgentHealthService(resp, req)
if err != nil {
t.Fatalf("Err: %v", err)
}
if got, want := resp.Code, 429; got != want {
t.Fatalf("returned bad status: %d. Body: %q", resp.Code, resp.Body.String())
}
if got, want := resp.Body.String(), "warning"; got != want {
t.Fatalf("got body %q want %q", got, want)
}
})
t.Run("critical checks", func(t *testing.T) {
req, _ := http.NewRequest("GET", "/v1/agent/health/service/mysql3", nil)
resp := httptest.NewRecorder()
_, err := a.srv.AgentHealthService(resp, req)
if err != nil {
t.Fatalf("Err: %v", err)
}
if got, want := resp.Code, 503; got != want {
t.Fatalf("returned bad status: %d. Body: %q", resp.Code, resp.Body.String())
}
if got, want := resp.Body.String(), "critical"; got != want {
t.Fatalf("got body %q want %q", got, want)
}
})
t.Run("unknown serviceid", func(t *testing.T) {
req, _ := http.NewRequest("GET", "/v1/agent/health/service/mysql1", nil)
resp := httptest.NewRecorder()
_, err := a.srv.AgentHealthService(resp, req)
if err != nil {
t.Fatalf("Err: %v", err)
}
if got, want := resp.Code, 400; got != want {
t.Fatalf("returned bad status: %d. Body: %q", resp.Code, resp.Body.String())
}
if got, want := resp.Body.String(), "Invalid serviceID mysql1"; got != want {
t.Fatalf("got body %q want %q", got, want)
}
})
}

func TestAgent_Checks_ACLFilter(t *testing.T) {
t.Parallel()
a := NewTestAgent(t.Name(), TestACLConfig())
Expand Down
1 change: 1 addition & 0 deletions agent/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ func (s *HTTPServer) handler(enableDebug bool) http.Handler {
handleFuncMetrics("/v1/agent/join/", s.wrap(s.AgentJoin))
handleFuncMetrics("/v1/agent/leave", s.wrap(s.AgentLeave))
handleFuncMetrics("/v1/agent/force-leave/", s.wrap(s.AgentForceLeave))
handleFuncMetrics("/v1/agent/health/service/", s.wrap(s.AgentHealthService))
handleFuncMetrics("/v1/agent/check/register", s.wrap(s.AgentRegisterCheck))
handleFuncMetrics("/v1/agent/check/deregister/", s.wrap(s.AgentDeregisterCheck))
handleFuncMetrics("/v1/agent/check/pass/", s.wrap(s.AgentCheckPass))
Expand Down

0 comments on commit a588e73

Please sign in to comment.