diff --git a/command/agent/agent.go b/command/agent/agent.go index d5ef05af4c35..9454dba4c0ea 100644 --- a/command/agent/agent.go +++ b/command/agent/agent.go @@ -29,6 +29,10 @@ const ( "If Consul was not shut down properly, the socket file may " + "be left behind. If the path looks correct, remove the file " + "and try again." + + // The ID of the faux health checks for maintenance mode + serviceMaintCheckPrefix = "_service_maintenance" + nodeMaintCheckID = "_node_maintenenace" ) /* @@ -995,3 +999,86 @@ func (a *Agent) unloadChecks() error { return nil } + +// serviceMaintCheckID returns the ID of a given service's maintenance check +func serviceMaintCheckID(serviceID string) string { + return fmt.Sprintf("%s:%s", serviceMaintCheckPrefix, serviceID) +} + +// EnableServiceMaintenance will register a false health check against the given +// service ID with critical status. This will exclude the service from queries. +func (a *Agent) EnableServiceMaintenance(serviceID string) error { + service, ok := a.state.Services()[serviceID] + if !ok { + return fmt.Errorf("No service registered with ID %q", serviceID) + } + + // Check if maintenance mode is not already enabled + checkID := serviceMaintCheckID(serviceID) + if _, ok := a.state.Checks()[checkID]; ok { + return nil + } + + // Create and register the critical health check + check := &structs.HealthCheck{ + Node: a.config.NodeName, + CheckID: checkID, + Name: "Service Maintenance Mode", + Notes: "Maintenance mode is enabled for this service", + ServiceID: service.ID, + ServiceName: service.Service, + Status: structs.HealthCritical, + } + a.AddCheck(check, nil, true) + a.logger.Printf("[INFO] agent: service %q entered maintenance mode", serviceID) + + return nil +} + +// DisableServiceMaintenance will deregister the fake maintenance mode check +// if the service has been marked as in maintenance. +func (a *Agent) DisableServiceMaintenance(serviceID string) error { + if _, ok := a.state.Services()[serviceID]; !ok { + return fmt.Errorf("No service registered with ID %q", serviceID) + } + + // Check if maintenance mode is enabled + checkID := serviceMaintCheckID(serviceID) + if _, ok := a.state.Checks()[checkID]; !ok { + return nil + } + + // Deregister the maintenance check + a.RemoveCheck(checkID, true) + a.logger.Printf("[INFO] agent: service %q left maintenance mode", serviceID) + + return nil +} + +// EnableNodeMaintenance places a node into maintenance mode. +func (a *Agent) EnableNodeMaintenance() { + // Ensure node maintenance is not already enabled + if _, ok := a.state.Checks()[nodeMaintCheckID]; ok { + return + } + + // Create and register the node maintenance check + check := &structs.HealthCheck{ + Node: a.config.NodeName, + CheckID: nodeMaintCheckID, + Name: "Node Maintenance Mode", + Notes: "Maintenance mode is enabled for this node", + Status: structs.HealthCritical, + } + a.AddCheck(check, nil, true) + a.logger.Printf("[INFO] agent: node entered maintenance mode") +} + +// DisableNodeMaintenance removes a node from maintenance mode +func (a *Agent) DisableNodeMaintenance() { + if _, ok := a.state.Checks()[nodeMaintCheckID]; !ok { + return + } + a.RemoveCheck(nodeMaintCheckID, true) + a.logger.Printf("[INFO] agent: node left maintenance mode") +} diff --git a/command/agent/agent_endpoint.go b/command/agent/agent_endpoint.go index 4ee70343fd97..63fa0d21f1c4 100644 --- a/command/agent/agent_endpoint.go +++ b/command/agent/agent_endpoint.go @@ -176,3 +176,90 @@ func (s *HTTPServer) AgentDeregisterService(resp http.ResponseWriter, req *http. serviceID := strings.TrimPrefix(req.URL.Path, "/v1/agent/service/deregister/") return nil, s.agent.RemoveService(serviceID, true) } + +func (s *HTTPServer) AgentServiceMaintenance(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + // Only PUT supported + if req.Method != "PUT" { + resp.WriteHeader(405) + return nil, nil + } + + // Ensure we have a service ID + serviceID := strings.TrimPrefix(req.URL.Path, "/v1/agent/service/maintenance/") + if serviceID == "" { + resp.WriteHeader(400) + resp.Write([]byte("Missing service ID")) + return nil, nil + } + + // Ensure we have some action + params := req.URL.Query() + if _, ok := params["enable"]; !ok { + resp.WriteHeader(400) + resp.Write([]byte("Missing value for enable")) + return nil, nil + } + + var enable bool + raw := params.Get("enable") + switch raw { + case "true": + enable = true + case "false": + enable = false + default: + resp.WriteHeader(400) + resp.Write([]byte(fmt.Sprintf("Invalid value for enable: %q", raw))) + return nil, nil + } + + var err error + if enable { + if err = s.agent.EnableServiceMaintenance(serviceID); err != nil { + resp.WriteHeader(404) + resp.Write([]byte(err.Error())) + } + } else { + if err = s.agent.DisableServiceMaintenance(serviceID); err != nil { + resp.WriteHeader(404) + resp.Write([]byte(err.Error())) + } + } + return nil, err +} + +func (s *HTTPServer) AgentNodeMaintenance(resp http.ResponseWriter, req *http.Request) (interface{}, error) { + // Only PUT supported + if req.Method != "PUT" { + resp.WriteHeader(405) + return nil, nil + } + + // Ensure we have some action + params := req.URL.Query() + if _, ok := params["enable"]; !ok { + resp.WriteHeader(400) + resp.Write([]byte("Missing value for enable")) + return nil, nil + } + + var enable bool + raw := params.Get("enable") + switch raw { + case "true": + enable = true + case "false": + enable = false + default: + resp.WriteHeader(400) + resp.Write([]byte(fmt.Sprintf("Invalid value for enable: %q", raw))) + return nil, nil + } + + if enable { + s.agent.EnableNodeMaintenance() + } else { + s.agent.DisableNodeMaintenance() + } + return nil, nil +} diff --git a/command/agent/agent_endpoint_test.go b/command/agent/agent_endpoint_test.go index 1b266347e767..eba6c28e79cd 100644 --- a/command/agent/agent_endpoint_test.go +++ b/command/agent/agent_endpoint_test.go @@ -7,6 +7,7 @@ import ( "github.com/hashicorp/consul/testutil" "github.com/hashicorp/serf/serf" "net/http" + "net/http/httptest" "os" "testing" "time" @@ -492,3 +493,193 @@ func TestHTTPAgentDeregisterService(t *testing.T) { t.Fatalf("have test check") } } + +func TestHTTPAgent_ServiceMaintenanceEndpoint_BadRequest(t *testing.T) { + dir, srv := makeHTTPServer(t) + defer os.RemoveAll(dir) + defer srv.Shutdown() + defer srv.agent.Shutdown() + + // Fails on non-PUT + req, _ := http.NewRequest("GET", "/v1/agent/service/maintenance/test?enable=true", nil) + resp := httptest.NewRecorder() + if _, err := srv.AgentServiceMaintenance(resp, req); err != nil { + t.Fatalf("err: %s", err) + } + if resp.Code != 405 { + t.Fatalf("expected 405, got %d", resp.Code) + } + + // Fails when no enable flag provided + req, _ = http.NewRequest("PUT", "/v1/agent/service/maintenance/test", nil) + resp = httptest.NewRecorder() + if _, err := srv.AgentServiceMaintenance(resp, req); err != nil { + t.Fatalf("err: %s", err) + } + if resp.Code != 400 { + t.Fatalf("expected 400, got %d", resp.Code) + } + + // Fails when no service ID provided + req, _ = http.NewRequest("PUT", "/v1/agent/service/maintenance/?enable=true", nil) + resp = httptest.NewRecorder() + if _, err := srv.AgentServiceMaintenance(resp, req); err != nil { + t.Fatalf("err: %s", err) + } + if resp.Code != 400 { + t.Fatalf("expected 400, got %d", resp.Code) + } + + // Fails when bad service ID provided + req, _ = http.NewRequest("PUT", "/v1/agent/service/maintenance/_nope_?enable=true", nil) + resp = httptest.NewRecorder() + if _, err := srv.AgentServiceMaintenance(resp, req); err == nil { + t.Fatalf("should have errored") + } + if resp.Code != 404 { + t.Fatalf("expected 404, got %d", resp.Code) + } +} + +func TestHTTPAgent_EnableServiceMaintenance(t *testing.T) { + dir, srv := makeHTTPServer(t) + defer os.RemoveAll(dir) + defer srv.Shutdown() + defer srv.agent.Shutdown() + + // Register the service + service := &structs.NodeService{ + ID: "test", + Service: "test", + } + if err := srv.agent.AddService(service, nil, false); err != nil { + t.Fatalf("err: %v", err) + } + + // Force the service into maintenance mode + req, _ := http.NewRequest("PUT", "/v1/agent/service/maintenance/test?enable=true", nil) + resp := httptest.NewRecorder() + if _, err := srv.AgentServiceMaintenance(resp, req); err != nil { + t.Fatalf("err: %s", err) + } + if resp.Code != 200 { + t.Fatalf("expected 200, got %d", resp.Code) + } + + // Ensure the maintenance check was registered + checkID := serviceMaintCheckID("test") + if _, ok := srv.agent.state.Checks()[checkID]; !ok { + t.Fatalf("should have registered maintenance check") + } +} + +func TestHTTPAgent_DisableServiceMaintenance(t *testing.T) { + dir, srv := makeHTTPServer(t) + defer os.RemoveAll(dir) + defer srv.Shutdown() + defer srv.agent.Shutdown() + + // Register the service + service := &structs.NodeService{ + ID: "test", + Service: "test", + } + if err := srv.agent.AddService(service, nil, false); err != nil { + t.Fatalf("err: %v", err) + } + + // Force the service into maintenance mode + if err := srv.agent.EnableServiceMaintenance("test"); err != nil { + t.Fatalf("err: %s", err) + } + + // Leave maintenance mode + req, _ := http.NewRequest("PUT", "/v1/agent/service/maintenance/test?enable=false", nil) + resp := httptest.NewRecorder() + if _, err := srv.AgentServiceMaintenance(resp, req); err != nil { + t.Fatalf("err: %s", err) + } + if resp.Code != 200 { + t.Fatalf("expected 200, got %d", resp.Code) + } + + // Ensure the maintenance check was removed + checkID := serviceMaintCheckID("test") + if _, ok := srv.agent.state.Checks()[checkID]; ok { + t.Fatalf("should have removed maintenance check") + } +} + +func TestHTTPAgent_NodeMaintenanceEndpoint_BadRequest(t *testing.T) { + dir, srv := makeHTTPServer(t) + defer os.RemoveAll(dir) + defer srv.Shutdown() + defer srv.agent.Shutdown() + + // Fails on non-PUT + req, _ := http.NewRequest("GET", "/v1/agent/self/maintenance?enable=true", nil) + resp := httptest.NewRecorder() + if _, err := srv.AgentNodeMaintenance(resp, req); err != nil { + t.Fatalf("err: %s", err) + } + if resp.Code != 405 { + t.Fatalf("expected 405, got %d", resp.Code) + } + + // Fails when no enable flag provided + req, _ = http.NewRequest("PUT", "/v1/agent/self/maintenance", nil) + resp = httptest.NewRecorder() + if _, err := srv.AgentNodeMaintenance(resp, req); err != nil { + t.Fatalf("err: %s", err) + } + if resp.Code != 400 { + t.Fatalf("expected 400, got %d", resp.Code) + } +} + +func TestHTTPAgent_EnableNodeMaintenance(t *testing.T) { + dir, srv := makeHTTPServer(t) + defer os.RemoveAll(dir) + defer srv.Shutdown() + defer srv.agent.Shutdown() + + // Force the node into maintenance mode + req, _ := http.NewRequest("PUT", "/v1/agent/self/maintenance?enable=true", nil) + resp := httptest.NewRecorder() + if _, err := srv.AgentNodeMaintenance(resp, req); err != nil { + t.Fatalf("err: %s", err) + } + if resp.Code != 200 { + t.Fatalf("expected 200, got %d", resp.Code) + } + + // Ensure the maintenance check was registered + if _, ok := srv.agent.state.Checks()[nodeMaintCheckID]; !ok { + t.Fatalf("should have registered maintenance check") + } +} + +func TestHTTPAgent_DisableNodeMaintenance(t *testing.T) { + dir, srv := makeHTTPServer(t) + defer os.RemoveAll(dir) + defer srv.Shutdown() + defer srv.agent.Shutdown() + + // Force the node into maintenance mode + srv.agent.EnableNodeMaintenance() + + // Leave maintenance mode + req, _ := http.NewRequest("PUT", "/v1/agent/self/maintenance?enable=false", nil) + resp := httptest.NewRecorder() + if _, err := srv.AgentNodeMaintenance(resp, req); err != nil { + t.Fatalf("err: %s", err) + } + if resp.Code != 200 { + t.Fatalf("expected 200, got %d", resp.Code) + } + + // Ensure the maintenance check was removed + if _, ok := srv.agent.state.Checks()[nodeMaintCheckID]; ok { + t.Fatalf("should have removed maintenance check") + } +} diff --git a/command/agent/agent_test.go b/command/agent/agent_test.go index 0add36702c70..37e2ebc32318 100644 --- a/command/agent/agent_test.go +++ b/command/agent/agent_test.go @@ -781,3 +781,66 @@ func TestAgent_unloadServices(t *testing.T) { t.Fatalf("consul service should not be removed") } } + +func TestAgent_ServiceMaintenanceMode(t *testing.T) { + config := nextConfig() + dir, agent := makeAgent(t, config) + defer os.RemoveAll(dir) + defer agent.Shutdown() + + svc := &structs.NodeService{ + ID: "redis", + Service: "redis", + Tags: []string{"foo"}, + Port: 8000, + } + + // Register the service + if err := agent.AddService(svc, nil, false); err != nil { + t.Fatalf("err: %v", err) + } + + // Enter maintenance mode for the service + if err := agent.EnableServiceMaintenance("redis"); err != nil { + t.Fatalf("err: %s", err) + } + + // Make sure the critical health check was added + checkID := serviceMaintCheckID("redis") + if _, ok := agent.state.Checks()[checkID]; !ok { + t.Fatalf("should have registered critical maintenance check") + } + + // Leave maintenance mode + if err := agent.DisableServiceMaintenance("redis"); err != nil { + t.Fatalf("err: %s", err) + } + + // Ensure the check was deregistered + if _, ok := agent.state.Checks()[checkID]; ok { + t.Fatalf("should have deregistered maintenance check") + } +} + +func TestAgent_NodeMaintenanceMode(t *testing.T) { + config := nextConfig() + dir, agent := makeAgent(t, config) + defer os.RemoveAll(dir) + defer agent.Shutdown() + + // Enter maintenance mode for the node + agent.EnableNodeMaintenance() + + // Make sure the critical health check was added + if _, ok := agent.state.Checks()[nodeMaintCheckID]; !ok { + t.Fatalf("should have registered critical node check") + } + + // Leave maintenance mode + agent.DisableNodeMaintenance() + + // Ensure the check was deregistered + if _, ok := agent.state.Checks()[nodeMaintCheckID]; ok { + t.Fatalf("should have deregistered critical node check") + } +} diff --git a/command/agent/http.go b/command/agent/http.go index 708fe5b17413..23e4163048b0 100644 --- a/command/agent/http.go +++ b/command/agent/http.go @@ -181,6 +181,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) { s.mux.HandleFunc("/v1/health/service/", s.wrap(s.HealthServiceNodes)) s.mux.HandleFunc("/v1/agent/self", s.wrap(s.AgentSelf)) + s.mux.HandleFunc("/v1/agent/self/maintenance", s.wrap(s.AgentNodeMaintenance)) s.mux.HandleFunc("/v1/agent/services", s.wrap(s.AgentServices)) s.mux.HandleFunc("/v1/agent/checks", s.wrap(s.AgentChecks)) s.mux.HandleFunc("/v1/agent/members", s.wrap(s.AgentMembers)) @@ -195,6 +196,7 @@ func (s *HTTPServer) registerHandlers(enableDebug bool) { s.mux.HandleFunc("/v1/agent/service/register", s.wrap(s.AgentRegisterService)) s.mux.HandleFunc("/v1/agent/service/deregister/", s.wrap(s.AgentDeregisterService)) + s.mux.HandleFunc("/v1/agent/service/maintenance/", s.wrap(s.AgentServiceMaintenance)) s.mux.HandleFunc("/v1/event/fire/", s.wrap(s.EventFire)) s.mux.HandleFunc("/v1/event/list", s.wrap(s.EventList)) diff --git a/website/source/docs/agent/http.html.markdown b/website/source/docs/agent/http.html.markdown index 5c620d0d6baa..cdb4f9db3417 100644 --- a/website/source/docs/agent/http.html.markdown +++ b/website/source/docs/agent/http.html.markdown @@ -238,6 +238,7 @@ The following endpoints are supported: * [`/v1/agent/services`](#agent_services) : Returns the services local agent is managing * [`/v1/agent/members`](#agent_members) : Returns the members as seen by the local serf agent * [`/v1/agent/self`](#agent_self) : Returns the local node configuration +* [`/v1/agent/self/maintenance`](#agent_self_maintenance) : Node maintenance mode * [`/v1/agent/join/
`](#agent_join) : Trigger local agent to join a node * [`/v1/agent/force-leave/`](#agent_force_leave)>: Force remove node * [`/v1/agent/check/register`](#agent_check_register) : Registers a new local check @@ -247,6 +248,7 @@ The following endpoints are supported: * [`/v1/agent/check/fail/`](#agent_check_fail) : Mark a local test as critical * [`/v1/agent/service/register`](#agent_service_register) : Registers a new local service * [`/v1/agent/service/deregister/`](#agent_service_deregister) : Deregister a local service +* [`/v1/agent/service/maintenance/`](#agent_service_maintenance) : Service maintenance mode ### /v1/agent/checks @@ -401,6 +403,18 @@ It returns a JSON body like this: } ``` +### /v1/agent/self/maintenance + +The node maintenance endpoint allows placing the agent into "maintenance mode". +During maintenance mode, the node will be marked as unavailable, and will not be +present in DNS or API queries. This API call is idempotent. Maintenance mode is +persistent and will be automatically restored on agent restart. + +The `?enable` flag is required, and its value must be `true` (to enter +maintenance mode), or `false` (to resume normal operation). + +The return code is 200 on success. + ### /v1/agent/join/\ This endpoint is hit with a GET and is used to instruct the agent to attempt to @@ -548,6 +562,19 @@ check, that is also deregistered. The return code is 200 on success. +### /v1/agent/service/maintenance/\ + +The service maintenance endpoint allows placing a given service into +"maintenance mode". During maintenance mode, the service will be marked as +unavailable, and will not be present in DNS or API queries. This API call is +idempotent. Maintenance mode is persistent and will be automatically restored +on agent restart. + +The `?enable` flag is required, and its value must be `true` (to enter +maintenance mode), or `false` (to resume normal operation). + +The return code is 200 on success. + ## Catalog The Catalog is the endpoint used to register and deregister nodes,