Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support node and service maintenance mode #606

Merged
merged 10 commits into from
Jan 16, 2015
87 changes: 87 additions & 0 deletions command/agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ const (
"If Consul was not shut down properly, the socket file may " +
"be left behind. If the path looks correct, remove the file " +
"and try again."

// The ID of the faux health checks for maintenance mode
serviceMaintCheckPrefix = "_service_maintenance"
nodeMaintCheckID = "_node_maintenenace"
)

/*
Expand Down Expand Up @@ -995,3 +999,86 @@ func (a *Agent) unloadChecks() error {

return nil
}

// serviceMaintCheckID returns the ID of a given service's maintenance check
func serviceMaintCheckID(serviceID string) string {
return fmt.Sprintf("%s:%s", serviceMaintCheckPrefix, serviceID)
}

// EnableServiceMaintenance will register a false health check against the given
// service ID with critical status. This will exclude the service from queries.
func (a *Agent) EnableServiceMaintenance(serviceID string) error {
service, ok := a.state.Services()[serviceID]
if !ok {
return fmt.Errorf("No service registered with ID %q", serviceID)
}

// Check if maintenance mode is not already enabled
checkID := serviceMaintCheckID(serviceID)
if _, ok := a.state.Checks()[checkID]; ok {
return nil
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we log something here for information sake?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I missed that. Thanks!

}

// Create and register the critical health check
check := &structs.HealthCheck{
Node: a.config.NodeName,
CheckID: checkID,
Name: "Service Maintenance Mode",
Notes: "Maintenance mode is enabled for this service",
ServiceID: service.ID,
ServiceName: service.Service,
Status: structs.HealthCritical,
}
a.AddCheck(check, nil, true)
a.logger.Printf("[INFO] agent: service %q entered maintenance mode", serviceID)

return nil
}

// DisableServiceMaintenance will deregister the fake maintenance mode check
// if the service has been marked as in maintenance.
func (a *Agent) DisableServiceMaintenance(serviceID string) error {
if _, ok := a.state.Services()[serviceID]; !ok {
return fmt.Errorf("No service registered with ID %q", serviceID)
}

// Check if maintenance mode is enabled
checkID := serviceMaintCheckID(serviceID)
if _, ok := a.state.Checks()[checkID]; !ok {
return nil
}

// Deregister the maintenance check
a.RemoveCheck(checkID, true)
a.logger.Printf("[INFO] agent: service %q left maintenance mode", serviceID)

return nil
}

// EnableNodeMaintenance places a node into maintenance mode.
func (a *Agent) EnableNodeMaintenance() {
// Ensure node maintenance is not already enabled
if _, ok := a.state.Checks()[nodeMaintCheckID]; ok {
return
}

// Create and register the node maintenance check
check := &structs.HealthCheck{
Node: a.config.NodeName,
CheckID: nodeMaintCheckID,
Name: "Node Maintenance Mode",
Notes: "Maintenance mode is enabled for this node",
Status: structs.HealthCritical,
}
a.AddCheck(check, nil, true)
a.logger.Printf("[INFO] agent: node entered maintenance mode")
}

// DisableNodeMaintenance removes a node from maintenance mode
func (a *Agent) DisableNodeMaintenance() {
if _, ok := a.state.Checks()[nodeMaintCheckID]; !ok {
return
}
a.RemoveCheck(nodeMaintCheckID, true)
a.logger.Printf("[INFO] agent: node left maintenance mode")
}
87 changes: 87 additions & 0 deletions command/agent/agent_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,90 @@ func (s *HTTPServer) AgentDeregisterService(resp http.ResponseWriter, req *http.
serviceID := strings.TrimPrefix(req.URL.Path, "/v1/agent/service/deregister/")
return nil, s.agent.RemoveService(serviceID, true)
}

func (s *HTTPServer) AgentServiceMaintenance(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
// Only PUT supported
if req.Method != "PUT" {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sooooo if we want to be pedantic, the API method here should technically be PATCH, not PUT. PATCH is used to update a partial resource, but PUT is used to update the entire resource.

resp.WriteHeader(405)
return nil, nil
}

// Ensure we have a service ID
serviceID := strings.TrimPrefix(req.URL.Path, "/v1/agent/service/maintenance/")
if serviceID == "" {
resp.WriteHeader(400)
resp.Write([]byte("Missing service ID"))
return nil, nil
}

// Ensure we have some action
params := req.URL.Query()
if _, ok := params["enable"]; !ok {
resp.WriteHeader(400)
resp.Write([]byte("Missing value for enable"))
return nil, nil
}

var enable bool
raw := params.Get("enable")
switch raw {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should use strconv.ParseBool to be a bit more flexible to things like case.

case "true":
enable = true
case "false":
enable = false
default:
resp.WriteHeader(400)
resp.Write([]byte(fmt.Sprintf("Invalid value for enable: %q", raw)))
return nil, nil
}

var err error
if enable {
if err = s.agent.EnableServiceMaintenance(serviceID); err != nil {
resp.WriteHeader(404)
resp.Write([]byte(err.Error()))
}
} else {
if err = s.agent.DisableServiceMaintenance(serviceID); err != nil {
resp.WriteHeader(404)
resp.Write([]byte(err.Error()))
}
}
return nil, err
}

func (s *HTTPServer) AgentNodeMaintenance(resp http.ResponseWriter, req *http.Request) (interface{}, error) {
// Only PUT supported
if req.Method != "PUT" {
resp.WriteHeader(405)
return nil, nil
}

// Ensure we have some action
params := req.URL.Query()
if _, ok := params["enable"]; !ok {
resp.WriteHeader(400)
resp.Write([]byte("Missing value for enable"))
return nil, nil
}

var enable bool
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same strconv.ParseBool here

raw := params.Get("enable")
switch raw {
case "true":
enable = true
case "false":
enable = false
default:
resp.WriteHeader(400)
resp.Write([]byte(fmt.Sprintf("Invalid value for enable: %q", raw)))
return nil, nil
}

if enable {
s.agent.EnableNodeMaintenance()
} else {
s.agent.DisableNodeMaintenance()
}
return nil, nil
}
191 changes: 191 additions & 0 deletions command/agent/agent_endpoint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"github.com/hashicorp/consul/testutil"
"github.com/hashicorp/serf/serf"
"net/http"
"net/http/httptest"
"os"
"testing"
"time"
Expand Down Expand Up @@ -492,3 +493,193 @@ func TestHTTPAgentDeregisterService(t *testing.T) {
t.Fatalf("have test check")
}
}

func TestHTTPAgent_ServiceMaintenanceEndpoint_BadRequest(t *testing.T) {
dir, srv := makeHTTPServer(t)
defer os.RemoveAll(dir)
defer srv.Shutdown()
defer srv.agent.Shutdown()

// Fails on non-PUT
req, _ := http.NewRequest("GET", "/v1/agent/service/maintenance/test?enable=true", nil)
resp := httptest.NewRecorder()
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
}
if resp.Code != 405 {
t.Fatalf("expected 405, got %d", resp.Code)
}

// Fails when no enable flag provided
req, _ = http.NewRequest("PUT", "/v1/agent/service/maintenance/test", nil)
resp = httptest.NewRecorder()
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
}
if resp.Code != 400 {
t.Fatalf("expected 400, got %d", resp.Code)
}

// Fails when no service ID provided
req, _ = http.NewRequest("PUT", "/v1/agent/service/maintenance/?enable=true", nil)
resp = httptest.NewRecorder()
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
}
if resp.Code != 400 {
t.Fatalf("expected 400, got %d", resp.Code)
}

// Fails when bad service ID provided
req, _ = http.NewRequest("PUT", "/v1/agent/service/maintenance/_nope_?enable=true", nil)
resp = httptest.NewRecorder()
if _, err := srv.AgentServiceMaintenance(resp, req); err == nil {
t.Fatalf("should have errored")
}
if resp.Code != 404 {
t.Fatalf("expected 404, got %d", resp.Code)
}
}

func TestHTTPAgent_EnableServiceMaintenance(t *testing.T) {
dir, srv := makeHTTPServer(t)
defer os.RemoveAll(dir)
defer srv.Shutdown()
defer srv.agent.Shutdown()

// Register the service
service := &structs.NodeService{
ID: "test",
Service: "test",
}
if err := srv.agent.AddService(service, nil, false); err != nil {
t.Fatalf("err: %v", err)
}

// Force the service into maintenance mode
req, _ := http.NewRequest("PUT", "/v1/agent/service/maintenance/test?enable=true", nil)
resp := httptest.NewRecorder()
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
}
if resp.Code != 200 {
t.Fatalf("expected 200, got %d", resp.Code)
}

// Ensure the maintenance check was registered
checkID := serviceMaintCheckID("test")
if _, ok := srv.agent.state.Checks()[checkID]; !ok {
t.Fatalf("should have registered maintenance check")
}
}

func TestHTTPAgent_DisableServiceMaintenance(t *testing.T) {
dir, srv := makeHTTPServer(t)
defer os.RemoveAll(dir)
defer srv.Shutdown()
defer srv.agent.Shutdown()

// Register the service
service := &structs.NodeService{
ID: "test",
Service: "test",
}
if err := srv.agent.AddService(service, nil, false); err != nil {
t.Fatalf("err: %v", err)
}

// Force the service into maintenance mode
if err := srv.agent.EnableServiceMaintenance("test"); err != nil {
t.Fatalf("err: %s", err)
}

// Leave maintenance mode
req, _ := http.NewRequest("PUT", "/v1/agent/service/maintenance/test?enable=false", nil)
resp := httptest.NewRecorder()
if _, err := srv.AgentServiceMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
}
if resp.Code != 200 {
t.Fatalf("expected 200, got %d", resp.Code)
}

// Ensure the maintenance check was removed
checkID := serviceMaintCheckID("test")
if _, ok := srv.agent.state.Checks()[checkID]; ok {
t.Fatalf("should have removed maintenance check")
}
}

func TestHTTPAgent_NodeMaintenanceEndpoint_BadRequest(t *testing.T) {
dir, srv := makeHTTPServer(t)
defer os.RemoveAll(dir)
defer srv.Shutdown()
defer srv.agent.Shutdown()

// Fails on non-PUT
req, _ := http.NewRequest("GET", "/v1/agent/self/maintenance?enable=true", nil)
resp := httptest.NewRecorder()
if _, err := srv.AgentNodeMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
}
if resp.Code != 405 {
t.Fatalf("expected 405, got %d", resp.Code)
}

// Fails when no enable flag provided
req, _ = http.NewRequest("PUT", "/v1/agent/self/maintenance", nil)
resp = httptest.NewRecorder()
if _, err := srv.AgentNodeMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
}
if resp.Code != 400 {
t.Fatalf("expected 400, got %d", resp.Code)
}
}

func TestHTTPAgent_EnableNodeMaintenance(t *testing.T) {
dir, srv := makeHTTPServer(t)
defer os.RemoveAll(dir)
defer srv.Shutdown()
defer srv.agent.Shutdown()

// Force the node into maintenance mode
req, _ := http.NewRequest("PUT", "/v1/agent/self/maintenance?enable=true", nil)
resp := httptest.NewRecorder()
if _, err := srv.AgentNodeMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
}
if resp.Code != 200 {
t.Fatalf("expected 200, got %d", resp.Code)
}

// Ensure the maintenance check was registered
if _, ok := srv.agent.state.Checks()[nodeMaintCheckID]; !ok {
t.Fatalf("should have registered maintenance check")
}
}

func TestHTTPAgent_DisableNodeMaintenance(t *testing.T) {
dir, srv := makeHTTPServer(t)
defer os.RemoveAll(dir)
defer srv.Shutdown()
defer srv.agent.Shutdown()

// Force the node into maintenance mode
srv.agent.EnableNodeMaintenance()

// Leave maintenance mode
req, _ := http.NewRequest("PUT", "/v1/agent/self/maintenance?enable=false", nil)
resp := httptest.NewRecorder()
if _, err := srv.AgentNodeMaintenance(resp, req); err != nil {
t.Fatalf("err: %s", err)
}
if resp.Code != 200 {
t.Fatalf("expected 200, got %d", resp.Code)
}

// Ensure the maintenance check was removed
if _, ok := srv.agent.state.Checks()[nodeMaintCheckID]; ok {
t.Fatalf("should have removed maintenance check")
}
}
Loading