Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes accidental state store updates from output-side fixups. #3867

Merged
merged 3 commits into from
Feb 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions agent/agent_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,11 @@ func (s *HTTPServer) AgentServices(resp http.ResponseWriter, req *http.Request)
}

// Use empty list instead of nil
for _, s := range services {
for id, s := range services {
if s.Tags == nil {
s.Tags = make([]string, 0)
clone := *s
clone.Tags = make([]string, 0)
services[id] = &clone
}
}

Expand All @@ -170,10 +172,11 @@ func (s *HTTPServer) AgentChecks(resp http.ResponseWriter, req *http.Request) (i
}

// Use empty list instead of nil
// checks needs to be a deep copy for this not be racy
for _, c := range checks {
for id, c := range checks {
if c.ServiceTags == nil {
c.ServiceTags = make([]string, 0)
clone := *c
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what issue you are referring to with this comment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You fixed it in c2a59f1, around line 150.

clone.ServiceTags = make([]string, 0)
checks[id] = &clone
}
}

Expand Down
52 changes: 42 additions & 10 deletions agent/agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -547,26 +547,42 @@ func TestAgent_RemoveServiceRemovesAllChecks(t *testing.T) {
}
}

// TestAgent_ServiceWithTagChurn is designed to detect a class of issues where
// we would have unnecessary catalog churn for services with tags. See issues
// #3259, #3642, and #3845.
func TestAgent_ServiceWithTagChurn(t *testing.T) {
// TestAgent_IndexChurn is designed to detect a class of issues where
// we would have unnecessary catalog churn from anti-entropy. See issues
// #3259, #3642, #3845, and #3866.
func TestAgent_IndexChurn(t *testing.T) {
t.Parallel()

t.Run("no tags", func(t *testing.T) {
verifyIndexChurn(t, nil)
})

t.Run("with tags", func(t *testing.T) {
verifyIndexChurn(t, []string{"foo", "bar"})
})
}

// verifyIndexChurn registers some things and runs anti-entropy a bunch of times
// in a row to make sure there are no index bumps.
func verifyIndexChurn(t *testing.T, tags []string) {
t.Helper()

a := NewTestAgent(t.Name(), "")
defer a.Shutdown()

svc := &structs.NodeService{
ID: "redis",
Service: "redis",
Port: 8000,
Tags: []string{"has", "tags"},
Tags: tags,
}
if err := a.AddService(svc, nil, true, ""); err != nil {
t.Fatalf("err: %v", err)
}

chk := &structs.HealthCheck{
CheckID: "redis-check",
Name: "Service-level check",
ServiceID: "redis",
Status: api.HealthCritical,
}
Expand All @@ -577,18 +593,34 @@ func TestAgent_ServiceWithTagChurn(t *testing.T) {
t.Fatalf("err: %v", err)
}

chk = &structs.HealthCheck{
CheckID: "node-check",
Name: "Node-level check",
Status: api.HealthCritical,
}
chkt = &structs.CheckType{
TTL: time.Hour,
}
if err := a.AddCheck(chk, chkt, true, ""); err != nil {
t.Fatalf("err: %v", err)
}

if err := a.sync.State.SyncFull(); err != nil {
t.Fatalf("err: %v", err)
}

args := &structs.ServiceSpecificRequest{
Datacenter: "dc1",
ServiceName: "redis",
}
var before structs.IndexedHealthChecks
if err := a.RPC("Health.ServiceChecks", args, &before); err != nil {
var before structs.IndexedCheckServiceNodes
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did the test have to change from calling Health.ServiceChecks to Health.ServiceNodes to illustrate this problem?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This problem wasn't uncovered by this test since you actually have to hit specific HTTP API endpoints to corrupt the data, but I did add a node-level check as part of trying to chase this down, so I changed this RPC endpoint to one that returns the service's check + the node-level checks to make this test more robust. It was when I could see this test passing but run this in an integrated fashion and watching /v1/health/service/ and see churn that I knew something super weird was happening!

if err := a.RPC("Health.ServiceNodes", args, &before); err != nil {
t.Fatalf("err: %v", err)
}
if got, want := len(before.HealthChecks), 1; got != want {
if got, want := len(before.Nodes), 1; got != want {
t.Fatalf("got %d want %d", got, want)
}
if got, want := len(before.Nodes[0].Checks), 3; /* incl. serfHealth */ got != want {
t.Fatalf("got %d want %d", got, want)
}

Expand All @@ -598,8 +630,8 @@ func TestAgent_ServiceWithTagChurn(t *testing.T) {
}
}

var after structs.IndexedHealthChecks
if err := a.RPC("Health.ServiceChecks", args, &after); err != nil {
var after structs.IndexedCheckServiceNodes
if err := a.RPC("Health.ServiceNodes", args, &after); err != nil {
t.Fatalf("err: %v", err)
}
verify.Values(t, "", after, before)
Expand Down
15 changes: 13 additions & 2 deletions agent/catalog_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,11 @@ func (s *HTTPServer) CatalogServiceNodes(resp http.ResponseWriter, req *http.Req
if out.ServiceNodes == nil {
out.ServiceNodes = make(structs.ServiceNodes, 0)
}
for _, s := range out.ServiceNodes {
for i, s := range out.ServiceNodes {
if s.ServiceTags == nil {
s.ServiceTags = make([]string, 0)
clone := *s
clone.ServiceTags = make([]string, 0)
out.ServiceNodes[i] = &clone
}
}
metrics.IncrCounterWithLabels([]string{"client", "api", "success", "catalog_service_nodes"}, 1,
Expand Down Expand Up @@ -244,6 +246,15 @@ func (s *HTTPServer) CatalogNodeServices(resp http.ResponseWriter, req *http.Req
s.agent.TranslateAddresses(args.Datacenter, out.NodeServices.Node)
}

// TODO: The NodeServices object in IndexedNodeServices is a pointer to
// something that's created for each request by the state store way down
// in https://github.com/hashicorp/consul/blob/v1.0.4/agent/consul/state/catalog.go#L953-L963.
// Since this isn't a pointer to a real state store object, it's safe to
// modify out.NodeServices.Services in the loop below without making a
// copy here. Same for the Tags in each service entry, since that was
// created by .ToNodeService() which made a copy. This is safe as-is but
// this whole business is tricky and subtle. See #3867 for more context.

// Use empty list instead of nil
if out.NodeServices != nil {
for _, s := range out.NodeServices.Services {
Expand Down
31 changes: 19 additions & 12 deletions agent/health_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,11 @@ func (s *HTTPServer) HealthChecksInState(resp http.ResponseWriter, req *http.Req
if out.HealthChecks == nil {
out.HealthChecks = make(structs.HealthChecks, 0)
}
for _, c := range out.HealthChecks {
for i, c := range out.HealthChecks {
if c.ServiceTags == nil {
c.ServiceTags = make([]string, 0)
clone := *c
clone.ServiceTags = make([]string, 0)
out.HealthChecks[i] = &clone
}
}
return out.HealthChecks, nil
Expand Down Expand Up @@ -80,9 +82,11 @@ func (s *HTTPServer) HealthNodeChecks(resp http.ResponseWriter, req *http.Reques
if out.HealthChecks == nil {
out.HealthChecks = make(structs.HealthChecks, 0)
}
for _, c := range out.HealthChecks {
for i, c := range out.HealthChecks {
if c.ServiceTags == nil {
c.ServiceTags = make([]string, 0)
clone := *c
clone.ServiceTags = make([]string, 0)
out.HealthChecks[i] = &clone
}
}
return out.HealthChecks, nil
Expand Down Expand Up @@ -120,9 +124,11 @@ func (s *HTTPServer) HealthServiceChecks(resp http.ResponseWriter, req *http.Req
if out.HealthChecks == nil {
out.HealthChecks = make(structs.HealthChecks, 0)
}
for _, c := range out.HealthChecks {
for i, c := range out.HealthChecks {
if c.ServiceTags == nil {
c.ServiceTags = make([]string, 0)
clone := *c
clone.ServiceTags = make([]string, 0)
out.HealthChecks[i] = &clone
}
}
return out.HealthChecks, nil
Expand Down Expand Up @@ -194,19 +200,20 @@ func (s *HTTPServer) HealthServiceNodes(resp http.ResponseWriter, req *http.Requ
out.Nodes = make(structs.CheckServiceNodes, 0)
}
for i := range out.Nodes {
// TODO (slackpad) It's lame that this isn't a slice of pointers
// but it's not a well-scoped change to fix this. We should
// change this at the next opportunity.
if out.Nodes[i].Checks == nil {
out.Nodes[i].Checks = make(structs.HealthChecks, 0)
}
for _, c := range out.Nodes[i].Checks {
for j, c := range out.Nodes[i].Checks {
if c.ServiceTags == nil {
c.ServiceTags = make([]string, 0)
clone := *c
clone.ServiceTags = make([]string, 0)
out.Nodes[i].Checks[j] = &clone
}
}
if out.Nodes[i].Service != nil && out.Nodes[i].Service.Tags == nil {
out.Nodes[i].Service.Tags = make([]string, 0)
clone := *out.Nodes[i].Service
clone.Tags = make([]string, 0)
out.Nodes[i].Service = &clone
}
}
return out.Nodes, nil
Expand Down
2 changes: 2 additions & 0 deletions agent/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,8 @@ type IndexedServiceNodes struct {
}

type IndexedNodeServices struct {
// TODO: This should not be a pointer, see comments in
// agent/catalog_endpoint.go.
NodeServices *NodeServices
QueryMeta
}
Expand Down