Skip to content

Commit

Permalink
handle case of same service ID on different nodes
Browse files Browse the repository at this point in the history
Duplicate service ID on 2 nodes would end up with only 1 service being
deregistered (from being a service in good standing), leaving the
service with the duplicate ID registered.

This includes the node's name in the id to make sure services have
a unique ID across nodes.
  • Loading branch information
eikenb committed May 31, 2022
1 parent f5533fd commit 33dfa54
Showing 1 changed file with 13 additions and 9 deletions.
22 changes: 13 additions & 9 deletions check.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,33 +445,37 @@ func (c *CheckRunner) reapServicesInternal() {
c.Lock()
defer c.Unlock()

reaped := make(map[string]bool)
type uniqueID struct {
node, service string
}

reaped := make(map[uniqueID]bool)
for checkID, criticalTime := range c.checksCritical {
check := c.checks[checkID]
serviceID := check.ServiceID

ID := uniqueID{node: check.Node, service: check.ServiceID}
// There's nothing to do if there's no service.
if serviceID == "" {
if ID.service == "" {
continue
}

// There might be multiple checks for one service, so
// we don't need to reap multiple times.
if reaped[serviceID] {
if reaped[ID] {
continue
}

timeout := check.Definition.DeregisterCriticalServiceAfterDuration
if timeout > 0 && timeout < time.Since(criticalTime) {
c.client.Catalog().Deregister(&api.CatalogDeregistration{
Node: check.Node,
ServiceID: serviceID,
Node: ID.node,
ServiceID: ID.service,
}, nil)
c.logger.Info("agent has been critical for too long, deregistered service", "checkID", checkID,
"serviceID", serviceID,
"nodeID", ID.node,
"serviceID", ID.service,
"duration", time.Since(criticalTime),
"timeout", timeout)
reaped[serviceID] = true
reaped[ID] = true
}
}
}
Expand Down

0 comments on commit 33dfa54

Please sign in to comment.