-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
mute consul debug messages #567
Changes from 4 commits
6af01d2
deea0d6
b3ed3e4
e1d2d58
42b4a34
c9277e2
83c4650
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,6 +72,7 @@ type ConsulService struct { | |
client consulApi | ||
logger *log.Logger | ||
shutdownCh chan struct{} | ||
node *structs.Node | ||
|
||
trackedTasks map[string]*trackedTask | ||
serviceStates map[string]string | ||
|
@@ -80,7 +81,7 @@ type ConsulService struct { | |
|
||
// A factory method to create new consul service | ||
func NewConsulService(logger *log.Logger, consulAddr string, token string, | ||
auth string, enableSSL bool, verifySSL bool) (*ConsulService, error) { | ||
auth string, enableSSL bool, verifySSL bool, node *structs.Node) (*ConsulService, error) { | ||
var err error | ||
var c *consul.Client | ||
cfg := consul.DefaultConfig() | ||
|
@@ -122,6 +123,7 @@ func NewConsulService(logger *log.Logger, consulAddr string, token string, | |
consulService := ConsulService{ | ||
client: &consulApiClient{client: c}, | ||
logger: logger, | ||
node: node, | ||
trackedTasks: make(map[string]*trackedTask), | ||
serviceStates: make(map[string]string), | ||
shutdownCh: make(chan struct{}), | ||
|
@@ -161,7 +163,7 @@ func (c *ConsulService) Deregister(task *structs.Task, allocID string) error { | |
} | ||
c.logger.Printf("[INFO] consul: deregistering service %v with consul", service.Name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we want to mask this output also? If not I think I misunderstood why the others are being skipped. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cbednarski This is being printed only once and not continuously so I think this is fine and it indicates the user that we have taken out the service and it's corresponding checks. If consul agent is not available at that time, the service and checks would be taken out once the agent comes back up but we won't print the message continuously as we keep retrying during the sync loop. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should move this after the if so that there is only one debug message. Either a successful deregister or an error deregistering same for registering |
||
if err := c.deregisterService(service.Id); err != nil { | ||
c.logger.Printf("[DEBUG] consul: error in deregistering service %v from consul", service.Name) | ||
c.printLogMessage("[DEBUG] consul: error in deregistering service %v from consul", service.Name) | ||
mErr.Errors = append(mErr.Errors, err) | ||
} | ||
} | ||
|
@@ -207,14 +209,14 @@ func (c *ConsulService) performSync() { | |
// Add new services which Consul agent isn't aware of | ||
knownServices[service.Id] = struct{}{} | ||
if _, ok := consulServices[service.Id]; !ok { | ||
c.logger.Printf("[INFO] consul: registering service %s with consul.", service.Name) | ||
c.printLogMessage("[INFO] consul: registering service %s with consul.", service.Name) | ||
c.registerService(service, trackedTask.task, trackedTask.allocID) | ||
continue | ||
} | ||
|
||
// If a service has changed, re-register it with Consul agent | ||
if service.Hash() != c.serviceStates[service.Id] { | ||
c.logger.Printf("[INFO] consul: reregistering service %s with consul.", service.Name) | ||
c.printLogMessage("[INFO] consul: reregistering service %s with consul.", service.Name) | ||
c.registerService(service, trackedTask.task, trackedTask.allocID) | ||
continue | ||
} | ||
|
@@ -242,7 +244,7 @@ func (c *ConsulService) performSync() { | |
for _, consulService := range consulServices { | ||
if _, ok := knownServices[consulService.ID]; !ok { | ||
delete(c.serviceStates, consulService.ID) | ||
c.logger.Printf("[INFO] consul: deregistering service %v with consul", consulService.Service) | ||
c.printLogMessage("[INFO] consul: deregistering service %v with consul", consulService.Service) | ||
c.deregisterService(consulService.ID) | ||
} | ||
} | ||
|
@@ -273,13 +275,13 @@ func (c *ConsulService) registerService(service *structs.Service, task *structs. | |
} | ||
|
||
if err := c.client.ServiceRegister(asr); err != nil { | ||
c.logger.Printf("[DEBUG] consul: error while registering service %v with consul: %v", service.Name, err) | ||
c.printLogMessage("[DEBUG] consul: error while registering service %v with consul: %v", service.Name, err) | ||
mErr.Errors = append(mErr.Errors, err) | ||
} | ||
for _, check := range service.Checks { | ||
cr := c.makeCheck(service, check, host, port) | ||
if err := c.registerCheck(cr); err != nil { | ||
c.logger.Printf("[DEBUG] consul: error while registerting check %v with consul: %v", check.Name, err) | ||
c.printLogMessage("[DEBUG] consul: error while registerting check %v with consul: %v", check.Name, err) | ||
mErr.Errors = append(mErr.Errors, err) | ||
} | ||
|
||
|
@@ -289,13 +291,13 @@ func (c *ConsulService) registerService(service *structs.Service, task *structs. | |
|
||
// registerCheck registers a check with Consul | ||
func (c *ConsulService) registerCheck(check *consul.AgentCheckRegistration) error { | ||
c.logger.Printf("[INFO] consul: registering Check with ID: %v for service: %v", check.ID, check.ServiceID) | ||
c.printLogMessage("[INFO] consul: registering check with ID: %v for service: %v", check.ID, check.ServiceID) | ||
return c.client.CheckRegister(check) | ||
} | ||
|
||
// deregisterCheck de-registers a check with a specific ID from Consul | ||
func (c *ConsulService) deregisterCheck(checkID string) error { | ||
c.logger.Printf("[INFO] consul: removing check with ID: %v", checkID) | ||
c.printLogMessage("[INFO] consul: removing check with ID: %v", checkID) | ||
return c.client.CheckDeregister(checkID) | ||
} | ||
|
||
|
@@ -336,3 +338,9 @@ func (c *ConsulService) makeCheck(service *structs.Service, check *structs.Servi | |
} | ||
return cr | ||
} | ||
|
||
func (c *ConsulService) printLogMessage(message string, v ...interface{}) { | ||
if _, ok := c.node.Attributes["consul.version"]; ok { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems weird that we're suppressing log messages when Consul is not present, but we're still running through all of the register / deregister logic. I feel like we should have shortcut or no-op'd much earlier. Can we skip over the consul logic completely if Consul is not running? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @cbednarski If a user has defined a service block in his/her Task, then even if the Consul agent is not running we start tracking the service and try to keep syncing it with Consul. The current logic allows consul agent to be unavailable but when it comes back up again, it syncs all the service and check definitions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still don't follow. There are two scenarios:
If we have to have the service registered permanently when the task starts just in case consul shows up some day, it seems like we should still be able to do this check much earlier and skip all of this additional complexity with respect to filtering the logs and retrying something that's never going to succeed. For example in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think if we just eat all the logs it solves this problem? Do you agree @cbednarski. Because then it just becomes us building the internal state such that if Consul comes back we can do the diff and register/deregister the necessary services and we'd never tell the user we are taking an action against Consul when we aren't. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The problem with suppressing the logs is Nomad is actually doing a lot of work that suddenly becomes invisible, but we probably shouldn't be doing the work in the first place. Hiding the logs is a smell. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree with @cbednarski on this one. I think we should skip the entire register loop if Consul was fingerprinted as missing. It doesn't make much sense otherwise to loop over every task, service, and check, and attempt a REST call we know will fail anyways. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ryanuber Discussed more about this internally. The fingerprinter currently updates every 15s, we would continue to do precise registration and de-registration whenever a new service comes up or goes away. We would not sync with Consul every 5s if we can't get the list of services and checks. |
||
c.logger.Printf(message, v) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,15 +12,21 @@ import ( | |
"github.com/hashicorp/nomad/nomad/structs" | ||
) | ||
|
||
const ( | ||
consulAvailable = "consulavailable" | ||
consulUnavailable = "consulunavailable" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we just change these to |
||
) | ||
|
||
// ConsulFingerprint is used to fingerprint the architecture | ||
type ConsulFingerprint struct { | ||
logger *log.Logger | ||
client *consul.Client | ||
logger *log.Logger | ||
client *consul.Client | ||
lastState string | ||
} | ||
|
||
// NewConsulFingerprint is used to create an OS fingerprint | ||
func NewConsulFingerprint(logger *log.Logger) Fingerprint { | ||
f := &ConsulFingerprint{logger: logger} | ||
f := &ConsulFingerprint{logger: logger, lastState: consulUnavailable} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
return f | ||
} | ||
|
||
|
@@ -55,6 +61,13 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod | |
if err != nil { | ||
// Clear any attributes set by a previous fingerprint. | ||
f.clearConsulAttributes(node) | ||
|
||
// Print a message indicating that the Consul Agent is not available | ||
// anymore | ||
if f.lastState == consulAvailable { | ||
f.logger.Printf("[INFO] fingerprint.consul: consul agent is unavailable") | ||
} | ||
f.lastState = consulUnavailable | ||
return false, nil | ||
} | ||
|
||
|
@@ -68,6 +81,12 @@ func (f *ConsulFingerprint) Fingerprint(config *client.Config, node *structs.Nod | |
node.Attributes["consul.datacenter"], | ||
node.Attributes["consul.name"]) | ||
|
||
// If the Consul Agent was previously unavailable print a message to | ||
// indicate the Agent is available now | ||
if f.lastState == consulUnavailable { | ||
f.logger.Printf("[INFO] fingerprt.consul: consul agent is available") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
f.lastState = consulAvailable | ||
return true, nil | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://github.com/hashicorp/nomad/pull/567/files#diff-2a1fc746b3609034abb284128e9a6ed4R146
Put a debug log there too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@dadgar registerService already has a debug message in it.