-
Notifications
You must be signed in to change notification settings - Fork 4.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enhanced Heartbeat Telemetry #8621
Changes from 3 commits
1cf1a5d
d85d186
ac5cbe4
0e20f0a
036d373
113ed51
027f045
e3f9d62
998d27a
a15ef68
fab8be9
d9995d5
7a62efb
090e920
339547c
919214d
e7374a4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,10 +99,10 @@ func createMockJob(name string, cfg *common.Config) ([]Job, error) { | |
} | ||
|
||
func mockPluginBuilder() pluginBuilder { | ||
return pluginBuilder{"test", ActiveMonitor, func(s string, config *common.Config) ([]Job, error) { | ||
return pluginBuilder{"test", ActiveMonitor, func(s string, config *common.Config) ([]Job, int, error) { | ||
c := common.Config{} | ||
j, err := createMockJob("test", &c) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this breaks the tests because there is not monitor type There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is now fixed since we only log instead of panic in this case. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is now further improved by our dynamic creation of metrics, so this scenario is no longer possible. |
||
return j, err | ||
return j, 1, err | ||
}} | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,8 +30,38 @@ import ( | |
"github.com/elastic/beats/libbeat/beat" | ||
"github.com/elastic/beats/libbeat/common" | ||
"github.com/elastic/beats/libbeat/logp" | ||
"github.com/elastic/beats/libbeat/monitoring" | ||
) | ||
|
||
var teleRegistry *monitoring.Registry = monitoring.Default.NewRegistry("heartbeat") | ||
var httpRegistry *monitoring.Registry = teleRegistry.NewRegistry("heartbeat.http") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @urso I initially thought I could nest by writing: var teleRegistry *monitoring.Registry = monitoring.Default.NewRegistry("heartbeat")
var httpRegistry *monitoring.Registry = teleRegistry.NewRegistry("http") but discovered that despite calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you need the full path? Where did you see things going wrong? When calling
Why do you create a new global registry? Is this supposed to be metrics or for phoning-home data? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see now that these lines are actually noops, we never use these registries, but define them further down. I was confused. In that subsequent block I do define them correctly without the full paths. This is what happens when you write code late in the day! Apologies. |
||
var tcpRegistry *monitoring.Registry = teleRegistry.NewRegistry("heartbeat.tcp") | ||
var icmpRegistry *monitoring.Registry = teleRegistry.NewRegistry("heartbeat.icmp") | ||
|
||
type protocolStats struct { | ||
monitors *monitoring.Int | ||
endpoints *monitoring.Int | ||
} | ||
|
||
func newProtocolStats(reg *monitoring.Registry) protocolStats { | ||
return protocolStats{ | ||
monitoring.NewInt(reg, "monitors"), | ||
monitoring.NewInt(reg, "endpoints"), | ||
} | ||
} | ||
|
||
var teleStats = struct { | ||
monitors *monitoring.Int | ||
protocols map[string]protocolStats | ||
}{ | ||
monitors: monitoring.NewInt(teleRegistry, "monitors"), | ||
protocols: map[string]protocolStats{ | ||
"http": newProtocolStats(teleRegistry.NewRegistry("http")), | ||
"tcp": newProtocolStats(teleRegistry.NewRegistry("tcp")), | ||
"icmp": newProtocolStats(teleRegistry.NewRegistry("icmp")), | ||
}, | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer for new code not to rely on globals. |
||
|
||
// Monitor represents a configured recurring monitoring task loaded from a config file. Starting it | ||
// will cause it to run with the given scheduler until Stop() is called. | ||
type Monitor struct { | ||
|
@@ -42,6 +72,8 @@ type Monitor struct { | |
scheduler *scheduler.Scheduler | ||
jobTasks []*task | ||
enabled bool | ||
// endpoints is a count of endpoints this monitor measures. | ||
endpoints int | ||
// internalsMtx is used to synchronize access to critical | ||
// internal datastructures | ||
internalsMtx sync.Mutex | ||
|
@@ -97,7 +129,8 @@ func newMonitor( | |
config: config, | ||
} | ||
|
||
jobs, err := monitorPlugin.create(config) | ||
jobs, endpoints, err := monitorPlugin.create(config) | ||
m.endpoints = endpoints | ||
if err != nil { | ||
return nil, fmt.Errorf("job err %v", err) | ||
} | ||
|
@@ -181,7 +214,8 @@ func (m *Monitor) makeWatchTasks(monitorPlugin pluginBuilder) error { | |
return | ||
} | ||
|
||
watchJobs, err := monitorPlugin.create(merged) | ||
watchJobs, endpoints, err := monitorPlugin.create(merged) | ||
m.endpoints = endpoints | ||
if err != nil { | ||
logp.Err("Could not create job from watch file: %v", err) | ||
} | ||
|
@@ -227,6 +261,15 @@ func (m *Monitor) Start() { | |
for _, t := range m.watchPollTasks { | ||
t.Start() | ||
} | ||
|
||
teleStats.monitors.Inc() | ||
|
||
if stats, ok := teleStats.protocols[m.name]; !ok { | ||
logp.Err("Unknown protocol for monitor stats: %s", m.name) | ||
} else { | ||
stats.monitors.Inc() | ||
stats.endpoints.Add(int64(m.endpoints)) | ||
} | ||
} | ||
|
||
// Stop stops the Monitor's execution in its configured scheduler. | ||
|
@@ -242,4 +285,13 @@ func (m *Monitor) Stop() { | |
for _, t := range m.watchPollTasks { | ||
t.Stop() | ||
} | ||
|
||
teleStats.monitors.Dec() | ||
|
||
if stats, ok := teleStats.protocols[m.name]; !ok { | ||
logp.Err("Unknown protocol for monitor stats: %s", m.name) | ||
} else { | ||
stats.monitors.Dec() | ||
stats.endpoints.Sub(int64(m.endpoints)) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,7 +48,7 @@ func init() { | |
|
||
// PluginBuilder is the signature of functions used to build active | ||
// monitors | ||
type PluginBuilder func(string, *common.Config) ([]Job, error) | ||
type PluginBuilder func(string, *common.Config) (jobs []Job, endpoints int, err error) | ||
|
||
// Type represents whether a plugin is active or passive. | ||
type Type uint8 | ||
|
@@ -129,7 +129,7 @@ func (r *pluginsReg) monitorNames() []string { | |
return names | ||
} | ||
|
||
func (e *pluginBuilder) create(cfg *common.Config) ([]Job, error) { | ||
func (e *pluginBuilder) create(cfg *common.Config) (jobs []Job, endpoints int, err error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. explicit names because |
||
return e.builder(e.name, cfg) | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was unclear before. This is really a map of
scheme(str) -> []host(str)