-
Notifications
You must be signed in to change notification settings - Fork 32
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
495 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
## Agent configuration | ||
|
||
This module uses [collectd/mongodb](https://docs.signalfx.com/en/latest/integrations/agent/monitors/collectd-mongodb.html) monitor. | ||
|
||
```yaml | ||
- type: collectd/mongodb | ||
host: &mongoHost localhost | ||
port: &mongoPort 27017 | ||
username: user | ||
password: pass | ||
databases: | ||
- admin | ||
# Uncomment only if mysql server is not on the same host as signalfx agent | ||
#disableHostDimensions: true | ||
extraDimensions: | ||
# Uncomment only if you enabled `disableHostDimensions` or for "serverless" mode. | ||
#host: *mongoHost | ||
# You should not have to change lines below | ||
extraMetrics: | ||
- gauge.connections.available | ||
- counter.asserts.regular | ||
- counter.asserts.warning | ||
# Only required if agent <= 5.5.5: | ||
- gauge.repl.max_lag | ||
- gauge.repl.active_nodes | ||
- gauge.repl.is_primary_node | ||
``` | ||
## Notes | ||
* Primary and secondary detectors require to configure on all members | ||
of the replicat because they use explicitly aggregation by replicaset | ||
(`cluster` by default) to work. Change default value of corresponding | ||
`aggregation_function` variable if necessary. | ||
|
||
* The heartbeat detector is by aggregated replicaset (`cluster`) by | ||
default to avoid alert for each single member disapearance. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../common/locals.tf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../common/modules.tf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../common/variables.tf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
../../common/versions.tf |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
resource "signalfx_detector" "heartbeat" { | ||
name = format("%s %s", local.detector_name_prefix, "Webcheck heartbeat") | ||
max_delay = 900 | ||
|
||
program_text = <<-EOF | ||
from signalfx.detectors.not_reporting import not_reporting | ||
signal = data('gauge.connections.available', filter=${module.filter-tags.filter_custom})${var.heartbeat_aggregation_function}.publish('signal') | ||
not_reporting.detector(stream=signal, resource_identifier=None, duration='${var.heartbeat_timeframe}').publish('CRIT') | ||
EOF | ||
|
||
rule { | ||
description = "has not reported in ${var.heartbeat_timeframe}" | ||
severity = "Critical" | ||
detect_label = "CRIT" | ||
disabled = coalesce(var.heartbeat_disabled, var.detectors_disabled) | ||
notifications = coalescelist(lookup(var.heartbeat_notifications, "critical", []), var.notifications.critical) | ||
parameterized_subject = local.rule_subject | ||
parameterized_body = local.rule_body | ||
} | ||
} | ||
|
||
resource "signalfx_detector" "page_faults" { | ||
name = format("%s %s", local.detector_name_prefix, "MongoDB page faults") | ||
|
||
program_text = <<-EOF | ||
signal = data('counter.extra_info.page_faults', filter=${module.filter-tags.filter_custom})${var.page_faults_aggregation_function}${var.page_faults_transformation_function}.publish('signal') | ||
detect(when(signal > ${var.page_faults_threshold_warning})).publish('WARN') | ||
EOF | ||
|
||
rule { | ||
description = "is too high > ${var.page_faults_threshold_warning}" | ||
severity = "Warning" | ||
detect_label = "WARN" | ||
disabled = coalesce(var.page_faults_disabled, var.detectors_disabled) | ||
notifications = coalescelist(lookup(var.page_faults_notifications, "warning", []), var.notifications.warning) | ||
parameterized_subject = local.rule_subject | ||
parameterized_body = local.rule_body | ||
} | ||
} | ||
|
||
resource "signalfx_detector" "max_connections" { | ||
name = format("%s %s", local.detector_name_prefix, "MongoDB number of connections over max capacity") | ||
|
||
program_text = <<-EOF | ||
A = data('gauge.connections.current', filter=${module.filter-tags.filter_custom})${var.max_connections_aggregation_function}${var.max_connections_transformation_function} | ||
B = data('gauge.connections.available', filter=${module.filter-tags.filter_custom})${var.max_connections_aggregation_function}${var.max_connections_transformation_function} | ||
signal = (A/(A+B)).scale(100).publish('signal') | ||
detect(when(signal > ${var.max_connections_threshold_critical})).publish('CRIT') | ||
detect(when(signal > ${var.max_connections_threshold_major}) and when(signal <= ${var.max_connections_threshold_critical})).publish('MAJOR') | ||
EOF | ||
|
||
rule { | ||
description = "is too high > ${var.max_connections_threshold_critical}" | ||
severity = "Critical" | ||
detect_label = "CRIT" | ||
disabled = coalesce(var.max_connections_disabled_critical, var.max_connections_disabled, var.detectors_disabled) | ||
notifications = coalescelist(lookup(var.max_connections_notifications, "critical", []), var.notifications.critical) | ||
parameterized_subject = local.rule_subject | ||
parameterized_body = local.rule_body | ||
} | ||
|
||
rule { | ||
description = "is too high > ${var.max_connections_threshold_major}" | ||
severity = "Major" | ||
detect_label = "MAJOR" | ||
disabled = coalesce(var.max_connections_disabled_major, var.max_connections_disabled, var.detectors_disabled) | ||
notifications = coalescelist(lookup(var.max_connections_notifications, "major", []), var.notifications.major) | ||
parameterized_subject = local.rule_subject | ||
parameterized_body = local.rule_body | ||
} | ||
} | ||
|
||
resource "signalfx_detector" "asserts" { | ||
name = format("%s %s", local.detector_name_prefix, "MongoDB asserts (warning and regular) errors") | ||
|
||
program_text = <<-EOF | ||
A = data('counter.asserts.regular', filter=${module.filter-tags.filter_custom})${var.asserts_aggregation_function}${var.asserts_transformation_function} | ||
B = data('counter.asserts.warning', filter=${module.filter-tags.filter_custom})${var.asserts_aggregation_function}${var.asserts_transformation_function} | ||
signal = (A+B).publish('signal') | ||
detect(when(signal > ${var.asserts_threshold_minor})).publish('MINOR') | ||
EOF | ||
|
||
rule { | ||
description = "is too high > ${var.asserts_threshold_minor}" | ||
severity = "Minor" | ||
detect_label = "MINOR" | ||
disabled = coalesce(var.asserts_disabled, var.detectors_disabled) | ||
notifications = coalescelist(lookup(var.asserts_notifications, "minor", []), var.notifications.minor) | ||
parameterized_subject = local.rule_subject | ||
parameterized_body = local.rule_body | ||
} | ||
} | ||
|
||
resource "signalfx_detector" "primary" { | ||
name = format("%s %s", local.detector_name_prefix, "MongoDB primary in replicaset") | ||
|
||
program_text = <<-EOF | ||
signal = data('gauge.repl.is_primary_node', filter=${module.filter-tags.filter_custom})${var.primary_aggregation_function}${var.primary_transformation_function}.publish('signal') | ||
detect(when(signal > ${var.primary_threshold_critical})).publish('CRIT') | ||
EOF | ||
|
||
rule { | ||
description = "is missing" | ||
severity = "Critical" | ||
detect_label = "CRIT" | ||
disabled = coalesce(var.primary_disabled, var.detectors_disabled) | ||
notifications = coalescelist(lookup(var.primary_notifications, "critical", []), var.notifications.critical) | ||
parameterized_subject = local.rule_subject | ||
parameterized_body = local.rule_body | ||
} | ||
} | ||
|
||
resource "signalfx_detector" "secondary" { | ||
name = format("%s %s", local.detector_name_prefix, "MongoDB secondary members count in replicaset") | ||
|
||
program_text = <<-EOF | ||
A = data('gauge.repl.active_nodes', filter=${module.filter-tags.filter_custom})${var.secondary_aggregation_function}${var.secondary_transformation_function} | ||
B = data('gauge.repl.is_primary_node', filter=${module.filter-tags.filter_custom})${var.secondary_aggregation_function}${var.secondary_transformation_function} | ||
signal = (A-B).publish('signal') | ||
detect(when(signal < ${var.secondary_threshold_critical})).publish('CRIT') | ||
EOF | ||
|
||
rule { | ||
description = "is too low < ${var.secondary_threshold_critical}" | ||
severity = "Critical" | ||
detect_label = "CRIT" | ||
disabled = coalesce(var.secondary_disabled, var.detectors_disabled) | ||
notifications = coalescelist(lookup(var.secondary_notifications, "critical", []), var.notifications.critical) | ||
parameterized_subject = local.rule_subject | ||
parameterized_body = local.rule_body | ||
} | ||
} | ||
|
||
resource "signalfx_detector" "replication_lag" { | ||
name = format("%s %s", local.detector_name_prefix, "MongoDB replication lag") | ||
|
||
program_text = <<-EOF | ||
signal = data('gauge.repl.max_lag', filter=${module.filter-tags.filter_custom})${var.replication_lag_aggregation_function}${var.replication_lag_transformation_function}.publish('signal') | ||
detect(when(signal > ${var.replication_lag_threshold_critical})).publish('CRIT') | ||
detect(when(signal > ${var.replication_lag_threshold_major}) and when(signal <= ${var.replication_lag_threshold_critical})).publish('MAJOR') | ||
EOF | ||
|
||
rule { | ||
description = "is too high > ${var.replication_lag_threshold_critical}" | ||
severity = "Critical" | ||
detect_label = "CRIT" | ||
disabled = coalesce(var.replication_lag_disabled_critical, var.replication_lag_disabled, var.detectors_disabled) | ||
notifications = coalescelist(lookup(var.replication_lag_notifications, "critical", []), var.notifications.critical) | ||
parameterized_subject = local.rule_subject | ||
parameterized_body = local.rule_body | ||
} | ||
|
||
rule { | ||
description = "is too high > ${var.replication_lag_threshold_major}" | ||
severity = "Major" | ||
detect_label = "MAJOR" | ||
disabled = coalesce(var.replication_lag_disabled_major, var.replication_lag_disabled, var.detectors_disabled) | ||
notifications = coalescelist(lookup(var.replication_lag_notifications, "major", []), var.notifications.major) | ||
parameterized_subject = local.rule_subject | ||
parameterized_body = local.rule_body | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
output "asserts" { | ||
description = "Detector resource for asserts" | ||
value = signalfx_detector.asserts | ||
} | ||
|
||
output "heartbeat" { | ||
description = "Detector resource for heartbeat" | ||
value = signalfx_detector.heartbeat | ||
} | ||
|
||
output "max_connections" { | ||
description = "Detector resource for max_connections" | ||
value = signalfx_detector.max_connections | ||
} | ||
|
||
output "page_faults" { | ||
description = "Detector resource for page_faults" | ||
value = signalfx_detector.page_faults | ||
} | ||
|
||
output "primary" { | ||
description = "Detector resource for primary" | ||
value = signalfx_detector.primary | ||
} | ||
|
||
output "replication_lag" { | ||
description = "Detector resource for replication_lag" | ||
value = signalfx_detector.replication_lag | ||
} | ||
|
||
output "secondary" { | ||
description = "Detector resource for secondary" | ||
value = signalfx_detector.secondary | ||
} | ||
|
Oops, something went wrong.