Skip to content

Commit

Permalink
#120 add mongo detectors
Browse files Browse the repository at this point in the history
  • Loading branch information
xp-1000 committed Nov 18, 2020
1 parent 7bc30ba commit 25ccccb
Show file tree
Hide file tree
Showing 8 changed files with 495 additions and 0 deletions.
37 changes: 37 additions & 0 deletions modules/database-mongo/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
## Agent configuration

This module uses [collectd/mongodb](https://docs.signalfx.com/en/latest/integrations/agent/monitors/collectd-mongodb.html) monitor.

```yaml
- type: collectd/mongodb
host: &mongoHost localhost
port: &mongoPort 27017
username: user
password: pass
databases:
- admin
# Uncomment only if mysql server is not on the same host as signalfx agent
#disableHostDimensions: true
extraDimensions:
# Uncomment only if you enabled `disableHostDimensions` or for "serverless" mode.
#host: *mongoHost
# You should not have to change lines below
extraMetrics:
- gauge.connections.available
- counter.asserts.regular
- counter.asserts.warning
# Only required if agent <= 5.5.5:
- gauge.repl.max_lag
- gauge.repl.active_nodes
- gauge.repl.is_primary_node
```
## Notes
* Primary and secondary detectors require to configure on all members
of the replicat because they use explicitly aggregation by replicaset
(`cluster` by default) to work. Change default value of corresponding
`aggregation_function` variable if necessary.

* The heartbeat detector is by aggregated replicaset (`cluster`) by
default to avoid alert for each single member disapearance.
1 change: 1 addition & 0 deletions modules/database-mongo/common-locals.tf
1 change: 1 addition & 0 deletions modules/database-mongo/common-modules.tf
1 change: 1 addition & 0 deletions modules/database-mongo/common-variables.tf
1 change: 1 addition & 0 deletions modules/database-mongo/common-versions.tf
163 changes: 163 additions & 0 deletions modules/database-mongo/detectors-mongo.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
resource "signalfx_detector" "heartbeat" {
name = format("%s %s", local.detector_name_prefix, "Webcheck heartbeat")
max_delay = 900

program_text = <<-EOF
from signalfx.detectors.not_reporting import not_reporting
signal = data('gauge.connections.available', filter=${module.filter-tags.filter_custom})${var.heartbeat_aggregation_function}.publish('signal')
not_reporting.detector(stream=signal, resource_identifier=None, duration='${var.heartbeat_timeframe}').publish('CRIT')
EOF

rule {
description = "has not reported in ${var.heartbeat_timeframe}"
severity = "Critical"
detect_label = "CRIT"
disabled = coalesce(var.heartbeat_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.heartbeat_notifications, "critical", []), var.notifications.critical)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

resource "signalfx_detector" "page_faults" {
name = format("%s %s", local.detector_name_prefix, "MongoDB page faults")

program_text = <<-EOF
signal = data('counter.extra_info.page_faults', filter=${module.filter-tags.filter_custom})${var.page_faults_aggregation_function}${var.page_faults_transformation_function}.publish('signal')
detect(when(signal > ${var.page_faults_threshold_warning})).publish('WARN')
EOF

rule {
description = "is too high > ${var.page_faults_threshold_warning}"
severity = "Warning"
detect_label = "WARN"
disabled = coalesce(var.page_faults_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.page_faults_notifications, "warning", []), var.notifications.warning)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

resource "signalfx_detector" "max_connections" {
name = format("%s %s", local.detector_name_prefix, "MongoDB number of connections over max capacity")

program_text = <<-EOF
A = data('gauge.connections.current', filter=${module.filter-tags.filter_custom})${var.max_connections_aggregation_function}${var.max_connections_transformation_function}
B = data('gauge.connections.available', filter=${module.filter-tags.filter_custom})${var.max_connections_aggregation_function}${var.max_connections_transformation_function}
signal = (A/(A+B)).scale(100).publish('signal')
detect(when(signal > ${var.max_connections_threshold_critical})).publish('CRIT')
detect(when(signal > ${var.max_connections_threshold_major}) and when(signal <= ${var.max_connections_threshold_critical})).publish('MAJOR')
EOF

rule {
description = "is too high > ${var.max_connections_threshold_critical}"
severity = "Critical"
detect_label = "CRIT"
disabled = coalesce(var.max_connections_disabled_critical, var.max_connections_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.max_connections_notifications, "critical", []), var.notifications.critical)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}

rule {
description = "is too high > ${var.max_connections_threshold_major}"
severity = "Major"
detect_label = "MAJOR"
disabled = coalesce(var.max_connections_disabled_major, var.max_connections_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.max_connections_notifications, "major", []), var.notifications.major)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

resource "signalfx_detector" "asserts" {
name = format("%s %s", local.detector_name_prefix, "MongoDB asserts (warning and regular) errors")

program_text = <<-EOF
A = data('counter.asserts.regular', filter=${module.filter-tags.filter_custom})${var.asserts_aggregation_function}${var.asserts_transformation_function}
B = data('counter.asserts.warning', filter=${module.filter-tags.filter_custom})${var.asserts_aggregation_function}${var.asserts_transformation_function}
signal = (A+B).publish('signal')
detect(when(signal > ${var.asserts_threshold_minor})).publish('MINOR')
EOF

rule {
description = "is too high > ${var.asserts_threshold_minor}"
severity = "Minor"
detect_label = "MINOR"
disabled = coalesce(var.asserts_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.asserts_notifications, "minor", []), var.notifications.minor)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

resource "signalfx_detector" "primary" {
name = format("%s %s", local.detector_name_prefix, "MongoDB primary in replicaset")

program_text = <<-EOF
signal = data('gauge.repl.is_primary_node', filter=${module.filter-tags.filter_custom})${var.primary_aggregation_function}${var.primary_transformation_function}.publish('signal')
detect(when(signal > ${var.primary_threshold_critical})).publish('CRIT')
EOF

rule {
description = "is missing"
severity = "Critical"
detect_label = "CRIT"
disabled = coalesce(var.primary_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.primary_notifications, "critical", []), var.notifications.critical)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

resource "signalfx_detector" "secondary" {
name = format("%s %s", local.detector_name_prefix, "MongoDB secondary members count in replicaset")

program_text = <<-EOF
A = data('gauge.repl.active_nodes', filter=${module.filter-tags.filter_custom})${var.secondary_aggregation_function}${var.secondary_transformation_function}
B = data('gauge.repl.is_primary_node', filter=${module.filter-tags.filter_custom})${var.secondary_aggregation_function}${var.secondary_transformation_function}
signal = (A-B).publish('signal')
detect(when(signal < ${var.secondary_threshold_critical})).publish('CRIT')
EOF

rule {
description = "is too low < ${var.secondary_threshold_critical}"
severity = "Critical"
detect_label = "CRIT"
disabled = coalesce(var.secondary_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.secondary_notifications, "critical", []), var.notifications.critical)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

resource "signalfx_detector" "replication_lag" {
name = format("%s %s", local.detector_name_prefix, "MongoDB replication lag")

program_text = <<-EOF
signal = data('gauge.repl.max_lag', filter=${module.filter-tags.filter_custom})${var.replication_lag_aggregation_function}${var.replication_lag_transformation_function}.publish('signal')
detect(when(signal > ${var.replication_lag_threshold_critical})).publish('CRIT')
detect(when(signal > ${var.replication_lag_threshold_major}) and when(signal <= ${var.replication_lag_threshold_critical})).publish('MAJOR')
EOF

rule {
description = "is too high > ${var.replication_lag_threshold_critical}"
severity = "Critical"
detect_label = "CRIT"
disabled = coalesce(var.replication_lag_disabled_critical, var.replication_lag_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.replication_lag_notifications, "critical", []), var.notifications.critical)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}

rule {
description = "is too high > ${var.replication_lag_threshold_major}"
severity = "Major"
detect_label = "MAJOR"
disabled = coalesce(var.replication_lag_disabled_major, var.replication_lag_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.replication_lag_notifications, "major", []), var.notifications.major)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

35 changes: 35 additions & 0 deletions modules/database-mongo/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
output "asserts" {
description = "Detector resource for asserts"
value = signalfx_detector.asserts
}

output "heartbeat" {
description = "Detector resource for heartbeat"
value = signalfx_detector.heartbeat
}

output "max_connections" {
description = "Detector resource for max_connections"
value = signalfx_detector.max_connections
}

output "page_faults" {
description = "Detector resource for page_faults"
value = signalfx_detector.page_faults
}

output "primary" {
description = "Detector resource for primary"
value = signalfx_detector.primary
}

output "replication_lag" {
description = "Detector resource for replication_lag"
value = signalfx_detector.replication_lag
}

output "secondary" {
description = "Detector resource for secondary"
value = signalfx_detector.secondary
}

Loading

0 comments on commit 25ccccb

Please sign in to comment.