Skip to content

Commit

Permalink
rework genericjmx detectors
Browse files Browse the repository at this point in the history
  • Loading branch information
xp-1000 committed Oct 27, 2020
1 parent dcfdae2 commit 7fb8e30
Show file tree
Hide file tree
Showing 12 changed files with 223 additions and 763 deletions.
45 changes: 45 additions & 0 deletions middleware/genericjmx/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# MIDDLEWARE JMX SignalFx detectors

## How to use this module

```hcl
module "signalfx-detectors-middleware-jmx" {
source = "github.com/claranet/terraform-signalfx-detectors.git//middleware/jmx?ref={revision}"
environment = var.environment
notifications = var.notifications
}
```

## Purpose

Creates SignalFx detectors with the following checks:
- JMX GC old generation usage
- JMX memory heap usage

## Notes

This module uses the [GenericJMX](https://docs.signalfx.com/en/latest/integrations/agent/monitors/collectd-genericjmx.html)
monitor to fetch common Java runtime metrics for every JVM based applications.

You must [enable JMX Remote](https://docs.oracle.com/javadb/10.10.1.2/adminguide/radminjmxenabledisable.html) on your JAVA
application. Depending on your application you should add following paramters as example:

```
-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.port=5000 -Dcom.sun.management.jmxremote.local.only=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Djava.rmi.server.hostname=127.0.0.1
```

If there is a native GenericJMX based monitor for your specific application like for
[Cassandra](https://docs.signalfx.com/en/latest/integrations/agent/monitors/collectd-cassandra.html)
so you should configure its dedicated monitor and you will automatically retrieve required metrics for this module.

Else if there is no monitor available for your specific application or you simply do not want to collect specific
application metrics, so you have to configure the GenericJMX directly:

```
- type: collectd/genericjmx
host: 127.0.0.1
port: 5000
```

Keep in mind you can easily add specific application metrics defining `mBeanDefinitions` parameter.
1 change: 1 addition & 0 deletions middleware/genericjmx/common-locals.tf
1 change: 1 addition & 0 deletions middleware/genericjmx/common-variables.tf
62 changes: 62 additions & 0 deletions middleware/genericjmx/detectors-genericjmx.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
resource "signalfx_detector" "jmx_memory_heap" {
name = format("%s %s", local.detector_name_prefix, "JMX memory heap usage")

program_text = <<-EOF
A = data('jmx_memory.used', filter=filter('plugin_instance', 'memory-heap') and ${module.filter-tags.filter_custom})${var.memory_heap_aggregation_function}${var.memory_heap_transformation_function}
B = data('jmx_memory.max', filter=filter('plugin_instance', 'memory-heap') and ${module.filter-tags.filter_custom})${var.memory_heap_aggregation_function}${var.memory_heap_transformation_function}
signal = (A/B).scale(100).publish('signal')
detect(when(signal > ${var.memory_heap_threshold_critical})).publish('CRIT')
detect(when(signal > ${var.memory_heap_threshold_major}) and (signal < ${var.memory_heap_threshold_critical})).publish('MAJOR')
EOF

rule {
description = "is too high > ${var.memory_heap_threshold_major}"
severity = "Major"
detect_label = "MAJOR"
disabled = coalesce(var.memory_heap_disabled_major, var.memory_heap_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.memory_heap_notifications, "major", []), var.notifications.major)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
rule {
description = "is too high > ${var.memory_heap_threshold_critical}"
severity = "Critical"
detect_label = "CRIT"
disabled = coalesce(var.memory_heap_disabled_critical, var.memory_heap_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.memory_heap_notifications, "critical", []), var.notifications.critical)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

resource "signalfx_detector" "jmx_old_gen" {
name = format("%s %s", local.detector_name_prefix, "JMX GC old generation usage")

program_text = <<-EOF
A = data('jmx_memory.used', filter=filter('plugin_instance', 'memory_pool-G1 Old Gen') and ${module.filter-tags.filter_custom})${var.gc_old_gen_aggregation_function}${var.gc_old_gen_transformation_function}
B = data('jmx_memory.max', filter=filter('plugin_instance', 'memory_pool-G1 Old Gen') and ${module.filter-tags.filter_custom})${var.gc_old_gen_aggregation_function}${var.gc_old_gen_transformation_function}
signal = (A/B).scale(100).publish('signal')
detect(when(signal > ${var.gc_old_gen_threshold_critical})).publish('CRIT')
detect(when(signal > ${var.gc_old_gen_threshold_major}) and (signal < ${var.gc_old_gen_threshold_critical})).publish('MAJOR')
EOF

rule {
description = "is too high > ${var.gc_old_gen_threshold_major}"
severity = "Major"
detect_label = "MAJOR"
disabled = coalesce(var.gc_old_gen_disabled_major, var.gc_old_gen_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.gc_old_gen_notifications, "major", []), var.notifications.major)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
rule {
description = "is too high > ${var.gc_old_gen_threshold_critical}"
severity = "Critical"
detect_label = "CRIT"
disabled = coalesce(var.gc_old_gen_disabled_critical, var.gc_old_gen_disabled, var.detectors_disabled)
notifications = coalescelist(lookup(var.gc_old_gen_notifications, "critical", []), var.notifications.critical)
parameterized_subject = local.rule_subject
parameterized_body = local.rule_body
}
}

File renamed without changes.
10 changes: 10 additions & 0 deletions middleware/genericjmx/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
output "jmx_memory_heap" {
description = "Detector resource for jmx_memory_heap"
value = signalfx_detector.jmx_memory_heap
}

output "jmx_old_gen" {
description = "Detector resource for jmx_old_gen"
value = signalfx_detector.jmx_old_gen
}

102 changes: 102 additions & 0 deletions middleware/genericjmx/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Module specific

# memory_heap detector

variable "memory_heap_disabled" {
description = "Disable all alerting rules for memory_heap detector"
type = bool
default = null
}

variable "memory_heap_disabled_critical" {
description = "Disable critical alerting rule for memory_heap detector"
type = bool
default = null
}

variable "memory_heap_disabled_major" {
description = "Disable major alerting rule for memory_heap detector"
type = bool
default = null
}

variable "memory_heap_notifications" {
description = "Notification recipients list per severity overridden for memory_heap detector"
type = map(list(string))
default = {}
}

variable "memory_heap_aggregation_function" {
description = "Aggregation function and group by for memory_heap detector (i.e. \".mean(by=['host']).\")"
type = string
default = ""
}

variable "memory_heap_transformation_function" {
description = "Transformation function for memory_heap detector (i.e. \".mean(over='5m')\")"
type = string
default = ".min(over='5m')"
}

variable "memory_heap_threshold_major" {
description = "Warning threshold for memory_heap detector"
type = number
default = 80
}

variable "memory_heap_threshold_critical" {
description = "Critical threshold for memory_heap detector"
type = number
default = 90
}

# gc_old_gen detector

variable "gc_old_gen_disabled" {
description = "Disable all alerting rules for gc_old_gen detector"
type = bool
default = null
}

variable "gc_old_gen_disabled_critical" {
description = "Disable critical alerting rule for gc_old_gen detector"
type = bool
default = null
}

variable "gc_old_gen_disabled_major" {
description = "Disable major alerting rule for gc_old_gen detector"
type = bool
default = null
}

variable "gc_old_gen_notifications" {
description = "Notification recipients list per severity overridden for gc_old_gen detector"
type = map(list(string))
default = {}
}

variable "gc_old_gen_aggregation_function" {
description = "Aggregation function and group by for gc_old_gen detector (i.e. \".mean(by=['host']).\")"
type = string
default = ""
}

variable "gc_old_gen_transformation_function" {
description = "Transformation function for gc_old_gen detector (i.e. \".mean(over='5m')\")"
type = string
default = ".min(over='5m')"
}

variable "gc_old_gen_threshold_major" {
description = "Warning threshold for gc_old_gen detector"
type = number
default = 80
}

variable "gc_old_gen_threshold_critical" {
description = "Critical threshold for gc_old_gen detector"
type = number
default = 90
}

Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
terraform {
required_providers {
signalfx = {
source = "terraform-providers/signalfx"
source = "splunk-terraform/signalfx"
version = ">= 4.26.4"
}
}
required_version = ">= 0.12.24"
required_version = ">= 0.12.26"
}
39 changes: 0 additions & 39 deletions middleware/jmx/README.md

This file was deleted.

Loading

0 comments on commit 7fb8e30

Please sign in to comment.