From bdb2914927f7654e8fb9dc9748de63714e44d298 Mon Sep 17 00:00:00 2001 From: Maxime de Roucy Date: Mon, 22 Jan 2024 19:02:46 +0100 Subject: [PATCH] elasticsearch: remove blank + add extrapolation + add lasting_at_least --- .../conf/01-cluster-status.yaml | 2 ++ .../conf/02-cluster_initializing_shards.yaml | 2 +- .../conf/05-cluster_pending_tasks.yaml | 4 ++-- .../conf/07-cluster_file_descriptor.yaml | 6 ++++-- .../conf/08-cluster_JVM_heap_memory.yaml | 4 ++-- .../conf/09-cluster_JVM_memory_young_usage.yaml | 8 +++++--- .../conf/10-cluster_JVM_memory_old_usage.yaml | 8 +++++--- ...r_old-generation_garbage_collections_latency.yaml | 6 +++--- ...young-generation_garbage_collections_latency.yaml | 6 +++--- .../conf/13-cluster_indexing_latency.yaml | 6 +++--- .../conf/14-cluster_flush_latency.yaml | 6 +++--- .../conf/15-cluster_search_latency.yaml | 6 +++--- .../conf/16-cluster_fetch_latency.yaml | 6 +++--- .../17-cluster_fielddata_cache_evictions_rate.yaml | 4 ++-- .../conf/18-cluster_time_in_queue_change.yaml | 6 +++--- modules/smart-agent_elasticsearch/detectors-gen.tf | 12 ++++++------ modules/smart-agent_elasticsearch/variables-gen.tf | 4 ++-- modules/smart-agent_redis/README.md | 1 + 18 files changed, 53 insertions(+), 44 deletions(-) diff --git a/modules/smart-agent_elasticsearch/conf/01-cluster-status.yaml b/modules/smart-agent_elasticsearch/conf/01-cluster-status.yaml index 25976153d..ecdbcc7f3 100644 --- a/modules/smart-agent_elasticsearch/conf/01-cluster-status.yaml +++ b/modules/smart-agent_elasticsearch/conf/01-cluster-status.yaml @@ -11,8 +11,10 @@ rules: comparator: ">=" description: "is red" lasting_duration: '5m' + lasting_at_least: 0.5 major: threshold: 1 comparator: ">=" description: "is yellow" lasting_duration: '5m' + lasting_at_least: 0.5 diff --git a/modules/smart-agent_elasticsearch/conf/02-cluster_initializing_shards.yaml b/modules/smart-agent_elasticsearch/conf/02-cluster_initializing_shards.yaml index 2c6bce1fc..1ae7437ad 100644 --- a/modules/smart-agent_elasticsearch/conf/02-cluster_initializing_shards.yaml +++ b/modules/smart-agent_elasticsearch/conf/02-cluster_initializing_shards.yaml @@ -17,4 +17,4 @@ rules: comparator: ">" dependency: critical description: "is too high" - lasting_duration: '15m' + lasting_duration: '15m' diff --git a/modules/smart-agent_elasticsearch/conf/05-cluster_pending_tasks.yaml b/modules/smart-agent_elasticsearch/conf/05-cluster_pending_tasks.yaml index 63aab518b..b3a184d6c 100644 --- a/modules/smart-agent_elasticsearch/conf/05-cluster_pending_tasks.yaml +++ b/modules/smart-agent_elasticsearch/conf/05-cluster_pending_tasks.yaml @@ -11,10 +11,10 @@ rules: threshold: 5 comparator: ">=" description: "are too high" - lasting_duration: '15m' + lasting_duration: '15m' major: threshold: 0 comparator: ">" dependency: critical description: "are too high" - lasting_duration: '15m' \ No newline at end of file + lasting_duration: '15m' diff --git a/modules/smart-agent_elasticsearch/conf/07-cluster_file_descriptor.yaml b/modules/smart-agent_elasticsearch/conf/07-cluster_file_descriptor.yaml index 5b55150b4..df61e444f 100644 --- a/modules/smart-agent_elasticsearch/conf/07-cluster_file_descriptor.yaml +++ b/modules/smart-agent_elasticsearch/conf/07-cluster_file_descriptor.yaml @@ -6,9 +6,11 @@ signals: A: metric: "elasticsearch.process.open_file_descriptors" rollup: average + extrapolation: zero B: metric: "elasticsearch.process.max_file_descriptors" rollup: average + extrapolation: last_value signal: formula: "(A/B).scale(100)" rules: @@ -16,10 +18,10 @@ rules: threshold: 95 comparator: ">=" description: "is too high" - lasting_duration: '15m' + lasting_duration: '15m' major: threshold: 90 comparator: ">" dependency: critical description: "is too high" - lasting_duration: '15m' \ No newline at end of file + lasting_duration: '15m' diff --git a/modules/smart-agent_elasticsearch/conf/08-cluster_JVM_heap_memory.yaml b/modules/smart-agent_elasticsearch/conf/08-cluster_JVM_heap_memory.yaml index fb589bbef..3bcc58534 100644 --- a/modules/smart-agent_elasticsearch/conf/08-cluster_JVM_heap_memory.yaml +++ b/modules/smart-agent_elasticsearch/conf/08-cluster_JVM_heap_memory.yaml @@ -11,10 +11,10 @@ rules: threshold: 90 comparator: ">=" description: "is too high" - lasting_duration: '5m' + lasting_duration: '5m' major: threshold: 80 comparator: ">" dependency: critical description: "is too high" - lasting_duration: '5m' \ No newline at end of file + lasting_duration: '5m' diff --git a/modules/smart-agent_elasticsearch/conf/09-cluster_JVM_memory_young_usage.yaml b/modules/smart-agent_elasticsearch/conf/09-cluster_JVM_memory_young_usage.yaml index ee42abe48..3b08f8a96 100644 --- a/modules/smart-agent_elasticsearch/conf/09-cluster_JVM_memory_young_usage.yaml +++ b/modules/smart-agent_elasticsearch/conf/09-cluster_JVM_memory_young_usage.yaml @@ -6,9 +6,11 @@ signals: A: metric: "elasticsearch.jvm.mem.pools.young.used_in_bytes" rollup: average + extrapolation: zero B: metric: "elasticsearch.jvm.mem.pools.young.max_in_bytes" rollup: average + extrapolation: last_value signal: formula: "(A/B).fill(0).scale(100)" rules: @@ -16,10 +18,10 @@ rules: threshold: 90 comparator: ">=" description: "is too high" - lasting_duration: '10m' + lasting_duration: '10m' minor: threshold: 80 comparator: ">" - description: "is too high" + description: "is too high" dependency: major - lasting_duration: '10m' \ No newline at end of file + lasting_duration: '10m' diff --git a/modules/smart-agent_elasticsearch/conf/10-cluster_JVM_memory_old_usage.yaml b/modules/smart-agent_elasticsearch/conf/10-cluster_JVM_memory_old_usage.yaml index b00693737..32b2b5f63 100644 --- a/modules/smart-agent_elasticsearch/conf/10-cluster_JVM_memory_old_usage.yaml +++ b/modules/smart-agent_elasticsearch/conf/10-cluster_JVM_memory_old_usage.yaml @@ -6,9 +6,11 @@ signals: A: metric: "elasticsearch.jvm.mem.pools.old.used_in_bytes" rollup: average + extrapolation: zero B: metric: "elasticsearch.jvm.mem.pools.old.max_in_bytes" rollup: average + extrapolation: last_value signal: formula: "(A/B).fill(0).scale(100)" rules: @@ -16,10 +18,10 @@ rules: threshold: 90 comparator: ">=" description: "is too high" - lasting_duration: '10m' + lasting_duration: '10m' minor: threshold: 80 comparator: ">" - description: "is too high" + description: "is too high" dependency: major - lasting_duration: '10m' \ No newline at end of file + lasting_duration: '10m' diff --git a/modules/smart-agent_elasticsearch/conf/11-cluster_old-generation_garbage_collections_latency.yaml b/modules/smart-agent_elasticsearch/conf/11-cluster_old-generation_garbage_collections_latency.yaml index 63be00353..f93774ef3 100644 --- a/modules/smart-agent_elasticsearch/conf/11-cluster_old-generation_garbage_collections_latency.yaml +++ b/modules/smart-agent_elasticsearch/conf/11-cluster_old-generation_garbage_collections_latency.yaml @@ -18,10 +18,10 @@ rules: threshold: 300 comparator: ">=" description: "is too high" - lasting_duration: '15m' + lasting_duration: '15m' minor: threshold: 200 comparator: ">" - description: "is too high" + description: "is too high" dependency: major - lasting_duration: '15m' \ No newline at end of file + lasting_duration: '15m' diff --git a/modules/smart-agent_elasticsearch/conf/12-cluster_young-generation_garbage_collections_latency.yaml b/modules/smart-agent_elasticsearch/conf/12-cluster_young-generation_garbage_collections_latency.yaml index 6fa6dfa99..0ddc4e0ec 100644 --- a/modules/smart-agent_elasticsearch/conf/12-cluster_young-generation_garbage_collections_latency.yaml +++ b/modules/smart-agent_elasticsearch/conf/12-cluster_young-generation_garbage_collections_latency.yaml @@ -18,10 +18,10 @@ rules: threshold: 40 comparator: ">=" description: "is too high" - lasting_duration: '15m' + lasting_duration: '15m' minor: threshold: 20 comparator: ">" - description: "is too high" + description: "is too high" dependency: major - lasting_duration: '15m' \ No newline at end of file + lasting_duration: '15m' diff --git a/modules/smart-agent_elasticsearch/conf/13-cluster_indexing_latency.yaml b/modules/smart-agent_elasticsearch/conf/13-cluster_indexing_latency.yaml index f881d43cb..1af7327d1 100644 --- a/modules/smart-agent_elasticsearch/conf/13-cluster_indexing_latency.yaml +++ b/modules/smart-agent_elasticsearch/conf/13-cluster_indexing_latency.yaml @@ -18,10 +18,10 @@ rules: threshold: 30 comparator: ">=" description: "is too high" - lasting_duration: '1h' + lasting_duration: '1h' minor: threshold: 15 comparator: ">" - description: "is too high" + description: "is too high" dependency: major - lasting_duration: '1h' \ No newline at end of file + lasting_duration: '1h' diff --git a/modules/smart-agent_elasticsearch/conf/14-cluster_flush_latency.yaml b/modules/smart-agent_elasticsearch/conf/14-cluster_flush_latency.yaml index 3dc7b354b..ba4f0a309 100644 --- a/modules/smart-agent_elasticsearch/conf/14-cluster_flush_latency.yaml +++ b/modules/smart-agent_elasticsearch/conf/14-cluster_flush_latency.yaml @@ -18,10 +18,10 @@ rules: threshold: 150 comparator: ">=" description: "is too high" - lasting_duration: '15m' + lasting_duration: '15m' minor: threshold: 100 comparator: ">" - description: "is too high" + description: "is too high" dependency: major - lasting_duration: '15m' \ No newline at end of file + lasting_duration: '15m' diff --git a/modules/smart-agent_elasticsearch/conf/15-cluster_search_latency.yaml b/modules/smart-agent_elasticsearch/conf/15-cluster_search_latency.yaml index cb6872764..a7ad8d6bd 100644 --- a/modules/smart-agent_elasticsearch/conf/15-cluster_search_latency.yaml +++ b/modules/smart-agent_elasticsearch/conf/15-cluster_search_latency.yaml @@ -18,10 +18,10 @@ rules: threshold: 20 comparator: ">=" description: "is too high" - lasting_duration: '30m' + lasting_duration: '30m' minor: threshold: 10 comparator: ">" - description: "is too high" + description: "is too high" dependency: major - lasting_duration: '30m' \ No newline at end of file + lasting_duration: '30m' diff --git a/modules/smart-agent_elasticsearch/conf/16-cluster_fetch_latency.yaml b/modules/smart-agent_elasticsearch/conf/16-cluster_fetch_latency.yaml index 6dd286303..fba452716 100644 --- a/modules/smart-agent_elasticsearch/conf/16-cluster_fetch_latency.yaml +++ b/modules/smart-agent_elasticsearch/conf/16-cluster_fetch_latency.yaml @@ -18,10 +18,10 @@ rules: threshold: 20 comparator: ">=" description: "is too high" - lasting_duration: '15m' + lasting_duration: '15m' minor: threshold: 10 comparator: ">" - description: "is too high" + description: "is too high" dependency: major - lasting_duration: '15m' \ No newline at end of file + lasting_duration: '15m' diff --git a/modules/smart-agent_elasticsearch/conf/17-cluster_fielddata_cache_evictions_rate.yaml b/modules/smart-agent_elasticsearch/conf/17-cluster_fielddata_cache_evictions_rate.yaml index ba8bd48b7..38144d614 100644 --- a/modules/smart-agent_elasticsearch/conf/17-cluster_fielddata_cache_evictions_rate.yaml +++ b/modules/smart-agent_elasticsearch/conf/17-cluster_fielddata_cache_evictions_rate.yaml @@ -8,7 +8,7 @@ signals: extrapolation: zero rollup: delta signal: - formula: A.rateofchange() + formula: A.rateofchange() rules: major: threshold: 120 @@ -20,4 +20,4 @@ rules: comparator: ">" description: "is too high" dependency: major - lasting_duration: '15m' \ No newline at end of file + lasting_duration: '15m' diff --git a/modules/smart-agent_elasticsearch/conf/18-cluster_time_in_queue_change.yaml b/modules/smart-agent_elasticsearch/conf/18-cluster_time_in_queue_change.yaml index d8701b683..fdfb62087 100644 --- a/modules/smart-agent_elasticsearch/conf/18-cluster_time_in_queue_change.yaml +++ b/modules/smart-agent_elasticsearch/conf/18-cluster_time_in_queue_change.yaml @@ -7,16 +7,16 @@ signals: metric: "elasticsearch.cluster.task-max-wait-time" rollup: average signal: - formula: A.rateofchange() + formula: A.rateofchange() rules: major: threshold: 200 comparator: ">=" description: "is too high" - lasting_duration: '15m' + lasting_duration: '15m' minor: threshold: 100 comparator: ">" description: "is too high" dependency: major - lasting_duration: '15m' \ No newline at end of file + lasting_duration: '15m' diff --git a/modules/smart-agent_elasticsearch/detectors-gen.tf b/modules/smart-agent_elasticsearch/detectors-gen.tf index b0fbd51ce..769a913eb 100644 --- a/modules/smart-agent_elasticsearch/detectors-gen.tf +++ b/modules/smart-agent_elasticsearch/detectors-gen.tf @@ -256,8 +256,8 @@ resource "signalfx_detector" "file_descriptors" { program_text = <<-EOF base_filtering = filter('node_name', '*') and filter('plugin', 'elasticsearch') - A = data('elasticsearch.process.open_file_descriptors', filter=base_filtering and ${module.filtering.signalflow}, rollup='average')${var.file_descriptors_aggregation_function}${var.file_descriptors_transformation_function} - B = data('elasticsearch.process.max_file_descriptors', filter=base_filtering and ${module.filtering.signalflow}, rollup='average')${var.file_descriptors_aggregation_function}${var.file_descriptors_transformation_function} + A = data('elasticsearch.process.open_file_descriptors', filter=base_filtering and ${module.filtering.signalflow}, rollup='average', extrapolation='zero')${var.file_descriptors_aggregation_function}${var.file_descriptors_transformation_function} + B = data('elasticsearch.process.max_file_descriptors', filter=base_filtering and ${module.filtering.signalflow}, rollup='average', extrapolation='last_value')${var.file_descriptors_aggregation_function}${var.file_descriptors_transformation_function} signal = (A/B).scale(100).publish('signal') detect(when(signal >= ${var.file_descriptors_threshold_critical}, lasting=%{if var.file_descriptors_lasting_duration_critical == null}None%{else}'${var.file_descriptors_lasting_duration_critical}'%{endif}, at_least=${var.file_descriptors_at_least_percentage_critical})).publish('CRIT') detect(when(signal > ${var.file_descriptors_threshold_major}, lasting=%{if var.file_descriptors_lasting_duration_major == null}None%{else}'${var.file_descriptors_lasting_duration_major}'%{endif}, at_least=${var.file_descriptors_at_least_percentage_major}) and (not when(signal >= ${var.file_descriptors_threshold_critical}, lasting=%{if var.file_descriptors_lasting_duration_critical == null}None%{else}'${var.file_descriptors_lasting_duration_critical}'%{endif}, at_least=${var.file_descriptors_at_least_percentage_critical}))).publish('MAJOR') @@ -340,8 +340,8 @@ resource "signalfx_detector" "jvm_memory_young_usage" { program_text = <<-EOF base_filtering = filter('node_name', '*') and filter('plugin', 'elasticsearch') - A = data('elasticsearch.jvm.mem.pools.young.used_in_bytes', filter=base_filtering and ${module.filtering.signalflow}, rollup='average')${var.jvm_memory_young_usage_aggregation_function}${var.jvm_memory_young_usage_transformation_function} - B = data('elasticsearch.jvm.mem.pools.young.max_in_bytes', filter=base_filtering and ${module.filtering.signalflow}, rollup='average')${var.jvm_memory_young_usage_aggregation_function}${var.jvm_memory_young_usage_transformation_function} + A = data('elasticsearch.jvm.mem.pools.young.used_in_bytes', filter=base_filtering and ${module.filtering.signalflow}, rollup='average', extrapolation='zero')${var.jvm_memory_young_usage_aggregation_function}${var.jvm_memory_young_usage_transformation_function} + B = data('elasticsearch.jvm.mem.pools.young.max_in_bytes', filter=base_filtering and ${module.filtering.signalflow}, rollup='average', extrapolation='last_value')${var.jvm_memory_young_usage_aggregation_function}${var.jvm_memory_young_usage_transformation_function} signal = (A/B).fill(0).scale(100).publish('signal') detect(when(signal >= ${var.jvm_memory_young_usage_threshold_major}, lasting=%{if var.jvm_memory_young_usage_lasting_duration_major == null}None%{else}'${var.jvm_memory_young_usage_lasting_duration_major}'%{endif}, at_least=${var.jvm_memory_young_usage_at_least_percentage_major})).publish('MAJOR') detect(when(signal > ${var.jvm_memory_young_usage_threshold_minor}, lasting=%{if var.jvm_memory_young_usage_lasting_duration_minor == null}None%{else}'${var.jvm_memory_young_usage_lasting_duration_minor}'%{endif}, at_least=${var.jvm_memory_young_usage_at_least_percentage_minor}) and (not when(signal >= ${var.jvm_memory_young_usage_threshold_major}, lasting=%{if var.jvm_memory_young_usage_lasting_duration_major == null}None%{else}'${var.jvm_memory_young_usage_lasting_duration_major}'%{endif}, at_least=${var.jvm_memory_young_usage_at_least_percentage_major}))).publish('MINOR') @@ -383,8 +383,8 @@ resource "signalfx_detector" "jvm_memory_old_usage" { program_text = <<-EOF base_filtering = filter('node_name', '*') and filter('plugin', 'elasticsearch') - A = data('elasticsearch.jvm.mem.pools.old.used_in_bytes', filter=base_filtering and ${module.filtering.signalflow}, rollup='average')${var.jvm_memory_old_usage_aggregation_function}${var.jvm_memory_old_usage_transformation_function} - B = data('elasticsearch.jvm.mem.pools.old.max_in_bytes', filter=base_filtering and ${module.filtering.signalflow}, rollup='average')${var.jvm_memory_old_usage_aggregation_function}${var.jvm_memory_old_usage_transformation_function} + A = data('elasticsearch.jvm.mem.pools.old.used_in_bytes', filter=base_filtering and ${module.filtering.signalflow}, rollup='average', extrapolation='zero')${var.jvm_memory_old_usage_aggregation_function}${var.jvm_memory_old_usage_transformation_function} + B = data('elasticsearch.jvm.mem.pools.old.max_in_bytes', filter=base_filtering and ${module.filtering.signalflow}, rollup='average', extrapolation='last_value')${var.jvm_memory_old_usage_aggregation_function}${var.jvm_memory_old_usage_transformation_function} signal = (A/B).fill(0).scale(100).publish('signal') detect(when(signal >= ${var.jvm_memory_old_usage_threshold_major}, lasting=%{if var.jvm_memory_old_usage_lasting_duration_major == null}None%{else}'${var.jvm_memory_old_usage_lasting_duration_major}'%{endif}, at_least=${var.jvm_memory_old_usage_at_least_percentage_major})).publish('MAJOR') detect(when(signal > ${var.jvm_memory_old_usage_threshold_minor}, lasting=%{if var.jvm_memory_old_usage_lasting_duration_minor == null}None%{else}'${var.jvm_memory_old_usage_lasting_duration_minor}'%{endif}, at_least=${var.jvm_memory_old_usage_at_least_percentage_minor}) and (not when(signal >= ${var.jvm_memory_old_usage_threshold_major}, lasting=%{if var.jvm_memory_old_usage_lasting_duration_major == null}None%{else}'${var.jvm_memory_old_usage_lasting_duration_major}'%{endif}, at_least=${var.jvm_memory_old_usage_at_least_percentage_major}))).publish('MINOR') diff --git a/modules/smart-agent_elasticsearch/variables-gen.tf b/modules/smart-agent_elasticsearch/variables-gen.tf index 7d1383245..43f1abdbb 100644 --- a/modules/smart-agent_elasticsearch/variables-gen.tf +++ b/modules/smart-agent_elasticsearch/variables-gen.tf @@ -113,7 +113,7 @@ variable "cluster_status_lasting_duration_critical" { variable "cluster_status_at_least_percentage_critical" { description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" type = number - default = 1 + default = 0.5 } variable "cluster_status_threshold_major" { description = "Major threshold for cluster_status detector" @@ -130,7 +130,7 @@ variable "cluster_status_lasting_duration_major" { variable "cluster_status_at_least_percentage_major" { description = "Percentage of lasting that conditions must be true before raising alert (>= 0.0 and <= 1.0)" type = number - default = 1 + default = 0.5 } # cluster_initializing_shards detector diff --git a/modules/smart-agent_redis/README.md b/modules/smart-agent_redis/README.md index 333ada608..b71f9ec63 100644 --- a/modules/smart-agent_redis/README.md +++ b/modules/smart-agent_redis/README.md @@ -147,6 +147,7 @@ parameter to the corresponding monitor configuration: - '!${var.use_otel_receiver ? "redis.keys.evicted" : "counter.evicted_keys"}' - '!${var.use_otel_receiver ? "redis.keys.expired" : "counter.expired_keys"}' - '!${var.use_otel_receiver ? "redis.keyspace.hits" : "derive.keyspace_hits"}' + - '!${var.use_otel_receiver ? "redis.keyspace.misses" : "derive.keyspace_misses"}' - '!${var.use_otel_receiver ? "redis.memory.rss" : "bytes.used_memory_rss"}' - '!${var.use_otel_receiver ? "redis.memory.used" : "bytes.used_memory"}'