Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unique cloudwatch alarms naming #48

Merged
merged 2 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# Opensearch

## Requirements

| Name | Version |
Expand All @@ -17,7 +15,7 @@

| Name | Source | Version |
|------|--------|---------|
| <a name="module_cloudwatch_alarms"></a> [cloudwatch\_alarms](#module\_cloudwatch\_alarms) | terraform-aws-modules/cloudwatch/aws//wrappers/metric-alarm | ~> 4.5.0 |
| <a name="module_cloudwatch_alarms"></a> [cloudwatch\_alarms](#module\_cloudwatch\_alarms) | terraform-aws-modules/cloudwatch/aws//wrappers/metric-alarm | ~> 5.4.0 |

## Resources

Expand Down
36 changes: 19 additions & 17 deletions alarms.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ locals {
default_alarms = {
# cluster status
cluster_status_red = {
alarm_name = "cluster_status_red"
alarm_name = "${aws_opensearch_domain.this.domain_name}_cluster_status_red"
alarm_description = "${aws_opensearch_domain.this.domain_name} has entered redstatus. One or more primary shard and its replicaes are not allocated to a node"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -24,7 +24,7 @@ locals {
}

cluster_status_yellow = {
alarm_name = "cluster_status_yellow"
alarm_name = "${aws_opensearch_domain.this.domain_name}_cluster_status_yellow"
alarm_description = "${aws_opensearch_domain.this.domain_name} has entered yellow status. One or more indexes do not have a replica shard. "

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -44,7 +44,7 @@ locals {
}

cluster_writes_blocked = {
alarm_name = "cluster_writes_blocked"
alarm_name = "${aws_opensearch_domain.this.domain_name}_cluster_writes_blocked"
alarm_description = "${aws_opensearch_domain.this.domain_name} is blocking write requests"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand Down Expand Up @@ -85,7 +85,7 @@ locals {

# cpu utilization
data_high_cpu_utilization = {
alarm_name = "data_high_cpu_util"
alarm_name = "${aws_opensearch_domain.this.domain_name}_data_high_cpu_util"
alarm_description = "high cpu utilization on aos data nodes"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -106,7 +106,7 @@ locals {
}

master_high_cpu_utilization = {
alarm_name = "master_high_cpu_util"
alarm_name = "${aws_opensearch_domain.this.domain_name}_master_high_cpu_util"
alarm_description = "high cpu utilization on aos master nodes"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -128,7 +128,7 @@ locals {

warm_high_cpu_utilization = {
create = var.warm_instance_count != null
alarm_name = "warm_high_cpu_util"
alarm_name = "${aws_opensearch_domain.this.domain_name}_warm_high_cpu_util"
alarm_description = "high cpu utilization on aos warm nodes"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -151,7 +151,7 @@ locals {

# jvm pressure
data_high_jvm_pressure = {
alarm_name = "data_high_jvm_pressure"
alarm_name = "${aws_opensearch_domain.this.domain_name}_data_high_jvm_pressure"
alarm_description = "high jvm_pressure on aos data nodes"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -172,7 +172,7 @@ locals {
}

data_high_oldjvm_pressure = {
alarm_name = "data_high_oldgenjvm_pressure"
alarm_name = "${aws_opensearch_domain.this.domain_name}_data_high_oldgenjvm_pressure"
alarm_description = "high old gen jvm pressure on aos warm nodes"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -193,7 +193,7 @@ locals {
}

master_high_jvm_pressure = {
alarm_name = "master_high_jvm_pressure"
alarm_name = "${aws_opensearch_domain.this.domain_name}_master_high_jvm_pressure"
alarm_description = "high jvm_pressure on aos master nodes"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -214,7 +214,7 @@ locals {
}

master_high_oldjvm_pressure = {
alarm_name = "master_high_oldgenjvm_pressure"
alarm_name = "${aws_opensearch_domain.this.domain_name}_master_high_oldgenjvm_pressure"
alarm_description = "high old gen jvm pressure on aos master nodes"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -237,7 +237,8 @@ locals {

# kms
aos_key_error = {
alarm_name = "aos_key_error"
create = var.encrypt_kms_key_id != null
alarm_name = "${aws_opensearch_domain.this.domain_name}_aos_key_error"
alarm_description = "the AWS KMS encryption key that is used to encrypt data at rest in your domain is disabled"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -258,7 +259,8 @@ locals {
}

aos_key_inaccessible = {
alarm_name = "aos_key_inaccessible"
create = var.encrypt_kms_key_id != null
alarm_name = "${aws_opensearch_domain.this.domain_name}_aos_key_inaccessible"
alarm_description = "the AWS KMS encryption key that is used to encrypt data at rest in your domain has been deleted or has revoked its grants to OpenSearch Service"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -280,7 +282,7 @@ locals {

# 5xx errors
server_errors = {
alarm_name = "server_errors"
alarm_name = "${aws_opensearch_domain.this.domain_name}_server_errors"
alarm_description = "One or more data nodes might be overloaded, or requests are failing to complete within the idle timeout period"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -301,7 +303,7 @@ locals {

# threadpool
threadpool_high_write_avg = {
alarm_name = "high_threadpool_write_queue_avg"
alarm_name = "${aws_opensearch_domain.this.domain_name}_high_threadpool_write_queue_avg"
alarm_description = "the cluster is experiencing high indexing concurrency"

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -321,7 +323,7 @@ locals {
}

threadpool_high_search_avg = {
alarm_name = "high_threadpool_search_avg"
alarm_name = "${aws_opensearch_domain.this.domain_name}_high_threadpool_search_avg"
alarm_description = " The cluster is experiencing high search concurrency. Consider scaling your cluster. You can also increase the search queue size, but increasing it excessively can cause out of memory errors."

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -341,7 +343,7 @@ locals {
}

threadpool_high_search_max = {
alarm_name = "high_threadpool_search_max"
alarm_name = "${aws_opensearch_domain.this.domain_name}_high_threadpool_search_max"
alarm_description = " The cluster is experiencing high search concurrency. Consider scaling your cluster. You can also increase the search queue size, but increasing it excessively can cause out of memory errors."

comparison_operator = "GreaterThanOrEqualToThreshold"
Expand All @@ -366,7 +368,7 @@ locals {

module "cloudwatch_alarms" {
source = "terraform-aws-modules/cloudwatch/aws//wrappers/metric-alarm"
version = "~> 4.5.0"
version = "~> 5.4.0"

items = local.alarms
}
Loading