Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add AWS RDS Read/Write IOPS alarms #16

Merged
merged 1 commit into from
Feb 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -193,3 +193,42 @@ resource "aws_cloudwatch_metric_alarm" "maximum_used_transaction_ids_too_high" {
alarm_actions = var.actions_alarm
ok_actions = var.actions_ok
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there is missing link to the db

  dimensions = {
    DBInstanceIdentifier = var.db_instance_id
  }


# SOC2 requirements
resource "aws_cloudwatch_metric_alarm" "read_iops_too_high" {
count = var.create_read_iops_alarm ? 1 : 0
alarm_name = "${var.prefix}rds-${var.db_instance_id}-read-iops-too-high"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = var.evaluation_period
metric_name = "ReadIOPS"
namespace = "AWS/RDS"
period = var.statistic_period
statistic = "Average"
threshold = var.read_iops_too_high_threshold
alarm_description = "Average Read IO over last ${(var.evaluation_period * var.statistic_period / 60)} minutes too high, performance may suffer"
alarm_actions = var.actions_alarm
ok_actions = var.actions_ok

dimensions = {
DBInstanceIdentifier = "${var.db_instance_id}-read-iops-too-high"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be just var.db_instance_id

}
}

resource "aws_cloudwatch_metric_alarm" "write_iops_too_high" {
count = var.create_write_iops_alarm ? 1 : 0
alarm_name = "${var.prefix}rds-${var.db_instance_id}-write-iops-too-high"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = var.evaluation_period
metric_name = "WriteIOPS"
namespace = "AWS/RDS"
period = var.statistic_period
statistic = "Average"
threshold = var.write_iops_too_high_threshold
alarm_description = "Average Write IO over last ${(var.evaluation_period * var.statistic_period / 60)} minutes too high, performance may suffer"
alarm_actions = var.actions_alarm
ok_actions = var.actions_ok

dimensions = {
DBInstanceIdentifier = "${var.prefix}rds-${var.db_instance_id}-write-iops-too-high"
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this should be just var.db_instance_id

}
}
14 changes: 7 additions & 7 deletions outputs.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
output "alarm_cpu_utilization_too_high" {
# For older terraform support...
value = var.create_high_cpu_alarm ? aws_cloudwatch_metric_alarm.cpu_utilization_too_high[0] : null
value = var.create_high_cpu_alarm ? aws_cloudwatch_metric_alarm.cpu_utilization_too_high[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.cpu_utilization_too_high.*)
description = "The CloudWatch Metric Alarm resource block for high CPU Utilization"
Expand All @@ -16,47 +16,47 @@ output "alarm_cpu_credit_balance_too_low" {

output "alarm_disk_queue_depth_too_high" {
# For older terraform support...
value = var.create_high_queue_depth_alarm ? aws_cloudwatch_metric_alarm.disk_queue_depth_too_high[0] : null
value = var.create_high_queue_depth_alarm ? aws_cloudwatch_metric_alarm.disk_queue_depth_too_high[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.disk_queue_depth_too_high.*)
description = "The CloudWatch Metric Alarm resource block for high Disk Queue Depth"
}

output "alarm_disk_free_storage_space_too_low" {
# For older terraform support...
value = var.create_low_disk_space_alarm ? aws_cloudwatch_metric_alarm.disk_free_storage_space_too_low[0] : null
value = var.create_low_disk_space_alarm ? aws_cloudwatch_metric_alarm.disk_free_storage_space_too_low[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.disk_free_storage_space_too_low.*)
description = "The CloudWatch Metric Alarm resource block for low Free Storage Space"
}

output "alarm_disk_burst_balance_too_low" {
# For older terraform support...
value = var.create_low_disk_burst_alarm ? aws_cloudwatch_metric_alarm.disk_burst_balance_too_low[0] : null
value = var.create_low_disk_burst_alarm ? aws_cloudwatch_metric_alarm.disk_burst_balance_too_low[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.disk_burst_balance_too_low.*)
description = "The CloudWatch Metric Alarm resource block for low Disk Burst Balance"
}

output "alarm_memory_freeable_too_low" {
# For older terraform support...
value = var.create_low_memory_alarm ? aws_cloudwatch_metric_alarm.memory_freeable_too_low[0] : null
value = var.create_low_memory_alarm ? aws_cloudwatch_metric_alarm.memory_freeable_too_low[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.memory_freeable_too_low.*)
description = "The CloudWatch Metric Alarm resource block for low Freeable Memory"
}

output "alarm_memory_swap_usage_too_high" {
# For older terraform support...
value = var.create_swap_alarm ? aws_cloudwatch_metric_alarm.memory_swap_usage_too_high[0] : null
value = var.create_swap_alarm ? aws_cloudwatch_metric_alarm.memory_swap_usage_too_high[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.memory_swap_usage_too_high.*)
description = "The CloudWatch Metric Alarm resource block for high Memory Swap Usage"
}

output "alarm_connection_count_anomalous" {
# For older terraform support...
value = var.create_anomaly_alarm ? aws_cloudwatch_metric_alarm.connection_count_anomalous[0] : null
value = var.create_anomaly_alarm ? aws_cloudwatch_metric_alarm.connection_count_anomalous[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.connection_count_anomalous.*)
description = "The CloudWatch Metric Alarm resource block for anomalous Connection Count"
Expand Down
34 changes: 29 additions & 5 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ variable "create_anomaly_alarm" {
description = "Whether or not to create the fairly noisy anomaly alarm. Default is to create it (for backwards compatible support), but recommended to disable this for non-production databases"
}

variable "create_read_iops_alarm" {
type = bool
default = true
description = "Whether or not to create the Read IOPS too high alarm. Default is to create it."
}

variable "create_write_iops_alarm" {
type = bool
default = true
description = "Whether or not to create the Write IOPS too high alarm. Default is to create it."
}

variable "anomaly_period" {
type = string
default = "600"
Expand All @@ -82,13 +94,13 @@ variable "anomaly_band_width" {
}

variable "actions_alarm" {
type = list
type = list(any)
default = []
description = "A list of actions to take when alarms are triggered. Will likely be an SNS topic for event distribution."
}

variable "actions_ok" {
type = list
type = list(any)
default = []
description = "A list of actions to take when alarms are cleared. Will likely be an SNS topic for event distribution."
}
Expand Down Expand Up @@ -135,21 +147,33 @@ variable "memory_swap_usage_too_high_threshold" {
description = "Alarm threshold for the 'highSwapUsage' alarm"
}

variable "read_iops_too_high_threshold" {
type = string
default = "100"
description = "Alarm threshold for the 'read-iops-too-high' alarm"
}

variable "write_iops_too_high_threshold" {
type = string
default = "10000"
description = "Alarm threshold for the 'write-iops-too-high' alarm"
}

variable "tags" {
type = map(string)
default = {}
description = "Tags to attach to each alarm"
}

variable "db_instance_class" {
type = string
type = string
description = "The rds instance class, e.g. db.t3.medium"
}

variable "engine" {
type = string
type = string
description = "The RDS engine being used. Used for postgres or mysql specific alarms"
default = ""
default = ""
}

variable "maximum_used_transaction_ids_too_high_threshold" {
Expand Down