Skip to content

Commit

Permalink
Merge pull request #16 from evidentid/iops-alerts
Browse files Browse the repository at this point in the history
Add AWS RDS Read/Write IOPS alarms
  • Loading branch information
lorenzoaiello authored Feb 17, 2024
2 parents 605f984 + 1c7dc5f commit 54e9f42
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 12 deletions.
39 changes: 39 additions & 0 deletions main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,42 @@ resource "aws_cloudwatch_metric_alarm" "maximum_used_transaction_ids_too_high" {
alarm_actions = var.actions_alarm
ok_actions = var.actions_ok
}

# SOC2 requirements
resource "aws_cloudwatch_metric_alarm" "read_iops_too_high" {
count = var.create_read_iops_alarm ? 1 : 0
alarm_name = "${var.prefix}rds-${var.db_instance_id}-read-iops-too-high"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = var.evaluation_period
metric_name = "ReadIOPS"
namespace = "AWS/RDS"
period = var.statistic_period
statistic = "Average"
threshold = var.read_iops_too_high_threshold
alarm_description = "Average Read IO over last ${(var.evaluation_period * var.statistic_period / 60)} minutes too high, performance may suffer"
alarm_actions = var.actions_alarm
ok_actions = var.actions_ok

dimensions = {
DBInstanceIdentifier = "${var.db_instance_id}-read-iops-too-high"
}
}

resource "aws_cloudwatch_metric_alarm" "write_iops_too_high" {
count = var.create_write_iops_alarm ? 1 : 0
alarm_name = "${var.prefix}rds-${var.db_instance_id}-write-iops-too-high"
comparison_operator = "GreaterThanThreshold"
evaluation_periods = var.evaluation_period
metric_name = "WriteIOPS"
namespace = "AWS/RDS"
period = var.statistic_period
statistic = "Average"
threshold = var.write_iops_too_high_threshold
alarm_description = "Average Write IO over last ${(var.evaluation_period * var.statistic_period / 60)} minutes too high, performance may suffer"
alarm_actions = var.actions_alarm
ok_actions = var.actions_ok

dimensions = {
DBInstanceIdentifier = "${var.prefix}rds-${var.db_instance_id}-write-iops-too-high"
}
}
14 changes: 7 additions & 7 deletions outputs.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
output "alarm_cpu_utilization_too_high" {
# For older terraform support...
value = var.create_high_cpu_alarm ? aws_cloudwatch_metric_alarm.cpu_utilization_too_high[0] : null
value = var.create_high_cpu_alarm ? aws_cloudwatch_metric_alarm.cpu_utilization_too_high[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.cpu_utilization_too_high.*)
description = "The CloudWatch Metric Alarm resource block for high CPU Utilization"
Expand All @@ -16,47 +16,47 @@ output "alarm_cpu_credit_balance_too_low" {

output "alarm_disk_queue_depth_too_high" {
# For older terraform support...
value = var.create_high_queue_depth_alarm ? aws_cloudwatch_metric_alarm.disk_queue_depth_too_high[0] : null
value = var.create_high_queue_depth_alarm ? aws_cloudwatch_metric_alarm.disk_queue_depth_too_high[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.disk_queue_depth_too_high.*)
description = "The CloudWatch Metric Alarm resource block for high Disk Queue Depth"
}

output "alarm_disk_free_storage_space_too_low" {
# For older terraform support...
value = var.create_low_disk_space_alarm ? aws_cloudwatch_metric_alarm.disk_free_storage_space_too_low[0] : null
value = var.create_low_disk_space_alarm ? aws_cloudwatch_metric_alarm.disk_free_storage_space_too_low[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.disk_free_storage_space_too_low.*)
description = "The CloudWatch Metric Alarm resource block for low Free Storage Space"
}

output "alarm_disk_burst_balance_too_low" {
# For older terraform support...
value = var.create_low_disk_burst_alarm ? aws_cloudwatch_metric_alarm.disk_burst_balance_too_low[0] : null
value = var.create_low_disk_burst_alarm ? aws_cloudwatch_metric_alarm.disk_burst_balance_too_low[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.disk_burst_balance_too_low.*)
description = "The CloudWatch Metric Alarm resource block for low Disk Burst Balance"
}

output "alarm_memory_freeable_too_low" {
# For older terraform support...
value = var.create_low_memory_alarm ? aws_cloudwatch_metric_alarm.memory_freeable_too_low[0] : null
value = var.create_low_memory_alarm ? aws_cloudwatch_metric_alarm.memory_freeable_too_low[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.memory_freeable_too_low.*)
description = "The CloudWatch Metric Alarm resource block for low Freeable Memory"
}

output "alarm_memory_swap_usage_too_high" {
# For older terraform support...
value = var.create_swap_alarm ? aws_cloudwatch_metric_alarm.memory_swap_usage_too_high[0] : null
value = var.create_swap_alarm ? aws_cloudwatch_metric_alarm.memory_swap_usage_too_high[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.memory_swap_usage_too_high.*)
description = "The CloudWatch Metric Alarm resource block for high Memory Swap Usage"
}

output "alarm_connection_count_anomalous" {
# For older terraform support...
value = var.create_anomaly_alarm ? aws_cloudwatch_metric_alarm.connection_count_anomalous[0] : null
value = var.create_anomaly_alarm ? aws_cloudwatch_metric_alarm.connection_count_anomalous[0] : null
# For Terraform 0.15+, eventually use this much nicer code instead...
# value = one(aws_cloudwatch_metric_alarm.connection_count_anomalous.*)
description = "The CloudWatch Metric Alarm resource block for anomalous Connection Count"
Expand Down
34 changes: 29 additions & 5 deletions variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,18 @@ variable "create_anomaly_alarm" {
description = "Whether or not to create the fairly noisy anomaly alarm. Default is to create it (for backwards compatible support), but recommended to disable this for non-production databases"
}

variable "create_read_iops_alarm" {
type = bool
default = true
description = "Whether or not to create the Read IOPS too high alarm. Default is to create it."
}

variable "create_write_iops_alarm" {
type = bool
default = true
description = "Whether or not to create the Write IOPS too high alarm. Default is to create it."
}

variable "anomaly_period" {
type = string
default = "600"
Expand All @@ -82,13 +94,13 @@ variable "anomaly_band_width" {
}

variable "actions_alarm" {
type = list
type = list(any)
default = []
description = "A list of actions to take when alarms are triggered. Will likely be an SNS topic for event distribution."
}

variable "actions_ok" {
type = list
type = list(any)
default = []
description = "A list of actions to take when alarms are cleared. Will likely be an SNS topic for event distribution."
}
Expand Down Expand Up @@ -135,21 +147,33 @@ variable "memory_swap_usage_too_high_threshold" {
description = "Alarm threshold for the 'highSwapUsage' alarm"
}

variable "read_iops_too_high_threshold" {
type = string
default = "100"
description = "Alarm threshold for the 'read-iops-too-high' alarm"
}

variable "write_iops_too_high_threshold" {
type = string
default = "10000"
description = "Alarm threshold for the 'write-iops-too-high' alarm"
}

variable "tags" {
type = map(string)
default = {}
description = "Tags to attach to each alarm"
}

variable "db_instance_class" {
type = string
type = string
description = "The rds instance class, e.g. db.t3.medium"
}

variable "engine" {
type = string
type = string
description = "The RDS engine being used. Used for postgres or mysql specific alarms"
default = ""
default = ""
}

variable "maximum_used_transaction_ids_too_high_threshold" {
Expand Down

0 comments on commit 54e9f42

Please sign in to comment.