diff --git a/main.tf b/main.tf index 7231492..1d6afc5 100644 --- a/main.tf +++ b/main.tf @@ -193,3 +193,42 @@ resource "aws_cloudwatch_metric_alarm" "maximum_used_transaction_ids_too_high" { alarm_actions = var.actions_alarm ok_actions = var.actions_ok } + +# SOC2 requirements +resource "aws_cloudwatch_metric_alarm" "read_iops_too_high" { + count = var.create_read_iops_alarm ? 1 : 0 + alarm_name = "${var.prefix}rds-${var.db_instance_id}-read-iops-too-high" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.evaluation_period + metric_name = "ReadIOPS" + namespace = "AWS/RDS" + period = var.statistic_period + statistic = "Average" + threshold = var.read_iops_too_high_threshold + alarm_description = "Average Read IO over last ${(var.evaluation_period * var.statistic_period / 60)} minutes too high, performance may suffer" + alarm_actions = var.actions_alarm + ok_actions = var.actions_ok + + dimensions = { + DBInstanceIdentifier = "${var.db_instance_id}-read-iops-too-high" + } +} + +resource "aws_cloudwatch_metric_alarm" "write_iops_too_high" { + count = var.create_write_iops_alarm ? 1 : 0 + alarm_name = "${var.prefix}rds-${var.db_instance_id}-write-iops-too-high" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = var.evaluation_period + metric_name = "WriteIOPS" + namespace = "AWS/RDS" + period = var.statistic_period + statistic = "Average" + threshold = var.write_iops_too_high_threshold + alarm_description = "Average Write IO over last ${(var.evaluation_period * var.statistic_period / 60)} minutes too high, performance may suffer" + alarm_actions = var.actions_alarm + ok_actions = var.actions_ok + + dimensions = { + DBInstanceIdentifier = "${var.prefix}rds-${var.db_instance_id}-write-iops-too-high" + } +} \ No newline at end of file diff --git a/outputs.tf b/outputs.tf index 4386cec..6086e97 100644 --- a/outputs.tf +++ b/outputs.tf @@ -1,6 +1,6 @@ output "alarm_cpu_utilization_too_high" { # For older terraform support... - value = var.create_high_cpu_alarm ? aws_cloudwatch_metric_alarm.cpu_utilization_too_high[0] : null + value = var.create_high_cpu_alarm ? aws_cloudwatch_metric_alarm.cpu_utilization_too_high[0] : null # For Terraform 0.15+, eventually use this much nicer code instead... # value = one(aws_cloudwatch_metric_alarm.cpu_utilization_too_high.*) description = "The CloudWatch Metric Alarm resource block for high CPU Utilization" @@ -16,7 +16,7 @@ output "alarm_cpu_credit_balance_too_low" { output "alarm_disk_queue_depth_too_high" { # For older terraform support... - value = var.create_high_queue_depth_alarm ? aws_cloudwatch_metric_alarm.disk_queue_depth_too_high[0] : null + value = var.create_high_queue_depth_alarm ? aws_cloudwatch_metric_alarm.disk_queue_depth_too_high[0] : null # For Terraform 0.15+, eventually use this much nicer code instead... # value = one(aws_cloudwatch_metric_alarm.disk_queue_depth_too_high.*) description = "The CloudWatch Metric Alarm resource block for high Disk Queue Depth" @@ -24,7 +24,7 @@ output "alarm_disk_queue_depth_too_high" { output "alarm_disk_free_storage_space_too_low" { # For older terraform support... - value = var.create_low_disk_space_alarm ? aws_cloudwatch_metric_alarm.disk_free_storage_space_too_low[0] : null + value = var.create_low_disk_space_alarm ? aws_cloudwatch_metric_alarm.disk_free_storage_space_too_low[0] : null # For Terraform 0.15+, eventually use this much nicer code instead... # value = one(aws_cloudwatch_metric_alarm.disk_free_storage_space_too_low.*) description = "The CloudWatch Metric Alarm resource block for low Free Storage Space" @@ -32,7 +32,7 @@ output "alarm_disk_free_storage_space_too_low" { output "alarm_disk_burst_balance_too_low" { # For older terraform support... - value = var.create_low_disk_burst_alarm ? aws_cloudwatch_metric_alarm.disk_burst_balance_too_low[0] : null + value = var.create_low_disk_burst_alarm ? aws_cloudwatch_metric_alarm.disk_burst_balance_too_low[0] : null # For Terraform 0.15+, eventually use this much nicer code instead... # value = one(aws_cloudwatch_metric_alarm.disk_burst_balance_too_low.*) description = "The CloudWatch Metric Alarm resource block for low Disk Burst Balance" @@ -40,7 +40,7 @@ output "alarm_disk_burst_balance_too_low" { output "alarm_memory_freeable_too_low" { # For older terraform support... - value = var.create_low_memory_alarm ? aws_cloudwatch_metric_alarm.memory_freeable_too_low[0] : null + value = var.create_low_memory_alarm ? aws_cloudwatch_metric_alarm.memory_freeable_too_low[0] : null # For Terraform 0.15+, eventually use this much nicer code instead... # value = one(aws_cloudwatch_metric_alarm.memory_freeable_too_low.*) description = "The CloudWatch Metric Alarm resource block for low Freeable Memory" @@ -48,7 +48,7 @@ output "alarm_memory_freeable_too_low" { output "alarm_memory_swap_usage_too_high" { # For older terraform support... - value = var.create_swap_alarm ? aws_cloudwatch_metric_alarm.memory_swap_usage_too_high[0] : null + value = var.create_swap_alarm ? aws_cloudwatch_metric_alarm.memory_swap_usage_too_high[0] : null # For Terraform 0.15+, eventually use this much nicer code instead... # value = one(aws_cloudwatch_metric_alarm.memory_swap_usage_too_high.*) description = "The CloudWatch Metric Alarm resource block for high Memory Swap Usage" @@ -56,7 +56,7 @@ output "alarm_memory_swap_usage_too_high" { output "alarm_connection_count_anomalous" { # For older terraform support... - value = var.create_anomaly_alarm ? aws_cloudwatch_metric_alarm.connection_count_anomalous[0] : null + value = var.create_anomaly_alarm ? aws_cloudwatch_metric_alarm.connection_count_anomalous[0] : null # For Terraform 0.15+, eventually use this much nicer code instead... # value = one(aws_cloudwatch_metric_alarm.connection_count_anomalous.*) description = "The CloudWatch Metric Alarm resource block for anomalous Connection Count" diff --git a/variables.tf b/variables.tf index 8e39738..155ba92 100644 --- a/variables.tf +++ b/variables.tf @@ -69,6 +69,18 @@ variable "create_anomaly_alarm" { description = "Whether or not to create the fairly noisy anomaly alarm. Default is to create it (for backwards compatible support), but recommended to disable this for non-production databases" } +variable "create_read_iops_alarm" { + type = bool + default = true + description = "Whether or not to create the Read IOPS too high alarm. Default is to create it." +} + +variable "create_write_iops_alarm" { + type = bool + default = true + description = "Whether or not to create the Write IOPS too high alarm. Default is to create it." +} + variable "anomaly_period" { type = string default = "600" @@ -82,13 +94,13 @@ variable "anomaly_band_width" { } variable "actions_alarm" { - type = list + type = list(any) default = [] description = "A list of actions to take when alarms are triggered. Will likely be an SNS topic for event distribution." } variable "actions_ok" { - type = list + type = list(any) default = [] description = "A list of actions to take when alarms are cleared. Will likely be an SNS topic for event distribution." } @@ -135,6 +147,18 @@ variable "memory_swap_usage_too_high_threshold" { description = "Alarm threshold for the 'highSwapUsage' alarm" } +variable "read_iops_too_high_threshold" { + type = string + default = "100" + description = "Alarm threshold for the 'read-iops-too-high' alarm" +} + +variable "write_iops_too_high_threshold" { + type = string + default = "10000" + description = "Alarm threshold for the 'write-iops-too-high' alarm" +} + variable "tags" { type = map(string) default = {} @@ -142,14 +166,14 @@ variable "tags" { } variable "db_instance_class" { - type = string + type = string description = "The rds instance class, e.g. db.t3.medium" } variable "engine" { - type = string + type = string description = "The RDS engine being used. Used for postgres or mysql specific alarms" - default = "" + default = "" } variable "maximum_used_transaction_ids_too_high_threshold" {