Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2372 alert for excessive 5xx responses; descriptions for alerts #2373

Merged
merged 23 commits into from
Dec 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
83f5f9e
Add log metric filters and alarms for remaining CloudWatch controls.
Matthew-Grayson Oct 19, 2023
c89d2ac
Fix log_metric_namespace definition in prod.tfvars.
Matthew-Grayson Oct 19, 2023
f9f968f
Remove tags from log_filters.
Matthew-Grayson Oct 19, 2023
4d67c64
Add statistic field to alarms.
Matthew-Grayson Oct 19, 2023
bbc537f
Merge branch 'master' into 2328-add-log-metric-filtersalarms-for-aws-…
Matthew-Grayson Oct 19, 2023
a20074d
Add system shutdown filter and alarm; refactor log metric names to be…
Matthew-Grayson Oct 20, 2023
8354649
Merge branch 'master' into 2328-add-log-metric-filtersalarms-for-aws-…
Matthew-Grayson Oct 20, 2023
85f3f2a
Merge branch 'master' into 2328-add-log-metric-filtersalarms-for-aws-…
Matthew-Grayson Oct 20, 2023
d28de8b
Merge branch 'master' into 2328-add-log-metric-filtersalarms-for-aws-…
Matthew-Grayson Oct 24, 2023
39d6bb9
Fix capitalization of tags for resources in cloudwatch.tf, log_alarms…
Matthew-Grayson Oct 24, 2023
4d6a6cf
Add severity tag to alarms; refactor alarm name for ec2 instance shut…
Matthew-Grayson Oct 25, 2023
cc2786c
Fix formatting.
Matthew-Grayson Oct 25, 2023
8f9251a
Refactor ec2 shutdown metric vars.
Matthew-Grayson Oct 25, 2023
674b00f
Add filters/alarms for db instance shutdowns and deletions.
Matthew-Grayson Oct 26, 2023
b3d1e4f
Add api_error_rate alert and vars.
Matthew-Grayson Nov 9, 2023
ca98473
Define period.
Matthew-Grayson Nov 9, 2023
f681a4b
Merge from master.
Matthew-Grayson Nov 16, 2023
245f8f7
Terraform formatting.
Matthew-Grayson Nov 16, 2023
fd5d42d
Fix typo in api_error_rate caomparison_operator.
Matthew-Grayson Nov 16, 2023
c0da13c
Add alarm descriptions.
Matthew-Grayson Nov 17, 2023
9af86af
Merge branch 'master' into 2372-api-gateway-alert-threshold
Matthew-Grayson Nov 22, 2023
d26ade8
Merge branch 'master' into 2372-api-gateway-alert-threshold
Matthew-Grayson Dec 27, 2023
550385c
Update description for AWS Config alarm.
Matthew-Grayson Dec 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions infrastructure/api_gateway_alarms.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
resource "aws_cloudwatch_metric_alarm" "api_error_rate" {
alarm_name = "${var.log_metric_api_error_rate}-alarm"
alarm_description = "The percentage of API calls returning a 5xx error exceeds 5%"
metric_name = "5XXError"
alarm_actions = [aws_sns_topic.alarms.arn]
comparison_operator = "GreaterThanOrEqualToThreshold"
period = 60
evaluation_periods = 2
threshold = 0.05
statistic = "Average"
unit = "Count"
treat_missing_data = "notBreaching"

tags = {
Project = var.project
Stage = var.stage
Severity = var.severity_medium
}
}

16 changes: 16 additions & 0 deletions infrastructure/log_alarms.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
resource "aws_cloudwatch_metric_alarm" "root_user" {
alarm_name = "${var.log_metric_root_user}-alarm"
alarm_description = "The root user account signed into AWS"
metric_name = var.log_metric_root_user
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -18,6 +19,7 @@ resource "aws_cloudwatch_metric_alarm" "root_user" {

resource "aws_cloudwatch_metric_alarm" "unauthorized_api_call" {
alarm_name = "${var.log_metric_unauthorized_api_call}-alarm"
alarm_description = "An API call returned an unauthorized error"
metric_name = var.log_metric_unauthorized_api_call
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -36,6 +38,7 @@ resource "aws_cloudwatch_metric_alarm" "unauthorized_api_call" {

resource "aws_cloudwatch_metric_alarm" "login_without_mfa" {
alarm_name = "${var.log_metric_login_without_mfa}-alarm"
alarm_description = "A user logged into AWS without MFA"
metric_name = var.log_metric_login_without_mfa
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -54,6 +57,7 @@ resource "aws_cloudwatch_metric_alarm" "login_without_mfa" {

resource "aws_cloudwatch_metric_alarm" "iam_policy" {
alarm_name = "${var.log_metric_iam_policy}-alarm"
alarm_description = "An IAM policy was modified"
metric_name = var.log_metric_iam_policy
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -72,6 +76,7 @@ resource "aws_cloudwatch_metric_alarm" "iam_policy" {

resource "aws_cloudwatch_metric_alarm" "cloudtrail" {
alarm_name = "${var.log_metric_cloudtrail}-alarm"
alarm_description = "CloudTrail configurations were modified"
metric_name = var.log_metric_cloudtrail
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -90,6 +95,7 @@ resource "aws_cloudwatch_metric_alarm" "cloudtrail" {

resource "aws_cloudwatch_metric_alarm" "login_failure" {
alarm_name = "${var.log_metric_login_failure}-alarm"
alarm_description = "A user sign in to AWS failed"
metric_name = var.log_metric_login_failure
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -108,6 +114,7 @@ resource "aws_cloudwatch_metric_alarm" "login_failure" {

resource "aws_cloudwatch_metric_alarm" "cmk_delete_disable" {
alarm_name = "${var.log_metric_cmk_delete_disable}-alarm"
alarm_description = "A customer-managed key was disabled or scheduled for deletion"
metric_name = var.log_metric_cmk_delete_disable
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -126,6 +133,7 @@ resource "aws_cloudwatch_metric_alarm" "cmk_delete_disable" {

resource "aws_cloudwatch_metric_alarm" "s3_bucket_policy" {
alarm_name = "${var.log_metric_s3_bucket_policy}-alarm"
alarm_description = "An S3 bucket policy was modified"
metric_name = var.log_metric_s3_bucket_policy
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -144,6 +152,7 @@ resource "aws_cloudwatch_metric_alarm" "s3_bucket_policy" {

resource "aws_cloudwatch_metric_alarm" "aws_config" {
alarm_name = "${var.log_metric_aws_config}-alarm"
alarm_description = "AWS Config was modified"
metric_name = var.log_metric_aws_config
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -162,6 +171,7 @@ resource "aws_cloudwatch_metric_alarm" "aws_config" {

resource "aws_cloudwatch_metric_alarm" "security_group" {
alarm_name = "${var.log_metric_security_group}-alarm"
alarm_description = "A security group was modified"
metric_name = var.log_metric_security_group
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -180,6 +190,7 @@ resource "aws_cloudwatch_metric_alarm" "security_group" {

resource "aws_cloudwatch_metric_alarm" "nacl" {
alarm_name = "${var.log_metric_nacl}-alarm"
alarm_description = "A network ACL was modified"
metric_name = var.log_metric_nacl
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -198,6 +209,7 @@ resource "aws_cloudwatch_metric_alarm" "nacl" {

resource "aws_cloudwatch_metric_alarm" "network_gateway" {
alarm_name = "${var.log_metric_network_gateway}-alarm"
alarm_description = "A network gateway was modified"
metric_name = var.log_metric_network_gateway
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -216,6 +228,7 @@ resource "aws_cloudwatch_metric_alarm" "network_gateway" {

resource "aws_cloudwatch_metric_alarm" "route_table" {
alarm_name = "${var.log_metric_route_table}-alarm"
alarm_description = "A route table was modified"
metric_name = var.log_metric_route_table
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand Down Expand Up @@ -252,6 +265,7 @@ resource "aws_cloudwatch_metric_alarm" "vpc" {

resource "aws_cloudwatch_metric_alarm" "ec2_shutdown" {
alarm_name = "${var.log_metric_ec2_shutdown}-alarm"
alarm_description = "An EC2 instance was shut down"
metric_name = var.log_metric_ec2_shutdown
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -270,6 +284,7 @@ resource "aws_cloudwatch_metric_alarm" "ec2_shutdown" {

resource "aws_cloudwatch_metric_alarm" "db_shutdown" {
alarm_name = "${var.log_metric_db_shutdown}-alarm"
alarm_description = "An RDS instance was shut down"
metric_name = var.log_metric_db_shutdown
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand All @@ -288,6 +303,7 @@ resource "aws_cloudwatch_metric_alarm" "db_shutdown" {

resource "aws_cloudwatch_metric_alarm" "db_deletion" {
alarm_name = "${var.log_metric_db_deletion}-alarm"
alarm_description = "An RDS instance was deleted"
metric_name = var.log_metric_db_deletion
namespace = var.log_metric_namespace
alarm_actions = [aws_sns_topic.alarms.arn]
Expand Down
1 change: 1 addition & 0 deletions infrastructure/prod.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ db_port = 5432
db_table_name = "cfproddb"
db_instance_class = "db.t3.2xlarge"
log_metric_namespace = "LogMetrics"
log_metric_api_error_rate = "crossfeed-prod-APIErrorRate"
log_metric_root_user = "crossfeed-prod-RootUserAccess"
log_metric_unauthorized_api_call = "crossfeed-prod-UnauthorizedApiCall"
log_metric_login_without_mfa = "crossfeed-prod-ConsoleSignInWithoutMFA"
Expand Down
1 change: 1 addition & 0 deletions infrastructure/stage.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ db_table_name = "cfstagingdb"
db_instance_class = "db.t3.2xlarge"
log_metric_namespace = "LogMetrics"
log_metric_root_user = "crossfeed-staging-RootUserAccess"
log_metric_api_error_rate = "crossfeed-staging-APIErrorRate"
log_metric_unauthorized_api_call = "crossfeed-staging-UnauthorizedApiCall"
log_metric_login_without_mfa = "crossfeed-staging-ConsoleSignInWithoutMFA"
log_metric_iam_policy = "crossfeed-staging-IAMPolicyChange"
Expand Down
6 changes: 6 additions & 0 deletions infrastructure/vars.tf
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ variable "log_metric_namespace" {
default = "LogMetrics"
}

variable "log_metric_api_error_rate" {
description = "log_metric_filter_api_error_rate"
type = string
default = "crossfeed-staging-APIErrorRate"
}

variable "log_metric_root_user" {
description = "log_metric_filter_root_user"
type = string
Expand Down
Loading