From 842954ed104f6b49fd7cc1b8c2c2f9f20020fc4f Mon Sep 17 00:00:00 2001 From: Joshua Ford Date: Thu, 4 Apr 2024 19:17:59 -0500 Subject: [PATCH] fix: Support AWS GovCloud regions in lambda-promtail A broken regex prevents AWS GovCloud regions from properly being supported with Lambda Promtail. The regex matches only on regions with three parts in their filenames (e.g., us-east-1), and the GovCloud regions have four parts (e.g., us-gov-east-1). The fix is to add an additional, non-greedy, non-capturing group to match on the fourth parth. Initially, I attempted to use `(?:[\w-]+)` for its simplicity and to match on the region-parsing logic we already have. However, this breaks the tests for vpcflowlogs because of the greediness of the `+` operator. As such, I opted to write something more verbose, but non-greedy. Tests have been added to prevent future breakages, as this tends to be an edge case. --- tools/lambda-promtail/lambda-promtail/s3.go | 4 +- .../lambda-promtail/s3_test.go | 101 ++++++++++++++++++ 2 files changed, 103 insertions(+), 2 deletions(-) diff --git a/tools/lambda-promtail/lambda-promtail/s3.go b/tools/lambda-promtail/lambda-promtail/s3.go index 5dca5cf7d609..77694ba60343 100644 --- a/tools/lambda-promtail/lambda-promtail/s3.go +++ b/tools/lambda-promtail/lambda-promtail/s3.go @@ -75,9 +75,9 @@ var ( // source: https://docs.aws.amazon.com/waf/latest/developerguide/logging-s3.html // format: aws-waf-logs-suffix[/prefix]/AWSLogs/aws-account-id/WAFLogs/region/webacl-name/year/month/day/hour/minute/aws-account-id_waflogs_region_webacl-name_timestamp_hash.log.gz // example: aws-waf-logs-test/AWSLogs/11111111111/WAFLogs/us-east-1/TEST-WEBACL/2021/10/28/19/50/11111111111_waflogs_us-east-1_TEST-WEBACL_20211028T1950Z_e0ca43b5.log.gz - defaultFilenameRegex = regexp.MustCompile(`AWSLogs\/(?P\d+)\/(?P[a-zA-Z0-9_\-]+)\/(?P[\w-]+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/\d+\_(?:elasticloadbalancing|vpcflowlogs)\_\w+-\w+-\d_(?:(?Papp|net)\.*?)?(?P[a-zA-Z0-9\-]+)`) + defaultFilenameRegex = regexp.MustCompile(`AWSLogs\/(?P\d+)\/(?P[a-zA-Z0-9_\-]+)\/(?P[\w-]+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/\d+\_(?:elasticloadbalancing|vpcflowlogs)_(?:\w+-\w+-(?:\w+-)?\d)_(?:(?Papp|net)\.*?)?(?P[a-zA-Z0-9\-]+)`) defaultTimestampRegex = regexp.MustCompile(`(?P\d+-\d+-\d+T\d+:\d+:\d+(?:\.\d+Z)?)`) - cloudtrailFilenameRegex = regexp.MustCompile(`AWSLogs\/(?Po-[a-z0-9]{10,32})?\/?(?P\d+)\/(?P[a-zA-Z0-9_\-]+)\/(?P[\w-]+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/\d+\_(?:CloudTrail|CloudTrail-Digest)\_\w+-\w+-\d_(?:(?:app|nlb|net)\.*?)?.+_(?P[a-zA-Z0-9\-]+)`) + cloudtrailFilenameRegex = regexp.MustCompile(`AWSLogs\/(?Po-[a-z0-9]{10,32})?\/?(?P\d+)\/(?P[a-zA-Z0-9_\-]+)\/(?P[\w-]+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/\d+\_(?:CloudTrail|CloudTrail-Digest)_(?:\w+-\w+-(?:\w+-)?\d)_(?:(?:app|nlb|net)\.*?)?.+_(?P[a-zA-Z0-9\-]+)`) cloudfrontFilenameRegex = regexp.MustCompile(`(?P.*)\/(?P[A-Z0-9]+)\.(?P\d+)-(?P\d+)-(?P\d+)-(.+)`) cloudfrontTimestampRegex = regexp.MustCompile(`(?P\d+-\d+-\d+\s\d+:\d+:\d+)`) wafFilenameRegex = regexp.MustCompile(`AWSLogs\/(?P\d+)\/(?PWAFLogs)\/(?P[\w-]+)\/(?P[\w-]+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/(?P\d+)\/\d+\_waflogs\_[\w-]+_[\w-]+_\d+T\d+Z_\w+`) diff --git a/tools/lambda-promtail/lambda-promtail/s3_test.go b/tools/lambda-promtail/lambda-promtail/s3_test.go index 60a22abba7a3..644ad12f1727 100644 --- a/tools/lambda-promtail/lambda-promtail/s3_test.go +++ b/tools/lambda-promtail/lambda-promtail/s3_test.go @@ -126,6 +126,39 @@ func Test_getLabels(t *testing.T) { }, wantErr: false, }, + { + name: "s3_govcloud_flow_logs", + args: args{ + record: events.S3EventRecord{ + AWSRegion: "us-gov-east-1", + S3: events.S3Entity{ + Bucket: events.S3Bucket{ + Name: "vpc_logs_test", + OwnerIdentity: events.S3UserIdentity{ + PrincipalID: "test", + }, + }, + Object: events.S3Object{ + Key: "my-bucket/AWSLogs/123456789012/vpcflowlogs/us-gov-east-1/2022/01/24/123456789012_vpcflowlogs_us-gov-east-1_fl-1234abcd_20180620T1620Z_fe123456.log.gz", + }, + }, + }, + }, + want: map[string]string{ + "account_id": "123456789012", + "bucket": "vpc_logs_test", + "bucket_owner": "test", + "bucket_region": "us-gov-east-1", + "day": "24", + "key": "my-bucket/AWSLogs/123456789012/vpcflowlogs/us-gov-east-1/2022/01/24/123456789012_vpcflowlogs_us-gov-east-1_fl-1234abcd_20180620T1620Z_fe123456.log.gz", + "month": "01", + "region": "us-gov-east-1", + "src": "fl-1234abcd", + "type": FLOW_LOG_TYPE, + "year": "2022", + }, + wantErr: false, + }, { name: "cloudtrail_digest_logs", args: args{ @@ -192,6 +225,39 @@ func Test_getLabels(t *testing.T) { }, wantErr: false, }, + { + name: "cloudtrail_govcloud_logs", + args: args{ + record: events.S3EventRecord{ + AWSRegion: "us-gov-east-1", + S3: events.S3Entity{ + Bucket: events.S3Bucket{ + Name: "cloudtrail_logs_test", + OwnerIdentity: events.S3UserIdentity{ + PrincipalID: "test", + }, + }, + Object: events.S3Object{ + Key: "my-bucket/AWSLogs/123456789012/CloudTrail/us-gov-east-1/2022/01/24/123456789012_CloudTrail_us-gov-east-1_20220124T0000Z_4jhzXFO2Jlvu2b3y.json.gz", + }, + }, + }, + }, + want: map[string]string{ + "account_id": "123456789012", + "bucket": "cloudtrail_logs_test", + "bucket_owner": "test", + "bucket_region": "us-gov-east-1", + "day": "24", + "key": "my-bucket/AWSLogs/123456789012/CloudTrail/us-gov-east-1/2022/01/24/123456789012_CloudTrail_us-gov-east-1_20220124T0000Z_4jhzXFO2Jlvu2b3y.json.gz", + "month": "01", + "region": "us-gov-east-1", + "src": "4jhzXFO2Jlvu2b3y", + "type": CLOUDTRAIL_LOG_TYPE, + "year": "2022", + }, + wantErr: false, + }, { name: "organization_cloudtrail_logs", args: args{ @@ -293,6 +359,41 @@ func Test_getLabels(t *testing.T) { }, wantErr: false, }, + { + name: "s3_govcloud_waf", + args: args{ + record: events.S3EventRecord{ + AWSRegion: "us-gov-east-1", + S3: events.S3Entity{ + Bucket: events.S3Bucket{ + Name: "waf_logs_test", + OwnerIdentity: events.S3UserIdentity{ + PrincipalID: "test", + }, + }, + Object: events.S3Object{ + Key: "prefix/AWSLogs/11111111111/WAFLogs/us-gov-east-1/TEST-WEBACL/2021/10/28/19/50/11111111111_waflogs_us-gov-east-1_TEST-WEBACL_20211028T1950Z_e0ca43b5.log.gz", + }, + }, + }, + }, + want: map[string]string{ + "account_id": "11111111111", + "bucket_owner": "test", + "bucket_region": "us-gov-east-1", + "bucket": "waf_logs_test", + "day": "28", + "hour": "19", + "key": "prefix/AWSLogs/11111111111/WAFLogs/us-gov-east-1/TEST-WEBACL/2021/10/28/19/50/11111111111_waflogs_us-gov-east-1_TEST-WEBACL_20211028T1950Z_e0ca43b5.log.gz", + "minute": "50", + "month": "10", + "region": "us-gov-east-1", + "src": "TEST-WEBACL", + "type": WAF_LOG_TYPE, + "year": "2021", + }, + wantErr: false, + }, { name: "missing_type", args: args{