Fix support case partitioning (#5)

* remove resource bucket as we use a workshop static url for the lambda package * update readme * update readme * change the sequence for the shell to avoid errors if CFN fails * check if bucket exists and permission with boto 3 * remove unused import * fix support case partitioning * add a convenient script to package lambda * add changelog * update gitignore * fix import and timezone warning * update lambda collector version * update formatting of the deployment script * Change the way the data is partitioned in S3 to avoid duplicates * update changelog * fix pylint warning * update instructions
aws-samples · Aug 14, 2024 · e548498 · e548498
1 parent ed611a0
commit e548498
Show file tree

Hide file tree

Showing 11 changed files with 97 additions and 47 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,3 +6,4 @@ src/support_collector/__pycache__
 src/support_collector/.python-version
 src/support_collector/individual-account-deployments/temp_dir/
 .DS_Store
+src/support_collector/temp_dir/
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,10 @@
+# Changelog
+
+## Support Collector Lambda v1.0.1
+
+* Partition support cases and Health data using their creation date in S3 (YYYY/MM) to avoid saving duplicates on the daily sync
+* Flatten Trusted Advisor checks in S3 to avoid duplicates during daily sync.
+
+## Support Collector Lambda v1.0.0
+
+* Update to Python 3.11 runtime
diff --git a/src/support_collector/README.md b/src/support_collector/README.md
@@ -93,7 +93,7 @@ To deploy this solution, you will need to know the `<organization-id>`, `<root-i
    - Update the bucket policy for the support data bucket to allow member accounts to upload their support data.
    - Deploy a stackset to run a one time sync to fetch historical support data and load to S3 data bucket.
 
-The bucket policy for the support data is generated in the file `output_bucket_policy.json` and is similar to the following one:
+The bucket policy for the support data is generated in the file `output_bucket_policy.json` and the script will ask you if you want to overwrite the bucket policy. If you decline, then you will have to update it manually. The policy is similar to the following one:
 
 ```json
 {

diff --git a/src/support_collector/deploy_collector.sh b/src/support_collector/deploy_collector.sh
@@ -1,22 +1,23 @@
 #!/bin/bash
-echo "This script will deploy the solution to an organization.\n"
+printf "This script will deploy the solution to an organization.\n\n"
 
-echo "Enter the OU IDs separated by commas (ie: ou-xxxxxxxxxx1,ou-xxxxxxxxxx2): "
+printf "Enter the OU IDs separated by commas (ie: ou-xxxxxxxxxx1,ou-xxxxxxxxxx2): "
 read OU_IDS
-echo ""
+printf "\n\n"
 
-echo "Enter the data collection S3 bucket name in the management account: "
+printf "Enter the data collection S3 bucket name in the management account: "
 read DATA_BUCKET_NAME
-echo ""
+printf "\n\n"
 
-echo "Do you want the script to overwrite the data collection bucket policy on your behalf?\nThis requires PutBucketPolicy permission and it will OVERWRITE the current policy.\nIf the policy is not set, member accounts may not be able to store their data properly. (Y/N, default: Y): "
+printf "Do you want the script to overwrite the data collection bucket policy on your behalf?\nThis requires PutBucketPolicy permission and it will OVERWRITE the current policy.\nIf the policy is not set, member accounts may not be able to store their data properly. (Y/N, default: Y): "
 read OVERWRITE_DATA_BUCKET_POLICY_ANSWER
+if [ "$OVERWRITE_DATA_BUCKET_POLICY_ANSWER" != "${OVERWRITE_DATA_BUCKET_POLICY_ANSWER#[Yy]}" ] ;then
 if [ "$OVERWRITE_DATA_BUCKET_POLICY_ANSWER" != "${OVERWRITE_DATA_BUCKET_POLICY_ANSWER#[Yy]}" ] ;then
     OVERWRITE_DATA_BUCKET_POLICY=--overwrite-data-bucket-policy
 else
     OVERWRITE_DATA_BUCKET_POLICY="--no-overwrite-data-bucket-policy"
 fi
-echo ""
+printf "\n\n"
 
-echo "Invoking deploy_infrastructure.py..."
+printf "Invoking deploy_infrastructure.py...\n"
 python3 deploy_infrastructure.py --data-bucket "${DATA_BUCKET_NAME}" --ou-ids "${OU_IDS}" "${OVERWRITE_DATA_BUCKET_POLICY}"
diff --git a/src/support_collector/member_account_resources.yaml b/src/support_collector/member_account_resources.yaml
@@ -57,7 +57,7 @@ Resources:
       Role: !GetAtt SupportInsightsLambdaExecutionRole.Arn
       Code:
         S3Bucket: ws-assets-prod-iad-r-iad-ed304a55c2ca1aee
-        S3Key: 741340b0-0c84-4a80-8ccf-e351dcc826c0/support-collector-lambda.zip
+        S3Key: 741340b0-0c84-4a80-8ccf-e351dcc826c0/support-collector-lambda-v1.0.1.zip
       Runtime: python3.11
       Timeout: 900
 

diff --git a/src/support_collector/package_lambda.sh b/src/support_collector/package_lambda.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+echo "Cleaning up old files..."
+rm -rf support-collector-lambda.zip support-collector-lambda-layer.zip python temp_dir
+
+echo "Installing dependencies into a temporary directory..."
+mkdir temp_dir
+pip3 install -r requirements.txt -t temp_dir/
+
+echo "Copying dependencies to the Lambda directory..."
+cp -r temp_dir/* support-collector-lambda/
+
+echo "Creating deployment package..."
+cd support-collector-lambda
+zip -r ../support-collector-lambda.zip . -x '*.DS_Store' 2>/dev/null || true
+cd ..
diff --git a/src/support_collector/support-collector-lambda/lambda_function.py b/src/support_collector/support-collector-lambda/lambda_function.py
@@ -1,9 +1,6 @@
 import importlib
-import boto3
-
 
 def lambda_handler(event, context):
-    lambda_client = boto3.client("lambda")
     account_id = context.invoked_function_arn.split(":")[4]
 
     # Get PAST_NO_OF_DAYS from event parameters

diff --git a/src/support_collector/support-collector-lambda/upload_cases.py b/src/support_collector/support-collector-lambda/upload_cases.py
@@ -1,32 +1,38 @@
 import json
-import datetime
+from datetime import datetime, timedelta, timezone
 from collections import defaultdict
 import logging
 import boto3
 from botocore.exceptions import ClientError
 
+from utils import convert_time_to_month_year
+
 logger = logging.getLogger()
 logger.setLevel(logging.INFO)
 
 session = boto3.Session()
 
-
 def save_to_s3(cases_by_account, bucket_name):
     region = session.region_name
     s3 = session.client("s3", region_name=region)
-    current_date = datetime.datetime.now().strftime(
-        "%Y-%m-%d"
-    )  # Format the date as YYYY-MM-DD
 
     print(f"The Support cases are being uploaded to S3 bucket {bucket_name}...")
     for account_id, cases in cases_by_account.items():
         for case in cases:
-            case_id = case["case"]["displayId"]  # Extracting case ID for filename
-            case_json = json.dumps(case, ensure_ascii=False).encode(
-                "utf-8"
-            )  # Serialize case data to JSON with UTF-8 encoding
-            file_key = f"support-cases/{account_id}/{current_date}/{case_id}.json"
+            # Extracting case ID for filename
+            case_id = case["case"]["displayId"]
+
+            # Extracting creation time for partitioning in S3
+            time_created = case["case"]["timeCreated"]
+            # Convert the time_created in the format "2024-07-23T15:49:29.995Z" to "2024/07"
+            creation_date = convert_time_to_month_year(iso_datetime=time_created)
+
+            # Serialize case data to JSON with UTF-8 encoding
+            case_json = json.dumps(case, ensure_ascii=False).encode("utf-8")
+
+            file_key = f"support-cases/{account_id}/{creation_date}/{case_id}.json"
             s3.put_object(Bucket=bucket_name, Key=file_key, Body=case_json)
+
             print(f"Uploaded {file_key}")
     print("Support cases upload done!")
 
@@ -45,14 +51,14 @@ def get_support_cases(credentials):
     return cases
 
 
-def describe_cases(after_time, resolved):
+def describe_cases(after_time, include_resolved):
     """
     Describe support cases over a period of time, optionally filtering
     by status.
 
     :param after_time: The start time to include for cases.
     :param before_time: The end time to include for cases.
-    :param resolved: True to include resolved cases in the results,
+    :param include_resolved: True to include resolved cases in the results,
         otherwise results are open cases.
     :return: The final status of the case.
     """
@@ -62,7 +68,7 @@ def describe_cases(after_time, resolved):
         paginator = support_client.get_paginator("describe_cases")
         for page in paginator.paginate(
             afterTime=after_time,
-            includeResolvedCases=resolved,
+            includeResolvedCases=include_resolved,
             includeCommunications=True,
             language="en",
         ):
@@ -85,11 +91,10 @@ def describe_cases(after_time, resolved):
 
 
 def list_all_cases(days):
-    include_communications = True
-    end_date = datetime.datetime.utcnow().date()
-    start_date = end_date - datetime.timedelta(days)
+    include_resolved = True
+    start_date = datetime.now(timezone.utc).date() - timedelta(days)
     start_time = str(start_date)
-    all_cases = describe_cases(start_time, include_communications)
+    all_cases = describe_cases(start_time, include_resolved)
 
     return all_cases
 

diff --git a/src/support_collector/support-collector-lambda/upload_health.py b/src/support_collector/support-collector-lambda/upload_health.py
@@ -4,6 +4,7 @@
 import logging
 import boto3
 
+
 # Set up logging
 logging.basicConfig(level=logging.INFO)
 
@@ -20,22 +21,26 @@ def default(self, o):
 def save_to_s3(events_by_account, bucket_name):
     region = session.region_name
     s3 = session.client("s3", region_name=region)
-    current_date = datetime.datetime.utcnow().strftime(
-        "%Y-%m-%d"
-    )  # Format the date as YYYY-MM-DD
 
     print(f"The Health events are being uploaded to S3 bucket {bucket_name}...")
     for account_id, account_events in events_by_account.items():
         for event_dict in account_events:
             event = event_dict["event"]
-            arn = (
-                event["arn"].split(":")[-1].replace("/", "_")
-            )  # Clean ARN for use as filename
+
+            # Clean ARN for use as filename
+            arn = event["arn"].split(":")[-1].replace("/", "_")
             event_json = json.dumps(
                 event, cls=DatetimeEncoder, ensure_ascii=False
             ).encode("utf-8")
-            file_key = f"health/{account_id}/{current_date}/{arn}.json"  # Construct the file key using account_id, date, and arn
+
+            # Extracting start time for partitioning in S3
+            dt = event["startTime"]
+            start_date = f"{dt.year}/{dt.month}"
+
+            # Construct the file key using account_id, date, and arn
+            file_key = f"health/{account_id}/{start_date}/{arn}.json"
             s3.put_object(Bucket=bucket_name, Key=file_key, Body=event_json)
+
             print(f"Uploaded {file_key}")
     print("Health upload done!")
 

diff --git a/src/support_collector/support-collector-lambda/upload_ta.py b/src/support_collector/support-collector-lambda/upload_ta.py
@@ -1,5 +1,4 @@
 import json
-import datetime
 from collections import defaultdict
 import boto3
 
@@ -19,34 +18,33 @@
 def save_to_s3(recommendations_by_account, bucket_name):
     region = session.region_name
     s3 = session.client("s3", region_name=region)
-    current_date = datetime.datetime.utcnow().strftime(
-        "%Y-%m-%d"
-    )  # Using UTC date to standardize the timestamps across regions
 
     print(f"The TA recommendations are being uploaded to S3 bucket {bucket_name}...")
     for account_id, recommendations in recommendations_by_account.items():
         for recommendation in recommendations:
             status = recommendation["recommendation"]["status"].lower()
+            # Filter for warning or error status
             if status in [
                 "warning",
                 "error",
                 "yellow",
                 "red",
-            ]:  # Filter for warning or error status
-                check_id = recommendation["recommendation"][
-                    "checkId"
-                ]  # Extract the checkId from the recommendation
+            ]:
+                # Extract the checkId from the recommendation
+                check_id = recommendation["recommendation"]["checkId"]
+                # Get the description from the checks_info_dict
                 description = checks_info_dict.get(check_id, {}).get(
                     "description", "No description provided"
-                )  # Get the description from the checks_info_dict
+                )
                 # Update the recommendation with name and modified description
                 recommendation["recommendation"][
                     "description"
                 ] = f"The Trusted Advisor (TA) recommendation is for AWS account Id {account_id} that has TA status as '{status}'. This status {status} indicates the account owner should take action on the resources stated here as per this recommendation. The recommendation is as follows: {description}"
                 recommendation_json = json.dumps(
                     recommendation, ensure_ascii=False
                 ).encode("utf-8")
-                file_key = f"ta/{account_id}/{current_date}/{check_id}.json"  # Construct the file key using account_id, date, and checkId
+                # Construct the file key using account_id, date, and checkId
+                file_key = f"ta/{account_id}/{check_id}.json"
                 s3.put_object(
                     Bucket=bucket_name, Key=file_key, Body=recommendation_json
                 )

diff --git a/src/support_collector/support-collector-lambda/utils.py b/src/support_collector/support-collector-lambda/utils.py
@@ -0,0 +1,17 @@
+from datetime import datetime
+
+
+def convert_time_to_month_year(iso_datetime):
+    # Parse the time_created string into a datetime object
+    # dt = datetime.strptime(iso_datetime, "%Y-%m-%dT%H:%M:%S.%fZ")
+    iso_date = iso_datetime.replace("Z", "+00:00")
+    dt = datetime.fromisoformat(iso_date)
+
+    # Extract the year and month components
+    year = dt.year
+    month = dt.month
+
+    # Format the year and month as "YYYY/MM"
+    month_year = f"{year}/{month:02d}"
+
+    return month_year