From e54849843f9d2ca728da569285c0d1bc27d7f0f0 Mon Sep 17 00:00:00 2001 From: "Jonathan D." <3976137+Jonathan34@users.noreply.github.com> Date: Wed, 14 Aug 2024 11:31:30 -0500 Subject: [PATCH] Fix support case partitioning (#5) * remove resource bucket as we use a workshop static url for the lambda package * update readme * update readme * change the sequence for the shell to avoid errors if CFN fails * check if bucket exists and permission with boto 3 * remove unused import * fix support case partitioning * add a convenient script to package lambda * add changelog * update gitignore * fix import and timezone warning * update lambda collector version * update formatting of the deployment script * Change the way the data is partitioned in S3 to avoid duplicates * update changelog * fix pylint warning * update instructions --- .gitignore | 1 + CHANGELOG.md | 10 +++++ src/support_collector/README.md | 2 +- src/support_collector/deploy_collector.sh | 17 ++++---- .../member_account_resources.yaml | 2 +- src/support_collector/package_lambda.sh | 16 ++++++++ .../lambda_function.py | 3 -- .../support-collector-lambda/upload_cases.py | 39 +++++++++++-------- .../support-collector-lambda/upload_health.py | 19 +++++---- .../support-collector-lambda/upload_ta.py | 18 ++++----- .../support-collector-lambda/utils.py | 17 ++++++++ 11 files changed, 97 insertions(+), 47 deletions(-) create mode 100644 CHANGELOG.md create mode 100755 src/support_collector/package_lambda.sh create mode 100644 src/support_collector/support-collector-lambda/utils.py diff --git a/.gitignore b/.gitignore index 3b08ee7..b1a6abf 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ src/support_collector/__pycache__ src/support_collector/.python-version src/support_collector/individual-account-deployments/temp_dir/ .DS_Store +src/support_collector/temp_dir/ diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..94761d6 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,10 @@ +# Changelog + +## Support Collector Lambda v1.0.1 + +* Partition support cases and Health data using their creation date in S3 (YYYY/MM) to avoid saving duplicates on the daily sync +* Flatten Trusted Advisor checks in S3 to avoid duplicates during daily sync. + +## Support Collector Lambda v1.0.0 + +* Update to Python 3.11 runtime diff --git a/src/support_collector/README.md b/src/support_collector/README.md index 5da3578..ad3f149 100644 --- a/src/support_collector/README.md +++ b/src/support_collector/README.md @@ -93,7 +93,7 @@ To deploy this solution, you will need to know the ``, `/dev/null || true +cd .. diff --git a/src/support_collector/support-collector-lambda/lambda_function.py b/src/support_collector/support-collector-lambda/lambda_function.py index 599a044..c45dbaa 100644 --- a/src/support_collector/support-collector-lambda/lambda_function.py +++ b/src/support_collector/support-collector-lambda/lambda_function.py @@ -1,9 +1,6 @@ import importlib -import boto3 - def lambda_handler(event, context): - lambda_client = boto3.client("lambda") account_id = context.invoked_function_arn.split(":")[4] # Get PAST_NO_OF_DAYS from event parameters diff --git a/src/support_collector/support-collector-lambda/upload_cases.py b/src/support_collector/support-collector-lambda/upload_cases.py index e5b2859..ce34053 100644 --- a/src/support_collector/support-collector-lambda/upload_cases.py +++ b/src/support_collector/support-collector-lambda/upload_cases.py @@ -1,32 +1,38 @@ import json -import datetime +from datetime import datetime, timedelta, timezone from collections import defaultdict import logging import boto3 from botocore.exceptions import ClientError +from utils import convert_time_to_month_year + logger = logging.getLogger() logger.setLevel(logging.INFO) session = boto3.Session() - def save_to_s3(cases_by_account, bucket_name): region = session.region_name s3 = session.client("s3", region_name=region) - current_date = datetime.datetime.now().strftime( - "%Y-%m-%d" - ) # Format the date as YYYY-MM-DD print(f"The Support cases are being uploaded to S3 bucket {bucket_name}...") for account_id, cases in cases_by_account.items(): for case in cases: - case_id = case["case"]["displayId"] # Extracting case ID for filename - case_json = json.dumps(case, ensure_ascii=False).encode( - "utf-8" - ) # Serialize case data to JSON with UTF-8 encoding - file_key = f"support-cases/{account_id}/{current_date}/{case_id}.json" + # Extracting case ID for filename + case_id = case["case"]["displayId"] + + # Extracting creation time for partitioning in S3 + time_created = case["case"]["timeCreated"] + # Convert the time_created in the format "2024-07-23T15:49:29.995Z" to "2024/07" + creation_date = convert_time_to_month_year(iso_datetime=time_created) + + # Serialize case data to JSON with UTF-8 encoding + case_json = json.dumps(case, ensure_ascii=False).encode("utf-8") + + file_key = f"support-cases/{account_id}/{creation_date}/{case_id}.json" s3.put_object(Bucket=bucket_name, Key=file_key, Body=case_json) + print(f"Uploaded {file_key}") print("Support cases upload done!") @@ -45,14 +51,14 @@ def get_support_cases(credentials): return cases -def describe_cases(after_time, resolved): +def describe_cases(after_time, include_resolved): """ Describe support cases over a period of time, optionally filtering by status. :param after_time: The start time to include for cases. :param before_time: The end time to include for cases. - :param resolved: True to include resolved cases in the results, + :param include_resolved: True to include resolved cases in the results, otherwise results are open cases. :return: The final status of the case. """ @@ -62,7 +68,7 @@ def describe_cases(after_time, resolved): paginator = support_client.get_paginator("describe_cases") for page in paginator.paginate( afterTime=after_time, - includeResolvedCases=resolved, + includeResolvedCases=include_resolved, includeCommunications=True, language="en", ): @@ -85,11 +91,10 @@ def describe_cases(after_time, resolved): def list_all_cases(days): - include_communications = True - end_date = datetime.datetime.utcnow().date() - start_date = end_date - datetime.timedelta(days) + include_resolved = True + start_date = datetime.now(timezone.utc).date() - timedelta(days) start_time = str(start_date) - all_cases = describe_cases(start_time, include_communications) + all_cases = describe_cases(start_time, include_resolved) return all_cases diff --git a/src/support_collector/support-collector-lambda/upload_health.py b/src/support_collector/support-collector-lambda/upload_health.py index 79a1e2a..a3861f0 100644 --- a/src/support_collector/support-collector-lambda/upload_health.py +++ b/src/support_collector/support-collector-lambda/upload_health.py @@ -4,6 +4,7 @@ import logging import boto3 + # Set up logging logging.basicConfig(level=logging.INFO) @@ -20,22 +21,26 @@ def default(self, o): def save_to_s3(events_by_account, bucket_name): region = session.region_name s3 = session.client("s3", region_name=region) - current_date = datetime.datetime.utcnow().strftime( - "%Y-%m-%d" - ) # Format the date as YYYY-MM-DD print(f"The Health events are being uploaded to S3 bucket {bucket_name}...") for account_id, account_events in events_by_account.items(): for event_dict in account_events: event = event_dict["event"] - arn = ( - event["arn"].split(":")[-1].replace("/", "_") - ) # Clean ARN for use as filename + + # Clean ARN for use as filename + arn = event["arn"].split(":")[-1].replace("/", "_") event_json = json.dumps( event, cls=DatetimeEncoder, ensure_ascii=False ).encode("utf-8") - file_key = f"health/{account_id}/{current_date}/{arn}.json" # Construct the file key using account_id, date, and arn + + # Extracting start time for partitioning in S3 + dt = event["startTime"] + start_date = f"{dt.year}/{dt.month}" + + # Construct the file key using account_id, date, and arn + file_key = f"health/{account_id}/{start_date}/{arn}.json" s3.put_object(Bucket=bucket_name, Key=file_key, Body=event_json) + print(f"Uploaded {file_key}") print("Health upload done!") diff --git a/src/support_collector/support-collector-lambda/upload_ta.py b/src/support_collector/support-collector-lambda/upload_ta.py index cb18e66..bf67450 100644 --- a/src/support_collector/support-collector-lambda/upload_ta.py +++ b/src/support_collector/support-collector-lambda/upload_ta.py @@ -1,5 +1,4 @@ import json -import datetime from collections import defaultdict import boto3 @@ -19,26 +18,24 @@ def save_to_s3(recommendations_by_account, bucket_name): region = session.region_name s3 = session.client("s3", region_name=region) - current_date = datetime.datetime.utcnow().strftime( - "%Y-%m-%d" - ) # Using UTC date to standardize the timestamps across regions print(f"The TA recommendations are being uploaded to S3 bucket {bucket_name}...") for account_id, recommendations in recommendations_by_account.items(): for recommendation in recommendations: status = recommendation["recommendation"]["status"].lower() + # Filter for warning or error status if status in [ "warning", "error", "yellow", "red", - ]: # Filter for warning or error status - check_id = recommendation["recommendation"][ - "checkId" - ] # Extract the checkId from the recommendation + ]: + # Extract the checkId from the recommendation + check_id = recommendation["recommendation"]["checkId"] + # Get the description from the checks_info_dict description = checks_info_dict.get(check_id, {}).get( "description", "No description provided" - ) # Get the description from the checks_info_dict + ) # Update the recommendation with name and modified description recommendation["recommendation"][ "description" @@ -46,7 +43,8 @@ def save_to_s3(recommendations_by_account, bucket_name): recommendation_json = json.dumps( recommendation, ensure_ascii=False ).encode("utf-8") - file_key = f"ta/{account_id}/{current_date}/{check_id}.json" # Construct the file key using account_id, date, and checkId + # Construct the file key using account_id, date, and checkId + file_key = f"ta/{account_id}/{check_id}.json" s3.put_object( Bucket=bucket_name, Key=file_key, Body=recommendation_json ) diff --git a/src/support_collector/support-collector-lambda/utils.py b/src/support_collector/support-collector-lambda/utils.py new file mode 100644 index 0000000..558abfb --- /dev/null +++ b/src/support_collector/support-collector-lambda/utils.py @@ -0,0 +1,17 @@ +from datetime import datetime + + +def convert_time_to_month_year(iso_datetime): + # Parse the time_created string into a datetime object + # dt = datetime.strptime(iso_datetime, "%Y-%m-%dT%H:%M:%S.%fZ") + iso_date = iso_datetime.replace("Z", "+00:00") + dt = datetime.fromisoformat(iso_date) + + # Extract the year and month components + year = dt.year + month = dt.month + + # Format the year and month as "YYYY/MM" + month_year = f"{year}/{month:02d}" + + return month_year