Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhance snapshot replicator #39

Merged
merged 3 commits into from
Oct 11, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions snapshot-replicator/functions/remove_snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@ def deleteSnapshots(region):
for page in page_iterator:
snapshots.extend(page['DBSnapshots'])
for snapshot in snapshots:
create_ts = snapshot['SnapshotCreateTime'].replace(tzinfo=None)
if create_ts < datetime.datetime.now() - datetime.timedelta(days=int(duration)):
print("Deleting snapshot id:", snapshot['DBSnapshotIdentifier'])
try:
response = rds.delete_db_snapshot(DBSnapshotIdentifier=snapshot['DBSnapshotIdentifier'])
print response
except botocore.exceptions.ClientError as e:
raise Exception("Could not issue delete command: %s" % e)
if snapshot['SnapshotType'] == 'manual':
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe you can filter out automatic snapshots in the describe_db_snapshots api call?

create_ts = snapshot['SnapshotCreateTime'].replace(tzinfo=None)
if create_ts < datetime.datetime.now() - datetime.timedelta(days=int(duration)):
print("Deleting snapshot id:", snapshot['DBSnapshotIdentifier'])
try:
response = rds.delete_db_snapshot(DBSnapshotIdentifier=snapshot['DBSnapshotIdentifier'])
print response
except botocore.exceptions.ClientError as e:
raise Exception("Could not issue delete command: %s" % e)

deleteSnapshots(region=source_region)
deleteSnapshots(region=target_region)
Expand Down
56 changes: 19 additions & 37 deletions snapshot-replicator/functions/shipper.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,34 @@
import boto3
import botocore
import datetime
import re
import os

source_region = os.environ['SOURCE_REGION']
target_region = os.environ['TARGET_REGION']
kms_key_id = os.environ['KMS_KEY_ID']
iam = boto3.client('iam')
instances = os.environ['DB_INSTANCES']

print('Loading function')

def byTimestamp(snap):
if 'SnapshotCreateTime' in snap:
return datetime.datetime.isoformat(snap['SnapshotCreateTime'])
else:
return datetime.datetime.isoformat(datetime.datetime.now())

def lambda_handler(event, context):
if("Finished" in event['Records'][0]['Sns']['Message']):
account_ids = []
try:
iam.get_user()
except Exception as e:
account_ids.append(re.search(r'(arn:aws:sts::)([0-9]+)', str(e)).groups()[1])
account = account_ids[0]

if("Manual snapshot created" in event['Records'][0]['Sns']['Message']):
source = boto3.client('rds', region_name=source_region)

source_snap = event['Records'][0]['Sns']['Source']
snapshot_details = source.describe_db_snapshots(DBSnapshotIdentifier=source_snap)['DBSnapshots'][0]
for instance in instances.split(','):
source_instances = source.describe_db_instances(DBInstanceIdentifier=instance)
source_snaps = source.describe_db_snapshots(DBInstanceIdentifier=instance)['DBSnapshots']
source_snap = sorted(source_snaps, key=byTimestamp, reverse=True)[0]['DBSnapshotIdentifier']
source_snap_arn = 'arn:aws:rds:%s:%s:snapshot:%s' % (source_region, account, source_snap)
target_snap_id = (re.sub('rds:', '', source_snap))
print('Will Copy %s to %s' % (source_snap_arn, target_snap_id))
target = boto3.client('rds', region_name=target_region)

try:
response = target.copy_db_snapshot(
SourceDBSnapshotIdentifier=source_snap_arn,
TargetDBSnapshotIdentifier=target_snap_id,
SourceRegion=source_region,
KmsKeyId=kms_key_id,
CopyTags = True)
print(response)
except botocore.exceptions.ClientError as e:
raise Exception("Could not issue copy command: %s" % e)
copied_snaps = target.describe_db_snapshots(SnapshotType='manual', DBInstanceIdentifier=instance)['DBSnapshots']

if instance in snapshot_detailts['DBInstanceIdentifier']:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you can replace this if and the for above by doing this:

if snapshot_detailts['DBInstanceIdentifier'] in instances.split(','):

source_snap_arn = snapshot_detailts['DBSnapshotArn'])
target_snap_id = (re.sub('rds:', '', source_snap))
target = boto3.client('rds', region_name=target_region)
print('Will Copy %s to %s' % (source_snap_arn, target_snap_id))
try:
response = target.copy_db_snapshot(
SourceDBSnapshotIdentifier=source_snap_arn,
TargetDBSnapshotIdentifier=target_snap_id,
SourceRegion=source_region,
KmsKeyId=kms_key_id,
CopyTags = True)
print(response)
except botocore.exceptions.ClientError as e:
raise Exception("Could not issue copy command: %s" % e)

7 changes: 3 additions & 4 deletions snapshot-replicator/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ resource "aws_iam_role_policy_attachment" "attach_lambda_copy_policy_to_role" {
resource "aws_iam_role_policy_attachment" "lambda_exec_role" {
count = var.enable ? 1 : 0
role = aws_iam_role.iam_for_lambda[0].name
policy_arn = "arn:aws:iam::aws:policy/AWSLambdaBasicExecutionRole"
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
}

resource "aws_iam_policy" "rds_lambda_create_snapshot" {
Expand Down Expand Up @@ -232,11 +232,10 @@ resource "aws_db_event_subscription" "default" {
name = "rds-manual-snapshot-${var.environment}"
sns_topic = aws_sns_topic.rds_backup_events[0].arn

source_type = "db-instance"
source_ids = var.db_instances
source_type = "snapshots"

event_categories = [
"backup",
"creation",
]
}

Expand Down
15 changes: 10 additions & 5 deletions snapshot-replicator/monitoring.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
locals {
cw_alarm_custom_period = 3600 * var.custom_snapshot_rate
cw_alarm_daily_period = 3600 * 24
}

resource "aws_cloudwatch_metric_alarm" "lambda_rds_snapshot_copy_errors" {
count = var.enable ? 1 : 0
alarm_name = "rds_snapshot_copy_invocation_${var.environment}_errors"
Expand All @@ -8,7 +13,7 @@ resource "aws_cloudwatch_metric_alarm" "lambda_rds_snapshot_copy_errors" {
comparison_operator = "GreaterThanThreshold"
threshold = 1
evaluation_periods = 1
period = 21600 # 6 hours
period = local.cw_alarm_custom_period

alarm_actions = [var.sns_topic_arn]
ok_actions = [var.sns_topic_arn]
Expand All @@ -28,7 +33,7 @@ resource "aws_cloudwatch_metric_alarm" "lambda_rds_snapshot_create_errors" {
comparison_operator = "GreaterThanThreshold"
threshold = 1
evaluation_periods = 1
period = 21600 # 6 hours
period = local.cw_alarm_custom_period

alarm_actions = [var.sns_topic_arn]
ok_actions = [var.sns_topic_arn]
Expand All @@ -48,7 +53,7 @@ resource "aws_cloudwatch_metric_alarm" "lambda_rds_snapshot_cleanup_errors" {
comparison_operator = "GreaterThanThreshold"
threshold = 1
evaluation_periods = 1
period = 86400 # 24 hours
period = local.cw_alarm_daily_period

alarm_actions = [var.sns_topic_arn]
ok_actions = [var.sns_topic_arn]
Expand All @@ -68,7 +73,7 @@ resource "aws_cloudwatch_metric_alarm" "invoke_rds_snapshot_lambda" {
comparison_operator = "GreaterThanThreshold"
threshold = 1
evaluation_periods = 1
period = 21600 # 6 hours
period = local.cw_alarm_custom_period

alarm_actions = [var.sns_topic_arn]
ok_actions = [var.sns_topic_arn]
Expand All @@ -88,7 +93,7 @@ resource "aws_cloudwatch_metric_alarm" "invoke_rds_cleanup_lambda" {
comparison_operator = "GreaterThanThreshold"
threshold = 1
evaluation_periods = 1
period = 86400 # 24 hours
period = local.cw_alarm_daily_period

alarm_actions = [var.sns_topic_arn]
ok_actions = [var.sns_topic_arn]
Expand Down