diff --git a/packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts b/packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts index d3f508f668ecc..da537c149f013 100644 --- a/packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts +++ b/packages/@aws-cdk/aws-logs/lib/log-retention-provider/index.ts @@ -18,14 +18,39 @@ interface SdkRetryOptions { * @param options CloudWatch API SDK options. */ async function createLogGroupSafe(logGroupName: string, region?: string, options?: SdkRetryOptions) { - try { // Try to create the log group - const cloudwatchlogs = new AWS.CloudWatchLogs({ apiVersion: '2014-03-28', region, ...options }); - await cloudwatchlogs.createLogGroup({ logGroupName }).promise(); - } catch (e) { - if (e.code !== 'ResourceAlreadyExistsException') { - throw e; + // If we set the log retention for a lambda, then due to the async nature of + // Lambda logging there could be a race condition when the same log group is + // already being created by the lambda execution. This can sometime result in + // an error "OperationAbortedException: A conflicting operation is currently + // in progress...Please try again." + // To avoid an error, we do as requested and try again. + let retryCount = options?.maxRetries == undefined ? 10 : options.maxRetries; + const delay = options?.retryOptions?.base == undefined ? 10 : options.retryOptions.base; + do { + try { + const cloudwatchlogs = new AWS.CloudWatchLogs({ apiVersion: '2014-03-28', region, ...options }); + await cloudwatchlogs.createLogGroup({ logGroupName }).promise(); + return; + } catch (error) { + if (error.code === 'ResourceAlreadyExistsException') { + // The log group is already created by the lambda execution + return; + } + if (error.code === 'OperationAbortedException') { + if (retryCount > 0) { + retryCount--; + await new Promise(resolve => setTimeout(resolve, delay)); + continue; + } else { + // The log group is still being created by another execution but we are out of retries + throw new Error('Out of attempts to create a logGroup'); + } + } + // Any other error + console.error(error); + throw error; } - } + } while (true); // exit happens on retry count check } /** @@ -64,21 +89,16 @@ export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent await setRetentionPolicy(logGroupName, logGroupRegion, retryOptions, parseInt(event.ResourceProperties.RetentionInDays, 10)); if (event.RequestType === 'Create') { - // Set a retention policy of 1 day on the logs of this function. The log - // group for this function should already exist at this stage because we - // already logged the event but due to the async nature of Lambda logging - // there could be a race condition. So we also try to create the log group - // of this function first. If multiple LogRetention constructs are present - // in the stack, they will try to act on this function's log group at the - // same time. This can sometime result in an OperationAbortedException. To - // avoid this and because this operation is not critical we catch all errors. - try { - const region = process.env.AWS_REGION; - await createLogGroupSafe(`/aws/lambda/${context.functionName}`, region, retryOptions); - await setRetentionPolicy(`/aws/lambda/${context.functionName}`, region, retryOptions, 1); - } catch (e) { - console.log(e); - } + // Set a retention policy of 1 day on the logs of this very function. + // Due to the async nature of the log group creation, the log group for this function might + // still be not created yet at this point. Therefore we attempt to create it. + // In case it is being created, createLogGroupSafe will handle the conflic. + const region = process.env.AWS_REGION; + await createLogGroupSafe(`/aws/lambda/${context.functionName}`, region, retryOptions); + // If createLogGroupSafe fails, the log group is not created even after multiple attempts + // In this case we have nothing to set the retention policy on but an exception will skip + // the next line. + await setRetentionPolicy(`/aws/lambda/${context.functionName}`, region, retryOptions, 1); } } diff --git a/packages/@aws-cdk/aws-logs/test/test.log-retention-provider.ts b/packages/@aws-cdk/aws-logs/test/test.log-retention-provider.ts index a08ff060dc2a4..ba67371ca9d60 100644 --- a/packages/@aws-cdk/aws-logs/test/test.log-retention-provider.ts +++ b/packages/@aws-cdk/aws-logs/test/test.log-retention-provider.ts @@ -27,6 +27,14 @@ function createRequest(type: string) { .reply(200); } +class MyError extends Error { + code: string; + constructor(message: string, code: string) { + super(message); + this.code = code; + } +} + export = { 'tearDown'(callback: any) { AWS.restore(); @@ -231,10 +239,60 @@ export = { test.done(); }, - async 'does not fail when operations on provider log group fail'(test: Test) { + async 'does not if when operations on provider log group fails'(test: Test) { + let attempt = 2; const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { if (params.logGroupName === '/aws/lambda/provider') { - return Promise.reject(new Error('OperationAbortedException')); + if (attempt > 0) { + attempt--; + return Promise.reject(new MyError( + 'A conflicting operation is currently in progress against this resource. Please try again.', + 'OperationAbortedException')); + } else { + return Promise.resolve({}); + } + } + return Promise.resolve({}); + }; + + const putRetentionPolicyFake = sinon.fake.resolves({}); + const deleteRetentionPolicyFake = sinon.fake.resolves({}); + + AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake); + AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake); + AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake); + + const event = { + ...eventCommon, + RequestType: 'Create', + ResourceProperties: { + ServiceToken: 'token', + RetentionInDays: '30', + LogGroupName: 'group', + }, + }; + + const request = createRequest('SUCCESS'); + + await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context); + + test.equal(request.isDone(), true); + + test.done(); + }, + + async 'does not fail if operations on CDK lambda log group fails twice'(test: Test) { + let attempt = 2; + const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { + if (params.logGroupName === 'group') { + if (attempt > 0) { + attempt--; + return Promise.reject(new MyError( + 'A conflicting operation is currently in progress against this resource. Please try again.', + 'OperationAbortedException')); + } else { + return Promise.resolve({}); + } } return Promise.resolve({}); }; @@ -265,6 +323,42 @@ export = { test.done(); }, + async 'does fail if operations on CDK lambda log group fails indefinitely'(test: Test) { + const createLogGroupFake = (params: AWSSDK.CloudWatchLogs.CreateLogGroupRequest) => { + if (params.logGroupName === 'group') { + return Promise.reject(new MyError( + 'A conflicting operation is currently in progress against this resource. Please try again.', + 'OperationAbortedException')); + } + return Promise.resolve({}); + }; + + const putRetentionPolicyFake = sinon.fake.resolves({}); + const deleteRetentionPolicyFake = sinon.fake.resolves({}); + + AWS.mock('CloudWatchLogs', 'createLogGroup', createLogGroupFake); + AWS.mock('CloudWatchLogs', 'putRetentionPolicy', putRetentionPolicyFake); + AWS.mock('CloudWatchLogs', 'deleteRetentionPolicy', deleteRetentionPolicyFake); + + const event = { + ...eventCommon, + RequestType: 'Create', + ResourceProperties: { + ServiceToken: 'token', + RetentionInDays: '30', + LogGroupName: 'group', + }, + }; + + const request = createRequest('FAILED'); + + await provider.handler(event as AWSLambda.CloudFormationCustomResourceCreateEvent, context); + + test.equal(request.isDone(), true); + + test.done(); + }, + async 'response data contains the log group name'(test: Test) { AWS.mock('CloudWatchLogs', 'createLogGroup', sinon.fake.resolves({})); AWS.mock('CloudWatchLogs', 'putRetentionPolicy', sinon.fake.resolves({}));