From c94d3865eaaaa4639bd3d2d30928c4a415c40f17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=F0=9F=91=A8=F0=9F=8F=BC=E2=80=8D=F0=9F=92=BB=20Romain=20M?= =?UTF-8?q?arcadier-Muller?= Date: Wed, 24 Jun 2020 14:26:41 +0200 Subject: [PATCH] fix(toolkit): CLI tool fails on CloudFormation Throttling The CDK (particularly, `cdk deploy`) might crash after getting throttled by CloudFormation, after the default configured 6 retries has been reached. This changes the retry configuration of the CloudFormation client (and only that one) to allow up to 10 retries with a backoff base of 1 second. This makes the maximum back-off about 17 minutes, which I hope would be plenty enough even for the 1 TPM calls. This should allow heavily parallel deployments on the same account and region to avoid getting killed by a throttle; but will reduce the responsiveness of the progress UI. Additionaly, configured a custom logger for the SDK, which would log the SDK calls to the console when running in debug mode, allowing the users to gain visibility on more information for troubleshooting purposes. Fixes #5637 --- packages/aws-cdk/lib/api/aws-auth/sdk.ts | 25 ++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/packages/aws-cdk/lib/api/aws-auth/sdk.ts b/packages/aws-cdk/lib/api/aws-auth/sdk.ts index 239f85fef51bc..871b36c6002d3 100644 --- a/packages/aws-cdk/lib/api/aws-auth/sdk.ts +++ b/packages/aws-cdk/lib/api/aws-auth/sdk.ts @@ -42,16 +42,17 @@ export class SDK implements ISDK { private readonly config: ConfigurationOptions; /** - * Default retry options for SDK clients - * - * Biggest bottleneck is CloudFormation, with a 1tps call rate. We want to be - * a little more tenacious than the defaults, and with a little more breathing - * room between calls (defaults are {retries=3, base=100}). + * Default retry options for SDK clients. + */ + private readonly retryOptions = { maxRetries: 6, retryDelayOptions: { base: 300 } }; + + /** + * The more generous retry policy for CloudFormation, which has a 1 TPM limit on certain APIs, + * which are abundantly used for deployment tracking, ... * - * I've left this running in a tight loop for an hour and the throttle errors - * haven't escaped the retry mechanism. + * So we're allowing way more retries, but waiting a bit more. */ - private readonly retryOptions = { maxRetries: 6, retryDelayOptions: { base: 300 }}; + private readonly cloudFormationRetryOptions = { maxRetries: 10, retryDelayOptions: { base: 1_000 } }; constructor(private readonly credentials: AWS.Credentials, region: string, httpOptions: ConfigurationOptions = {}) { this.config = { @@ -59,12 +60,16 @@ export class SDK implements ISDK { ...this.retryOptions, credentials, region, + logger: { log: (...messages) => messages.forEach(m => debug('%s', m)) }, }; this.currentRegion = region; } public cloudFormation(): AWS.CloudFormation { - return wrapServiceErrorHandling(new AWS.CloudFormation(this.config)); + return wrapServiceErrorHandling(new AWS.CloudFormation({ + ...this.config, + ...this.cloudFormationRetryOptions, + })); } public ec2(): AWS.EC2 { @@ -212,4 +217,4 @@ function allChainedExceptionMessages(e: Error | undefined) { e = (e as any).originalError; } return ret.join(': '); -} \ No newline at end of file +}