From 746513e5eb64e603efce93aa3cbe114e511c91d0 Mon Sep 17 00:00:00 2001 From: Hsing-Hui Hsu <hsinghui@amazon.com> Date: Wed, 2 Jun 2021 16:35:31 -0700 Subject: [PATCH] fix(ecs): allow both Fargate and ASG capacity providers in a cluster Previously, adding/enabling Fargate capacity providers would add the FARGATE and FARGATE_SPOT capacity providers a part of the [capacityProviders field](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ecs-cluster.html#cfn-ecs-cluster-capacityproviders) on the ECS::Cluster resource. However, ASG Capacity Providers can only be specified as part of the [capacityProviders field on the ECS::ClusterCapacityProviderAssociation](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-resource-ecs-clustercapacityproviderassociations.html#cfn-ecs-clustercapacityproviderassociations-capacityproviders) resource, due to circular dependency issues. However, when a cluster is created, any capacity providers regardless of type must all be specified *either* on the cluster *or* as a ClusterCapacityProviderAssociation. Therefore, in order to support both types on the same cluster, Fargate capacity providers must also be specified as a ClusterCapacityProviderAssociation. This change removes the Fargate capacity providers from the Cluster resource and adds them as a ClusterCapacityProviderAssociation. Fixes #14730. --- packages/@aws-cdk/aws-ecs/lib/cluster.ts | 37 ++- .../@aws-cdk/aws-ecs/test/cluster.test.ts | 24 +- .../ec2/integ.capacity-provider.expected.json | 291 +++++++++++++++++- .../test/ec2/integ.capacity-provider.ts | 4 +- .../test/fargate/fargate-service.test.ts | 8 + .../integ.capacity-providers.expected.json | 11 +- 6 files changed, 352 insertions(+), 23 deletions(-) diff --git a/packages/@aws-cdk/aws-ecs/lib/cluster.ts b/packages/@aws-cdk/aws-ecs/lib/cluster.ts index 5c6e910e4507e..b68fe6ff29f7b 100644 --- a/packages/@aws-cdk/aws-ecs/lib/cluster.ts +++ b/packages/@aws-cdk/aws-ecs/lib/cluster.ts @@ -126,14 +126,14 @@ export class Cluster extends Resource implements ICluster { public readonly clusterName: string; /** - * The cluster-level (FARGATE, FARGATE_SPOT) capacity providers. + * The EC2 Auto Scaling Group capacity providers associated with the cluster. */ - private _fargateCapacityProviders: string[] = []; + private _asgCapacityProviders: AsgCapacityProvider[] = []; /** - * The EC2 Auto Scaling Group capacity providers associated with the cluster. + * The names of both ASG and Fargate capacity providers associated with the cluster. */ - private _asgCapacityProviders: AsgCapacityProvider[] = []; + private _capacityProviderNames: string[] = []; /** * The AWS Cloud Map namespace to associate with the cluster. @@ -173,7 +173,7 @@ export class Cluster extends Resource implements ICluster { clusterSettings = [{ name: 'containerInsights', value: props.containerInsights ? ContainerInsights.ENABLED : ContainerInsights.DISABLED }]; } - this._fargateCapacityProviders = props.capacityProviders ?? []; + this._capacityProviderNames = props.capacityProviders ?? []; if (props.enableFargateCapacityProviders) { this.enableFargateCapacityProviders(); } @@ -189,7 +189,6 @@ export class Cluster extends Resource implements ICluster { const cluster = new CfnCluster(this, 'Resource', { clusterName: this.physicalName, clusterSettings, - capacityProviders: Lazy.list({ produce: () => this._fargateCapacityProviders }, { omitEmpty: true }), configuration: this._executeCommandConfiguration && this.renderExecuteCommandConfiguration(), }); @@ -216,7 +215,7 @@ export class Cluster extends Resource implements ICluster { // since it's harmless, but we'd prefer not to add unexpected new // resources to the stack which could surprise users working with // brown-field CDK apps and stacks. - Aspects.of(this).add(new MaybeCreateCapacityProviderAssociations(this, id, this._asgCapacityProviders)); + Aspects.of(this).add(new MaybeCreateCapacityProviderAssociations(this, id, this._capacityProviderNames)); } /** @@ -224,8 +223,8 @@ export class Cluster extends Resource implements ICluster { */ public enableFargateCapacityProviders() { for (const provider of ['FARGATE', 'FARGATE_SPOT']) { - if (!this._fargateCapacityProviders.includes(provider)) { - this._fargateCapacityProviders.push(provider); + if (!this._capacityProviderNames.includes(provider)) { + this._capacityProviderNames.push(provider); } } } @@ -341,6 +340,7 @@ export class Cluster extends Resource implements ICluster { }); this._asgCapacityProviders.push(provider); + this._capacityProviderNames.push(provider.capacityProviderName); } /** @@ -462,8 +462,8 @@ export class Cluster extends Resource implements ICluster { throw new Error('CapacityProvider not supported'); } - if (!this._fargateCapacityProviders.includes(provider)) { - this._fargateCapacityProviders.push(provider); + if (!this._capacityProviderNames.includes(provider)) { + this._capacityProviderNames.push(provider); } } @@ -1340,22 +1340,25 @@ export class AsgCapacityProvider extends CoreConstruct { class MaybeCreateCapacityProviderAssociations implements IAspect { private scope: CoreConstruct; private id: string; - private capacityProviders: AsgCapacityProvider[] + private capacityProviders: string[] + private resource?: CfnClusterCapacityProviderAssociations - constructor(scope: CoreConstruct, id: string, capacityProviders: AsgCapacityProvider[]) { + constructor(scope: CoreConstruct, id: string, capacityProviders: string[] ) { this.scope = scope; this.id = id; this.capacityProviders = capacityProviders; } + public visit(node: IConstruct): void { if (node instanceof Cluster) { - const providers = this.capacityProviders.map(p => p.capacityProviderName).filter(p => p !== 'FARGATE' && p !== 'FARGATE_SPOT'); - if (providers.length > 0) { - new CfnClusterCapacityProviderAssociations(this.scope, this.id, { + if (this.capacityProviders.length > 0 && !this.resource) { + const resource = new CfnClusterCapacityProviderAssociations(this.scope, this.id, { cluster: node.clusterName, defaultCapacityProviderStrategy: [], - capacityProviders: Lazy.list({ produce: () => providers }), + // capacityProviders: this.capacityProviders, + capacityProviders: Lazy.list({ produce: () => this.capacityProviders }), }); + this.resource = resource; } } } diff --git a/packages/@aws-cdk/aws-ecs/test/cluster.test.ts b/packages/@aws-cdk/aws-ecs/test/cluster.test.ts index a580cb5dca197..8b5ee66d6b3ff 100644 --- a/packages/@aws-cdk/aws-ecs/test/cluster.test.ts +++ b/packages/@aws-cdk/aws-ecs/test/cluster.test.ts @@ -1709,6 +1709,10 @@ nodeunitShim({ // THEN expect(stack).to(haveResource('AWS::ECS::Cluster', { + CapacityProviders: ABSENT, + })); + + expect(stack).to(haveResource('AWS::ECS::ClusterCapacityProviderAssociations', { CapacityProviders: ['FARGATE_SPOT'], })); @@ -1727,6 +1731,10 @@ nodeunitShim({ // THEN expect(stack).to(haveResource('AWS::ECS::Cluster', { + CapacityProviders: ABSENT, + })); + + expect(stack).to(haveResource('AWS::ECS::ClusterCapacityProviderAssociations', { CapacityProviders: ['FARGATE', 'FARGATE_SPOT'], })); @@ -1744,6 +1752,10 @@ nodeunitShim({ // THEN expect(stack).to(haveResource('AWS::ECS::Cluster', { + CapacityProviders: ABSENT, + })); + + expect(stack).to(haveResource('AWS::ECS::ClusterCapacityProviderAssociations', { CapacityProviders: ['FARGATE', 'FARGATE_SPOT'], })); @@ -1762,6 +1774,10 @@ nodeunitShim({ // THEN expect(stack).to(haveResource('AWS::ECS::Cluster', { + CapacityProviders: ABSENT, + })); + + expect(stack).to(haveResource('AWS::ECS::ClusterCapacityProviderAssociations', { CapacityProviders: ['FARGATE'], })); @@ -1779,7 +1795,11 @@ nodeunitShim({ cluster.addCapacityProvider('FARGATE'); // does not add twice // THEN - expect(stack).to(haveResource('AWS::ECS::Cluster', { + // expect(stack).to(haveResource('AWS::ECS::Cluster', { + // CapacityProviders: ABSENT, + // })); + + expect(stack).to(haveResource('AWS::ECS::ClusterCapacityProviderAssociations', { CapacityProviders: ['FARGATE'], })); @@ -1941,6 +1961,8 @@ nodeunitShim({ Ref: 'EcsCluster97242B84', }, CapacityProviders: [ + 'FARGATE', + 'FARGATE_SPOT', { Ref: 'providerD3FF4D3A', }, diff --git a/packages/@aws-cdk/aws-ecs/test/ec2/integ.capacity-provider.expected.json b/packages/@aws-cdk/aws-ecs/test/ec2/integ.capacity-provider.expected.json index d4e008750fe93..555d9acc46764 100644 --- a/packages/@aws-cdk/aws-ecs/test/ec2/integ.capacity-provider.expected.json +++ b/packages/@aws-cdk/aws-ecs/test/ec2/integ.capacity-provider.expected.json @@ -362,6 +362,8 @@ "Type": "AWS::ECS::ClusterCapacityProviderAssociations", "Properties": { "CapacityProviders": [ + "FARGATE", + "FARGATE_SPOT", { "Ref": "EC2CapacityProvider5A2E35CD" } @@ -582,7 +584,6 @@ "LaunchConfigurationName": { "Ref": "ASGLaunchConfigC00AF12B" }, - "NewInstancesProtectedFromScaleIn": true, "Tags": [ { "Key": "Name", @@ -605,6 +606,292 @@ } } }, + "ASGDrainECSHookFunctionServiceRoleC12963BB": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "lambda.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + ] + ] + } + ], + "Tags": [ + { + "Key": "Name", + "Value": "integ-ec2-capacity-provider/ASG" + } + ] + } + }, + "ASGDrainECSHookFunctionServiceRoleDefaultPolicy16848A27": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "ec2:DescribeInstances", + "ec2:DescribeInstanceAttribute", + "ec2:DescribeInstanceStatus", + "ec2:DescribeHosts" + ], + "Effect": "Allow", + "Resource": "*" + }, + { + "Action": "autoscaling:CompleteLifecycleAction", + "Effect": "Allow", + "Resource": { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":autoscaling:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":autoScalingGroup:*:autoScalingGroupName/", + { + "Ref": "ASG46ED3070" + } + ] + ] + } + }, + { + "Action": [ + "ecs:DescribeContainerInstances", + "ecs:DescribeTasks" + ], + "Condition": { + "ArnEquals": { + "ecs:cluster": { + "Fn::GetAtt": [ + "EC2CPClusterD5F0FD32", + "Arn" + ] + } + } + }, + "Effect": "Allow", + "Resource": "*" + }, + { + "Action": [ + "ecs:ListContainerInstances", + "ecs:SubmitContainerStateChange", + "ecs:SubmitTaskStateChange" + ], + "Effect": "Allow", + "Resource": { + "Fn::GetAtt": [ + "EC2CPClusterD5F0FD32", + "Arn" + ] + } + }, + { + "Action": [ + "ecs:UpdateContainerInstancesState", + "ecs:ListTasks" + ], + "Condition": { + "ArnEquals": { + "ecs:cluster": { + "Fn::GetAtt": [ + "EC2CPClusterD5F0FD32", + "Arn" + ] + } + } + }, + "Effect": "Allow", + "Resource": "*" + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "ASGDrainECSHookFunctionServiceRoleDefaultPolicy16848A27", + "Roles": [ + { + "Ref": "ASGDrainECSHookFunctionServiceRoleC12963BB" + } + ] + } + }, + "ASGDrainECSHookFunction5F24CF4D": { + "Type": "AWS::Lambda::Function", + "Properties": { + "Code": { + "ZipFile": "import boto3, json, os, time\n\necs = boto3.client('ecs')\nautoscaling = boto3.client('autoscaling')\n\n\ndef lambda_handler(event, context):\n print(json.dumps(event))\n cluster = os.environ['CLUSTER']\n snsTopicArn = event['Records'][0]['Sns']['TopicArn']\n lifecycle_event = json.loads(event['Records'][0]['Sns']['Message'])\n instance_id = lifecycle_event.get('EC2InstanceId')\n if not instance_id:\n print('Got event without EC2InstanceId: %s', json.dumps(event))\n return\n\n instance_arn = container_instance_arn(cluster, instance_id)\n print('Instance %s has container instance ARN %s' % (lifecycle_event['EC2InstanceId'], instance_arn))\n\n if not instance_arn:\n return\n\n task_arns = container_instance_task_arns(cluster, instance_arn)\n \n if task_arns:\n print('Instance ARN %s has task ARNs %s' % (instance_arn, ', '.join(task_arns)))\n\n while has_tasks(cluster, instance_arn, task_arns):\n time.sleep(10)\n\n try:\n print('Terminating instance %s' % instance_id)\n autoscaling.complete_lifecycle_action(\n LifecycleActionResult='CONTINUE',\n **pick(lifecycle_event, 'LifecycleHookName', 'LifecycleActionToken', 'AutoScalingGroupName'))\n except Exception as e:\n # Lifecycle action may have already completed.\n print(str(e))\n\n\ndef container_instance_arn(cluster, instance_id):\n \"\"\"Turn an instance ID into a container instance ARN.\"\"\"\n arns = ecs.list_container_instances(cluster=cluster, filter='ec2InstanceId==' + instance_id)['containerInstanceArns']\n if not arns:\n return None\n return arns[0]\n\ndef container_instance_task_arns(cluster, instance_arn):\n \"\"\"Fetch tasks for a container instance ARN.\"\"\"\n arns = ecs.list_tasks(cluster=cluster, containerInstance=instance_arn)['taskArns']\n return arns\n\ndef has_tasks(cluster, instance_arn, task_arns):\n \"\"\"Return True if the instance is running tasks for the given cluster.\"\"\"\n instances = ecs.describe_container_instances(cluster=cluster, containerInstances=[instance_arn])['containerInstances']\n if not instances:\n return False\n instance = instances[0]\n\n if instance['status'] == 'ACTIVE':\n # Start draining, then try again later\n set_container_instance_to_draining(cluster, instance_arn)\n return True\n\n task_count = None\n\n if task_arns:\n # Fetch details for tasks running on the container instance\n tasks = ecs.describe_tasks(cluster=cluster, tasks=task_arns)['tasks']\n if tasks:\n # Consider any non-stopped tasks as running\n task_count = sum(task['lastStatus'] != 'STOPPED' for task in tasks) + instance['pendingTasksCount']\n \n if not task_count:\n # Fallback to instance task counts if detailed task information is unavailable\n task_count = instance['runningTasksCount'] + instance['pendingTasksCount']\n \n print('Instance %s has %s tasks' % (instance_arn, task_count))\n\n return task_count > 0\n\ndef set_container_instance_to_draining(cluster, instance_arn):\n ecs.update_container_instances_state(\n cluster=cluster,\n containerInstances=[instance_arn], status='DRAINING')\n\n\ndef pick(dct, *keys):\n \"\"\"Pick a subset of a dict.\"\"\"\n return {k: v for k, v in dct.items() if k in keys}\n" + }, + "Role": { + "Fn::GetAtt": [ + "ASGDrainECSHookFunctionServiceRoleC12963BB", + "Arn" + ] + }, + "Environment": { + "Variables": { + "CLUSTER": { + "Ref": "EC2CPClusterD5F0FD32" + } + } + }, + "Handler": "index.lambda_handler", + "Runtime": "python3.6", + "Tags": [ + { + "Key": "Name", + "Value": "integ-ec2-capacity-provider/ASG" + } + ], + "Timeout": 310 + }, + "DependsOn": [ + "ASGDrainECSHookFunctionServiceRoleDefaultPolicy16848A27", + "ASGDrainECSHookFunctionServiceRoleC12963BB" + ] + }, + "ASGDrainECSHookFunctionAllowInvokeintegec2capacityproviderASGLifecycleHookDrainHookTopic4714B3C1EB63E78F": { + "Type": "AWS::Lambda::Permission", + "Properties": { + "Action": "lambda:InvokeFunction", + "FunctionName": { + "Fn::GetAtt": [ + "ASGDrainECSHookFunction5F24CF4D", + "Arn" + ] + }, + "Principal": "sns.amazonaws.com", + "SourceArn": { + "Ref": "ASGLifecycleHookDrainHookTopicA8AD4ACB" + } + } + }, + "ASGDrainECSHookFunctionTopicD6FC59F7": { + "Type": "AWS::SNS::Subscription", + "Properties": { + "Protocol": "lambda", + "TopicArn": { + "Ref": "ASGLifecycleHookDrainHookTopicA8AD4ACB" + }, + "Endpoint": { + "Fn::GetAtt": [ + "ASGDrainECSHookFunction5F24CF4D", + "Arn" + ] + } + } + }, + "ASGLifecycleHookDrainHookRoleD640316C": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "autoscaling.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "Tags": [ + { + "Key": "Name", + "Value": "integ-ec2-capacity-provider/ASG" + } + ] + } + }, + "ASGLifecycleHookDrainHookRoleDefaultPolicy3EEFDE57": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "sns:Publish", + "Effect": "Allow", + "Resource": { + "Ref": "ASGLifecycleHookDrainHookTopicA8AD4ACB" + } + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "ASGLifecycleHookDrainHookRoleDefaultPolicy3EEFDE57", + "Roles": [ + { + "Ref": "ASGLifecycleHookDrainHookRoleD640316C" + } + ] + } + }, + "ASGLifecycleHookDrainHookTopicA8AD4ACB": { + "Type": "AWS::SNS::Topic", + "Properties": { + "Tags": [ + { + "Key": "Name", + "Value": "integ-ec2-capacity-provider/ASG" + } + ] + } + }, + "ASGLifecycleHookDrainHookFE4AFEBE": { + "Type": "AWS::AutoScaling::LifecycleHook", + "Properties": { + "AutoScalingGroupName": { + "Ref": "ASG46ED3070" + }, + "LifecycleTransition": "autoscaling:EC2_INSTANCE_TERMINATING", + "DefaultResult": "CONTINUE", + "HeartbeatTimeout": 300, + "NotificationTargetARN": { + "Ref": "ASGLifecycleHookDrainHookTopicA8AD4ACB" + }, + "RoleARN": { + "Fn::GetAtt": [ + "ASGLifecycleHookDrainHookRoleD640316C", + "Arn" + ] + } + }, + "DependsOn": [ + "ASGLifecycleHookDrainHookRoleDefaultPolicy3EEFDE57", + "ASGLifecycleHookDrainHookRoleD640316C" + ] + }, "EC2CapacityProvider5A2E35CD": { "Type": "AWS::ECS::CapacityProvider", "Properties": { @@ -616,7 +903,7 @@ "Status": "ENABLED", "TargetCapacity": 100 }, - "ManagedTerminationProtection": "ENABLED" + "ManagedTerminationProtection": "DISABLED" } } }, diff --git a/packages/@aws-cdk/aws-ecs/test/ec2/integ.capacity-provider.ts b/packages/@aws-cdk/aws-ecs/test/ec2/integ.capacity-provider.ts index f82ce6a9f9f56..3cc93ebaf2eb2 100644 --- a/packages/@aws-cdk/aws-ecs/test/ec2/integ.capacity-provider.ts +++ b/packages/@aws-cdk/aws-ecs/test/ec2/integ.capacity-provider.ts @@ -10,6 +10,7 @@ const vpc = new ec2.Vpc(stack, 'Vpc', { maxAzs: 2 }); const cluster = new ecs.Cluster(stack, 'EC2CPCluster', { vpc, + enableFargateCapacityProviders: true, }); const taskDefinition = new ecs.Ec2TaskDefinition(stack, 'TaskDef'); @@ -27,6 +28,8 @@ const autoScalingGroup = new autoscaling.AutoScalingGroup(stack, 'ASG', { const cp = new ecs.AsgCapacityProvider(stack, 'EC2CapacityProvider', { autoScalingGroup, + // This is to allow cdk destroy to work; otherwise deletion will hang bc ASG cannot be deleted + enableManagedTerminationProtection: false, }); cluster.addAsgCapacityProvider(cp); @@ -43,4 +46,3 @@ new ecs.Ec2Service(stack, 'EC2Service', { }); app.synth(); - diff --git a/packages/@aws-cdk/aws-ecs/test/fargate/fargate-service.test.ts b/packages/@aws-cdk/aws-ecs/test/fargate/fargate-service.test.ts index 33a5f2baf28d4..f1fdbedc01064 100644 --- a/packages/@aws-cdk/aws-ecs/test/fargate/fargate-service.test.ts +++ b/packages/@aws-cdk/aws-ecs/test/fargate/fargate-service.test.ts @@ -148,6 +148,10 @@ nodeunitShim({ // THEN expect(stack).to(haveResource('AWS::ECS::Cluster', { + CapacityProviders: ABSENT, + })); + + expect(stack).to(haveResource('AWS::ECS::ClusterCapacityProviderAssociations', { CapacityProviders: ['FARGATE', 'FARGATE_SPOT'], })); @@ -234,6 +238,10 @@ nodeunitShim({ // THEN expect(stack).to(haveResource('AWS::ECS::Cluster', { + CapacityProviders: ABSENT, + })); + + expect(stack).to(haveResource('AWS::ECS::ClusterCapacityProviderAssociations', { CapacityProviders: ['FARGATE', 'FARGATE_SPOT'], })); diff --git a/packages/@aws-cdk/aws-ecs/test/fargate/integ.capacity-providers.expected.json b/packages/@aws-cdk/aws-ecs/test/fargate/integ.capacity-providers.expected.json index c0281dc8e14ae..7eb0eada4f244 100644 --- a/packages/@aws-cdk/aws-ecs/test/fargate/integ.capacity-providers.expected.json +++ b/packages/@aws-cdk/aws-ecs/test/fargate/integ.capacity-providers.expected.json @@ -356,12 +356,19 @@ } }, "FargateCPCluster668E71F2": { - "Type": "AWS::ECS::Cluster", + "Type": "AWS::ECS::Cluster" + }, + "FargateCPClusterBFD66A36": { + "Type": "AWS::ECS::ClusterCapacityProviderAssociations", "Properties": { "CapacityProviders": [ "FARGATE", "FARGATE_SPOT" - ] + ], + "Cluster": { + "Ref": "FargateCPCluster668E71F2" + }, + "DefaultCapacityProviderStrategy": [] } }, "TaskDefTaskRole1EDB4A67": {