Skip to content

Commit

Permalink
fix(eks): kubectl resources fail before fargate profiles are created
Browse files Browse the repository at this point in the history
When a Fargate profile is being created, the Kubernetes API server in EKS sometimes rejects requests. This means that kubectl-related resources such as KubernetesResources Helm charts may fail during deployment.

To address this, we add a "barrier resource" (in the form of an SSM parameter) which waits for all fargate profiles to be created before allowing kubectl resources to continue. This is done by the barrier taking a dependency on all FargateProfile resources and all kubectl resources taking a dependency on the barrier.

Fixes #8854


This commit also fixes #8574 by adding `iam:ListAttachedRolePolicies` to the cluster's creation role IAM policy.
  • Loading branch information
Elad Ben-Israel committed Jul 2, 2020
1 parent bae9af4 commit 28f2717
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 26 deletions.
2 changes: 1 addition & 1 deletion packages/@aws-cdk/aws-eks/lib/cluster-resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ export class ClusterResource extends Construct {
}));

this.creationRole.addToPolicy(new iam.PolicyStatement({
actions: [ 'iam:GetRole' ],
actions: [ 'iam:GetRole', 'iam:listAttachedRolePolicies' ],
resources: [ '*' ],
}));

Expand Down
90 changes: 71 additions & 19 deletions packages/@aws-cdk/aws-eks/lib/cluster.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import * as autoscaling from '@aws-cdk/aws-autoscaling';
import * as ec2 from '@aws-cdk/aws-ec2';
import * as iam from '@aws-cdk/aws-iam';
import * as ssm from '@aws-cdk/aws-ssm';
import { CfnOutput, Construct, IResource, Resource, Stack, Tag, Token } from '@aws-cdk/core';
import { CfnOutput, CfnResource, Construct, IResource, Resource, Stack, Tag, Token } from '@aws-cdk/core';
import * as fs from 'fs';
import * as path from 'path';
import * as YAML from 'yaml';
Expand Down Expand Up @@ -392,6 +392,20 @@ export class Cluster extends Resource implements ICluster {

private readonly version: string | undefined;

/**
* A dummy CloudFormation resource that is used as a wait barrier which
* represents that the cluster is ready to receive "kubectl" commands.
*
* Specifically, all fargate profiles are automatically added as a dependency
* of this barrier, which means that it will only be "signaled" when all
* fargate profiles have been successfully created.
*
* When kubectl resources call `_attachKubectlResourceScope()`, this resource
* is added as their dependency which implies that they can only be deployed
* after the cluster is ready.
*/
private readonly _kubectlReadyBarrier?: CfnResource;

/**
* Initiates an EKS Cluster with the supplied arguments
*
Expand Down Expand Up @@ -448,6 +462,18 @@ export class Cluster extends Resource implements ICluster {
if (this.kubectlEnabled) {
resource = new ClusterResource(this, 'Resource', clusterProps);
this._clusterResource = resource;

// we use an SSM parameter as a barrier because it's free and fast.
this._kubectlReadyBarrier = new CfnResource(this, 'KubectlReadyBarrier', {
type: 'AWS::SSM::Parameter',
properties: {
Type: 'String',
Value: 'aws:cdk:eks:kubectl-ready',
},
});

// add the cluster resource itself as a dependency of the barrier
this._kubectlReadyBarrier.node.addDependency(this._clusterResource);
} else {
resource = new CfnCluster(this, 'Resource', clusterProps);
}
Expand Down Expand Up @@ -502,7 +528,7 @@ export class Cluster extends Resource implements ICluster {
}

if (this.kubectlEnabled) {
this.defineCoreDnsComputeType(props.coreDnsComputeType || CoreDnsComputeType.EC2);
this.defineCoreDnsComputeType(props.coreDnsComputeType ?? CoreDnsComputeType.EC2);
}
}

Expand Down Expand Up @@ -794,33 +820,59 @@ export class Cluster extends Resource implements ICluster {
}

/**
* Returns the custom resource provider for kubectl-related resources.
* Internal API used by `FargateProfile` to keep inventory of Fargate profiles associated with
* this cluster, for the sake of ensuring the profiles are created sequentially.
*
* @returns the list of FargateProfiles attached to this cluster, including the one just attached.
* @internal
*/
public get _kubectlProvider(): KubectlProvider {
if (!this._clusterResource) {
throw new Error('Unable to perform this operation since kubectl is not enabled for this cluster');
}

const uid = '@aws-cdk/aws-eks.KubectlProvider';
const provider = this.stack.node.tryFindChild(uid) as KubectlProvider || new KubectlProvider(this.stack, uid);
public _attachFargateProfile(fargateProfile: FargateProfile): FargateProfile[] {
this._fargateProfiles.push(fargateProfile);

// allow the kubectl provider to assume the cluster creation role.
this._clusterResource.addTrustedRole(provider.role);
// add all profiles as a dependency of the "kubectl-ready" barrier because all kubectl-
// resources can only be deployed after all fargate profiles are created.
if (this._kubectlReadyBarrier) {
this._kubectlReadyBarrier.node.addDependency(fargateProfile);
}

return provider;
return this._fargateProfiles;
}

/**
* Internal API used by `FargateProfile` to keep inventory of Fargate profiles associated with
* this cluster, for the sake of ensuring the profiles are created sequentially.
* Adds a resource scope that requires `kubectl` to this cluster and returns
* the `KubectlProvider` which is the custom resource provider that should be
* used as the resource provider.
*
* Called from `HelmResource` and `KubernetesResource`
*
* @property resourceScope the construct scope in which kubectl resources are defined.
*
* @returns the list of FargateProfiles attached to this cluster, including the one just attached.
* @internal
*/
public _attachFargateProfile(fargateProfile: FargateProfile): FargateProfile[] {
this._fargateProfiles.push(fargateProfile);
return this._fargateProfiles;
public _attachKubectlResourceScope(resourceScope: Construct): KubectlProvider {
const uid = '@aws-cdk/aws-eks.KubectlProvider';

if (!this._clusterResource) {
throw new Error('Unable to perform this operation since kubectl is not enabled for this cluster');
}

// singleton
let provider = this.stack.node.tryFindChild(uid) as KubectlProvider;
if (!provider) {
// create the provider.
provider = new KubectlProvider(this.stack, uid);

// allow the kubectl provider to assume the cluster creation role.
this._clusterResource.addTrustedRole(provider.role);
}

if (!this._kubectlReadyBarrier) {
throw new Error('unexpected: kubectl enabled clusters should have a kubectl-ready barrier resource');
}

resourceScope.node.addDependency(this._kubectlReadyBarrier);

return provider;
}

/**
Expand Down
2 changes: 1 addition & 1 deletion packages/@aws-cdk/aws-eks/lib/helm-chart.ts
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ export class HelmChart extends Construct {

const stack = Stack.of(this);

const provider = props.cluster._kubectlProvider;
const provider = props.cluster._attachKubectlResourceScope(this);

const timeout = props.timeout?.toSeconds();
if (timeout && timeout > 900) {
Expand Down
2 changes: 1 addition & 1 deletion packages/@aws-cdk/aws-eks/lib/k8s-patch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ export class KubernetesPatch extends Construct {
super(scope, id);

const stack = Stack.of(this);
const provider = props.cluster._kubectlProvider;
const provider = props.cluster._attachKubectlResourceScope(this);

new CustomResource(this, 'Resource', {
serviceToken: provider.serviceToken,
Expand Down
2 changes: 1 addition & 1 deletion packages/@aws-cdk/aws-eks/lib/k8s-resource.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ export class KubernetesResource extends Construct {
super(scope, id);

const stack = Stack.of(this);
const provider = props.cluster._kubectlProvider;
const provider = props.cluster._attachKubectlResourceScope(this);

new CustomResource(this, 'Resource', {
serviceToken: provider.serviceToken,
Expand Down
1 change: 1 addition & 0 deletions packages/@aws-cdk/aws-eks/lib/kubectl-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ export class KubectlProvider extends NestedStack {
runtime: lambda.Runtime.PYTHON_3_7,
handler: 'index.handler',
timeout: Duration.minutes(15),
description: 'onEvent handler for EKS kubectl resource provider',
layers: [ KubectlLayer.getOrCreate(this, { version: '2.0.0' }) ],
memorySize: 256,
});
Expand Down
53 changes: 51 additions & 2 deletions packages/@aws-cdk/aws-eks/test/test.cluster.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import * as YAML from 'yaml';
import * as eks from '../lib';
import { KubectlLayer } from '../lib/kubectl-layer';
import { testFixture, testFixtureNoVpc } from './util';
import { HelmChart, KubernetesPatch } from '../lib';

// tslint:disable:max-line-length

Expand Down Expand Up @@ -956,7 +957,7 @@ export = {
},
},
{
Action: 'iam:GetRole',
Action: [ 'iam:GetRole', 'iam:listAttachedRolePolicies' ],
Effect: 'Allow',
Resource: '*',
},
Expand Down Expand Up @@ -1025,7 +1026,7 @@ export = {
Resource: '*',
},
{
Action: 'iam:GetRole',
Action: [ 'iam:GetRole', 'iam:listAttachedRolePolicies' ],
Effect: 'Allow',
Resource: '*',
},
Expand Down Expand Up @@ -1177,4 +1178,52 @@ export = {
}));
test.done();
},

'kubectl resources are always created after all fargate profiles'(test: Test) {
// GIVEN
const { stack, app } = testFixture();
const cluster = new eks.Cluster(stack, 'Cluster');

// WHEN
cluster.addFargateProfile('profile1', { selectors: [ { namespace: 'profile1' } ]});
cluster.addResource('resource1', { foo: 123 });
cluster.addFargateProfile('profile2', { selectors: [ { namespace: 'profile2' } ]});
new HelmChart(stack, 'chart', { cluster, chart: 'mychart' });
cluster.addFargateProfile('profile3', { selectors: [ { namespace: 'profile3' } ]});
new KubernetesPatch(stack, 'patch1', {
cluster,
applyPatch: { foo: 123 },
restorePatch: { bar: 123 },
resourceName: 'foo/bar',
});
cluster.addFargateProfile('profile4', { selectors: [ { namespace: 'profile4' } ]});

// THEN
const template = app.synth().getStackArtifact(stack.artifactId).template;

const barrier = template.Resources.ClusterKubectlReadyBarrier200052AF;

test.deepEqual(barrier.DependsOn, [
'Clusterfargateprofileprofile1PodExecutionRoleE85F87B5',
'Clusterfargateprofileprofile129AEA3C6',
'Clusterfargateprofileprofile2PodExecutionRole22670AF8',
'Clusterfargateprofileprofile233B9A117',
'Clusterfargateprofileprofile3PodExecutionRole475C0D8F',
'Clusterfargateprofileprofile3D06F3076',
'Clusterfargateprofileprofile4PodExecutionRole086057FB',
'Clusterfargateprofileprofile4A0E3BBE8',
'ClusterCreationRoleDefaultPolicyE8BDFC7B',
'ClusterCreationRole360249B6',
'Cluster9EE0221C',
]);

const kubectlResources = [ 'chartF2447AFC', 'patch1B964AC93', 'Clustermanifestresource10B1C9505', 'ClusterAwsAuthmanifestFE51F8AE' ];

// check that all kubectl resources depend on the barrier
for (const r of kubectlResources) {
test.deepEqual(template.Resources[r].DependsOn, [ 'ClusterKubectlReadyBarrier200052AF' ]);
}

test.done();
},
}};
2 changes: 1 addition & 1 deletion packages/@aws-cdk/aws-eks/test/test.fargate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ export = {
Resource: '*',
},
{
Action: 'iam:GetRole',
Action: [ 'iam:GetRole', 'iam:listAttachedRolePolicies' ],
Effect: 'Allow',
Resource: '*',
},
Expand Down

0 comments on commit 28f2717

Please sign in to comment.