-
Notifications
You must be signed in to change notification settings - Fork 3.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(redshift): optionally reboot Clusters to apply parameter changes #22063
Changes from 22 commits
8b8798c
68ebe28
f262a52
6c1122b
c61f994
6fe9387
63400f6
a5d1ee5
153345a
f31b6dd
a01c334
52916e6
c445b0f
26fbbaa
b732dcf
692d1ab
762f2bf
dae7883
75ebd28
b6c323d
1c69185
80848a9
f91ffeb
a76b887
16ba85f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
// eslint-disable-next-line import/no-extraneous-dependencies | ||
import { Redshift } from 'aws-sdk'; | ||
|
||
const redshift = new Redshift(); | ||
|
||
export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent): Promise<void> { | ||
if (event.RequestType !== 'Delete') { | ||
return rebootClusterIfRequired(event.ResourceProperties?.ClusterId, event.ResourceProperties?.ParameterGroupName); | ||
} else { | ||
return; | ||
} | ||
} | ||
|
||
async function rebootClusterIfRequired(clusterId: string, parameterGroupName: string): Promise<void> { | ||
return executeActionForStatus(await getApplyStatus()); | ||
|
||
// https://docs.aws.amazon.com/redshift/latest/APIReference/API_ClusterParameterStatus.html | ||
async function executeActionForStatus(status: string, retryDurationMs?: number): Promise<void> { | ||
await sleep(retryDurationMs ?? 0); | ||
if (['pending-reboot', 'apply-deferred', 'apply-error'].includes(status)) { | ||
try { | ||
await redshift.rebootCluster({ ClusterIdentifier: clusterId }).promise(); | ||
} catch (err) { | ||
if ((err as any).code === 'InvalidClusterState') { | ||
return await executeActionForStatus(status, 30000); | ||
} else { | ||
throw err; | ||
} | ||
} | ||
return; | ||
} else if (['applying', 'retry'].includes(status)) { | ||
return executeActionForStatus(await getApplyStatus(), 30000); | ||
} | ||
return; | ||
} | ||
|
||
async function getApplyStatus(): Promise<string> { | ||
const clusterDetails = await redshift.describeClusters({ ClusterIdentifier: clusterId }).promise(); | ||
if (clusterDetails.Clusters?.[0].ClusterParameterGroups === undefined) { | ||
throw new Error(`Unable to find any Parameter Groups associated with ClusterId "${clusterId}".`); | ||
} | ||
for (const group of clusterDetails.Clusters?.[0].ClusterParameterGroups) { | ||
if (group.ParameterGroupName === parameterGroupName) { | ||
return group.ParameterApplyStatus ?? 'retry'; | ||
} | ||
} | ||
throw new Error(`Unable to find Parameter Group named "${parameterGroupName}" associated with ClusterId "${clusterId}".`); | ||
} | ||
} | ||
|
||
function sleep(ms: number) { | ||
return new Promise(resolve => setTimeout(resolve, ms)); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,18 @@ | ||
import * as path from 'path'; | ||
import * as ec2 from '@aws-cdk/aws-ec2'; | ||
import * as iam from '@aws-cdk/aws-iam'; | ||
import * as kms from '@aws-cdk/aws-kms'; | ||
import * as lambda from '@aws-cdk/aws-lambda'; | ||
import * as s3 from '@aws-cdk/aws-s3'; | ||
import * as secretsmanager from '@aws-cdk/aws-secretsmanager'; | ||
import { Duration, IResource, Lazy, RemovalPolicy, Resource, SecretValue, Token } from '@aws-cdk/core'; | ||
import { AwsCustomResource, AwsCustomResourcePolicy, PhysicalResourceId } from '@aws-cdk/custom-resources'; | ||
import { ArnFormat, CustomResource, Duration, IResource, Lazy, RemovalPolicy, Resource, SecretValue, Stack, Token } from '@aws-cdk/core'; | ||
import { AwsCustomResource, AwsCustomResourcePolicy, PhysicalResourceId, Provider } from '@aws-cdk/custom-resources'; | ||
import { Construct } from 'constructs'; | ||
import { DatabaseSecret } from './database-secret'; | ||
import { Endpoint } from './endpoint'; | ||
import { ClusterParameterGroup, IClusterParameterGroup } from './parameter-group'; | ||
import { CfnCluster } from './redshift.generated'; | ||
import { ClusterSubnetGroup, IClusterSubnetGroup } from './subnet-group'; | ||
|
||
/** | ||
* Possible Node Types to use in the cluster | ||
* used for defining `ClusterProps.nodeType`. | ||
|
@@ -364,6 +365,12 @@ export interface ClusterProps { | |
*/ | ||
readonly elasticIp?: string | ||
|
||
/** | ||
* If this flag is set, the cluster will be rebooted when changes to the cluster's parameter group that require a restart to apply. | ||
* @default false | ||
*/ | ||
readonly rebootForParameterChanges?: boolean | ||
|
||
/** | ||
* If this flag is set, Amazon Redshift forces all COPY and UNLOAD traffic between your cluster and your data repositories through your virtual private cloud (VPC). | ||
* | ||
|
@@ -470,6 +477,11 @@ export class Cluster extends ClusterBase { | |
*/ | ||
protected parameterGroup?: IClusterParameterGroup; | ||
|
||
/** | ||
* Whether the cluster will be rebooted when changes to the cluster's parameter group that require a restart to apply. | ||
*/ | ||
protected rebootForParameterChangesEnabled?: boolean; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think we need this prop, can we remove it? It seems redundant with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Related to my other comment There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add to this docstring that this is only used to guard against repeated invocations of |
||
|
||
/** | ||
* The ARNs of the roles that will be attached to the cluster. | ||
* | ||
|
@@ -592,7 +604,9 @@ export class Cluster extends ClusterBase { | |
|
||
const defaultPort = ec2.Port.tcp(this.clusterEndpoint.port); | ||
this.connections = new ec2.Connections({ securityGroups, defaultPort }); | ||
|
||
if (props.rebootForParameterChanges) { | ||
this.enableRebootForParameterChanges(); | ||
} | ||
// Add default role if specified and also available in the roles list | ||
if (props.defaultRole) { | ||
if (props.roles?.some(x => x === props.defaultRole)) { | ||
|
@@ -689,6 +703,71 @@ export class Cluster extends ClusterBase { | |
} | ||
} | ||
|
||
/** | ||
* Enables automatic cluster rebooting when changes to the cluster's parameter group require a restart to apply. | ||
*/ | ||
public enableRebootForParameterChanges(): void { | ||
if (!this.rebootForParameterChangesEnabled) { | ||
this.rebootForParameterChangesEnabled = true; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The check on line 607 is all we need for this, no? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe we need it to make sure we don't make a duplicate Custom Resource. I could create class variables to make the function idempotent, but thought that may be overkill There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. making the function idempotent is definitely overkill, and guarding against these potential bugs (an error would be thrown complaining about duplicate construct IDs) is definitely the right call in general. In this case though I don't see how the function could be called twice, as it's only called in the constructor and it's internal-only. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I originally thought about making it constructor only, but It's currently not (example). I figured that exposing the method would be friendlier for situationally enabling the feature under specific conditions (ex. beta/prod stages, using the addToParameterGroup method and then using this method). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmmm okay. I don't love that we have this prop and this internal member, but it is a better user experience to be able to use a convenience method to set it, so we can leave it. Please change the format of this to a guard clause though, eg:
|
||
const rebootFunction = new lambda.SingletonFunction(this, 'RedshiftClusterRebooterFunction', { | ||
uuid: '511e207f-13df-4b8b-b632-c32b30b65ac2', | ||
runtime: lambda.Runtime.NODEJS_16_X, | ||
code: lambda.Code.fromAsset(path.join(__dirname, 'cluster-parameter-change-reboot-handler')), | ||
handler: 'index.handler', | ||
timeout: Duration.seconds(900), | ||
}); | ||
rebootFunction.addToRolePolicy(new iam.PolicyStatement({ | ||
actions: ['redshift:DescribeClusters'], | ||
resources: ['*'], | ||
})); | ||
rebootFunction.addToRolePolicy(new iam.PolicyStatement({ | ||
actions: ['redshift:RebootCluster'], | ||
resources: [ | ||
Stack.of(this).formatArn({ | ||
service: 'redshift', | ||
resource: 'cluster', | ||
resourceName: this.clusterName, | ||
arnFormat: ArnFormat.COLON_RESOURCE_NAME, | ||
}), | ||
], | ||
})); | ||
const provider = new Provider(this, 'ResourceProvider', { | ||
onEventHandler: rebootFunction, | ||
}); | ||
const customResource = new CustomResource(this, 'RedshiftClusterRebooterCustomResource', { | ||
resourceType: 'Custom::RedshiftClusterRebooter', | ||
serviceToken: provider.serviceToken, | ||
properties: { | ||
ClusterId: this.clusterName, | ||
ParameterGroupName: Lazy.string({ | ||
produce: () => { | ||
if (!this.parameterGroup) { | ||
throw new Error('Cannot enable reboot for parameter changes when there is no associated ClusterParameterGroup.'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Putting implementation details aside I didn't think it would be a good user experience to allow for enabling a feature that does nothing. Using a |
||
} | ||
return this.parameterGroup.clusterParameterGroupName; | ||
}, | ||
}), | ||
ParametersString: Lazy.string({ | ||
produce: () => { | ||
if (!(this.parameterGroup instanceof ClusterParameterGroup)) { | ||
throw new Error('Cannot enable reboot for parameter changes when using an imported parameter group.'); | ||
} | ||
return JSON.stringify(this.parameterGroup.parameters); | ||
}, | ||
}), | ||
}, | ||
}); | ||
Lazy.any({ | ||
produce: () => { | ||
if (!this.parameterGroup) { | ||
throw new Error('Cannot enable reboot for parameter changes when there is no associated ClusterParameterGroup.'); | ||
} | ||
customResource.node.addDependency(this, this.parameterGroup); | ||
}, | ||
}); | ||
} | ||
} | ||
|
||
/** | ||
* Adds default IAM role to cluster. The default IAM role must be already associated to the cluster to be added as the default role. | ||
* | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
export declare function handler(event: AWSLambda.CloudFormationCustomResourceEvent): Promise<void>; |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
// eslint-disable-next-line import/no-extraneous-dependencies | ||
import { Redshift } from 'aws-sdk'; | ||
|
||
const redshift = new Redshift(); | ||
|
||
export async function handler(event: AWSLambda.CloudFormationCustomResourceEvent): Promise<void> { | ||
if (event.RequestType !== 'Delete') { | ||
return rebootClusterIfRequired(event.ResourceProperties?.ClusterId, event.ResourceProperties?.ParameterGroupName); | ||
} else { | ||
return; | ||
} | ||
} | ||
|
||
async function rebootClusterIfRequired(clusterId: string, parameterGroupName: string): Promise<void> { | ||
return executeActionForStatus(await getApplyStatus()); | ||
|
||
// https://docs.aws.amazon.com/redshift/latest/APIReference/API_ClusterParameterStatus.html | ||
async function executeActionForStatus(status: string, retryDurationMs?: number): Promise<void> { | ||
await sleep(retryDurationMs ?? 0); | ||
if (['pending-reboot', 'apply-deferred', 'apply-error', 'unknown-error'].includes(status)) { | ||
try { | ||
await redshift.rebootCluster({ ClusterIdentifier: clusterId }).promise(); | ||
} catch (err) { | ||
if ((<any>err).code === 'InvalidClusterState') { | ||
return await executeActionForStatus(status, 30000); | ||
} else { | ||
throw err; | ||
} | ||
} | ||
return; | ||
} else if (['applying', 'retry'].includes(status)) { | ||
return executeActionForStatus(await getApplyStatus(), 30000); | ||
} | ||
return; | ||
} | ||
|
||
async function getApplyStatus(): Promise<string> { | ||
const clusterDetails = await redshift.describeClusters({ ClusterIdentifier: clusterId }).promise(); | ||
if (clusterDetails.Clusters?.[0].ClusterParameterGroups === undefined) { | ||
throw new Error(`Unable to find any Parameter Groups associated with ClusterId "${clusterId}".`); | ||
} | ||
for (const group of clusterDetails.Clusters?.[0].ClusterParameterGroups) { | ||
if (group.ParameterGroupName === parameterGroupName) { | ||
return group.ParameterApplyStatus ?? 'retry'; | ||
} | ||
} | ||
throw new Error(`Unable to find Parameter Group named "${parameterGroupName}" associated with ClusterId "${clusterId}".`); | ||
} | ||
} | ||
|
||
function sleep(ms: number) { | ||
return new Promise(resolve => setTimeout(resolve, ms)); | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I thought about doing some sort of backoff here, but found checking every
30
seconds to be sufficient in my testing.