Skip to content

Commit

Permalink
feat(aws-batch): fargate support for batch jobs
Browse files Browse the repository at this point in the history
closes: #13590
  • Loading branch information
kokachev committed Mar 16, 2021
1 parent e80a98a commit f3eadba
Show file tree
Hide file tree
Showing 5 changed files with 378 additions and 26 deletions.
17 changes: 16 additions & 1 deletion packages/@aws-cdk/aws-batch/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ For more information on **AWS Batch** visit the [AWS Docs for Batch](https://doc

## Compute Environment

At the core of AWS Batch is the compute environment. All batch jobs are processed within a compute environment, which uses resource like OnDemand or Spot EC2 instances.
At the core of AWS Batch is the compute environment. All batch jobs are processed within a compute environment, which uses resource like OnDemand/Spot EC2 instances or Fargate.

In **MANAGED** mode, AWS will handle the provisioning of compute resources to accommodate the demand. Otherwise, in **UNMANAGED** mode, you will need to manage the provisioning of those resources.

Expand Down Expand Up @@ -74,6 +74,21 @@ const spotEnvironment = new batch.ComputeEnvironment(stack, 'MySpotEnvironment',
});
```

### Fargate Compute Environment

It is possible to have AWS Batch submit jobs to be run on Fargate compute resources. Below is an example of how this can be done:

```ts
const vpc = new ec2.Vpc(this, 'VPC');

const spotEnvironment = new batch.ComputeEnvironment(stack, 'MyFargateEnvironment', {
computeResources: {
type: batch.ComputeResourceType.FARGATE_SPOT,
vpc,
},
});
```

### Understanding Progressive Allocation Strategies

AWS Batch uses an [allocation strategy](https://docs.aws.amazon.com/batch/latest/userguide/allocation-strategies.html) to determine what compute resource will efficiently handle incoming job requests. By default, **BEST_FIT** will pick an available compute instance based on vCPU requirements. If none exist, the job will wait until resources become available. However, with this strategy, you may have jobs waiting in the queue unnecessarily despite having more powerful instances available. Below is an example of how that situation might look like:
Expand Down
117 changes: 93 additions & 24 deletions packages/@aws-cdk/aws-batch/lib/compute-environment.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,16 @@ export enum ComputeResourceType {
* Resources will be EC2 SpotFleet resources.
*/
SPOT = 'SPOT',

/**
* Resources will be Fargate resources.
*/
FARGATE = 'FARGATE',

/**
* Resources will be Fargate resources.
*/
FARGATE_SPOT = 'FARGATE_SPOT',
}

/**
Expand Down Expand Up @@ -427,41 +437,100 @@ export class ComputeEnvironment extends Resource implements IComputeEnvironment
throw new Error('computeResources is missing but required on a managed compute environment');
}

// Setting a bid percentage is only allowed on SPOT resources +
// Cannot use SPOT_CAPACITY_OPTIMIZED when using ON_DEMAND
if (props.computeResources) {
if (props.computeResources.type === ComputeResourceType.ON_DEMAND) {
// VALIDATE FOR ON_DEMAND
if (props.computeResources.type === ComputeResourceType.FARGATE || props.computeResources.type === ComputeResourceType.FARGATE_SPOT) {
// VALIDATE FOR FARGATE

// Bid percentage is not allowed
// Bid percentage cannot be set for Fargate evnvironments
if (props.computeResources.bidPercentage !== undefined) {
throw new Error('Setting the bid percentage is only allowed for SPOT type resources on a batch compute environment');
throw new Error('Bid percentage must not be set for Fargate compute environments');
}

// SPOT_CAPACITY_OPTIMIZED allocation is not allowed
if (props.computeResources.allocationStrategy && props.computeResources.allocationStrategy === AllocationStrategy.SPOT_CAPACITY_OPTIMIZED) {
throw new Error('The SPOT_CAPACITY_OPTIMIZED allocation strategy is only allowed if the environment is a SPOT type compute environment');
// Allocation strategy cannot be set for Fargate evnvironments
if (props.computeResources.allocationStrategy !== undefined) {
throw new Error('Allocation strategy must not be set for Fargate compute environments');
}
} else {
// VALIDATE FOR SPOT

// Bid percentage must be from 0 - 100
if (props.computeResources.bidPercentage !== undefined &&
(props.computeResources.bidPercentage < 0 || props.computeResources.bidPercentage > 100)) {
throw new Error('Bid percentage can only be a value between 0 and 100');
// Desired vCPUs cannot be set for Fargate evnvironments
if (props.computeResources.desiredvCpus !== undefined) {
throw new Error('Desired vCPUs must not be set for Fargate compute environments');
}

// Image ID cannot be set for Fargate evnvironments
if (props.computeResources.image !== undefined) {
throw new Error('Image must not be set for Fargate compute environments');
}

// Instance types cannot be set for Fargate evnvironments
if (props.computeResources.instanceTypes !== undefined) {
throw new Error('Instance types must not be set for Fargate compute environments');
}
}

if (props.computeResources.minvCpus) {
// minvCpus cannot be less than 0
if (props.computeResources.minvCpus < 0) {
throw new Error('Minimum vCpus for a batch compute environment cannot be less than 0');
// EC2 key pair cannot be set for Fargate evnvironments
if (props.computeResources.ec2KeyPair !== undefined) {
throw new Error('EC2 key pair must not be set for Fargate compute environments');
}

// minvCpus cannot exceed max vCpus
if (props.computeResources.maxvCpus &&
props.computeResources.minvCpus > props.computeResources.maxvCpus) {
throw new Error('Minimum vCpus cannot be greater than the maximum vCpus');
// Instance role cannot be set for Fargate evnvironments
if (props.computeResources.instanceRole !== undefined) {
throw new Error('Instance role must not be set for Fargate compute environments');
}

// Launch template cannot be set for Fargate evnvironments
if (props.computeResources.launchTemplate !== undefined) {
throw new Error('Launch template must not be set for Fargate compute environments');
}

// Min vCPUs cannot be set for Fargate evnvironments
if (props.computeResources.minvCpus !== undefined) {
throw new Error('Min vCPUs must not be set for Fargate compute environments');
}

// Placement group cannot be set for Fargate evnvironments
if (props.computeResources.placementGroup !== undefined) {
throw new Error('Placement group must not be set for Fargate compute environments');
}

// Spot fleet role cannot be set for Fargate evnvironments
if (props.computeResources.spotFleetRole !== undefined) {
throw new Error('Spot fleet role must not be set for Fargate compute environments');
}
} else {
// VALIDATE FOR ON_DEMAND AND SPOT
if (props.computeResources.minvCpus) {
// minvCpus cannot be less than 0
if (props.computeResources.minvCpus < 0) {
throw new Error('Minimum vCpus for a batch compute environment cannot be less than 0');
}

// minvCpus cannot exceed max vCpus
if (props.computeResources.maxvCpus &&
props.computeResources.minvCpus > props.computeResources.maxvCpus) {
throw new Error('Minimum vCpus cannot be greater than the maximum vCpus');
}
}
// Setting a bid percentage is only allowed on SPOT resources +
// Cannot use SPOT_CAPACITY_OPTIMIZED when using ON_DEMAND
if (props.computeResources.type === ComputeResourceType.ON_DEMAND) {
// VALIDATE FOR ON_DEMAND

// Bid percentage is not allowed
if (props.computeResources.bidPercentage !== undefined) {
throw new Error('Setting the bid percentage is only allowed for SPOT type resources on a batch compute environment');
}

// SPOT_CAPACITY_OPTIMIZED allocation is not allowed
if (props.computeResources.allocationStrategy && props.computeResources.allocationStrategy === AllocationStrategy.SPOT_CAPACITY_OPTIMIZED) {
throw new Error('The SPOT_CAPACITY_OPTIMIZED allocation strategy is only allowed if the environment is a SPOT type compute environment');
}
} else if (props.computeResources.type === ComputeResourceType.SPOT) {
// VALIDATE FOR SPOT

// Bid percentage must be from 0 - 100
if (props.computeResources.bidPercentage !== undefined &&
(props.computeResources.bidPercentage < 0 || props.computeResources.bidPercentage > 100)) {
throw new Error('Bid percentage can only be a value between 0 and 100');
}
}
}
}
Expand Down
68 changes: 68 additions & 0 deletions packages/@aws-cdk/aws-batch/lib/job-definition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,21 @@ export enum LogDriver {
SYSLOG = 'syslog'
}

/**
* Platform capabilities
*/
export enum PlatformCapabilities {
/**
* Specifies EC2 environment.
*/
EC2 = 'EC2',

/**
* Specifies Fargate environment.
*/
FARGATE = 'FARGATE'
}

/**
* Log configuration options to send to a custom log driver for the container.
*/
Expand All @@ -77,6 +92,17 @@ export interface LogConfiguration {
readonly secretOptions?: ExposedSecret[];
}


/**
* Fargate platform configuration
*/
export interface FargatePlatformConfiguration {
/**
* Fargate platform version
*/
readonly platformVersion: ecs.FargatePlatformVersion
}

/**
* Properties of a job definition container.
*/
Expand Down Expand Up @@ -197,6 +223,20 @@ export interface JobDefinitionContainer {
* @default - No data volumes will be used.
*/
readonly volumes?: ecs.Volume[];

/**
* The platform configuration for jobs that are running on Fargate resources.
*
* @default - LATEST platform version will be used
*/
readonly fargatePlatformConfiguration?: FargatePlatformConfiguration;

/**
* The IAM role that AWS Batch can assume.
*
* @default - None
*/
readonly executionRole?: iam.IRole;
}

/**
Expand Down Expand Up @@ -252,6 +292,13 @@ export interface JobDefinitionProps {
* @default - undefined
*/
readonly timeout?: Duration;

/**
* The platform capabilities required by the job definition.
*
* @default - undefined
*/
readonly platformCapabilities?: PlatformCapabilities[];
}

/**
Expand Down Expand Up @@ -382,6 +429,8 @@ export class JobDefinition extends Resource implements IJobDefinition {
physicalName: props.jobDefinitionName,
});

this.validateProps(props);

this.imageConfig = new JobDefinitionImageConfig(this, props.container);

const jobDef = new CfnJobDefinition(this, 'Resource', {
Expand All @@ -402,6 +451,7 @@ export class JobDefinition extends Resource implements IJobDefinition {
timeout: {
attemptDurationSeconds: props.timeout ? props.timeout.toSeconds() : undefined,
},
platformCapabilities: props.platformCapabilities || undefined,
});

this.jobDefinitionArn = this.getResourceArnAttribute(jobDef.ref, {
Expand All @@ -426,6 +476,20 @@ export class JobDefinition extends Resource implements IJobDefinition {
return vars;
}

/**
* Validates the properties provided for a new job definition.
*/
private validateProps(props: JobDefinitionProps) {
if (props === undefined) {
return;
}

if (props.platformCapabilities !== undefined && props.platformCapabilities.includes(PlatformCapabilities.FARGATE)
&& props.container.executionRole === undefined) {
throw new Error('Fargate job must have executionRole set');
}
}

private buildJobContainer(container?: JobDefinitionContainer): CfnJobDefinition.ContainerPropertiesProperty | undefined {
if (container === undefined) {
return undefined;
Expand All @@ -437,6 +501,7 @@ export class JobDefinition extends Resource implements IJobDefinition {
image: this.imageConfig.imageName,
instanceType: container.instanceType && container.instanceType.toString(),
jobRoleArn: container.jobRole && container.jobRole.roleArn,
executionRoleArn: container.executionRole && container.executionRole.roleArn,
linuxParameters: container.linuxParams
? { devices: container.linuxParams.renderLinuxParameters().devices }
: undefined,
Expand All @@ -458,6 +523,9 @@ export class JobDefinition extends Resource implements IJobDefinition {
user: container.user,
vcpus: container.vcpus || 1,
volumes: container.volumes,
fargatePlatformConfiguration: container.fargatePlatformConfiguration ? {
platformVersion: container.fargatePlatformConfiguration.platformVersion,
} : undefined,
};
}

Expand Down
Loading

0 comments on commit f3eadba

Please sign in to comment.