Skip to content

Commit

Permalink
feat(stepfunctions-tasks): support databrew startJobRun task (#12532)
Browse files Browse the repository at this point in the history
This adds support for AWS Glue DataBrew StartJobRun API as a task.

Task documentation: https://docs.aws.amazon.com/step-functions/latest/dg/connect-databrew.html

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
viditochani authored Jan 25, 2021
1 parent c2f0e15 commit eacd2f7
Show file tree
Hide file tree
Showing 7 changed files with 589 additions and 0 deletions.
13 changes: 13 additions & 0 deletions packages/@aws-cdk/aws-stepfunctions-tasks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ This module is part of the [AWS Cloud Development Kit](https://github.com/aws/aw
- [Modify Instance Fleet](#modify-instance-fleet)
- [Modify Instance Group](#modify-instance-group)
- [Glue](#glue)
- [Glue DataBrew](#glue-databrew)
- [Lambda](#lambda)
- [SageMaker](#sagemaker)
- [Create Training Job](#create-training-job)
Expand Down Expand Up @@ -680,6 +681,18 @@ new GlueStartJobRun(stack, 'Task', {
});
```

## Glue DataBrew

Step Functions supports [AWS Glue DataBrew](https://docs.aws.amazon.com/step-functions/latest/dg/connect-databrew.html) through the service integration pattern.

You can call the [`StartJobRun`](https://docs.aws.amazon.com/databrew/latest/dg/API_StartJobRun.html) API from a `Task` state.

```ts
new GlueDataBrewStartJobRun(stack, 'Task', {
Name: 'databrew-job',
});
```

## Lambda

[Invoke](https://docs.aws.amazon.com/lambda/latest/dg/API_Invoke.html) a Lambda function.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import * as iam from '@aws-cdk/aws-iam';
import * as sfn from '@aws-cdk/aws-stepfunctions';
import * as cdk from '@aws-cdk/core';
import { Construct } from 'constructs';
import { integrationResourceArn, validatePatternSupported } from '../private/task-utils';

/**
* Properties for starting a job run with StartJobRun
* @experimental
*/
export interface GlueDataBrewStartJobRunProps extends sfn.TaskStateBaseProps {

/**
* Glue DataBrew Job to run
*/
readonly name: string;
}

/**
* Start a Job run as a Task
*
* @see https://docs.aws.amazon.com/step-functions/latest/dg/connect-databrew.html
* @experimental
*/
export class GlueDataBrewStartJobRun extends sfn.TaskStateBase {

private static readonly SUPPORTED_INTEGRATION_PATTERNS: sfn.IntegrationPattern[] = [
sfn.IntegrationPattern.REQUEST_RESPONSE,
sfn.IntegrationPattern.RUN_JOB,
];

protected readonly taskMetrics?: sfn.TaskMetricsConfig;
protected readonly taskPolicies?: iam.PolicyStatement[];

private readonly integrationPattern: sfn.IntegrationPattern;

/**
* @experimental
*/
constructor(scope: Construct, id: string, private readonly props: GlueDataBrewStartJobRunProps) {
super(scope, id, props);
this.integrationPattern = props.integrationPattern ?? sfn.IntegrationPattern.REQUEST_RESPONSE;

validatePatternSupported(this.integrationPattern, GlueDataBrewStartJobRun.SUPPORTED_INTEGRATION_PATTERNS);

const actions = ['databrew:startJobRun'];

if (this.integrationPattern === sfn.IntegrationPattern.RUN_JOB) {
actions.push('databrew:stopJobRun', 'databrew:listJobRuns');
}

this.taskPolicies = [
new iam.PolicyStatement({
resources: [
cdk.Stack.of(this).formatArn({
service: 'databrew',
resource: 'job',
// If the name comes from input, we cannot target the policy to a particular ARN prefix reliably.
resourceName: sfn.JsonPath.isEncodedJsonPath(this.props.name) ? '*' : this.props.name,
}),
],
actions: actions,
}),
];
}

/**
* Provides the Glue DataBrew Start Job Run task configuration
* @internal
*/
protected _renderTask(): any {
return {
Resource: integrationResourceArn('databrew', 'startJobRun', this.integrationPattern),
Parameters: sfn.FieldUtils.renderObject({
Name: this.props.name,
}),
};
}
}

1 change: 1 addition & 0 deletions packages/@aws-cdk/aws-stepfunctions-tasks/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,4 @@ export * from './athena/start-query-execution';
export * from './athena/stop-query-execution';
export * from './athena/get-query-execution';
export * from './athena/get-query-results';
export * from './databrew/start-job-run';
2 changes: 2 additions & 0 deletions packages/@aws-cdk/aws-stepfunctions-tasks/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
"@aws-cdk/aws-batch": "0.0.0",
"@aws-cdk/aws-cloudwatch": "0.0.0",
"@aws-cdk/aws-codebuild": "0.0.0",
"@aws-cdk/aws-databrew": "0.0.0",
"@aws-cdk/aws-dynamodb": "0.0.0",
"@aws-cdk/aws-ec2": "0.0.0",
"@aws-cdk/aws-ecr": "0.0.0",
Expand All @@ -96,6 +97,7 @@
"@aws-cdk/aws-batch": "0.0.0",
"@aws-cdk/aws-cloudwatch": "0.0.0",
"@aws-cdk/aws-codebuild": "0.0.0",
"@aws-cdk/aws-databrew": "0.0.0",
"@aws-cdk/aws-dynamodb": "0.0.0",
"@aws-cdk/aws-ec2": "0.0.0",
"@aws-cdk/aws-ecr": "0.0.0",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
{
"Resources": {
"JobOutputBucketACE3BC7B": {
"Type": "AWS::S3::Bucket",
"UpdateReplacePolicy": "Delete",
"DeletionPolicy": "Delete"
},
"DataBrewRole7E60F80D": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Statement": [
{
"Action": "sts:AssumeRole",
"Effect": "Allow",
"Principal": {
"Service": "databrew.amazonaws.com"
}
}
],
"Version": "2012-10-17"
},
"ManagedPolicyArns": [
"arn:aws:iam::aws:policy/service-role/AWSGlueDataBrewServiceRole"
],
"Path": "/",
"Policies": [
{
"PolicyDocument": {
"Statement": [
{
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject",
"s3:ListBucket"
],
"Effect": "Allow",
"Resource": [
"arn:aws:s3:::databrew-public-datasets-test-region/*",
"arn:aws:s3:::databrew-public-datasets-test-region",
{
"Fn::Join": [
"",
[
{
"Fn::GetAtt": [
"JobOutputBucketACE3BC7B",
"Arn"
]
},
"/*"
]
]
},
{
"Fn::GetAtt": [
"JobOutputBucketACE3BC7B",
"Arn"
]
}
]
}
],
"Version": "2012-10-17"
},
"PolicyName": "DataBrewPolicy"
}
]
}
},
"DataBrewRecipe": {
"Type": "AWS::DataBrew::Recipe",
"Properties": {
"Name": "recipe-1",
"Steps": [
{
"Action": {
"Operation": "UPPER_CASE",
"Parameters": {
"sourceColumn": "description"
}
}
},
{
"Action": {
"Operation": "DELETE",
"Parameters": {
"sourceColumn": "doc_id"
}
}
}
]
}
},
"DataBrewDataset": {
"Type": "AWS::DataBrew::Dataset",
"Properties": {
"Input": {
"S3InputDefinition": {
"Bucket": "databrew-public-datasets-test-region",
"Key": "votes.csv"
}
},
"Name": "dataset-1"
}
},
"DataBrewProject": {
"Type": "AWS::DataBrew::Project",
"Properties": {
"DatasetName": "dataset-1",
"Name": "project-1",
"RecipeName": "recipe-1",
"RoleArn": {
"Fn::GetAtt": [
"DataBrewRole7E60F80D",
"Arn"
]
}
},
"DependsOn": [
"DataBrewDataset",
"DataBrewRecipe"
]
},
"DataBrewJob": {
"Type": "AWS::DataBrew::Job",
"Properties": {
"Name": "job-1",
"RoleArn": {
"Fn::GetAtt": [
"DataBrewRole7E60F80D",
"Arn"
]
},
"Type": "RECIPE",
"Outputs": [
{
"Location": {
"Bucket": {
"Ref": "JobOutputBucketACE3BC7B"
}
}
}
],
"ProjectName": "project-1"
},
"DependsOn": [
"DataBrewProject"
]
},
"StateMachineRoleB840431D": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument": {
"Statement": [
{
"Action": "sts:AssumeRole",
"Effect": "Allow",
"Principal": {
"Service": {
"Fn::Join": [
"",
[
"states.",
{
"Ref": "AWS::Region"
},
".amazonaws.com"
]
]
}
}
}
],
"Version": "2012-10-17"
}
}
},
"StateMachineRoleDefaultPolicyDF1E6607": {
"Type": "AWS::IAM::Policy",
"Properties": {
"PolicyDocument": {
"Statement": [
{
"Action": "databrew:startJobRun",
"Effect": "Allow",
"Resource": {
"Fn::Join": [
"",
[
"arn:",
{
"Ref": "AWS::Partition"
},
":databrew:",
{
"Ref": "AWS::Region"
},
":",
{
"Ref": "AWS::AccountId"
},
":job/job-1"
]
]
}
}
],
"Version": "2012-10-17"
},
"PolicyName": "StateMachineRoleDefaultPolicyDF1E6607",
"Roles": [
{
"Ref": "StateMachineRoleB840431D"
}
]
}
},
"StateMachine2E01A3A5": {
"Type": "AWS::StepFunctions::StateMachine",
"Properties": {
"RoleArn": {
"Fn::GetAtt": [
"StateMachineRoleB840431D",
"Arn"
]
},
"DefinitionString": {
"Fn::Join": [
"",
[
"{\"StartAt\":\"Start DataBrew Job run\",\"States\":{\"Start DataBrew Job run\":{\"End\":true,\"Type\":\"Task\",\"Resource\":\"arn:",
{
"Ref": "AWS::Partition"
},
":states:::databrew:startJobRun\",\"Parameters\":{\"Name\":\"job-1\"}}},\"TimeoutSeconds\":30}"
]
]
}
},
"DependsOn": [
"StateMachineRoleDefaultPolicyDF1E6607",
"StateMachineRoleB840431D"
]
}
},
"Outputs": {
"stateMachineArn": {
"Value": {
"Ref": "StateMachine2E01A3A5"
}
}
}
}
Loading

0 comments on commit eacd2f7

Please sign in to comment.