diff --git a/packages/@aws-cdk/aws-glue-alpha/README.md b/packages/@aws-cdk/aws-glue-alpha/README.md index 64ac86f8c6b9b..86415d5de15dc 100644 --- a/packages/@aws-cdk/aws-glue-alpha/README.md +++ b/packages/@aws-cdk/aws-glue-alpha/README.md @@ -105,6 +105,26 @@ new glue.Job(this, 'RayJob', { }); ``` +### Enable Spark UI + +Enable Spark UI setting the `sparkUI` property. + +```ts +new glue.Job(this, 'EnableSparkUI', { + jobName: 'EtlJobWithSparkUIPrefix', + sparkUI: { + enabled: true, + }, + executable: glue.JobExecutable.pythonEtl({ + glueVersion: glue.GlueVersion.V3_0, + pythonVersion: glue.PythonVersion.THREE, + script: glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')), + }), +}); +``` + +The `sparkUI` property also allows the specification of an s3 bucket and a bucket prefix. + See [documentation](https://docs.aws.amazon.com/glue/latest/dg/add-job.html) for more information on adding jobs in Glue. ## Connection diff --git a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts index 6962f801aa3a7..825511c1fdfab 100644 --- a/packages/@aws-cdk/aws-glue-alpha/lib/job.ts +++ b/packages/@aws-cdk/aws-glue-alpha/lib/job.ts @@ -1,3 +1,4 @@ +import { EOL } from 'os'; import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; import * as events from 'aws-cdk-lib/aws-events'; import * as iam from 'aws-cdk-lib/aws-iam'; @@ -389,8 +390,9 @@ export interface SparkUIProps { /** * The path inside the bucket (objects prefix) where the Glue job stores the logs. + * Use format `'/foo/bar'` * - * @default '/' - the logs will be written at the root of the bucket + * @default - the logs will be written at the root of the bucket */ readonly prefix?: string; } @@ -813,8 +815,9 @@ export class Job extends JobBase { throw new Error('Spark UI is not available for JobType.RAY jobs'); } + this.validatePrefix(props.prefix); const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket'); - bucket.grantReadWrite(role); + bucket.grantReadWrite(role, this.cleanPrefixForGrant(props.prefix)); const args = { '--enable-spark-ui': 'true', '--spark-event-logs-path': bucket.s3UrlForObject(props.prefix), @@ -829,6 +832,31 @@ export class Job extends JobBase { }; } + private validatePrefix(prefix?: string): void { + if (!prefix || cdk.Token.isUnresolved(prefix)) { + // skip validation if prefix is not specified or is a token + return; + } + + const errors: string[] = []; + + if (!prefix.startsWith('/')) { + errors.push('Prefix must begin with \'/\''); + } + + if (prefix.endsWith('/')) { + errors.push('Prefix must not end with \'/\''); + } + + if (errors.length > 0) { + throw new Error(`Invalid prefix format (value: ${prefix})${EOL}${errors.join(EOL)}`); + } + } + + private cleanPrefixForGrant(prefix?: string): string | undefined { + return prefix !== undefined ? prefix.slice(1) + '/*' : undefined; + } + private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) { const args: {[key: string]: string} = { '--enable-continuous-cloudwatch-log': 'true', diff --git a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts index 81245f43929af..cfea34c396147 100644 --- a/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts +++ b/packages/@aws-cdk/aws-glue-alpha/test/job.test.ts @@ -1,3 +1,4 @@ +import { EOL } from 'os'; import { Template } from 'aws-cdk-lib/assertions'; import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch'; import * as events from 'aws-cdk-lib/aws-events'; @@ -629,12 +630,28 @@ describe('Job', () => { }); }); }); - describe('with bucket and path provided', () => { const sparkUIBucketName = 'sparkbucketname'; - const prefix = 'some/path/'; + const prefix = '/foob/bart'; + const badPrefix = 'foob/bart/'; let sparkUIBucket: s3.IBucket; + const expectedErrors = [ + `Invalid prefix format (value: ${badPrefix})`, + 'Prefix must begin with \'/\'', + 'Prefix must not end with \'/\'', + ].join(EOL); + it('fails if path is mis-formatted', () => { + expect(() => new glue.Job(stack, 'BadPrefixJob', { + ...defaultProps, + sparkUI: { + enabled: true, + bucket: sparkUIBucket, + prefix: badPrefix, + }, + })).toThrow(expectedErrors); + }); + beforeEach(() => { sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName); job = new glue.Job(stack, 'Job', { @@ -642,16 +659,66 @@ describe('Job', () => { sparkUI: { enabled: true, bucket: sparkUIBucket, - prefix, + prefix: prefix, }, }); }); - test('should set spark arguments on the job', () => { + it('should grant the role read/write permissions spark ui bucket prefixed folder', () => { + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:PutObjectLegalHold', + 's3:PutObjectRetention', + 's3:PutObjectTagging', + 's3:PutObjectVersionTagging', + 's3:Abort*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':s3:::sparkbucketname', + ], + ], + }, + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + `:s3:::sparkbucketname${prefix}/*`, + ], + ], + }, + ], + }, + codeBucketAccessStatement, + ], + Version: '2012-10-17', + }, + PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D', + Roles: [{ Ref: 'JobServiceRole4F432993' }], + }); + }); + + it('should set spark arguments on the job', () => { Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', { DefaultArguments: { '--enable-spark-ui': 'true', - '--spark-event-logs-path': `s3://${sparkUIBucketName}/${prefix}`, + '--spark-event-logs-path': `s3://${sparkUIBucketName}${prefix}`, }, }); });