Skip to content

Commit

Permalink
feat(glue): Job construct does not honor SparkUIProps S3 prefix when …
Browse files Browse the repository at this point in the history
…granting S3 access (#26696)

The `SparkUIProps.prefix` attribute of `glue.Job` now has a consistent format, is validated, and the bucket ReadWrite role is only given permissions to the folder of the bucket specified by the `prefix` if provided.

Adds a suggested format for the prefix: `/foo/bar` and throws and error if the prefix does not start with a `/` or ends without a `/`.

This may be a breaking change for users who configured their prefix in a style different than this enforces. However, I believe it is the correct standardization going forward.

Closes #19862.

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
scanlonp committed Aug 10, 2023
1 parent 68b96c6 commit 42250f1
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 7 deletions.
20 changes: 20 additions & 0 deletions packages/@aws-cdk/aws-glue-alpha/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,26 @@ new glue.Job(this, 'RayJob', {
});
```

### Enable Spark UI

Enable Spark UI setting the `sparkUI` property.

```ts
new glue.Job(this, 'EnableSparkUI', {
jobName: 'EtlJobWithSparkUIPrefix',
sparkUI: {
enabled: true,
},
executable: glue.JobExecutable.pythonEtl({
glueVersion: glue.GlueVersion.V3_0,
pythonVersion: glue.PythonVersion.THREE,
script: glue.Code.fromAsset(path.join(__dirname, 'job-script/hello_world.py')),
}),
});
```

The `sparkUI` property also allows the specification of an s3 bucket and a bucket prefix.

See [documentation](https://docs.aws.amazon.com/glue/latest/dg/add-job.html) for more information on adding jobs in Glue.

## Connection
Expand Down
32 changes: 30 additions & 2 deletions packages/@aws-cdk/aws-glue-alpha/lib/job.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { EOL } from 'os';
import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch';
import * as events from 'aws-cdk-lib/aws-events';
import * as iam from 'aws-cdk-lib/aws-iam';
Expand Down Expand Up @@ -389,8 +390,9 @@ export interface SparkUIProps {

/**
* The path inside the bucket (objects prefix) where the Glue job stores the logs.
* Use format `'/foo/bar'`
*
* @default '/' - the logs will be written at the root of the bucket
* @default - the logs will be written at the root of the bucket
*/
readonly prefix?: string;
}
Expand Down Expand Up @@ -813,8 +815,9 @@ export class Job extends JobBase {
throw new Error('Spark UI is not available for JobType.RAY jobs');
}

this.validatePrefix(props.prefix);
const bucket = props.bucket ?? new s3.Bucket(this, 'SparkUIBucket');
bucket.grantReadWrite(role);
bucket.grantReadWrite(role, this.cleanPrefixForGrant(props.prefix));
const args = {
'--enable-spark-ui': 'true',
'--spark-event-logs-path': bucket.s3UrlForObject(props.prefix),
Expand All @@ -829,6 +832,31 @@ export class Job extends JobBase {
};
}

private validatePrefix(prefix?: string): void {
if (!prefix || cdk.Token.isUnresolved(prefix)) {
// skip validation if prefix is not specified or is a token
return;
}

const errors: string[] = [];

if (!prefix.startsWith('/')) {
errors.push('Prefix must begin with \'/\'');
}

if (prefix.endsWith('/')) {
errors.push('Prefix must not end with \'/\'');
}

if (errors.length > 0) {
throw new Error(`Invalid prefix format (value: ${prefix})${EOL}${errors.join(EOL)}`);
}
}

private cleanPrefixForGrant(prefix?: string): string | undefined {
return prefix !== undefined ? prefix.slice(1) + '/*' : undefined;
}

private setupContinuousLogging(role: iam.IRole, props: ContinuousLoggingProps) {
const args: {[key: string]: string} = {
'--enable-continuous-cloudwatch-log': 'true',
Expand Down
77 changes: 72 additions & 5 deletions packages/@aws-cdk/aws-glue-alpha/test/job.test.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { EOL } from 'os';
import { Template } from 'aws-cdk-lib/assertions';
import * as cloudwatch from 'aws-cdk-lib/aws-cloudwatch';
import * as events from 'aws-cdk-lib/aws-events';
Expand Down Expand Up @@ -629,29 +630,95 @@ describe('Job', () => {
});
});
});

describe('with bucket and path provided', () => {
const sparkUIBucketName = 'sparkbucketname';
const prefix = 'some/path/';
const prefix = '/foob/bart';
const badPrefix = 'foob/bart/';
let sparkUIBucket: s3.IBucket;

const expectedErrors = [
`Invalid prefix format (value: ${badPrefix})`,
'Prefix must begin with \'/\'',
'Prefix must not end with \'/\'',
].join(EOL);
it('fails if path is mis-formatted', () => {
expect(() => new glue.Job(stack, 'BadPrefixJob', {
...defaultProps,
sparkUI: {
enabled: true,
bucket: sparkUIBucket,
prefix: badPrefix,
},
})).toThrow(expectedErrors);
});

beforeEach(() => {
sparkUIBucket = s3.Bucket.fromBucketName(stack, 'BucketId', sparkUIBucketName);
job = new glue.Job(stack, 'Job', {
...defaultProps,
sparkUI: {
enabled: true,
bucket: sparkUIBucket,
prefix,
prefix: prefix,
},
});
});

test('should set spark arguments on the job', () => {
it('should grant the role read/write permissions spark ui bucket prefixed folder', () => {
Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', {
PolicyDocument: {
Statement: [
{
Action: [
's3:GetObject*',
's3:GetBucket*',
's3:List*',
's3:DeleteObject*',
's3:PutObject',
's3:PutObjectLegalHold',
's3:PutObjectRetention',
's3:PutObjectTagging',
's3:PutObjectVersionTagging',
's3:Abort*',
],
Effect: 'Allow',
Resource: [
{
'Fn::Join': [
'',
[
'arn:',
{ Ref: 'AWS::Partition' },
':s3:::sparkbucketname',
],
],
},
{
'Fn::Join': [
'',
[
'arn:',
{ Ref: 'AWS::Partition' },
`:s3:::sparkbucketname${prefix}/*`,
],
],
},
],
},
codeBucketAccessStatement,
],
Version: '2012-10-17',
},
PolicyName: 'JobServiceRoleDefaultPolicy03F68F9D',
Roles: [{ Ref: 'JobServiceRole4F432993' }],
});
});

it('should set spark arguments on the job', () => {
Template.fromStack(stack).hasResourceProperties('AWS::Glue::Job', {
DefaultArguments: {
'--enable-spark-ui': 'true',
'--spark-event-logs-path': `s3://${sparkUIBucketName}/${prefix}`,
'--spark-event-logs-path': `s3://${sparkUIBucketName}${prefix}`,
},
});
});
Expand Down

0 comments on commit 42250f1

Please sign in to comment.