From 374fc23d51d4402e5e0102d1f3f1fed035f2bcab Mon Sep 17 00:00:00 2001 From: kaizen3031593 Date: Fri, 10 Dec 2021 18:43:37 -0500 Subject: [PATCH 01/10] first attempt at custom resource for partition index --- packages/@aws-cdk/aws-glue/lib/table.ts | 91 ++++++++++++++++++++++++- packages/@aws-cdk/aws-glue/package.json | 1 + 2 files changed, 91 insertions(+), 1 deletion(-) diff --git a/packages/@aws-cdk/aws-glue/lib/table.ts b/packages/@aws-cdk/aws-glue/lib/table.ts index 14ff98dd3b3c4..9102116aaacf3 100644 --- a/packages/@aws-cdk/aws-glue/lib/table.ts +++ b/packages/@aws-cdk/aws-glue/lib/table.ts @@ -1,13 +1,31 @@ import * as iam from '@aws-cdk/aws-iam'; import * as kms from '@aws-cdk/aws-kms'; import * as s3 from '@aws-cdk/aws-s3'; -import { ArnFormat, Fn, IResource, Resource, Stack } from '@aws-cdk/core'; +import { ArnFormat, Fn, IResource, Names, Resource, Stack } from '@aws-cdk/core'; +import * as cr from '@aws-cdk/custom-resources'; import { Construct } from 'constructs'; import { DataFormat } from './data-format'; import { IDatabase } from './database'; import { CfnTable } from './glue.generated'; import { Column } from './schema'; +/** + * Properties of a Partition Index. + */ +export interface PartitionIndexProps { + /** + * The name of the partition index. + * + * @default - a name will be generated for you. + */ + readonly indexName: string; + + /** + * The partition index keys. These keys + * must be a subet of the table's partition keys. + */ + readonly keys: string[]; +} export interface ITable extends IResource { /** * @attribute @@ -230,6 +248,8 @@ export class Table extends Resource implements ITable { */ public readonly partitionKeys?: Column[]; + private partitionIndecies: number = 0; + constructor(scope: Construct, id: string, props: TableProps) { super(scope, id, { physicalName: props.tableName, @@ -289,6 +309,59 @@ export class Table extends Resource implements ITable { this.node.defaultChild = tableResource; } + /** + * Add a partition index to the table. + * @see https://docs.aws.amazon.com/glue/latest/dg/partition-indexes.html + * + * Partition index keys must be a subset of the tables partition keys. + */ + public addPartitionIndex(props: PartitionIndexProps) { + if (this.partitionIndecies >= 3) { + throw new Error('Table can have a maximum of 3 partition indecies'); + } + this.partitionIndecies++; + this.validatePartitionIndex(props); + const partitionIndex = new cr.AwsCustomResource(this, 'table-partition-index', { + onCreate: { + service: 'Glue', + action: 'createPartitionIndex', + parameters: { + DatabaseName: this.database.databaseName, + TableName: this.tableName, + PartitionIndex: { + IndexName: props.indexName ?? this.generateName(), + Keys: props.keys, + }, + }, + physicalResourceId: cr.PhysicalResourceId.of( + 'CreatePartitionIndex', + ), + }, + policy: cr.AwsCustomResourcePolicy.fromSdkCalls({ + resources: cr.AwsCustomResourcePolicy.ANY_RESOURCE, + }), + }); + + this.grantToUnderlyingResources(partitionIndex, ['glue:UpdateTable']); + } + + private generateName(): string { + return Names.uniqueId(this); + } + + private validatePartitionIndex(props: PartitionIndexProps) { + if (props.indexName && !props.indexName.match(/^[A-Za-z0-9\_\-])/)) { + throw new Error(`Index name can only have letters, numbers, hyphens, or underscores but received ${props.indexName}`); + } + if (!this.partitionKeys || this.partitionKeys.length === 0) { + throw new Error('To create a partition index the table must have partition keys'); + } + const keyNames = this.partitionKeys.map(pk => pk.name); + if (!props.keys.every(k => keyNames.includes(k))) { + throw new Error(`All index keys must also be partition keys. Got ${props.keys} but partition key names are ${keyNames}`); + } + } + /** * Grant read permissions to the table and the underlying data stored in S3 to an IAM principal. * @@ -336,6 +409,22 @@ export class Table extends Resource implements ITable { }); } + /** + * Grant the given identity custom permissions to ALL underlying resources of the table. + * Permissions will be granted to the catalog, the database, and the table. + */ + public grantToUnderlyingResources(grantee: iam.IGrantable, actions: string[]) { + return iam.Grant.addToPrincipal({ + grantee, + resourceArns: [ + this.tableArn, + this.database.catalogArn, + this.database.databaseArn, + ], + actions, + }); + } + private getS3PrefixForGrant() { return this.s3Prefix + '*'; } diff --git a/packages/@aws-cdk/aws-glue/package.json b/packages/@aws-cdk/aws-glue/package.json index 8c622099adadd..fb3b86e48046e 100644 --- a/packages/@aws-cdk/aws-glue/package.json +++ b/packages/@aws-cdk/aws-glue/package.json @@ -99,6 +99,7 @@ "@aws-cdk/aws-s3": "0.0.0", "@aws-cdk/aws-s3-assets": "0.0.0", "@aws-cdk/core": "0.0.0", + "@aws-cdk/custom-resources": "0.0.0", "constructs": "^3.3.69" }, "homepage": "https://github.com/aws/aws-cdk", From e070acd00d20678c146a0a4621b42e044a4bc205 Mon Sep 17 00:00:00 2001 From: kaizen3031593 Date: Mon, 13 Dec 2021 14:40:26 -0500 Subject: [PATCH 02/10] finishing touches to table partition index --- packages/@aws-cdk/aws-glue/lib/table.ts | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/packages/@aws-cdk/aws-glue/lib/table.ts b/packages/@aws-cdk/aws-glue/lib/table.ts index 9102116aaacf3..43bd780d10876 100644 --- a/packages/@aws-cdk/aws-glue/lib/table.ts +++ b/packages/@aws-cdk/aws-glue/lib/table.ts @@ -248,8 +248,6 @@ export class Table extends Resource implements ITable { */ public readonly partitionKeys?: Column[]; - private partitionIndecies: number = 0; - constructor(scope: Construct, id: string, props: TableProps) { super(scope, id, { physicalName: props.tableName, @@ -316,10 +314,6 @@ export class Table extends Resource implements ITable { * Partition index keys must be a subset of the tables partition keys. */ public addPartitionIndex(props: PartitionIndexProps) { - if (this.partitionIndecies >= 3) { - throw new Error('Table can have a maximum of 3 partition indecies'); - } - this.partitionIndecies++; this.validatePartitionIndex(props); const partitionIndex = new cr.AwsCustomResource(this, 'table-partition-index', { onCreate: { @@ -329,7 +323,7 @@ export class Table extends Resource implements ITable { DatabaseName: this.database.databaseName, TableName: this.tableName, PartitionIndex: { - IndexName: props.indexName ?? this.generateName(), + IndexName: props.indexName ?? this.generateName(props.keys), Keys: props.keys, }, }, @@ -345,8 +339,12 @@ export class Table extends Resource implements ITable { this.grantToUnderlyingResources(partitionIndex, ['glue:UpdateTable']); } - private generateName(): string { - return Names.uniqueId(this); + private generateName(keys: string[]): string { + const prefix = keys.join('-'); + const uniqueId = Names.uniqueId(this); + const maxIndexLength = 80; // self-specified + const startIndex = Math.max(0, uniqueId.length - (maxIndexLength - prefix.length)); + return prefix + uniqueId.substring(startIndex); } private validatePartitionIndex(props: PartitionIndexProps) { From e4cb1925566948197fb133931fb366513a42024d Mon Sep 17 00:00:00 2001 From: kaizen3031593 Date: Mon, 13 Dec 2021 15:24:59 -0500 Subject: [PATCH 03/10] refactor some tests --- packages/@aws-cdk/aws-glue/package.json | 1 + packages/@aws-cdk/aws-glue/test/table.test.ts | 834 +++++++++--------- 2 files changed, 420 insertions(+), 415 deletions(-) diff --git a/packages/@aws-cdk/aws-glue/package.json b/packages/@aws-cdk/aws-glue/package.json index fb3b86e48046e..22e1a7927879c 100644 --- a/packages/@aws-cdk/aws-glue/package.json +++ b/packages/@aws-cdk/aws-glue/package.json @@ -114,6 +114,7 @@ "@aws-cdk/aws-s3": "0.0.0", "@aws-cdk/aws-s3-assets": "0.0.0", "@aws-cdk/core": "0.0.0", + "@aws-cdk/custom-resources": "0.0.0", "constructs": "^3.3.69" }, "engines": { diff --git a/packages/@aws-cdk/aws-glue/test/table.test.ts b/packages/@aws-cdk/aws-glue/test/table.test.ts index e4dac06728e19..e4ff31b2d9bcf 100644 --- a/packages/@aws-cdk/aws-glue/test/table.test.ts +++ b/packages/@aws-cdk/aws-glue/test/table.test.ts @@ -933,501 +933,505 @@ test('explicit s3 bucket and with empty prefix', () => { }); }); -test('grants: custom', () => { - const stack = new cdk.Stack(); - const user = new iam.User(stack, 'User'); - const database = new glue.Database(stack, 'Database', { - databaseName: 'database', - }); +describe('grants', () => { + test('custom permissions', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); - const table = new glue.Table(stack, 'Table', { - database, - tableName: 'table', - columns: [{ - name: 'col', - type: glue.Schema.STRING, - }], - compressed: true, - dataFormat: glue.DataFormat.JSON, - }); + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); - table.grant(user, ['glue:UpdateTable']); + table.grant(user, ['glue:UpdateTable']); - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: 'glue:UpdateTable', - Effect: 'Allow', - Resource: { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':glue:', - { - Ref: 'AWS::Region', - }, - ':', - { - Ref: 'AWS::AccountId', - }, - ':table/', - { - Ref: 'DatabaseB269D8BB', - }, - '/', - { - Ref: 'Table4C2D914F', - }, + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: 'glue:UpdateTable', + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', + }, + ], ], - ], + }, }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', }, ], - Version: '2012-10-17', - }, - PolicyName: 'UserDefaultPolicy1F97781E', - Users: [ - { - Ref: 'User00B015A1', - }, - ], + }); }); -}); -test('grants: read only', () => { - const stack = new cdk.Stack(); - const user = new iam.User(stack, 'User'); - const database = new glue.Database(stack, 'Database', { - databaseName: 'database', - }); + test('read only', () => { + const stack = new cdk.Stack(); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); - const table = new glue.Table(stack, 'Table', { - database, - tableName: 'table', - columns: [{ - name: 'col', - type: glue.Schema.STRING, - }], - compressed: true, - dataFormat: glue.DataFormat.JSON, - }); + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); - table.grantRead(user); + table.grantRead(user); - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 'glue:BatchGetPartition', - 'glue:GetPartition', - 'glue:GetPartitions', - 'glue:GetTable', - 'glue:GetTables', - 'glue:GetTableVersion', - 'glue:GetTableVersions', - ], - Effect: 'Allow', - Resource: { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':glue:', - { - Ref: 'AWS::Region', - }, - ':', - { - Ref: 'AWS::AccountId', - }, - ':table/', - { - Ref: 'DatabaseB269D8BB', - }, - '/', - { - Ref: 'Table4C2D914F', - }, - ], + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:BatchGetPartition', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetTableVersion', + 'glue:GetTableVersions', ], - }, - }, - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], - }, - { + Effect: 'Allow', + Resource: { 'Fn::Join': [ '', [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', }, - '/*', ], ], }, - ], + }, + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', }, ], - Version: '2012-10-17', - }, - PolicyName: 'UserDefaultPolicy1F97781E', - Users: [ - { - Ref: 'User00B015A1', - }, - ], + }); }); -}); -testFutureBehavior('grants: write only', s3GrantWriteCtx, cdk.App, (app) => { - const stack = new cdk.Stack(app); - const user = new iam.User(stack, 'User'); - const database = new glue.Database(stack, 'Database', { - databaseName: 'database', - }); + testFutureBehavior('write only', s3GrantWriteCtx, cdk.App, (app) => { + const stack = new cdk.Stack(app); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); - const table = new glue.Table(stack, 'Table', { - database, - tableName: 'table', - columns: [{ - name: 'col', - type: glue.Schema.STRING, - }], - compressed: true, - dataFormat: glue.DataFormat.JSON, - }); + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); - table.grantWrite(user); + table.grantWrite(user); - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 'glue:BatchCreatePartition', - 'glue:BatchDeletePartition', - 'glue:CreatePartition', - 'glue:DeletePartition', - 'glue:UpdatePartition', - ], - Effect: 'Allow', - Resource: { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':glue:', - { - Ref: 'AWS::Region', - }, - ':', - { - Ref: 'AWS::AccountId', - }, - ':table/', - { - Ref: 'DatabaseB269D8BB', - }, - '/', - { - Ref: 'Table4C2D914F', - }, - ], + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:CreatePartition', + 'glue:DeletePartition', + 'glue:UpdatePartition', ], - }, - }, - { - Action: [ - 's3:DeleteObject*', - 's3:PutObject', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], - }, - { + Effect: 'Allow', + Resource: { 'Fn::Join': [ '', [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', }, - '/*', ], ], }, - ], + }, + { + Action: [ + 's3:DeleteObject*', + 's3:PutObject', + 's3:Abort*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', }, ], - Version: '2012-10-17', - }, - PolicyName: 'UserDefaultPolicy1F97781E', - Users: [ - { - Ref: 'User00B015A1', - }, - ], + }); }); -}); -testFutureBehavior('grants: read and write', s3GrantWriteCtx, cdk.App, (app) => { - const stack = new cdk.Stack(app); - const user = new iam.User(stack, 'User'); - const database = new glue.Database(stack, 'Database', { - databaseName: 'database', - }); + testFutureBehavior('read and write', s3GrantWriteCtx, cdk.App, (app) => { + const stack = new cdk.Stack(app); + const user = new iam.User(stack, 'User'); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); - const table = new glue.Table(stack, 'Table', { - database, - tableName: 'table', - columns: [{ - name: 'col', - type: glue.Schema.STRING, - }], - compressed: true, - dataFormat: glue.DataFormat.JSON, - }); + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + compressed: true, + dataFormat: glue.DataFormat.JSON, + }); - table.grantReadWrite(user); + table.grantReadWrite(user); - Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { - PolicyDocument: { - Statement: [ - { - Action: [ - 'glue:BatchGetPartition', - 'glue:GetPartition', - 'glue:GetPartitions', - 'glue:GetTable', - 'glue:GetTables', - 'glue:GetTableVersion', - 'glue:GetTableVersions', - 'glue:BatchCreatePartition', - 'glue:BatchDeletePartition', - 'glue:CreatePartition', - 'glue:DeletePartition', - 'glue:UpdatePartition', - ], - Effect: 'Allow', - Resource: { - 'Fn::Join': [ - '', - [ - 'arn:', - { - Ref: 'AWS::Partition', - }, - ':glue:', - { - Ref: 'AWS::Region', - }, - ':', - { - Ref: 'AWS::AccountId', - }, - ':table/', - { - Ref: 'DatabaseB269D8BB', - }, - '/', - { - Ref: 'Table4C2D914F', - }, - ], + Template.fromStack(stack).hasResourceProperties('AWS::IAM::Policy', { + PolicyDocument: { + Statement: [ + { + Action: [ + 'glue:BatchGetPartition', + 'glue:GetPartition', + 'glue:GetPartitions', + 'glue:GetTable', + 'glue:GetTables', + 'glue:GetTableVersion', + 'glue:GetTableVersions', + 'glue:BatchCreatePartition', + 'glue:BatchDeletePartition', + 'glue:CreatePartition', + 'glue:DeletePartition', + 'glue:UpdatePartition', ], - }, - }, - { - Action: [ - 's3:GetObject*', - 's3:GetBucket*', - 's3:List*', - 's3:DeleteObject*', - 's3:PutObject', - 's3:Abort*', - ], - Effect: 'Allow', - Resource: [ - { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], - }, - { + Effect: 'Allow', + Resource: { 'Fn::Join': [ '', [ + 'arn:', { - 'Fn::GetAtt': [ - 'TableBucketDA42407C', - 'Arn', - ], + Ref: 'AWS::Partition', + }, + ':glue:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':table/', + { + Ref: 'DatabaseB269D8BB', + }, + '/', + { + Ref: 'Table4C2D914F', }, - '/*', ], ], }, - ], + }, + { + Action: [ + 's3:GetObject*', + 's3:GetBucket*', + 's3:List*', + 's3:DeleteObject*', + 's3:PutObject', + 's3:Abort*', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'TableBucketDA42407C', + 'Arn', + ], + }, + '/*', + ], + ], + }, + ], + }, + ], + Version: '2012-10-17', + }, + PolicyName: 'UserDefaultPolicy1F97781E', + Users: [ + { + Ref: 'User00B015A1', }, ], - Version: '2012-10-17', - }, - PolicyName: 'UserDefaultPolicy1F97781E', - Users: [ - { - Ref: 'User00B015A1', - }, - ], + }); }); }); -test('validate: at least one column', () => { - expect(() => { - createTable({ - columns: [], - tableName: 'name', - }); - }).toThrowError('you must specify at least one column for the table'); +describe('validate', () => { + test('at least one column', () => { + expect(() => { + createTable({ + columns: [], + tableName: 'name', + }); + }).toThrowError('you must specify at least one column for the table'); -}); + }); -test('validate: unique column names', () => { - expect(() => { - createTable({ - tableName: 'name', - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }, { - name: 'col1', - type: glue.Schema.STRING, - }], - }); - }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); + test('unique column names', () => { + expect(() => { + createTable({ + tableName: 'name', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }, { + name: 'col1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); -}); + }); + + test('unique partition keys', () => { + expect(() => { + createTable({ + tableName: 'name', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'p1', + type: glue.Schema.STRING, + }, { + name: 'p1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'p1' is duplicated"); -test('validate: unique partition keys', () => { - expect(() => { - createTable({ + }); + + test('column names and partition keys are all unique', () => { + expect(() => { + createTable({ + tableName: 'name', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + }); + }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); + + }); + + test('can not specify an explicit bucket and encryption', () => { + expect(() => { + createTable({ + tableName: 'name', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), + encryption: glue.TableEncryption.KMS, + }); + }).toThrowError('you can not specify encryption settings if you also provide a bucket'); + }); + + test('can explicitly pass bucket if Encryption undefined', () => { + expect(() => createTable({ tableName: 'name', columns: [{ name: 'col1', type: glue.Schema.STRING, }], - partitionKeys: [{ - name: 'p1', - type: glue.Schema.STRING, - }, { - name: 'p1', - type: glue.Schema.STRING, - }], - }); - }).toThrowError("column names and partition keys must be unique, but 'p1' is duplicated"); - -}); + bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), + encryption: undefined, + })).not.toThrow(); + }); -test('validate: column names and partition keys are all unique', () => { - expect(() => { - createTable({ + test('can explicitly pass bucket if Unencrypted', () => { + expect(() => createTable({ tableName: 'name', columns: [{ name: 'col1', type: glue.Schema.STRING, }], - partitionKeys: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - }); - }).toThrowError("column names and partition keys must be unique, but 'col1' is duplicated"); - -}); + bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), + encryption: undefined, + })).not.toThrow(); + }); -test('validate: can not specify an explicit bucket and encryption', () => { - expect(() => { - createTable({ + test('can explicitly pass bucket if ClientSideKms', () => { + expect(() => createTable({ tableName: 'name', columns: [{ name: 'col1', type: glue.Schema.STRING, }], bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), - encryption: glue.TableEncryption.KMS, - }); - }).toThrowError('you can not specify encryption settings if you also provide a bucket'); -}); - -test('validate: can explicitly pass bucket if Encryption undefined', () => { - expect(() => createTable({ - tableName: 'name', - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), - encryption: undefined, - })).not.toThrow(); -}); - -test('validate: can explicitly pass bucket if Unencrypted', () => { - expect(() => createTable({ - tableName: 'name', - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), - encryption: undefined, - })).not.toThrow(); -}); - -test('validate: can explicitly pass bucket if ClientSideKms', () => { - expect(() => createTable({ - tableName: 'name', - columns: [{ - name: 'col1', - type: glue.Schema.STRING, - }], - bucket: new s3.Bucket(new cdk.Stack(), 'Bucket'), - encryption: glue.TableEncryption.CLIENT_SIDE_KMS, - })).not.toThrow(); + encryption: glue.TableEncryption.CLIENT_SIDE_KMS, + })).not.toThrow(); + }); }); test('Table.fromTableArn', () => { From 176eb00ab4a13a81422153b7b703f4b2b3c82866 Mon Sep 17 00:00:00 2001 From: kaizen3031593 Date: Mon, 13 Dec 2021 15:40:43 -0500 Subject: [PATCH 04/10] add synth-time check tests --- packages/@aws-cdk/aws-glue/lib/table.ts | 4 +- packages/@aws-cdk/aws-glue/test/table.test.ts | 76 +++++++++++++++++++ 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/packages/@aws-cdk/aws-glue/lib/table.ts b/packages/@aws-cdk/aws-glue/lib/table.ts index 43bd780d10876..b35f7d8913b52 100644 --- a/packages/@aws-cdk/aws-glue/lib/table.ts +++ b/packages/@aws-cdk/aws-glue/lib/table.ts @@ -348,11 +348,11 @@ export class Table extends Resource implements ITable { } private validatePartitionIndex(props: PartitionIndexProps) { - if (props.indexName && !props.indexName.match(/^[A-Za-z0-9\_\-])/)) { + if (props.indexName && !props.indexName.match(/^[A-Za-z0-9\_\-]/)) { throw new Error(`Index name can only have letters, numbers, hyphens, or underscores but received ${props.indexName}`); } if (!this.partitionKeys || this.partitionKeys.length === 0) { - throw new Error('To create a partition index the table must have partition keys'); + throw new Error('The table must have partition keys to create a partition index'); } const keyNames = this.partitionKeys.map(pk => pk.name); if (!props.keys.every(k => keyNames.includes(k))) { diff --git a/packages/@aws-cdk/aws-glue/test/table.test.ts b/packages/@aws-cdk/aws-glue/test/table.test.ts index e4ff31b2d9bcf..420e80d6eae0c 100644 --- a/packages/@aws-cdk/aws-glue/test/table.test.ts +++ b/packages/@aws-cdk/aws-glue/test/table.test.ts @@ -933,6 +933,82 @@ test('explicit s3 bucket and with empty prefix', () => { }); }); +describe('add partition index', () => { + test('fails if no partition keys', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); + + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + dataFormat: glue.DataFormat.JSON, + }); + + expect(() => table.addPartitionIndex({ + indexName: 'my-part', + keys: ['part'], + })).toThrowError(/The table must have partition keys to create a partition index/); + }); + + test('fails if partition index does not match partition keys', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); + + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + }); + + expect(() => table.addPartitionIndex({ + indexName: 'my-part', + keys: ['not-part'], + })).toThrowError(/All index keys must also be partition keys/); + }); + + test('fails with bad index name', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); + + const table = new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + dataFormat: glue.DataFormat.JSON, + }); + + expect(() => table.addPartitionIndex({ + indexName: '$my-part', + keys: ['part'], + })).toThrowError(/Index name can only have letters, numbers, hyphens, or underscores/); + }); +}); + describe('grants', () => { test('custom permissions', () => { const stack = new cdk.Stack(); From 3cae7bd9f3a0f4664aec51bbadb9b00cc036ecd4 Mon Sep 17 00:00:00 2001 From: kaizen3031593 Date: Mon, 13 Dec 2021 16:18:51 -0500 Subject: [PATCH 05/10] integ test partition indecies --- packages/@aws-cdk/aws-glue/lib/table.ts | 2 +- .../test/integ.partition-index.expected.json | 534 ++++++++++++++++++ .../aws-glue/test/integ.partition-index.ts | 76 +++ .../@aws-cdk/aws-glue/test/integ.table.ts | 5 +- 4 files changed, 612 insertions(+), 5 deletions(-) create mode 100644 packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json create mode 100644 packages/@aws-cdk/aws-glue/test/integ.partition-index.ts diff --git a/packages/@aws-cdk/aws-glue/lib/table.ts b/packages/@aws-cdk/aws-glue/lib/table.ts index b35f7d8913b52..ce5fef445ade2 100644 --- a/packages/@aws-cdk/aws-glue/lib/table.ts +++ b/packages/@aws-cdk/aws-glue/lib/table.ts @@ -18,7 +18,7 @@ export interface PartitionIndexProps { * * @default - a name will be generated for you. */ - readonly indexName: string; + readonly indexName?: string; /** * The partition index keys. These keys diff --git a/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json b/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json new file mode 100644 index 0000000000000..3c770de92eff3 --- /dev/null +++ b/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json @@ -0,0 +1,534 @@ +{ + "Resources": { + "DataBucketE3889A50": { + "Type": "AWS::S3::Bucket", + "UpdateReplacePolicy": "Retain", + "DeletionPolicy": "Retain" + }, + "MyDatabase1E2517DB": { + "Type": "AWS::Glue::Database", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseInput": { + "Name": "my_database" + } + } + }, + "CSVTableE499CABA": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "csv_table generated by CDK", + "Name": "csv_table", + "Parameters": { + "classification": "csv", + "has_encrypted_data": false + }, + "PartitionKeys": [ + { + "Name": "year", + "Type": "smallint" + }, + { + "Name": "month", + "Type": "bigint" + } + ], + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "string" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.apache.hadoop.hive.serde2.OpenCSVSerde" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, + "CSVTabletablepartitionindexCustomResourcePolicy956F04EE": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "glue:CreatePartitionIndex", + "Effect": "Allow", + "Resource": "*" + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "CSVTabletablepartitionindexCustomResourcePolicy956F04EE", + "Roles": [ + { + "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + } + ] + } + }, + "CSVTabletablepartitionindex17C63E99": { + "Type": "Custom::AWS", + "Properties": { + "ServiceToken": { + "Fn::GetAtt": [ + "AWS679f53fac002430cb0da5b7982bd22872D164C4C", + "Arn" + ] + }, + "Create": { + "Fn::Join": [ + "", + [ + "{\"service\":\"Glue\",\"action\":\"createPartitionIndex\",\"parameters\":{\"DatabaseName\":\"", + { + "Ref": "MyDatabase1E2517DB" + }, + "\",\"TableName\":\"", + { + "Ref": "CSVTableE499CABA" + }, + "\",\"PartitionIndex\":{\"IndexName\":\"my-index\",\"Keys\":[\"year\"]}},\"physicalResourceId\":{\"id\":\"CreatePartitionIndex\"}}" + ] + ] + }, + "InstallLatestAwsSdk": true + }, + "DependsOn": [ + "CSVTabletablepartitionindexCustomResourcePolicy956F04EE" + ], + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" + }, + "JSONTable00348F1D": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "json_table generated by CDK", + "Name": "json_table", + "Parameters": { + "classification": "json", + "has_encrypted_data": false + }, + "PartitionKeys": [ + { + "Name": "year", + "Type": "smallint" + }, + { + "Name": "month", + "Type": "bigint" + } + ], + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "string" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, + "JSONTabletablepartitionindexCustomResourcePolicy44275402": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "glue:CreatePartitionIndex", + "Effect": "Allow", + "Resource": "*" + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "JSONTabletablepartitionindexCustomResourcePolicy44275402", + "Roles": [ + { + "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + } + ] + } + }, + "JSONTabletablepartitionindex2F55AB66": { + "Type": "Custom::AWS", + "Properties": { + "ServiceToken": { + "Fn::GetAtt": [ + "AWS679f53fac002430cb0da5b7982bd22872D164C4C", + "Arn" + ] + }, + "Create": { + "Fn::Join": [ + "", + [ + "{\"service\":\"Glue\",\"action\":\"createPartitionIndex\",\"parameters\":{\"DatabaseName\":\"", + { + "Ref": "MyDatabase1E2517DB" + }, + "\",\"TableName\":\"", + { + "Ref": "JSONTable00348F1D" + }, + "\",\"PartitionIndex\":{\"IndexName\":\"year-monthawscdkglueJSONTable937C116B\",\"Keys\":[\"year\",\"month\"]}},\"physicalResourceId\":{\"id\":\"CreatePartitionIndex\"}}" + ] + ] + }, + "InstallLatestAwsSdk": true + }, + "DependsOn": [ + "JSONTabletablepartitionindexCustomResourcePolicy44275402" + ], + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" + }, + "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2": { + "Type": "AWS::IAM::Role", + "Properties": { + "AssumeRolePolicyDocument": { + "Statement": [ + { + "Action": "sts:AssumeRole", + "Effect": "Allow", + "Principal": { + "Service": "lambda.amazonaws.com" + } + } + ], + "Version": "2012-10-17" + }, + "ManagedPolicyArns": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" + ] + ] + } + ] + } + }, + "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleDefaultPolicyD28E1A5E": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "glue:UpdateTable", + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":table/", + { + "Ref": "MyDatabase1E2517DB" + }, + "/", + { + "Ref": "CSVTableE499CABA" + } + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":catalog" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":database/", + { + "Ref": "MyDatabase1E2517DB" + } + ] + ] + } + ] + }, + { + "Action": "glue:UpdateTable", + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":table/", + { + "Ref": "MyDatabase1E2517DB" + }, + "/", + { + "Ref": "JSONTable00348F1D" + } + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":catalog" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":database/", + { + "Ref": "MyDatabase1E2517DB" + } + ] + ] + } + ] + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleDefaultPolicyD28E1A5E", + "Roles": [ + { + "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + } + ] + } + }, + "AWS679f53fac002430cb0da5b7982bd22872D164C4C": { + "Type": "AWS::Lambda::Function", + "Properties": { + "Code": { + "S3Bucket": { + "Ref": "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2S3BucketF482197E" + }, + "S3Key": { + "Fn::Join": [ + "", + [ + { + "Fn::Select": [ + 0, + { + "Fn::Split": [ + "||", + { + "Ref": "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2S3VersionKey38B69632" + } + ] + } + ] + }, + { + "Fn::Select": [ + 1, + { + "Fn::Split": [ + "||", + { + "Ref": "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2S3VersionKey38B69632" + } + ] + } + ] + } + ] + ] + } + }, + "Role": { + "Fn::GetAtt": [ + "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2", + "Arn" + ] + }, + "Handler": "index.handler", + "Runtime": "nodejs12.x", + "Timeout": 120 + }, + "DependsOn": [ + "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleDefaultPolicyD28E1A5E", + "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + ] + } + }, + "Parameters": { + "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2S3BucketF482197E": { + "Type": "String", + "Description": "S3 bucket for asset \"6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2\"" + }, + "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2S3VersionKey38B69632": { + "Type": "String", + "Description": "S3 key for asset version \"6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2\"" + }, + "AssetParameters6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2ArtifactHash4BE92B79": { + "Type": "String", + "Description": "Artifact hash for asset \"6ee0a36dd10d630708c265bcf7616c64030040c1bbc383b34150db74b744cad2\"" + } + }, + "Outputs": { + "CatalogId": { + "Value": { + "Ref": "AWS::AccountId" + } + } + } +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts b/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts new file mode 100644 index 0000000000000..e32b563300aef --- /dev/null +++ b/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts @@ -0,0 +1,76 @@ +#!/usr/bin/env node +import * as s3 from '@aws-cdk/aws-s3'; +import * as cdk from '@aws-cdk/core'; +import * as glue from '../lib'; + +/** + * Stack verification steps: + * * aws cloudformation describe-stacks --stack-name aws-cdk-glue --query Stacks[0].Outputs[0].OutputValue + * * aws glue get-partition-indexes --catalog-id --database-name my_database --table-name csv_table + * returns an index with name 'my-index' and one key with name 'year' + * * aws glue get-partition-indexes --catalog-id --database-name my_database --table-name json_table + * returns an index with name 'year-month...' and keys 'year' and 'month' + */ + +const app = new cdk.App(); +const stack = new cdk.Stack(app, 'aws-cdk-glue'); +const bucket = new s3.Bucket(stack, 'DataBucket'); +const database = new glue.Database(stack, 'MyDatabase', { + databaseName: 'my_database', +}); + +const columns = [{ + name: 'col1', + type: glue.Schema.STRING, +}, { + name: 'col2', + type: glue.Schema.STRING, +}, { + name: 'col3', + type: glue.Schema.STRING, +}]; + +const partitionKeys = [{ + name: 'year', + type: glue.Schema.SMALL_INT, +}, { + name: 'month', + type: glue.Schema.BIG_INT, +}]; + +const csvTable = new glue.Table(stack, 'CSVTable', { + database, + bucket, + tableName: 'csv_table', + columns, + partitionKeys, + dataFormat: glue.DataFormat.CSV, +}); + +const jsonTable = new glue.Table(stack, 'JSONTable', { + database, + bucket, + tableName: 'json_table', + columns, + partitionKeys, + dataFormat: glue.DataFormat.JSON, +}); + +const partitionIndexProps: glue.PartitionIndexProps = { + indexName: 'my-index', + keys: ['year'], +}; + +const partitionIndexPropsWithoutName: glue.PartitionIndexProps = { + keys: ['year', 'month'], +}; + +csvTable.addPartitionIndex(partitionIndexProps); +jsonTable.addPartitionIndex(partitionIndexPropsWithoutName); + +// output necessary for stack verification +new cdk.CfnOutput(stack, 'CatalogId', { + value: database.catalogId, +}); + +app.synth(); diff --git a/packages/@aws-cdk/aws-glue/test/integ.table.ts b/packages/@aws-cdk/aws-glue/test/integ.table.ts index 59eede985e5ce..e9d54d659921e 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.table.ts +++ b/packages/@aws-cdk/aws-glue/test/integ.table.ts @@ -81,10 +81,7 @@ const encryptedTable = new glue.Table(stack, 'MyEncryptedTable', { database, tableName: 'my_encrypted_table', columns, - partitionKeys: [{ - name: 'year', - type: glue.Schema.SMALL_INT, - }], + partitionKeys, dataFormat: glue.DataFormat.JSON, encryption: glue.TableEncryption.KMS, encryptionKey: new kms.Key(stack, 'MyKey'), From 6d38daf765dd92d7a39d9bb72bf5b5c842e1980d Mon Sep 17 00:00:00 2001 From: kaizen3031593 Date: Mon, 13 Dec 2021 16:22:04 -0500 Subject: [PATCH 06/10] last comments --- packages/@aws-cdk/aws-glue/lib/table.ts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/packages/@aws-cdk/aws-glue/lib/table.ts b/packages/@aws-cdk/aws-glue/lib/table.ts index ce5fef445ade2..880d90d4a44ab 100644 --- a/packages/@aws-cdk/aws-glue/lib/table.ts +++ b/packages/@aws-cdk/aws-glue/lib/table.ts @@ -308,10 +308,11 @@ export class Table extends Resource implements ITable { } /** - * Add a partition index to the table. - * @see https://docs.aws.amazon.com/glue/latest/dg/partition-indexes.html + * Add a partition index to the table. You can have a maximum of 3 partition + * indecies to a table. Partition index keys must be a subet of the table's + * partition keys. * - * Partition index keys must be a subset of the tables partition keys. + * @see https://docs.aws.amazon.com/glue/latest/dg/partition-indexes.html */ public addPartitionIndex(props: PartitionIndexProps) { this.validatePartitionIndex(props); @@ -323,7 +324,7 @@ export class Table extends Resource implements ITable { DatabaseName: this.database.databaseName, TableName: this.tableName, PartitionIndex: { - IndexName: props.indexName ?? this.generateName(props.keys), + IndexName: props.indexName ?? this.generateIndexName(props.keys), Keys: props.keys, }, }, @@ -339,10 +340,10 @@ export class Table extends Resource implements ITable { this.grantToUnderlyingResources(partitionIndex, ['glue:UpdateTable']); } - private generateName(keys: string[]): string { + private generateIndexName(keys: string[]): string { const prefix = keys.join('-'); const uniqueId = Names.uniqueId(this); - const maxIndexLength = 80; // self-specified + const maxIndexLength = 80; // arbitrarily specified const startIndex = Math.max(0, uniqueId.length - (maxIndexLength - prefix.length)); return prefix + uniqueId.substring(startIndex); } From d6a42cb81d9f51df04b213132789d5cb8bd2fe4d Mon Sep 17 00:00:00 2001 From: kaizen3031593 Date: Mon, 13 Dec 2021 19:13:57 -0500 Subject: [PATCH 07/10] add partitionindexes property too --- packages/@aws-cdk/aws-glue/lib/table.ts | 62 ++++++--- .../test/integ.partition-index.expected.json | 130 ------------------ .../aws-glue/test/integ.partition-index.ts | 27 ++-- packages/@aws-cdk/aws-glue/test/table.test.ts | 32 ++++- 4 files changed, 85 insertions(+), 166 deletions(-) diff --git a/packages/@aws-cdk/aws-glue/lib/table.ts b/packages/@aws-cdk/aws-glue/lib/table.ts index 880d90d4a44ab..c11068fecb5b7 100644 --- a/packages/@aws-cdk/aws-glue/lib/table.ts +++ b/packages/@aws-cdk/aws-glue/lib/table.ts @@ -12,7 +12,7 @@ import { Column } from './schema'; /** * Properties of a Partition Index. */ -export interface PartitionIndexProps { +export interface PartitionIndex { /** * The name of the partition index. * @@ -21,10 +21,11 @@ export interface PartitionIndexProps { readonly indexName?: string; /** - * The partition index keys. These keys - * must be a subet of the table's partition keys. + * The partition key names that comprise the partition + * index. The names must correspond to a name in the + * table's partition keys. */ - readonly keys: string[]; + readonly keyNames: string[]; } export interface ITable extends IResource { /** @@ -120,7 +121,16 @@ export interface TableProps { * * @default table is not partitioned */ - readonly partitionKeys?: Column[] + readonly partitionKeys?: Column[]; + + /** + * Partition indecies on the table. A maximum of 3 indexes + * are allowed on a table. Keys in the index must be part + * of the table's partition keys. + * + * @default table has no partition indecies + */ + readonly partitionIndexes?: PartitionIndex[]; /** * Storage type of the table's data. @@ -248,6 +258,11 @@ export class Table extends Resource implements ITable { */ public readonly partitionKeys?: Column[]; + /** + * This table's partition indexes. + */ + public readonly partitionIndexes?: PartitionIndex[]; + constructor(scope: Construct, id: string, props: TableProps) { super(scope, id, { physicalName: props.tableName, @@ -261,6 +276,14 @@ export class Table extends Resource implements ITable { this.columns = props.columns; this.partitionKeys = props.partitionKeys; + if (props.partitionIndexes) { + if (props.partitionIndexes.length > 3) { + throw new Error(`Maximum number of partition indexes allowed is 3 but got ${props.partitionIndexes.length}`); + } + this.partitionIndexes = props.partitionIndexes; + this.partitionIndexes.forEach((index) => this.addPartitionIndex(index)); + } + this.compressed = props.compressed ?? false; const { bucket, encryption, encryptionKey } = createBucket(this, props); this.bucket = bucket; @@ -314,9 +337,11 @@ export class Table extends Resource implements ITable { * * @see https://docs.aws.amazon.com/glue/latest/dg/partition-indexes.html */ - public addPartitionIndex(props: PartitionIndexProps) { - this.validatePartitionIndex(props); - const partitionIndex = new cr.AwsCustomResource(this, 'table-partition-index', { + public addPartitionIndex(index: PartitionIndex) { + this.validatePartitionIndex(index); + + const indexName = index.indexName ?? this.generateIndexName(index.keyNames); + const partitionIndexCustomResource = new cr.AwsCustomResource(this, `table-partition-index-${indexName}`, { onCreate: { service: 'Glue', action: 'createPartitionIndex', @@ -324,40 +349,39 @@ export class Table extends Resource implements ITable { DatabaseName: this.database.databaseName, TableName: this.tableName, PartitionIndex: { - IndexName: props.indexName ?? this.generateIndexName(props.keys), - Keys: props.keys, + IndexName: indexName, + Keys: index.keyNames, }, }, physicalResourceId: cr.PhysicalResourceId.of( - 'CreatePartitionIndex', + indexName, ), }, policy: cr.AwsCustomResourcePolicy.fromSdkCalls({ resources: cr.AwsCustomResourcePolicy.ANY_RESOURCE, }), }); - - this.grantToUnderlyingResources(partitionIndex, ['glue:UpdateTable']); + this.grantToUnderlyingResources(partitionIndexCustomResource, ['glue:UpdateTable']); } private generateIndexName(keys: string[]): string { - const prefix = keys.join('-'); + const prefix = keys.join('-') + '-'; const uniqueId = Names.uniqueId(this); const maxIndexLength = 80; // arbitrarily specified const startIndex = Math.max(0, uniqueId.length - (maxIndexLength - prefix.length)); return prefix + uniqueId.substring(startIndex); } - private validatePartitionIndex(props: PartitionIndexProps) { - if (props.indexName && !props.indexName.match(/^[A-Za-z0-9\_\-]/)) { - throw new Error(`Index name can only have letters, numbers, hyphens, or underscores but received ${props.indexName}`); + private validatePartitionIndex(index: PartitionIndex) { + if (index.indexName && !index.indexName.match(/^[A-Za-z0-9\_\-]/)) { + throw new Error(`Index name can only have letters, numbers, hyphens, or underscores but received ${index.indexName}`); } if (!this.partitionKeys || this.partitionKeys.length === 0) { throw new Error('The table must have partition keys to create a partition index'); } const keyNames = this.partitionKeys.map(pk => pk.name); - if (!props.keys.every(k => keyNames.includes(k))) { - throw new Error(`All index keys must also be partition keys. Got ${props.keys} but partition key names are ${keyNames}`); + if (!index.keyNames.every(k => keyNames.includes(k))) { + throw new Error(`All index keys must also be partition keys. Got ${index.keyNames} but partition key names are ${keyNames}`); } } diff --git a/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json b/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json index 3c770de92eff3..48780578b55c2 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json +++ b/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json @@ -81,60 +81,6 @@ } } }, - "CSVTabletablepartitionindexCustomResourcePolicy956F04EE": { - "Type": "AWS::IAM::Policy", - "Properties": { - "PolicyDocument": { - "Statement": [ - { - "Action": "glue:CreatePartitionIndex", - "Effect": "Allow", - "Resource": "*" - } - ], - "Version": "2012-10-17" - }, - "PolicyName": "CSVTabletablepartitionindexCustomResourcePolicy956F04EE", - "Roles": [ - { - "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" - } - ] - } - }, - "CSVTabletablepartitionindex17C63E99": { - "Type": "Custom::AWS", - "Properties": { - "ServiceToken": { - "Fn::GetAtt": [ - "AWS679f53fac002430cb0da5b7982bd22872D164C4C", - "Arn" - ] - }, - "Create": { - "Fn::Join": [ - "", - [ - "{\"service\":\"Glue\",\"action\":\"createPartitionIndex\",\"parameters\":{\"DatabaseName\":\"", - { - "Ref": "MyDatabase1E2517DB" - }, - "\",\"TableName\":\"", - { - "Ref": "CSVTableE499CABA" - }, - "\",\"PartitionIndex\":{\"IndexName\":\"my-index\",\"Keys\":[\"year\"]}},\"physicalResourceId\":{\"id\":\"CreatePartitionIndex\"}}" - ] - ] - }, - "InstallLatestAwsSdk": true - }, - "DependsOn": [ - "CSVTabletablepartitionindexCustomResourcePolicy956F04EE" - ], - "UpdateReplacePolicy": "Delete", - "DeletionPolicy": "Delete" - }, "JSONTable00348F1D": { "Type": "AWS::Glue::Table", "Properties": { @@ -290,82 +236,6 @@ "Properties": { "PolicyDocument": { "Statement": [ - { - "Action": "glue:UpdateTable", - "Effect": "Allow", - "Resource": [ - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":glue:", - { - "Ref": "AWS::Region" - }, - ":", - { - "Ref": "AWS::AccountId" - }, - ":table/", - { - "Ref": "MyDatabase1E2517DB" - }, - "/", - { - "Ref": "CSVTableE499CABA" - } - ] - ] - }, - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":glue:", - { - "Ref": "AWS::Region" - }, - ":", - { - "Ref": "AWS::AccountId" - }, - ":catalog" - ] - ] - }, - { - "Fn::Join": [ - "", - [ - "arn:", - { - "Ref": "AWS::Partition" - }, - ":glue:", - { - "Ref": "AWS::Region" - }, - ":", - { - "Ref": "AWS::AccountId" - }, - ":database/", - { - "Ref": "MyDatabase1E2517DB" - } - ] - ] - } - ] - }, { "Action": "glue:UpdateTable", "Effect": "Allow", diff --git a/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts b/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts index e32b563300aef..b7ea8127fba6d 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts +++ b/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts @@ -7,9 +7,9 @@ import * as glue from '../lib'; * Stack verification steps: * * aws cloudformation describe-stacks --stack-name aws-cdk-glue --query Stacks[0].Outputs[0].OutputValue * * aws glue get-partition-indexes --catalog-id --database-name my_database --table-name csv_table - * returns an index with name 'my-index' and one key with name 'year' + * returns two indexes named 'index1' and 'index2' * * aws glue get-partition-indexes --catalog-id --database-name my_database --table-name json_table - * returns an index with name 'year-month...' and keys 'year' and 'month' + * returns an index with name 'year-month...' */ const app = new cdk.App(); @@ -38,12 +38,19 @@ const partitionKeys = [{ type: glue.Schema.BIG_INT, }]; -const csvTable = new glue.Table(stack, 'CSVTable', { +new glue.Table(stack, 'CSVTable', { database, bucket, tableName: 'csv_table', columns, partitionKeys, + partitionIndexes: [{ + indexName: 'index1', + keyNames: ['month'], + }, { + indexName: 'index2', + keyNames: ['month', 'year'], + }], dataFormat: glue.DataFormat.CSV, }); @@ -56,17 +63,9 @@ const jsonTable = new glue.Table(stack, 'JSONTable', { dataFormat: glue.DataFormat.JSON, }); -const partitionIndexProps: glue.PartitionIndexProps = { - indexName: 'my-index', - keys: ['year'], -}; - -const partitionIndexPropsWithoutName: glue.PartitionIndexProps = { - keys: ['year', 'month'], -}; - -csvTable.addPartitionIndex(partitionIndexProps); -jsonTable.addPartitionIndex(partitionIndexPropsWithoutName); +jsonTable.addPartitionIndex({ + keyNames: ['year', 'month'], +}); // output necessary for stack verification new cdk.CfnOutput(stack, 'CatalogId', { diff --git a/packages/@aws-cdk/aws-glue/test/table.test.ts b/packages/@aws-cdk/aws-glue/test/table.test.ts index 420e80d6eae0c..51de2e4b6c922 100644 --- a/packages/@aws-cdk/aws-glue/test/table.test.ts +++ b/packages/@aws-cdk/aws-glue/test/table.test.ts @@ -952,7 +952,7 @@ describe('add partition index', () => { expect(() => table.addPartitionIndex({ indexName: 'my-part', - keys: ['part'], + keyNames: ['part'], })).toThrowError(/The table must have partition keys to create a partition index/); }); @@ -978,7 +978,7 @@ describe('add partition index', () => { expect(() => table.addPartitionIndex({ indexName: 'my-part', - keys: ['not-part'], + keyNames: ['not-part'], })).toThrowError(/All index keys must also be partition keys/); }); @@ -1004,9 +1004,35 @@ describe('add partition index', () => { expect(() => table.addPartitionIndex({ indexName: '$my-part', - keys: ['part'], + keyNames: ['part'], })).toThrowError(/Index name can only have letters, numbers, hyphens, or underscores/); }); + + test('fails with > 3 indexes', () => { + const stack = new cdk.Stack(); + const database = new glue.Database(stack, 'Database', { + databaseName: 'database', + }); + + const index = { + keyNames: ['part'], + }; + + expect(() => new glue.Table(stack, 'Table', { + database, + tableName: 'table', + columns: [{ + name: 'col', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'part', + type: glue.Schema.SMALL_INT, + }], + partitionIndexes: [index, index, index, index], + dataFormat: glue.DataFormat.JSON, + })).toThrowError('Maximum number of partition indexes allowed is 3 but got 4'); + }); }); describe('grants', () => { From 2c606a9b3fd6724696afb06890e5105e782ae53e Mon Sep 17 00:00:00 2001 From: kaizen3031593 Date: Mon, 13 Dec 2021 20:07:19 -0500 Subject: [PATCH 08/10] last touch --- packages/@aws-cdk/aws-glue/lib/table.ts | 36 +- .../test/integ.partition-index.expected.json | 314 ++++++++++++++---- 2 files changed, 280 insertions(+), 70 deletions(-) diff --git a/packages/@aws-cdk/aws-glue/lib/table.ts b/packages/@aws-cdk/aws-glue/lib/table.ts index c11068fecb5b7..48d132bffaec2 100644 --- a/packages/@aws-cdk/aws-glue/lib/table.ts +++ b/packages/@aws-cdk/aws-glue/lib/table.ts @@ -3,6 +3,7 @@ import * as kms from '@aws-cdk/aws-kms'; import * as s3 from '@aws-cdk/aws-s3'; import { ArnFormat, Fn, IResource, Names, Resource, Stack } from '@aws-cdk/core'; import * as cr from '@aws-cdk/custom-resources'; +import { AwsCustomResource } from '@aws-cdk/custom-resources'; import { Construct } from 'constructs'; import { DataFormat } from './data-format'; import { IDatabase } from './database'; @@ -263,6 +264,13 @@ export class Table extends Resource implements ITable { */ public readonly partitionIndexes?: PartitionIndex[]; + /** + * Partition indexes must be created one at a time. To avoid + * race conditions, we store the resource and add dependencies + * each time a new partition index is created. + */ + private partitionIndexCustomResources: AwsCustomResource[] = []; + constructor(scope: Construct, id: string, props: TableProps) { super(scope, id, { physicalName: props.tableName, @@ -276,14 +284,6 @@ export class Table extends Resource implements ITable { this.columns = props.columns; this.partitionKeys = props.partitionKeys; - if (props.partitionIndexes) { - if (props.partitionIndexes.length > 3) { - throw new Error(`Maximum number of partition indexes allowed is 3 but got ${props.partitionIndexes.length}`); - } - this.partitionIndexes = props.partitionIndexes; - this.partitionIndexes.forEach((index) => this.addPartitionIndex(index)); - } - this.compressed = props.compressed ?? false; const { bucket, encryption, encryptionKey } = createBucket(this, props); this.bucket = bucket; @@ -328,6 +328,15 @@ export class Table extends Resource implements ITable { resourceName: `${this.database.databaseName}/${this.tableName}`, }); this.node.defaultChild = tableResource; + + // Partition index creation relies on created table. + if (props.partitionIndexes) { + if (props.partitionIndexes.length > 3) { + throw new Error(`Maximum number of partition indexes allowed is 3 but got ${props.partitionIndexes.length}`); + } + this.partitionIndexes = props.partitionIndexes; + this.partitionIndexes.forEach((index) => this.addPartitionIndex(index)); + } } /** @@ -362,6 +371,17 @@ export class Table extends Resource implements ITable { }), }); this.grantToUnderlyingResources(partitionIndexCustomResource, ['glue:UpdateTable']); + + const numPartitions = this.partitionIndexCustomResources.length; + if (numPartitions > 3) { + throw new Error(`Maximum number of partition indexes allowed is 3 but got ${numPartitions}`); + } + // Depend on previous partition index if possible, to avoid race condition + if (numPartitions > 0) { + this.partitionIndexCustomResources[numPartitions-1].node.addDependency(partitionIndexCustomResource); + } + + this.partitionIndexCustomResources.push(partitionIndexCustomResource); } private generateIndexName(keys: string[]): string { diff --git a/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json b/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json index 48780578b55c2..124d1140c18ba 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json +++ b/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json @@ -81,72 +81,67 @@ } } }, - "JSONTable00348F1D": { - "Type": "AWS::Glue::Table", + "CSVTabletablepartitionindexindex1CustomResourcePolicyCADBE000": { + "Type": "AWS::IAM::Policy", "Properties": { - "CatalogId": { - "Ref": "AWS::AccountId" - }, - "DatabaseName": { - "Ref": "MyDatabase1E2517DB" - }, - "TableInput": { - "Description": "json_table generated by CDK", - "Name": "json_table", - "Parameters": { - "classification": "json", - "has_encrypted_data": false - }, - "PartitionKeys": [ - { - "Name": "year", - "Type": "smallint" - }, + "PolicyDocument": { + "Statement": [ { - "Name": "month", - "Type": "bigint" + "Action": "glue:CreatePartitionIndex", + "Effect": "Allow", + "Resource": "*" } ], - "StorageDescriptor": { - "Columns": [ + "Version": "2012-10-17" + }, + "PolicyName": "CSVTabletablepartitionindexindex1CustomResourcePolicyCADBE000", + "Roles": [ + { + "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + } + ] + }, + "DependsOn": [ + "CSVTabletablepartitionindexindex2CustomResourcePolicyC0A14188", + "CSVTabletablepartitionindexindex2DAFCAA94" + ] + }, + "CSVTabletablepartitionindexindex17C2F0FDC": { + "Type": "Custom::AWS", + "Properties": { + "ServiceToken": { + "Fn::GetAtt": [ + "AWS679f53fac002430cb0da5b7982bd22872D164C4C", + "Arn" + ] + }, + "Create": { + "Fn::Join": [ + "", + [ + "{\"service\":\"Glue\",\"action\":\"createPartitionIndex\",\"parameters\":{\"DatabaseName\":\"", { - "Name": "col1", - "Type": "string" + "Ref": "MyDatabase1E2517DB" }, + "\",\"TableName\":\"", { - "Name": "col2", - "Type": "string" + "Ref": "CSVTableE499CABA" }, - { - "Name": "col3", - "Type": "string" - } - ], - "Compressed": false, - "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", - "Location": { - "Fn::Join": [ - "", - [ - "s3://", - { - "Ref": "DataBucketE3889A50" - }, - "/" - ] - ] - }, - "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", - "SerdeInfo": { - "SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe" - }, - "StoredAsSubDirectories": false - }, - "TableType": "EXTERNAL_TABLE" - } - } + "\",\"PartitionIndex\":{\"IndexName\":\"index1\",\"Keys\":[\"month\"]}},\"physicalResourceId\":{\"id\":\"index1\"}}" + ] + ] + }, + "InstallLatestAwsSdk": true + }, + "DependsOn": [ + "CSVTabletablepartitionindexindex1CustomResourcePolicyCADBE000", + "CSVTabletablepartitionindexindex2CustomResourcePolicyC0A14188", + "CSVTabletablepartitionindexindex2DAFCAA94" + ], + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" }, - "JSONTabletablepartitionindexCustomResourcePolicy44275402": { + "CSVTabletablepartitionindexindex2CustomResourcePolicyC0A14188": { "Type": "AWS::IAM::Policy", "Properties": { "PolicyDocument": { @@ -159,7 +154,7 @@ ], "Version": "2012-10-17" }, - "PolicyName": "JSONTabletablepartitionindexCustomResourcePolicy44275402", + "PolicyName": "CSVTabletablepartitionindexindex2CustomResourcePolicyC0A14188", "Roles": [ { "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" @@ -167,7 +162,7 @@ ] } }, - "JSONTabletablepartitionindex2F55AB66": { + "CSVTabletablepartitionindexindex2DAFCAA94": { "Type": "Custom::AWS", "Properties": { "ServiceToken": { @@ -186,16 +181,16 @@ }, "\",\"TableName\":\"", { - "Ref": "JSONTable00348F1D" + "Ref": "CSVTableE499CABA" }, - "\",\"PartitionIndex\":{\"IndexName\":\"year-monthawscdkglueJSONTable937C116B\",\"Keys\":[\"year\",\"month\"]}},\"physicalResourceId\":{\"id\":\"CreatePartitionIndex\"}}" + "\",\"PartitionIndex\":{\"IndexName\":\"index2\",\"Keys\":[\"month\",\"year\"]}},\"physicalResourceId\":{\"id\":\"index2\"}}" ] ] }, "InstallLatestAwsSdk": true }, "DependsOn": [ - "JSONTabletablepartitionindexCustomResourcePolicy44275402" + "CSVTabletablepartitionindexindex2CustomResourcePolicyC0A14188" ], "UpdateReplacePolicy": "Delete", "DeletionPolicy": "Delete" @@ -236,6 +231,82 @@ "Properties": { "PolicyDocument": { "Statement": [ + { + "Action": "glue:UpdateTable", + "Effect": "Allow", + "Resource": [ + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":table/", + { + "Ref": "MyDatabase1E2517DB" + }, + "/", + { + "Ref": "CSVTableE499CABA" + } + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":catalog" + ] + ] + }, + { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":database/", + { + "Ref": "MyDatabase1E2517DB" + } + ] + ] + } + ] + }, { "Action": "glue:UpdateTable", "Effect": "Allow", @@ -378,6 +449,125 @@ "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleDefaultPolicyD28E1A5E", "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" ] + }, + "JSONTable00348F1D": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "json_table generated by CDK", + "Name": "json_table", + "Parameters": { + "classification": "json", + "has_encrypted_data": false + }, + "PartitionKeys": [ + { + "Name": "year", + "Type": "smallint" + }, + { + "Name": "month", + "Type": "bigint" + } + ], + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "string" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.openx.data.jsonserde.JsonSerDe" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, + "JSONTabletablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicyE66443B9": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": "glue:CreatePartitionIndex", + "Effect": "Allow", + "Resource": "*" + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "JSONTabletablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicyE66443B9", + "Roles": [ + { + "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" + } + ] + } + }, + "JSONTabletablepartitionindexyearmonthawscdkglueJSONTable937C116BAB62E6B0": { + "Type": "Custom::AWS", + "Properties": { + "ServiceToken": { + "Fn::GetAtt": [ + "AWS679f53fac002430cb0da5b7982bd22872D164C4C", + "Arn" + ] + }, + "Create": { + "Fn::Join": [ + "", + [ + "{\"service\":\"Glue\",\"action\":\"createPartitionIndex\",\"parameters\":{\"DatabaseName\":\"", + { + "Ref": "MyDatabase1E2517DB" + }, + "\",\"TableName\":\"", + { + "Ref": "JSONTable00348F1D" + }, + "\",\"PartitionIndex\":{\"IndexName\":\"year-month-awscdkglueJSONTable937C116B\",\"Keys\":[\"year\",\"month\"]}},\"physicalResourceId\":{\"id\":\"year-month-awscdkglueJSONTable937C116B\"}}" + ] + ] + }, + "InstallLatestAwsSdk": true + }, + "DependsOn": [ + "JSONTabletablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicyE66443B9" + ], + "UpdateReplacePolicy": "Delete", + "DeletionPolicy": "Delete" } }, "Parameters": { From d5362f001656dfeecc5d8388b9f1c9ce77190ea4 Mon Sep 17 00:00:00 2001 From: kaizen3031593 Date: Mon, 13 Dec 2021 20:24:07 -0500 Subject: [PATCH 09/10] add readme --- packages/@aws-cdk/aws-glue/README.md | 48 ++++++++++++++++++++++++- packages/@aws-cdk/aws-glue/lib/table.ts | 11 +++--- 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/packages/@aws-cdk/aws-glue/README.md b/packages/@aws-cdk/aws-glue/README.md index 069b572b6cd71..344fa61c00ae4 100644 --- a/packages/@aws-cdk/aws-glue/README.md +++ b/packages/@aws-cdk/aws-glue/README.md @@ -194,7 +194,7 @@ new glue.Table(this, 'MyTable', { By default, an S3 bucket will be created to store the table's data and stored in the bucket root. You can also manually pass the `bucket` and `s3Prefix`: -### Partitions +### Partition Keys To improve query performance, a table can specify `partitionKeys` on which data is stored and queried separately. For example, you might partition a table by `year` and `month` to optimize queries based on a time window: @@ -218,6 +218,52 @@ new glue.Table(this, 'MyTable', { }); ``` +### Partition Indexes + +Another way to improve query performance is to specify partition indexes. If no partition indexes are +present on the table, AWS Glue loads all partitions of the table and filters the loaded partitions using +the query expression. The query takes more time to run as the number of partitiosn increase. With an +index, the query will try to fetch a subset of the partitions instead of loading all partitions of the +table. + +The keys of a partition index must be a subset of the partition keys of the table. You can have a +maximum of 3 partition indexes per table. To specify a partition index, you can use the `partitionIndexes` +property: + +```ts +declare const myDatabase: glue.Database; +new glue.Table(this, 'MyTable', { + database: myDatabase, + tableName: 'my_table', + columns: [{ + name: 'col1', + type: glue.Schema.STRING, + }], + partitionKeys: [{ + name: 'year', + type: glue.Schema.SMALL_INT, + }, { + name: 'month', + type: glue.Schema.SMALL_INT, + }], + partitionIndexes: [{ + indexName: 'my-index', // optional + keyNames: ['year'], + }], // supply up to 3 indexes + dataFormat: glue.DataFormat.JSON, +}); +``` + +Alternatively, you can call the `addPartitionIndex()` function on a table: + +```ts +declare const myTable: glue.Table; +myTable.addPartitionIndex({ + indexName: 'my-index', + keyNames: ['year'], +}); +``` + ## [Encryption](https://docs.aws.amazon.com/athena/latest/ug/encryption.html) You can enable encryption on a Table's data: diff --git a/packages/@aws-cdk/aws-glue/lib/table.ts b/packages/@aws-cdk/aws-glue/lib/table.ts index 48d132bffaec2..1d7a6cd6378dd 100644 --- a/packages/@aws-cdk/aws-glue/lib/table.ts +++ b/packages/@aws-cdk/aws-glue/lib/table.ts @@ -341,12 +341,16 @@ export class Table extends Resource implements ITable { /** * Add a partition index to the table. You can have a maximum of 3 partition - * indecies to a table. Partition index keys must be a subet of the table's + * indexes to a table. Partition index keys must be a subset of the table's * partition keys. * * @see https://docs.aws.amazon.com/glue/latest/dg/partition-indexes.html */ public addPartitionIndex(index: PartitionIndex) { + const numPartitions = this.partitionIndexCustomResources.length; + if (numPartitions > 3) { + throw new Error(`Maximum number of partition indexes allowed is 3 but got ${numPartitions}`); + } this.validatePartitionIndex(index); const indexName = index.indexName ?? this.generateIndexName(index.keyNames); @@ -372,15 +376,10 @@ export class Table extends Resource implements ITable { }); this.grantToUnderlyingResources(partitionIndexCustomResource, ['glue:UpdateTable']); - const numPartitions = this.partitionIndexCustomResources.length; - if (numPartitions > 3) { - throw new Error(`Maximum number of partition indexes allowed is 3 but got ${numPartitions}`); - } // Depend on previous partition index if possible, to avoid race condition if (numPartitions > 0) { this.partitionIndexCustomResources[numPartitions-1].node.addDependency(partitionIndexCustomResource); } - this.partitionIndexCustomResources.push(partitionIndexCustomResource); } From fd1f1c2722da755e074cbd6f9500f3913f3c7311 Mon Sep 17 00:00:00 2001 From: kaizen3031593 Date: Wed, 15 Dec 2021 19:41:00 -0500 Subject: [PATCH 10/10] cr feedback --- packages/@aws-cdk/aws-glue/README.md | 2 +- packages/@aws-cdk/aws-glue/lib/table.ts | 17 ++++------ .../test/integ.partition-index.expected.json | 34 +++++++++---------- .../aws-glue/test/integ.partition-index.ts | 6 ++-- packages/@aws-cdk/aws-glue/test/table.test.ts | 25 ++++++++++---- 5 files changed, 46 insertions(+), 38 deletions(-) diff --git a/packages/@aws-cdk/aws-glue/README.md b/packages/@aws-cdk/aws-glue/README.md index 344fa61c00ae4..639198c67ed9a 100644 --- a/packages/@aws-cdk/aws-glue/README.md +++ b/packages/@aws-cdk/aws-glue/README.md @@ -222,7 +222,7 @@ new glue.Table(this, 'MyTable', { Another way to improve query performance is to specify partition indexes. If no partition indexes are present on the table, AWS Glue loads all partitions of the table and filters the loaded partitions using -the query expression. The query takes more time to run as the number of partitiosn increase. With an +the query expression. The query takes more time to run as the number of partitions increase. With an index, the query will try to fetch a subset of the partitions instead of loading all partitions of the table. diff --git a/packages/@aws-cdk/aws-glue/lib/table.ts b/packages/@aws-cdk/aws-glue/lib/table.ts index 1d7a6cd6378dd..1b17da32e5454 100644 --- a/packages/@aws-cdk/aws-glue/lib/table.ts +++ b/packages/@aws-cdk/aws-glue/lib/table.ts @@ -125,11 +125,11 @@ export interface TableProps { readonly partitionKeys?: Column[]; /** - * Partition indecies on the table. A maximum of 3 indexes + * Partition indexes on the table. A maximum of 3 indexes * are allowed on a table. Keys in the index must be part * of the table's partition keys. * - * @default table has no partition indecies + * @default table has no partition indexes */ readonly partitionIndexes?: PartitionIndex[]; @@ -331,9 +331,6 @@ export class Table extends Resource implements ITable { // Partition index creation relies on created table. if (props.partitionIndexes) { - if (props.partitionIndexes.length > 3) { - throw new Error(`Maximum number of partition indexes allowed is 3 but got ${props.partitionIndexes.length}`); - } this.partitionIndexes = props.partitionIndexes; this.partitionIndexes.forEach((index) => this.addPartitionIndex(index)); } @@ -348,13 +345,13 @@ export class Table extends Resource implements ITable { */ public addPartitionIndex(index: PartitionIndex) { const numPartitions = this.partitionIndexCustomResources.length; - if (numPartitions > 3) { - throw new Error(`Maximum number of partition indexes allowed is 3 but got ${numPartitions}`); + if (numPartitions >= 3) { + throw new Error('Maximum number of partition indexes allowed is 3'); } this.validatePartitionIndex(index); const indexName = index.indexName ?? this.generateIndexName(index.keyNames); - const partitionIndexCustomResource = new cr.AwsCustomResource(this, `table-partition-index-${indexName}`, { + const partitionIndexCustomResource = new cr.AwsCustomResource(this, `partition-index-${indexName}`, { onCreate: { service: 'Glue', action: 'createPartitionIndex', @@ -392,8 +389,8 @@ export class Table extends Resource implements ITable { } private validatePartitionIndex(index: PartitionIndex) { - if (index.indexName && !index.indexName.match(/^[A-Za-z0-9\_\-]/)) { - throw new Error(`Index name can only have letters, numbers, hyphens, or underscores but received ${index.indexName}`); + if (index.indexName !== undefined && (index.indexName.length < 1 || index.indexName.length > 255)) { + throw new Error(`Index name must be between 1 and 255 characters, but got ${index.indexName.length}`); } if (!this.partitionKeys || this.partitionKeys.length === 0) { throw new Error('The table must have partition keys to create a partition index'); diff --git a/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json b/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json index 124d1140c18ba..a4b3cad50cea3 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json +++ b/packages/@aws-cdk/aws-glue/test/integ.partition-index.expected.json @@ -12,7 +12,7 @@ "Ref": "AWS::AccountId" }, "DatabaseInput": { - "Name": "my_database" + "Name": "database" } } }, @@ -81,7 +81,7 @@ } } }, - "CSVTabletablepartitionindexindex1CustomResourcePolicyCADBE000": { + "CSVTablepartitionindexindex1CustomResourcePolicy4983F2A9": { "Type": "AWS::IAM::Policy", "Properties": { "PolicyDocument": { @@ -94,7 +94,7 @@ ], "Version": "2012-10-17" }, - "PolicyName": "CSVTabletablepartitionindexindex1CustomResourcePolicyCADBE000", + "PolicyName": "CSVTablepartitionindexindex1CustomResourcePolicy4983F2A9", "Roles": [ { "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" @@ -102,11 +102,11 @@ ] }, "DependsOn": [ - "CSVTabletablepartitionindexindex2CustomResourcePolicyC0A14188", - "CSVTabletablepartitionindexindex2DAFCAA94" + "CSVTablepartitionindexindex2CustomResourcePolicy4FF1AF9F", + "CSVTablepartitionindexindex29D554319" ] }, - "CSVTabletablepartitionindexindex17C2F0FDC": { + "CSVTablepartitionindexindex16247ABF6": { "Type": "Custom::AWS", "Properties": { "ServiceToken": { @@ -134,14 +134,14 @@ "InstallLatestAwsSdk": true }, "DependsOn": [ - "CSVTabletablepartitionindexindex1CustomResourcePolicyCADBE000", - "CSVTabletablepartitionindexindex2CustomResourcePolicyC0A14188", - "CSVTabletablepartitionindexindex2DAFCAA94" + "CSVTablepartitionindexindex1CustomResourcePolicy4983F2A9", + "CSVTablepartitionindexindex2CustomResourcePolicy4FF1AF9F", + "CSVTablepartitionindexindex29D554319" ], "UpdateReplacePolicy": "Delete", "DeletionPolicy": "Delete" }, - "CSVTabletablepartitionindexindex2CustomResourcePolicyC0A14188": { + "CSVTablepartitionindexindex2CustomResourcePolicy4FF1AF9F": { "Type": "AWS::IAM::Policy", "Properties": { "PolicyDocument": { @@ -154,7 +154,7 @@ ], "Version": "2012-10-17" }, - "PolicyName": "CSVTabletablepartitionindexindex2CustomResourcePolicyC0A14188", + "PolicyName": "CSVTablepartitionindexindex2CustomResourcePolicy4FF1AF9F", "Roles": [ { "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" @@ -162,7 +162,7 @@ ] } }, - "CSVTabletablepartitionindexindex2DAFCAA94": { + "CSVTablepartitionindexindex29D554319": { "Type": "Custom::AWS", "Properties": { "ServiceToken": { @@ -190,7 +190,7 @@ "InstallLatestAwsSdk": true }, "DependsOn": [ - "CSVTabletablepartitionindexindex2CustomResourcePolicyC0A14188" + "CSVTablepartitionindexindex2CustomResourcePolicy4FF1AF9F" ], "UpdateReplacePolicy": "Delete", "DeletionPolicy": "Delete" @@ -515,7 +515,7 @@ } } }, - "JSONTabletablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicyE66443B9": { + "JSONTablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicy92B3C1AE": { "Type": "AWS::IAM::Policy", "Properties": { "PolicyDocument": { @@ -528,7 +528,7 @@ ], "Version": "2012-10-17" }, - "PolicyName": "JSONTabletablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicyE66443B9", + "PolicyName": "JSONTablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicy92B3C1AE", "Roles": [ { "Ref": "AWS679f53fac002430cb0da5b7982bd2287ServiceRoleC1EA0FF2" @@ -536,7 +536,7 @@ ] } }, - "JSONTabletablepartitionindexyearmonthawscdkglueJSONTable937C116BAB62E6B0": { + "JSONTablepartitionindexyearmonthawscdkglueJSONTable937C116B74A5990F": { "Type": "Custom::AWS", "Properties": { "ServiceToken": { @@ -564,7 +564,7 @@ "InstallLatestAwsSdk": true }, "DependsOn": [ - "JSONTabletablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicyE66443B9" + "JSONTablepartitionindexyearmonthawscdkglueJSONTable937C116BCustomResourcePolicy92B3C1AE" ], "UpdateReplacePolicy": "Delete", "DeletionPolicy": "Delete" diff --git a/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts b/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts index b7ea8127fba6d..e3a514bfadf6e 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts +++ b/packages/@aws-cdk/aws-glue/test/integ.partition-index.ts @@ -6,9 +6,9 @@ import * as glue from '../lib'; /** * Stack verification steps: * * aws cloudformation describe-stacks --stack-name aws-cdk-glue --query Stacks[0].Outputs[0].OutputValue - * * aws glue get-partition-indexes --catalog-id --database-name my_database --table-name csv_table + * * aws glue get-partition-indexes --catalog-id --database-name database --table-name csv_table * returns two indexes named 'index1' and 'index2' - * * aws glue get-partition-indexes --catalog-id --database-name my_database --table-name json_table + * * aws glue get-partition-indexes --catalog-id --database-name database --table-name json_table * returns an index with name 'year-month...' */ @@ -16,7 +16,7 @@ const app = new cdk.App(); const stack = new cdk.Stack(app, 'aws-cdk-glue'); const bucket = new s3.Bucket(stack, 'DataBucket'); const database = new glue.Database(stack, 'MyDatabase', { - databaseName: 'my_database', + databaseName: 'database', }); const columns = [{ diff --git a/packages/@aws-cdk/aws-glue/test/table.test.ts b/packages/@aws-cdk/aws-glue/test/table.test.ts index 51de2e4b6c922..a7aa71474724f 100644 --- a/packages/@aws-cdk/aws-glue/test/table.test.ts +++ b/packages/@aws-cdk/aws-glue/test/table.test.ts @@ -6,6 +6,7 @@ import { testFutureBehavior } from '@aws-cdk/cdk-build-tools/lib/feature-flag'; import * as cdk from '@aws-cdk/core'; import * as cxapi from '@aws-cdk/cx-api'; import * as glue from '../lib'; +import { PartitionIndex } from '../lib'; import { CfnTable } from '../lib/glue.generated'; const s3GrantWriteCtx = { [cxapi.S3_GRANT_WRITE_WITHOUT_ACL]: true }; @@ -982,7 +983,7 @@ describe('add partition index', () => { })).toThrowError(/All index keys must also be partition keys/); }); - test('fails with bad index name', () => { + test('fails with index name < 1 character', () => { const stack = new cdk.Stack(); const database = new glue.Database(stack, 'Database', { databaseName: 'database', @@ -1003,9 +1004,9 @@ describe('add partition index', () => { }); expect(() => table.addPartitionIndex({ - indexName: '$my-part', + indexName: '', keyNames: ['part'], - })).toThrowError(/Index name can only have letters, numbers, hyphens, or underscores/); + })).toThrowError(/Index name must be between 1 and 255 characters, but got 0/); }); test('fails with > 3 indexes', () => { @@ -1014,9 +1015,19 @@ describe('add partition index', () => { databaseName: 'database', }); - const index = { + const indexes: PartitionIndex[] = [{ + indexName: 'ind1', keyNames: ['part'], - }; + }, { + indexName: 'ind2', + keyNames: ['part'], + }, { + indexName: 'ind3', + keyNames: ['part'], + }, { + indexName: 'ind4', + keyNames: ['part'], + }]; expect(() => new glue.Table(stack, 'Table', { database, @@ -1029,9 +1040,9 @@ describe('add partition index', () => { name: 'part', type: glue.Schema.SMALL_INT, }], - partitionIndexes: [index, index, index, index], + partitionIndexes: indexes, dataFormat: glue.DataFormat.JSON, - })).toThrowError('Maximum number of partition indexes allowed is 3 but got 4'); + })).toThrowError('Maximum number of partition indexes allowed is 3'); }); });