Skip to content

Commit

Permalink
feat(glue): support Data Quality ruleset (aws#26272)
Browse files Browse the repository at this point in the history
Glue launched Glue Data Quality.
https://aws.amazon.com/about-aws/whats-new/2023/06/aws-glue-data-quality-generally-available/
This PR is to support the Glue Data Quality in AWS CDK.

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
moomindani authored and bmoffatt committed Jul 28, 2023
1 parent 7c144f8 commit bcc5046
Show file tree
Hide file tree
Showing 14 changed files with 925 additions and 2 deletions.
20 changes: 20 additions & 0 deletions packages/@aws-cdk/aws-glue-alpha/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -493,3 +493,23 @@ new glue.Table(this, 'MyTable', {
| array(itemType: Type) | Function | An array of some other type |
| map(keyType: Type, valueType: Type) | Function | A map of some primitive key type to any value type |
| struct(collumns: Column[]) | Function | Nested structure containing individually named and typed collumns |

## Data Quality Ruleset

A `DataQualityRuleset` specifies a data quality ruleset with DQDL rules applied to a specified AWS Glue table. For example, to create a data quality ruleset for a given table:

```ts
new glue.DataQualityRuleset(this, 'MyDataQualityRuleset', {
clientToken: 'client_token',
description: 'description',
rulesetName: 'ruleset_name',
rulesetDqdl: 'ruleset_dqdl',
tags: {
key1: 'value1',
key2: 'value2',
},
targetTable: new glue.DataQualityTargetTable('database_name', 'table_name'),
});
```

For more information, see [AWS Glue Data Quality](https://docs.aws.amazon.com/glue/latest/dg/glue-data-quality.html).
139 changes: 139 additions & 0 deletions packages/@aws-cdk/aws-glue-alpha/lib/data-quality-ruleset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import * as cdk from 'aws-cdk-lib';
import * as constructs from 'constructs';
import { IResource, Resource } from 'aws-cdk-lib/core';
import { CfnDataQualityRuleset } from 'aws-cdk-lib/aws-glue';

/**
* Properties of a DataQualityTargetTable.
*/
export class DataQualityTargetTable {
/**
* The database name of the target table.
*/
readonly databaseName: string;

/**
* The table name of the target table.
*/
readonly tableName: string;

constructor(databaseName: string, tableName: string) {
this.databaseName = databaseName;
this.tableName = tableName;
}
}

export interface IDataQualityRuleset extends IResource {
/**
* The ARN of the ruleset
* @attribute
*/
readonly rulesetArn: string;

/**
* The name of the ruleset
* @attribute
*/
readonly rulesetName: string;
}

/**
* Construction properties for `DataQualityRuleset`
*/
export interface DataQualityRulesetProps {
/**
* The name of the ruleset
* @default cloudformation generated name
*/
readonly rulesetName?: string;

/**
* The client token of the ruleset
* @attribute
*/
readonly clientToken?: string;

/**
* The description of the ruleset
* @attribute
*/
readonly description?: string;

/**
* The dqdl of the ruleset
* @attribute
*/
readonly rulesetDqdl: string;

/**
* Key-Value pairs that define tags for the ruleset.
* @default empty tags
*/
readonly tags?: { [key: string]: string };

/**
* The target table of the ruleset
* @attribute
*/
readonly targetTable: DataQualityTargetTable;
}

/**
* A Glue Data Quality ruleset.
*/
export class DataQualityRuleset extends Resource implements IDataQualityRuleset {
public static fromRulesetArn(scope: constructs.Construct, id: string, rulesetArn: string): IDataQualityRuleset {
class Import extends Resource implements IDataQualityRuleset {
public rulesetArn = rulesetArn;
public rulesetName = cdk.Arn.extractResourceName(rulesetArn, 'dataqualityruleset');
}

return new Import(scope, id);
}

public static fromRulesetName(scope: constructs.Construct, id: string, rulesetName: string): IDataQualityRuleset {
class Import extends Resource implements IDataQualityRuleset {
public rulesetArn = DataQualityRuleset.buildRulesetArn(scope, rulesetName);
public rulesetName = rulesetName;
}

return new Import(scope, id);
}

private static buildRulesetArn(scope: constructs.Construct, rulesetName: string) : string {
return cdk.Stack.of(scope).formatArn({
service: 'glue',
resource: 'dataqualityruleset',
resourceName: rulesetName,
});
}

/**
* Name of this ruleset.
*/
public readonly rulesetName: string;

/**
* ARN of this ruleset.
*/
public readonly rulesetArn: string;

constructor(scope: constructs.Construct, id: string, props: DataQualityRulesetProps) {
super(scope, id, {
physicalName: props.rulesetName,
});

const rulesetResource = new CfnDataQualityRuleset(this, 'Resource', {
clientToken: props.clientToken,
description: props.description,
name: props.rulesetName,
ruleset: props.rulesetDqdl,
tags: props.tags,
targetTable: props.targetTable,
});

const resourceName = this.getResourceNameAttribute(rulesetResource.ref);
this.rulesetArn = DataQualityRuleset.buildRulesetArn(this, resourceName);
this.rulesetName = resourceName;
}
}
1 change: 1 addition & 0 deletions packages/@aws-cdk/aws-glue-alpha/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

export * from './connection';
export * from './data-format';
export * from './data-quality-ruleset';
export * from './database';
export * from './job';
export * from './job-executable';
Expand Down
5 changes: 3 additions & 2 deletions packages/@aws-cdk/aws-glue-alpha/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,11 @@
"@aws-cdk/cdk-build-tools": "0.0.0",
"@aws-cdk/integ-runner": "0.0.0",
"@aws-cdk/pkglint": "0.0.0",
"@aws-cdk/integ-tests-alpha": "0.0.0",
"@types/jest": "^29.5.1",
"jest": "^29.5.0",
"aws-cdk-lib": "0.0.0",
"constructs": "^10.0.0"
"constructs": "^10.0.0",
"jest": "^29.5.0"
},
"dependencies": {},
"homepage": "https://github.com/aws/aws-cdk",
Expand Down
75 changes: 75 additions & 0 deletions packages/@aws-cdk/aws-glue-alpha/test/data-quality-ruleset.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { Template } from 'aws-cdk-lib/assertions';
import * as cdk from 'aws-cdk-lib';
import * as glue from '../lib';

test('a data quality ruleset', () => {
const stack = new cdk.Stack();
new glue.DataQualityRuleset(stack, 'DataQualityRuleset', {
description: 'description',
rulesetName: 'ruleset_name',
rulesetDqdl: 'ruleset_dqdl',
targetTable: new glue.DataQualityTargetTable('database_name', 'table_name'),
});

Template.fromStack(stack).hasResourceProperties('AWS::Glue::DataQualityRuleset', {
Description: 'description',
Name: 'ruleset_name',
Ruleset: 'ruleset_dqdl',
TargetTable: {
DatabaseName: 'database_name',
TableName: 'table_name',
},
});
});

test('a data quality ruleset with a client token', () => {
const stack = new cdk.Stack();
new glue.DataQualityRuleset(stack, 'DataQualityRuleset', {
clientToken: 'client_token',
description: 'description',
rulesetName: 'ruleset_name',
rulesetDqdl: 'ruleset_dqdl',
targetTable: new glue.DataQualityTargetTable('database_name', 'table_name'),
});

Template.fromStack(stack).hasResourceProperties('AWS::Glue::DataQualityRuleset', {
ClientToken: 'client_token',
Description: 'description',
Name: 'ruleset_name',
Ruleset: 'ruleset_dqdl',
TargetTable: {
DatabaseName: 'database_name',
TableName: 'table_name',
},
});
});

test('a data quality ruleset with tags', () => {
const stack = new cdk.Stack();
new glue.DataQualityRuleset(stack, 'DataQualityRuleset', {
clientToken: 'client_token',
description: 'description',
rulesetName: 'ruleset_name',
rulesetDqdl: 'ruleset_dqdl',
tags: {
key1: 'value1',
key2: 'value2',
},
targetTable: new glue.DataQualityTargetTable('database_name', 'table_name'),
});

Template.fromStack(stack).hasResourceProperties('AWS::Glue::DataQualityRuleset', {
ClientToken: 'client_token',
Description: 'description',
Name: 'ruleset_name',
Ruleset: 'ruleset_dqdl',
Tags: {
key1: 'value1',
key2: 'value2',
},
TargetTable: {
DatabaseName: 'database_name',
TableName: 'table_name',
},
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"version": "32.0.0",
"files": {
"b9515accbd6b765c36fbffa5adb190fc8f6d1f67573ab2655ede370368887799": {
"source": {
"path": "aws-glue-data-quality-ruleset.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "b9515accbd6b765c36fbffa5adb190fc8f6d1f67573ab2655ede370368887799.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
}
},
"dockerImages": {}
}
Loading

0 comments on commit bcc5046

Please sign in to comment.