-
Notifications
You must be signed in to change notification settings - Fork 4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Athena Databases and Tables #5237
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
import { PolicyStatement } from '@aws-cdk/aws-iam'; | ||
import { IBucket } from '@aws-cdk/aws-s3'; | ||
import { Aws, Construct, IConstruct } from '@aws-cdk/core'; | ||
import { AwsCustomResource } from '@aws-cdk/custom-resources'; | ||
|
||
/** | ||
* Creates a Table with the name and the parameters that you specify. | ||
*/ | ||
export interface IDatabase extends IConstruct { | ||
/** | ||
* The Database name. | ||
* @attribute | ||
*/ | ||
readonly databaseName: string; | ||
} | ||
|
||
/** | ||
* Properties used to define an Athena Database | ||
*/ | ||
export interface DatabaseProps { | ||
/** | ||
* The name for the Database. | ||
* @default none | ||
*/ | ||
readonly databaseName: string; | ||
|
||
/** | ||
* Establishes the metadata value for the built-in metadata property named `comment` and the value | ||
* you provide for comment. | ||
* @default none | ||
*/ | ||
readonly comment: string; | ||
|
||
/** | ||
* Specifies the location where database files and metastore will exist. The location must be an | ||
* Amazon S3 location. | ||
*/ | ||
readonly locationBucket: IBucket; | ||
|
||
/** | ||
* The path part of the S3 Location. | ||
* @default none | ||
*/ | ||
readonly locationPath?: string; | ||
|
||
/** | ||
* Allows you to specify custom metadata properties for the database definition. | ||
* @default none | ||
*/ | ||
readonly databaseProperties?: Map<string, string>; | ||
} | ||
|
||
/** | ||
* An Athena Database | ||
*/ | ||
export class Database extends Construct implements IDatabase { | ||
/** | ||
* some docstring | ||
*/ | ||
public readonly databaseName: string; | ||
|
||
constructor(scope: Construct, id: string, props: DatabaseProps) { | ||
super(scope, id); | ||
this.databaseName = props.databaseName; | ||
|
||
const s3Policy = new PolicyStatement(); | ||
s3Policy.addActions('s3:*'); | ||
s3Policy.addResources(props.locationBucket.arnForObjects('*')); | ||
s3Policy.addResources(props.locationBucket.bucketArn); | ||
|
||
const athenaPolicy = new PolicyStatement(); | ||
athenaPolicy.addActions('athena:StartQueryExecution'); | ||
athenaPolicy.addAllResources(); | ||
|
||
const gluePolicy = new PolicyStatement(); | ||
gluePolicy.addActions('glue:GetDatabase', 'glue:GetTable'); | ||
athenaPolicy.addResources(`arn:aws:glue:${Aws.REGION}:${Aws.ACCOUNT_ID}:catalog`); | ||
|
||
new AwsCustomResource(this, 'CreateAthenaDatabase', { | ||
onCreate: { | ||
service: 'Athena', | ||
action: 'startQueryExecution', | ||
parameters: { | ||
QueryString: queryStringBuilder(props), | ||
ResultConfiguration: { | ||
OutputLocation: `s3://${props.locationBucket.bucketName}/` | ||
}, | ||
}, | ||
physicalResourceIdPath: 'QueryExecutionId' | ||
}, | ||
policyStatements: [s3Policy, athenaPolicy, gluePolicy] | ||
}); | ||
} | ||
} | ||
|
||
function queryStringBuilder(props: DatabaseProps) { | ||
let queryString = `CREATE DATABASE ${props.databaseName}`; | ||
if (props.comment) { | ||
queryString += ` COMMENT ${props.comment}`; | ||
} | ||
queryString += ` LOCATION 's3://${props.locationBucket.bucketName}/`; | ||
if (props.locationPath) { | ||
queryString += `${props.locationPath}/`; | ||
} | ||
queryString += "'"; | ||
if (props.databaseProperties) { | ||
const propString = propertiesStringBuilder(props.databaseProperties); | ||
queryString += ` ${propString}`; | ||
} | ||
queryString += ";"; | ||
return queryString; | ||
} | ||
|
||
function propertiesStringBuilder(databaseProperties: Map<string, string>) { | ||
const tempStrings: string[] = []; | ||
databaseProperties.forEach((value: string, key: string) => { | ||
tempStrings.push(`'${key}'='${value}'`); | ||
}); | ||
return `(${tempStrings.join(", ")})`; | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
/** | ||
* Supported Datatypes in Athena | ||
*/ | ||
export enum DataType { | ||
/** | ||
* Values are true and false. | ||
*/ | ||
BOOLEAN = 'BOOLEAN', | ||
/** | ||
* A 8-bit signed INTEGER in two’s complement format, with a minimum value of -2^7 and a maximum value of 2^7-1. | ||
*/ | ||
TINYINT = 'TINYINT', | ||
/** | ||
* A 16-bit signed INTEGER in two’s complement format, with a minimum value of -2^15 and a maximum value of 2^15-1. | ||
*/ | ||
SMALLINT = 'SMALLINT', | ||
/** | ||
* Athena combines two different implementations of the INTEGER data type. In Data Definition Language (DDL) | ||
* queries, Athena uses the INT data type. In all other queries, Athena uses the INTEGER data type, where INTEGER | ||
* is represented as a 32-bit signed value in two's complement format, with a minimum value of-2^31 and a maximum | ||
* value of 2^31-1. In the JDBC driver, INTEGER is returned, to ensure compatibility with business analytics | ||
* applications. | ||
*/ | ||
INT = 'INT', | ||
/** | ||
* A 64-bit signed INTEGER in two’s complement format, with a minimum value of -2^63 and a maximum value of 2^63-1. | ||
*/ | ||
BIGINT = 'BIGINT', | ||
/** | ||
* Floating-point types | ||
*/ | ||
DOUBLE = 'DOUBLE', | ||
/** | ||
* Floating-point types | ||
*/ | ||
FLOAT = 'FLOAT', | ||
/** | ||
* TODO | ||
* [ (precision, scale) ], where precision is the total number of digits, and scale (optional) is the number of | ||
* digits in fractional part, the default is 0. For example, use these type definitions: DECIMAL(11,5), DECIMAL(15). | ||
* To specify decimal values as literals, such as when selecting rows with a specific decimal value in a query DDL | ||
* expression, specify the DECIMAL type definition, and list the decimal value as a literal (in single quotes) in | ||
* your query, as in this example: decimal_value = DECIMAL '0.12'. | ||
*/ | ||
DECIMAL = 'DECIMALTODO', | ||
/** | ||
* Fixed length character data, with a specified length between 1 and 255, such as char(10). | ||
* For more information, see CHAR Hive Data Type. | ||
* https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-char | ||
*/ | ||
CHAR = 'CHAR', | ||
/** | ||
* Variable length character data, with a specified length between 1 and 65535, such as varchar(10). | ||
* For more information, see VARCHAR Hive Data Type. | ||
* https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types#LanguageManualTypes-varchar | ||
*/ | ||
VARCHAR = 'VARCHAR', | ||
/** | ||
* Not Officially documented, but clearly supported by Athena :shrug emoji: | ||
*/ | ||
STRING = 'STRING', | ||
/** | ||
* (for data in Parquet) | ||
*/ | ||
BINARY = 'BINARY', | ||
/** | ||
* DATE, in the UNIX format, such as YYYY-MM-DD. | ||
*/ | ||
DATE = 'DATE', | ||
/** | ||
* Instant in time and date in the UNiX format, such as yyyy-mm-dd hh:mm:ss[.f...]. | ||
* For example, TIMESTAMP '2008-09-15 03:04:05.324'. This format uses the session time zone. | ||
*/ | ||
TIMESTAMP = 'TIMESTAMP', | ||
/** | ||
* < data_type > | ||
*/ | ||
ARRAY = 'ARRAYTODO', | ||
/** | ||
* < primitive_type, data_type > | ||
*/ | ||
MAP = 'MAPTODO', | ||
/** | ||
* < col_name : data_type [COMMENT col_comment] [, ...] > | ||
*/ | ||
STRUCT = 'STRUCTTODO' | ||
Comment on lines
+78
to
+86
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I need a better way to represent these data structures inside the schema. |
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,4 @@ | ||
export * from './table'; | ||
export * from './datatypes'; | ||
// AWS::Athena CloudFormation Resources: | ||
export * from './athena.generated'; | ||
export * from './athena.generated'; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import { PolicyStatement } from '@aws-cdk/aws-iam'; | ||
import { IBucket } from '@aws-cdk/aws-s3'; | ||
import { Aws, Construct, IConstruct } from '@aws-cdk/core'; | ||
import { AwsCustomResource } from '@aws-cdk/custom-resources'; | ||
import { DataType } from './datatypes'; | ||
|
||
/** | ||
* Creates a Table with the name and the parameters that you specify. | ||
*/ | ||
export interface ITable extends IConstruct { | ||
/** | ||
* The Table name. | ||
* @attribute | ||
*/ | ||
readonly tableName: string; | ||
} | ||
|
||
/** | ||
* Properties used to define an Athena Table | ||
*/ | ||
export interface TableProps { | ||
/** | ||
* The database to add the Table to. | ||
* @default none | ||
*/ | ||
readonly databaseName: string; | ||
|
||
/** | ||
* The name for the Table. | ||
* @default none | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do I need '@default' for required properties? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, it doesn't really make sense to have |
||
*/ | ||
readonly tableName: string; | ||
|
||
/** | ||
* Specifies the name for each column to be created, along with the column's data type. | ||
* Column names do not allow special characters other than underscore (_). | ||
* If col_name begins with an underscore, enclose the column name in backticks, for example `_mycolumn`. | ||
*/ | ||
readonly schema: {[key: string]: DataType} | ||
|
||
/** | ||
* Table comment. | ||
* @default none | ||
*/ | ||
readonly comment?: string; | ||
|
||
/** | ||
* The Bucket that this Table databases. | ||
* @default none | ||
*/ | ||
readonly queryBucket: IBucket; | ||
} | ||
|
||
/** | ||
* An Athena Table | ||
*/ | ||
export class Table extends Construct implements ITable { | ||
/** | ||
* some docstring | ||
*/ | ||
public readonly tableName: string; | ||
|
||
constructor(scope: Construct, id: string, props: TableProps) { | ||
super(scope, id); | ||
this.tableName = props.tableName; | ||
|
||
const s3Policy = new PolicyStatement(); | ||
s3Policy.addActions('s3:*'); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Need to scope this down to the read Actions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, you can create a new AwsCustomResource(this, 'CreateAthenaTable', {
// ...
policyStatements: [
new PolicyStatement({
actions: [...],
resources: [...],
}),
// ...
],
}); |
||
s3Policy.addResources(props.queryBucket.arnForObjects('*')); | ||
s3Policy.addResources(props.queryBucket.bucketArn); | ||
|
||
const athenaPolicy = new PolicyStatement(); | ||
athenaPolicy.addActions('athena:StartQueryExecution'); | ||
athenaPolicy.addAllResources(); | ||
|
||
const gluePolicy = new PolicyStatement(); | ||
gluePolicy.addActions('glue:GetDatabase', 'glue:GetTable'); | ||
athenaPolicy.addResources(`arn:aws:glue:${Aws.REGION}:${Aws.ACCOUNT_ID}:catalog`); | ||
|
||
new AwsCustomResource(this, 'CreateAthenaTable', { | ||
onCreate: { | ||
service: 'Athena', | ||
action: 'startQueryExecution', | ||
parameters: { | ||
QueryString: `CREATE EXTERNAL TABLE IF NOT EXISTS ${props.databaseName}.${props.tableName} (${schemaStringBuilder(props.schema)}) LOCATION 's3://${props.queryBucket.bucketName}'/`, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add the Comments arguments to the query |
||
ResultConfiguration: { | ||
OutputLocation: `s3://${props.queryBucket.bucketName}/` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I need a better place for this. we really don't care about the output of this query but this field is required. |
||
}, | ||
}, | ||
physicalResourceIdPath: 'QueryExecutionId' | ||
}, | ||
policyStatements: [s3Policy, athenaPolicy, gluePolicy] | ||
}); | ||
} | ||
} | ||
|
||
function schemaStringBuilder(schema: {[key: string]: DataType}) { | ||
const tempStrings: string[] = []; | ||
tempStrings.concat(Object.keys(schema) | ||
.map(key => `'${key}' '${schema[key]}'`)); | ||
return `(${tempStrings.join(", ")})`; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a very surprising way to write this code... Did you mean something like this? function schemaStringBuilder(schema: { [key: string]: DataType }): string {
return '(' +
Object.keys(schema)
.map(key => `'${key}' '${schema[key]}'`)
.join(', ')
+ ')';
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ohhh, that's good to know. I'll make that change. |
||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,14 +68,18 @@ | |
"pkglint": "1.18.0" | ||
}, | ||
"dependencies": { | ||
"@aws-cdk/core": "1.18.0" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Leave the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was just pushed down a line so the dependencies are in alphabetical order |
||
"@aws-cdk/aws-s3": "1.18.0", | ||
"@aws-cdk/core": "1.18.0", | ||
"@aws-cdk/custom-resources": "1.18.0" | ||
}, | ||
"homepage": "https://github.com/aws/aws-cdk", | ||
"peerDependencies": { | ||
"@aws-cdk/core": "1.18.0" | ||
"@aws-cdk/aws-s3": "1.18.0", | ||
"@aws-cdk/core": "1.18.0", | ||
"@aws-cdk/custom-resources": "1.18.0" | ||
}, | ||
"engines": { | ||
"node": ">= 10.3.0" | ||
}, | ||
"stability": "experimental" | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this should be
gluePolicy.addResources(...