diff --git a/packages/@aws-cdk/aws-glue/lib/data-format.ts b/packages/@aws-cdk/aws-glue/lib/data-format.ts index a6c51b0aa4e22..15c97d9fb0f54 100644 --- a/packages/@aws-cdk/aws-glue/lib/data-format.ts +++ b/packages/@aws-cdk/aws-glue/lib/data-format.ts @@ -2,14 +2,43 @@ * Absolute class name of the Hadoop `InputFormat` to use when reading table files. */ export class InputFormat { + /** + * InputFormat for Avro files. + * + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/ql/io/avro/AvroContainerInputFormat.html + */ + public static readonly AVRO = new InputFormat('org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'); + + /** + * InputFormat for Cloudtrail Logs. + * + * @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html + */ + public static readonly CLOUDTRAIL = new InputFormat('com.amazon.emr.cloudtrail.CloudTrailInputFormat'); + + /** + * InputFormat for Orc files. + * + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.html + */ + public static readonly ORC = new InputFormat('org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'); + + /** + * InputFormat for Parquet files. + * + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.html + */ + public static readonly PARQUET = new InputFormat('org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'); + /** * An InputFormat for plain text files. Files are broken into lines. Either linefeed or * carriage-return are used to signal end of line. Keys are the position in the file, and * values are the line of text. + * JSON & CSV files are examples of this InputFormat * * @see https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/mapred/TextInputFormat.html */ - public static readonly TEXT_INPUT_FORMAT = new InputFormat('org.apache.hadoop.mapred.TextInputFormat'); + public static readonly TEXT = new InputFormat('org.apache.hadoop.mapred.TextInputFormat'); constructor(public readonly className: string) {} } @@ -23,7 +52,28 @@ export class OutputFormat { * * @see https://hive.apache.org/javadocs/r2.2.0/api/org/apache/hadoop/hive/ql/io/HiveIgnoreKeyTextOutputFormat.html */ - public static readonly HIVE_IGNORE_KEY_TEXT_OUTPUT_FORMAT = new OutputFormat('org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'); + public static readonly HIVE_IGNORE_KEY_TEXT = new OutputFormat('org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'); + + /** + * OutputFormat for Avro files. + * + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/ql/io/avro/AvroContainerOutputFormat.html + */ + public static readonly AVRO = new InputFormat('org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'); + + /** + * OutputFormat for Orc files. + * + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/ql/io/orc/OrcOutputFormat.html + */ + public static readonly ORC = new InputFormat('org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'); + + /** + * OutputFormat for Parquet files. + * + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/ql/io/parquet/MapredParquetOutputFormat.html + */ + public static readonly PARQUET = new OutputFormat('org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'); constructor(public readonly className: string) {} } @@ -35,15 +85,55 @@ export class OutputFormat { */ export class SerializationLibrary { /** - * @see https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-JSON + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/serde2/avro/AvroSerDe.html + */ + public static readonly AVRO = new SerializationLibrary('org.apache.hadoop.hive.serde2.avro.AvroSerDe'); + + /** + * @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html + */ + public static readonly CLOUDTRAIL = new SerializationLibrary('com.amazon.emr.hive.serde.CloudTrailSerde'); + + /** + * @see https://docs.aws.amazon.com/athena/latest/ug/grok.html + */ + public static readonly GROK = new SerializationLibrary('com.amazonaws.glue.serde.GrokSerDe'); + + /** + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hive/hcatalog/data/JsonSerDe.html */ public static readonly HIVE_JSON = new SerializationLibrary('org.apache.hive.hcatalog.data.JsonSerDe'); + /** + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/serde2/lazy/LazySimpleSerDe.html + */ + public static readonly LAZY_SIMPLE = new SerializationLibrary('org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'); + + /** + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/serde2/OpenCSVSerde.html + */ + public static readonly OPEN_CSV = new SerializationLibrary('org.apache.hadoop.hive.serde2.OpenCSVSerde'); + /** * @see https://github.com/rcongiu/Hive-JSON-Serde */ public static readonly OPENX_JSON = new SerializationLibrary('org.openx.data.jsonserde.JsonSerDe'); + /** + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/ql/io/orc/OrcSerde.html + */ + public static readonly ORC = new SerializationLibrary('org.apache.hadoop.hive.ql.io.orc.OrcSerde'); + + /** + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.html + */ + public static readonly PARQUET = new SerializationLibrary('org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'); + + /** + * @see https://hive.apache.org/javadocs/r1.2.2/api/org/apache/hadoop/hive/serde2/RegexSerDe.html + */ + public static readonly REGEXP = new SerializationLibrary('org.apache.hadoop.hive.serde2.RegexSerDe'); + constructor(public readonly className: string) {} } @@ -69,15 +159,102 @@ export interface DataFormat { export namespace DataFormat { /** - * Stored as plain text files in JSON format. + * DataFormat for Apache Web Server Logs. Also works for CloudFront logs + * + * @see https://docs.aws.amazon.com/athena/latest/ug/apache.html + */ + export const ApacheLogs: DataFormat = { + inputFormat: InputFormat.TEXT, + outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT, + serializationLibrary: SerializationLibrary.REGEXP + }; + + /** + * DataFormat for Apache Avro + * + * @see https://docs.aws.amazon.com/athena/latest/ug/avro.html + */ + export const Avro: DataFormat = { + inputFormat: InputFormat.AVRO, + outputFormat: OutputFormat.AVRO, + serializationLibrary: SerializationLibrary.AVRO + }; + + /** + * DataFormat for CloudTrail logs stored on S3 + * + * @see https://docs.aws.amazon.com/athena/latest/ug/cloudtrail.html + */ + export const CloudTrailLogs: DataFormat = { + inputFormat: InputFormat.CLOUDTRAIL, + outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT, + serializationLibrary: SerializationLibrary.CLOUDTRAIL + }; + + /** + * DataFormat for CSV Files * + * @see https://docs.aws.amazon.com/athena/latest/ug/csv.html + */ + export const CSV: DataFormat = { + inputFormat: InputFormat.TEXT, + outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT, + serializationLibrary: SerializationLibrary.OPEN_CSV + }; + + /** + * Stored as plain text files in JSON format. * Uses OpenX Json SerDe for serialization and deseralization. * * @see https://docs.aws.amazon.com/athena/latest/ug/json.html */ export const Json: DataFormat = { - inputFormat: InputFormat.TEXT_INPUT_FORMAT, - outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT_OUTPUT_FORMAT, + inputFormat: InputFormat.TEXT, + outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT, serializationLibrary: SerializationLibrary.OPENX_JSON }; -} \ No newline at end of file + + /** + * DataFormat for Logstash Logs, using the GROK SerDe + * + * @see https://docs.aws.amazon.com/athena/latest/ug/grok.html + */ + export const Logstash: DataFormat = { + inputFormat: InputFormat.TEXT, + outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT, + serializationLibrary: SerializationLibrary.GROK + }; + + /** + * DataFormat for Apache ORC (Optimized Row Columnar) + * + * @see https://docs.aws.amazon.com/athena/latest/ug/orc.html + */ + export const Orc: DataFormat = { + inputFormat: InputFormat.ORC, + outputFormat: OutputFormat.ORC, + serializationLibrary: SerializationLibrary.ORC + }; + + /** + * DataFormat for Apache Parquet + * + * @see https://docs.aws.amazon.com/athena/latest/ug/parquet.html + */ + export const Parquet: DataFormat = { + inputFormat: InputFormat.PARQUET, + outputFormat: OutputFormat.PARQUET, + serializationLibrary: SerializationLibrary.PARQUET + }; + + /** + * DataFormat for TSV (Tab-Separated Values) + * + * @see https://docs.aws.amazon.com/athena/latest/ug/lazy-simple-serde.html + */ + export const TSV: DataFormat = { + inputFormat: InputFormat.TEXT, + outputFormat: OutputFormat.HIVE_IGNORE_KEY_TEXT, + serializationLibrary: SerializationLibrary.LAZY_SIMPLE + }; +} diff --git a/packages/@aws-cdk/aws-glue/package.json b/packages/@aws-cdk/aws-glue/package.json index b981ac7e31c3c..b610d765bb419 100644 --- a/packages/@aws-cdk/aws-glue/package.json +++ b/packages/@aws-cdk/aws-glue/package.json @@ -108,8 +108,16 @@ "docs-public-apis:@aws-cdk/aws-glue.Table.fromTableArn", "docs-public-apis:@aws-cdk/aws-glue.Schema.DOUBLE", "docs-public-apis:@aws-cdk/aws-glue.Schema.FLOAT", + "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.AVRO", + "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.CLOUDTRAIL", + "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.GROK", "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.HIVE_JSON", + "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.LAZY_SIMPLE", + "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.OPEN_CSV", "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.OPENX_JSON", + "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.ORC", + "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.PARQUET", + "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.REGEXP", "docs-public-apis:@aws-cdk/aws-glue.SerializationLibrary.className" ] } diff --git a/packages/@aws-cdk/aws-glue/test/integ.table.expected.json b/packages/@aws-cdk/aws-glue/test/integ.table.expected.json index 56fffdcde397e..95332bde0e762 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.table.expected.json +++ b/packages/@aws-cdk/aws-glue/test/integ.table.expected.json @@ -1,9 +1,14 @@ { "Resources": { + "DataBucketE3889A50": { + "Type": "AWS::S3::Bucket", + "UpdateReplacePolicy": "Retain", + "DeletionPolicy": "Retain" + }, "MyDatabaseBucketCA6D21E6": { "Type": "AWS::S3::Bucket", - "DeletionPolicy": "Retain", - "UpdateReplacePolicy": "Retain" + "UpdateReplacePolicy": "Retain", + "DeletionPolicy": "Retain" }, "MyDatabase1E2517DB": { "Type": "AWS::Glue::Database", @@ -28,12 +33,76 @@ } } }, - "MyTableBucketD51E7EDC": { - "Type": "AWS::S3::Bucket", - "DeletionPolicy": "Retain", - "UpdateReplacePolicy": "Retain" + "AVROTable58646ABF": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "avro_table generated by CDK", + "Name": "avro_table", + "Parameters": { + "has_encrypted_data": false + }, + "PartitionKeys": [ + { + "Name": "year", + "Type": "smallint" + } + ], + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Comment": "col2 comment", + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "array" + }, + { + "Name": "col4", + "Type": "map" + }, + { + "Name": "col5", + "Type": "struct" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/data/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.apache.hadoop.hive.serde2.avro.AvroSerDe" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } }, - "MyTable85A92380": { + "CSVTableE499CABA": { "Type": "AWS::Glue::Table", "Properties": { "CatalogId": { @@ -43,8 +112,8 @@ "Ref": "MyDatabase1E2517DB" }, "TableInput": { - "Description": "my_table generated by CDK", - "Name": "my_table", + "Description": "csv_table generated by CDK", + "Name": "csv_table", "Parameters": { "has_encrypted_data": false }, @@ -86,7 +155,76 @@ [ "s3://", { - "Ref": "MyTableBucketD51E7EDC" + "Ref": "DataBucketE3889A50" + }, + "/data/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.apache.hadoop.hive.serde2.OpenCSVSerde" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, + "JSONTable00348F1D": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "json_table generated by CDK", + "Name": "json_table", + "Parameters": { + "has_encrypted_data": false + }, + "PartitionKeys": [ + { + "Name": "year", + "Type": "smallint" + } + ], + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Comment": "col2 comment", + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "array" + }, + { + "Name": "col4", + "Type": "map" + }, + { + "Name": "col5", + "Type": "struct" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.mapred.TextInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" }, "/data/" ] @@ -102,6 +240,75 @@ } } }, + "ParquetTableE84E985F": { + "Type": "AWS::Glue::Table", + "Properties": { + "CatalogId": { + "Ref": "AWS::AccountId" + }, + "DatabaseName": { + "Ref": "MyDatabase1E2517DB" + }, + "TableInput": { + "Description": "parquet_table generated by CDK", + "Name": "parquet_table", + "Parameters": { + "has_encrypted_data": false + }, + "PartitionKeys": [ + { + "Name": "year", + "Type": "smallint" + } + ], + "StorageDescriptor": { + "Columns": [ + { + "Name": "col1", + "Type": "string" + }, + { + "Comment": "col2 comment", + "Name": "col2", + "Type": "string" + }, + { + "Name": "col3", + "Type": "array" + }, + { + "Name": "col4", + "Type": "map" + }, + { + "Name": "col5", + "Type": "struct" + } + ], + "Compressed": false, + "InputFormat": "org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat", + "Location": { + "Fn::Join": [ + "", + [ + "s3://", + { + "Ref": "DataBucketE3889A50" + }, + "/data/" + ] + ] + }, + "OutputFormat": "org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat", + "SerdeInfo": { + "SerializationLibrary": "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe" + }, + "StoredAsSubDirectories": false + }, + "TableType": "EXTERNAL_TABLE" + } + } + }, "MyKey6AB29FA6": { "Type": "AWS::KMS::Key", "Properties": { @@ -167,8 +374,8 @@ "Version": "2012-10-17" } }, - "DeletionPolicy": "Retain", - "UpdateReplacePolicy": "Retain" + "UpdateReplacePolicy": "Retain", + "DeletionPolicy": "Retain" }, "MyEncryptedTableBucket7B28486D": { "Type": "AWS::S3::Bucket", @@ -189,8 +396,8 @@ ] } }, - "DeletionPolicy": "Retain", - "UpdateReplacePolicy": "Retain" + "UpdateReplacePolicy": "Retain", + "DeletionPolicy": "Retain" }, "MyEncryptedTable981A88C6": { "Type": "AWS::Glue::Table", @@ -307,7 +514,7 @@ }, "/", { - "Ref": "MyTable85A92380" + "Ref": "CSVTableE499CABA" } ] ] @@ -326,7 +533,7 @@ "Resource": [ { "Fn::GetAtt": [ - "MyTableBucketD51E7EDC", + "DataBucketE3889A50", "Arn" ] }, @@ -336,7 +543,7 @@ [ { "Fn::GetAtt": [ - "MyTableBucketD51E7EDC", + "DataBucketE3889A50", "Arn" ] }, @@ -449,6 +656,190 @@ } ] } + }, + "AnotherUser254B09E3": { + "Type": "AWS::IAM::User" + }, + "AnotherUserDefaultPolicyDBDB9923": { + "Type": "AWS::IAM::Policy", + "Properties": { + "PolicyDocument": { + "Statement": [ + { + "Action": [ + "glue:BatchDeletePartition", + "glue:BatchGetPartition", + "glue:GetPartition", + "glue:GetPartitions", + "glue:GetTable", + "glue:GetTables", + "glue:GetTableVersions", + "glue:BatchCreatePartition", + "glue:BatchDeletePartition", + "glue:CreatePartition", + "glue:DeletePartition", + "glue:UpdatePartition" + ], + "Effect": "Allow", + "Resource": { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":table/", + { + "Ref": "MyDatabase1E2517DB" + }, + "/", + { + "Ref": "AVROTable58646ABF" + } + ] + ] + } + }, + { + "Action": [ + "s3:GetObject*", + "s3:GetBucket*", + "s3:List*", + "s3:DeleteObject*", + "s3:PutObject*", + "s3:Abort*" + ], + "Effect": "Allow", + "Resource": [ + { + "Fn::GetAtt": [ + "DataBucketE3889A50", + "Arn" + ] + }, + { + "Fn::Join": [ + "", + [ + { + "Fn::GetAtt": [ + "DataBucketE3889A50", + "Arn" + ] + }, + "/data/" + ] + ] + } + ] + }, + { + "Action": [ + "glue:BatchDeletePartition", + "glue:BatchGetPartition", + "glue:GetPartition", + "glue:GetPartitions", + "glue:GetTable", + "glue:GetTables", + "glue:GetTableVersions", + "glue:BatchCreatePartition", + "glue:BatchDeletePartition", + "glue:CreatePartition", + "glue:DeletePartition", + "glue:UpdatePartition" + ], + "Effect": "Allow", + "Resource": { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":table/", + { + "Ref": "MyDatabase1E2517DB" + }, + "/", + { + "Ref": "JSONTable00348F1D" + } + ] + ] + } + }, + { + "Action": [ + "glue:BatchDeletePartition", + "glue:BatchGetPartition", + "glue:GetPartition", + "glue:GetPartitions", + "glue:GetTable", + "glue:GetTables", + "glue:GetTableVersions", + "glue:BatchCreatePartition", + "glue:BatchDeletePartition", + "glue:CreatePartition", + "glue:DeletePartition", + "glue:UpdatePartition" + ], + "Effect": "Allow", + "Resource": { + "Fn::Join": [ + "", + [ + "arn:", + { + "Ref": "AWS::Partition" + }, + ":glue:", + { + "Ref": "AWS::Region" + }, + ":", + { + "Ref": "AWS::AccountId" + }, + ":table/", + { + "Ref": "MyDatabase1E2517DB" + }, + "/", + { + "Ref": "ParquetTableE84E985F" + } + ] + ] + } + } + ], + "Version": "2012-10-17" + }, + "PolicyName": "AnotherUserDefaultPolicyDBDB9923", + "Users": [ + { + "Ref": "AnotherUser254B09E3" + } + ] + } } } -} +} \ No newline at end of file diff --git a/packages/@aws-cdk/aws-glue/test/integ.table.ts b/packages/@aws-cdk/aws-glue/test/integ.table.ts index 373384e9e2934..240a498fc80d5 100644 --- a/packages/@aws-cdk/aws-glue/test/integ.table.ts +++ b/packages/@aws-cdk/aws-glue/test/integ.table.ts @@ -1,6 +1,7 @@ #!/usr/bin/env node import * as iam from '@aws-cdk/aws-iam'; import * as kms from '@aws-cdk/aws-kms'; +import * as s3 from '@aws-cdk/aws-s3'; import * as cdk from '@aws-cdk/core'; import * as glue from '../lib'; @@ -8,63 +9,78 @@ const app = new cdk.App(); const stack = new cdk.Stack(app, 'aws-cdk-glue'); +const bucket = new s3.Bucket(stack, 'DataBucket'); + const database = new glue.Database(stack, 'MyDatabase', { databaseName: 'my_database', }); -const ordinaryTable = new glue.Table(stack, 'MyTable', { - database, - tableName: 'my_table', - columns: [{ +const columns = [{ + name: 'col1', + type: glue.Schema.STRING +}, { + name: 'col2', + type: glue.Schema.STRING, + comment: 'col2 comment' +}, { + name: 'col3', + type: glue.Schema.array(glue.Schema.STRING) +}, { + name: 'col4', + type: glue.Schema.map(glue.Schema.STRING, glue.Schema.STRING) +}, { + name: 'col5', + type: glue.Schema.struct([{ name: 'col1', type: glue.Schema.STRING - }, { - name: 'col2', - type: glue.Schema.STRING, - comment: 'col2 comment' - }, { - name: 'col3', - type: glue.Schema.array(glue.Schema.STRING) - }, { - name: 'col4', - type: glue.Schema.map(glue.Schema.STRING, glue.Schema.STRING) - }, { - name: 'col5', - type: glue.Schema.struct([{ - name: 'col1', - type: glue.Schema.STRING - }]) - }], - partitionKeys: [{ - name: 'year', - type: glue.Schema.SMALL_INT - }], + }]) +}]; + +const partitionKeys = [{ + name: 'year', + type: glue.Schema.SMALL_INT +}]; + +const avroTable = new glue.Table(stack, 'AVROTable', { + database, + bucket, + tableName: 'avro_table', + columns, + partitionKeys, + dataFormat: glue.DataFormat.Avro +}); + +const csvTable = new glue.Table(stack, 'CSVTable', { + database, + bucket, + tableName: 'csv_table', + columns, + partitionKeys, + dataFormat: glue.DataFormat.CSV +}); + +const jsonTable = new glue.Table(stack, 'JSONTable', { + database, + bucket, + tableName: 'json_table', + columns, + partitionKeys, dataFormat: glue.DataFormat.Json }); +const parquetTable = new glue.Table(stack, 'ParquetTable', { + database, + bucket, + tableName: 'parquet_table', + columns, + partitionKeys, + dataFormat: glue.DataFormat.Parquet +}); + const encryptedTable = new glue.Table(stack, 'MyEncryptedTable', { database, tableName: 'my_encrypted_table', - columns: [{ - name: 'col1', - type: glue.Schema.STRING - }, { - name: 'col2', - type: glue.Schema.STRING, - comment: 'col2 comment' - }, { - name: 'col3', - type: glue.Schema.array(glue.Schema.STRING) - }, { - name: 'col4', - type: glue.Schema.map(glue.Schema.STRING, glue.Schema.STRING) - }, { - name: 'col5', - type: glue.Schema.struct([{ - name: 'col1', - type: glue.Schema.STRING - }]) - }], + columns, partitionKeys: [{ name: 'year', type: glue.Schema.SMALL_INT @@ -75,7 +91,12 @@ const encryptedTable = new glue.Table(stack, 'MyEncryptedTable', { }); const user = new iam.User(stack, 'MyUser'); -ordinaryTable.grantReadWrite(user); +csvTable.grantReadWrite(user); encryptedTable.grantReadWrite(user); +const anotherUser = new iam.User(stack, 'AnotherUser'); +avroTable.grantReadWrite(anotherUser); +jsonTable.grantReadWrite(anotherUser); +parquetTable.grantReadWrite(anotherUser); + app.synth();