Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

resource/aws_glue_crawler: Support DynamoDB targets #5152

Merged
merged 1 commit into from
Jul 26, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 58 additions & 6 deletions aws/resource_aws_glue_crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,19 @@ func resourceAwsGlueCrawler() *schema.Resource {
},
},
},
"dynamodb_target": {
Type: schema.TypeList,
Optional: true,
MinItems: 1,
Elem: &schema.Resource{
Schema: map[string]*schema.Schema{
"path": {
Type: schema.TypeString,
Required: true,
},
},
},
},
"jdbc_target": {
Type: schema.TypeList,
Optional: true,
Expand Down Expand Up @@ -235,19 +248,42 @@ func expandGlueSchemaChangePolicy(v []interface{}) *glue.SchemaChangePolicy {
func expandGlueCrawlerTargets(d *schema.ResourceData) (*glue.CrawlerTargets, error) {
crawlerTargets := &glue.CrawlerTargets{}

dynamodbTargets, dynamodbTargetsOk := d.GetOk("dynamodb_target")
jdbcTargets, jdbcTargetsOk := d.GetOk("jdbc_target")
s3Targets, s3TargetsOk := d.GetOk("s3_target")
if !jdbcTargetsOk && !s3TargetsOk {
return nil, fmt.Errorf("jdbc targets or s3 targets configuration is required")
if !dynamodbTargetsOk && !jdbcTargetsOk && !s3TargetsOk {
return nil, fmt.Errorf("One of the following configurations is required: dynamodb_target, jdbc_target, s3_target")
}

log.Print("[DEBUG] Creating crawler target")
crawlerTargets.S3Targets = expandGlueS3Targets(s3Targets.([]interface{}))
crawlerTargets.DynamoDBTargets = expandGlueDynamoDBTargets(dynamodbTargets.([]interface{}))
crawlerTargets.JdbcTargets = expandGlueJdbcTargets(jdbcTargets.([]interface{}))
crawlerTargets.S3Targets = expandGlueS3Targets(s3Targets.([]interface{}))

return crawlerTargets, nil
}

func expandGlueDynamoDBTargets(targets []interface{}) []*glue.DynamoDBTarget {
if len(targets) < 1 {
return []*glue.DynamoDBTarget{}
}

perms := make([]*glue.DynamoDBTarget, len(targets), len(targets))
for i, rawCfg := range targets {
cfg := rawCfg.(map[string]interface{})
perms[i] = expandGlueDynamoDBTarget(cfg)
}
return perms
}

func expandGlueDynamoDBTarget(cfg map[string]interface{}) *glue.DynamoDBTarget {
target := &glue.DynamoDBTarget{
Path: aws.String(cfg["path"].(string)),
}

return target
}

func expandGlueS3Targets(targets []interface{}) []*glue.S3Target {
if len(targets) < 1 {
return []*glue.S3Target{}
Expand Down Expand Up @@ -364,12 +400,16 @@ func resourceAwsGlueCrawlerRead(d *schema.ResourceData, meta interface{}) error
}

if crawlerOutput.Crawler.Targets != nil {
if err := d.Set("s3_target", flattenGlueS3Targets(crawlerOutput.Crawler.Targets.S3Targets)); err != nil {
log.Printf("[ERR] Error setting Glue S3 Targets: %s", err)
if err := d.Set("dynamodb_target", flattenGlueDynamoDBTargets(crawlerOutput.Crawler.Targets.DynamoDBTargets)); err != nil {
return fmt.Errorf("error setting dynamodb_target: %s", err)
}

if err := d.Set("jdbc_target", flattenGlueJdbcTargets(crawlerOutput.Crawler.Targets.JdbcTargets)); err != nil {
log.Printf("[ERR] Error setting Glue JDBC Targets: %s", err)
return fmt.Errorf("error setting jdbc_target: %s", err)
}

if err := d.Set("s3_target", flattenGlueS3Targets(crawlerOutput.Crawler.Targets.S3Targets)); err != nil {
return fmt.Errorf("error setting s3_target: %s", err)
}
}

Expand All @@ -389,6 +429,18 @@ func flattenGlueS3Targets(s3Targets []*glue.S3Target) []map[string]interface{} {
return result
}

func flattenGlueDynamoDBTargets(dynamodbTargets []*glue.DynamoDBTarget) []map[string]interface{} {
result := make([]map[string]interface{}, 0)

for _, dynamodbTarget := range dynamodbTargets {
attrs := make(map[string]interface{})
attrs["path"] = aws.StringValue(dynamodbTarget.Path)

result = append(result, attrs)
}
return result
}

func flattenGlueJdbcTargets(jdbcTargets []*glue.JdbcTarget) []map[string]interface{} {
result := make([]map[string]interface{}, 0)

Expand Down
85 changes: 85 additions & 0 deletions aws/resource_aws_glue_crawler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,67 @@ func testSweepGlueCrawlers(region string) error {
return nil
}

func TestAccAWSGlueCrawler_DynamodbTarget(t *testing.T) {
var crawler glue.Crawler
rName := acctest.RandomWithPrefix("tf-acc-test")
resourceName := "aws_glue_crawler.test"

resource.Test(t, resource.TestCase{
PreCheck: func() { testAccPreCheck(t) },
Providers: testAccProviders,
CheckDestroy: testAccCheckAWSGlueCrawlerDestroy,
Steps: []resource.TestStep{
{
Config: testAccGlueCrawlerConfig_DynamodbTarget(rName, "table1"),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSGlueCrawlerExists(resourceName, &crawler),
resource.TestCheckResourceAttr(resourceName, "classifiers.#", "0"),
resource.TestCheckResourceAttr(resourceName, "configuration", ""),
resource.TestCheckResourceAttr(resourceName, "database_name", rName),
resource.TestCheckResourceAttr(resourceName, "description", ""),
resource.TestCheckResourceAttr(resourceName, "dynamodb_target.#", "1"),
resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.path", "table1"),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "name", rName),
resource.TestCheckResourceAttr(resourceName, "role", rName),
resource.TestCheckResourceAttr(resourceName, "s3_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "schedule", ""),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.#", "1"),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.0.delete_behavior", "DEPRECATE_IN_DATABASE"),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.0.update_behavior", "UPDATE_IN_DATABASE"),
resource.TestCheckResourceAttr(resourceName, "table_prefix", ""),
),
},
{
Config: testAccGlueCrawlerConfig_DynamodbTarget(rName, "table2"),
Check: resource.ComposeTestCheckFunc(
testAccCheckAWSGlueCrawlerExists(resourceName, &crawler),
resource.TestCheckResourceAttr(resourceName, "classifiers.#", "0"),
resource.TestCheckResourceAttr(resourceName, "configuration", ""),
resource.TestCheckResourceAttr(resourceName, "database_name", rName),
resource.TestCheckResourceAttr(resourceName, "description", ""),
resource.TestCheckResourceAttr(resourceName, "dynamodb_target.#", "1"),
resource.TestCheckResourceAttr(resourceName, "dynamodb_target.0.path", "table2"),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "name", rName),
resource.TestCheckResourceAttr(resourceName, "role", rName),
resource.TestCheckResourceAttr(resourceName, "s3_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "schedule", ""),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.#", "1"),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.0.delete_behavior", "DEPRECATE_IN_DATABASE"),
resource.TestCheckResourceAttr(resourceName, "schema_change_policy.0.update_behavior", "UPDATE_IN_DATABASE"),
resource.TestCheckResourceAttr(resourceName, "table_prefix", ""),
),
},
{
ResourceName: resourceName,
ImportState: true,
ImportStateVerify: true,
},
},
})
}

func TestAccAWSGlueCrawler_JdbcTarget(t *testing.T) {
var crawler glue.Crawler
rName := acctest.RandomWithPrefix("tf-acc-test")
Expand All @@ -82,6 +143,7 @@ func TestAccAWSGlueCrawler_JdbcTarget(t *testing.T) {
resource.TestCheckResourceAttr(resourceName, "configuration", ""),
resource.TestCheckResourceAttr(resourceName, "database_name", rName),
resource.TestCheckResourceAttr(resourceName, "description", ""),
resource.TestCheckResourceAttr(resourceName, "dynamodb_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.#", "1"),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.0.connection_name", rName),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.0.exclusions.#", "0"),
Expand All @@ -104,6 +166,7 @@ func TestAccAWSGlueCrawler_JdbcTarget(t *testing.T) {
resource.TestCheckResourceAttr(resourceName, "configuration", ""),
resource.TestCheckResourceAttr(resourceName, "database_name", rName),
resource.TestCheckResourceAttr(resourceName, "description", ""),
resource.TestCheckResourceAttr(resourceName, "dynamodb_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.#", "1"),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.0.connection_name", rName),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.0.exclusions.#", "0"),
Expand Down Expand Up @@ -238,6 +301,7 @@ func TestAccAWSGlueCrawler_S3Target(t *testing.T) {
resource.TestCheckResourceAttr(resourceName, "configuration", ""),
resource.TestCheckResourceAttr(resourceName, "database_name", rName),
resource.TestCheckResourceAttr(resourceName, "description", ""),
resource.TestCheckResourceAttr(resourceName, "dynamodb_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "name", rName),
resource.TestCheckResourceAttr(resourceName, "role", rName),
Expand All @@ -259,6 +323,7 @@ func TestAccAWSGlueCrawler_S3Target(t *testing.T) {
resource.TestCheckResourceAttr(resourceName, "configuration", ""),
resource.TestCheckResourceAttr(resourceName, "database_name", rName),
resource.TestCheckResourceAttr(resourceName, "description", ""),
resource.TestCheckResourceAttr(resourceName, "dynamodb_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "jdbc_target.#", "0"),
resource.TestCheckResourceAttr(resourceName, "name", rName),
resource.TestCheckResourceAttr(resourceName, "role", rName),
Expand Down Expand Up @@ -811,6 +876,26 @@ resource "aws_glue_crawler" "test" {
`, rName, description, rName)
}

func testAccGlueCrawlerConfig_DynamodbTarget(rName, path string) string {
return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(`
resource "aws_glue_catalog_database" "test" {
name = %q
}

resource "aws_glue_crawler" "test" {
depends_on = ["aws_iam_role_policy_attachment.test-AWSGlueServiceRole"]

database_name = "${aws_glue_catalog_database.test.name}"
name = %q
role = "${aws_iam_role.test.name}"

dynamodb_target {
path = %q
}
}
`, rName, rName, path)
}

func testAccGlueCrawlerConfig_JdbcTarget(rName, path string) string {
return testAccGlueCrawlerConfig_Base(rName) + fmt.Sprintf(`
resource "aws_glue_catalog_database" "test" {
Expand Down
21 changes: 20 additions & 1 deletion website/docs/r/glue_crawler.html.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,20 @@ Manages a Glue Crawler. More information can be found in the [AWS Glue Develeper

## Example Usage

### DynamoDB Target

```hcl
resource "aws_glue_crawler" "example" {
database_name = "${aws_glue_catalog_database.example.name}"
name = "example"
role = "${aws_iam_role.example.name}"

dynamodb_target {
path = "table-name"
}
}
```

### JDBC Target

```hcl
Expand All @@ -36,7 +50,7 @@ resource "aws_glue_crawler" "example" {
role = "${aws_iam_role.example.name}"

s3_target {
path = "s3://${aws_s3_bucket.example.bucket}
path = "s3://${aws_s3_bucket.example.bucket}"
}
}
```
Expand All @@ -53,12 +67,17 @@ The following arguments are supported:
* `classifiers` (Optional) List of custom classifiers. By default, all AWS classifiers are included in a crawl, but these custom classifiers always override the default classifiers for a given classification.
* `configuration` (Optional) JSON string of configuration information.
* `description` (Optional) Description of the crawler.
* `dynamodb_target` (Optional) List of nested DynamoDB target arguments. See below.
* `jdbc_target` (Optional) List of nested JBDC target arguments. See below.
* `s3_target` (Optional) List nested Amazon S3 target arguments. See below.
* `schedule` (Optional) A cron expression used to specify the schedule. For more information, see [Time-Based Schedules for Jobs and Crawlers](https://docs.aws.amazon.com/glue/latest/dg/monitor-data-warehouse-schedule.html). For example, to run something every day at 12:15 UTC, you would specify: `cron(15 12 * * ? *)`.
* `schema_change_policy` (Optional) Policy for the crawler's update and deletion behavior.
* `table_prefix` (Optional) The table prefix used for catalog tables that are created.

### dynamodb_target Argument Reference

* `path` - (Required) The name of the DynamoDB table to crawl.

### jdbc_target Argument Reference

* `connection_name` - (Required) The name of the connection to use to connect to the JDBC target.
Expand Down