This repository has been archived by the owner on Feb 14, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 2
/
pipeline.json.erb
109 lines (98 loc) · 3.94 KB
/
pipeline.json.erb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
{
"objects": [
{
"id": "S3Transactions",
"type": "S3DataNode",
"filePath": "s3://swipely-reinvent-demo/data/transactions.csv"
},
{
"id": "S3SalesByDay",
"type": "S3DataNode",
"directoryPath": "s3://swipely-reinvent-demo/run/#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/sales_by_day"
},
{
"id": "RDSSalesByDay",
"type": "MySqlDataNode",
"table": "sales_by_day",
"username": "<%= username %>",
"*password": "<%= password %>",
"connectionString": "jdbc:mysql://<%= host %>:3306/<%= database %>",
"insertQuery": "INSERT INTO #{table} (store, day, total_sales) VALUES (?, ?, ? / 100);"
},
{
"id": "GenerateSalesByDay",
"type": "EmrActivity",
"runsOn": { "ref": "SalesByDayEMRCluster" },
"input": { "ref": "S3Transactions" },
"output": { "ref": "S3SalesByDay" },
"step": "/home/hadoop/contrib/streaming/hadoop-streaming.jar,-input,s3n://swipely-reinvent-demo/data/transactions.csv,-output,s3://swipely-reinvent-demo/run/#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/sales_by_day,-mapper,s3n://swipely-reinvent-demo/bin/sales_by_day_mapper.rb,-reducer,s3n://swipely-reinvent-demo/bin/sales_by_day_reducer.rb"
},
{
"id": "CopySalesByDayToAppDB",
"type": "CopyActivity",
"runsOn": { "ref": "SalesByDayEC2Resource" },
"dependsOn": { "ref": "BootstrapEnvironment" },
"input": { "ref": "S3SalesByDay" },
"output": { "ref": "RDSSalesByDay" }
},
{
"id": "SalesByDayEMRCluster",
"type": "EmrCluster",
"masterInstanceType": "m1.large",
"taskInstanceType": "m1.large",
"coreInstanceType": "m1.large",
"coreInstanceCount": "4",
"terminateAfter": "6 hours",
"enableDebugging": "true",
"installHive": "0.11.0.1",
"bootstrapAction": "s3://swipely-reinvent-demo/bin/bootstrap_emr.sh",
"emrLogUri": "s3://swipely-reinvent-demo/run/#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/log/SalesByDayEMRLogs",
"logUri": "s3://swipely-reinvent-demo/run/#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/log/SalesByDayEMRCluster"
},
{
"id": "SalesByDayEC2Resource",
"type": "Ec2Resource",
"instanceType": "m1.large",
"logUri": "s3://swipely-reinvent-demo/run/#{format(@scheduledStartTime,'YYYY-MM-dd_HHmmss')}/log/SalesByDayEC2Resource",
"terminateAfter": "6 hours",
"keyPair": "key-pipe-demo",
"securityGroups": [ "secgrp-pipe-demo" ]
},
{
"id": "BootstrapEnvironment",
"type": "ShellCommandActivity",
"runsOn": { "ref": "SalesByDayEC2Resource" },
"scriptUri": "s3://swipely-reinvent-demo/bin/bootstrap_ec2.sh"
},
{
"id": "Default",
"role": "role-pipe-demo",
"resourceRole": "role-pipe-demo-resource",
"failureAndRerunMode": "cascade",
"scheduleType": "cron",
"schedule": { "ref": "Nightly" },
"onSuccess": { "ref": "SuccessNotify" },
"onFail": { "ref": "FailureNotify" }
},
{
"id": "Nightly",
"type": "Schedule",
"startDateTime": "<%= start_date_time %>",
"period": "<%= period %>"
},
{
"id": "SuccessNotify",
"type": "SnsAlarm",
"topicArn": "arn:aws:sns:us-west-2:969468027173:topic-pipe-demo",
"subject": "[SalesByDay] SUCCESS: pipeline step #{node.name}",
"message": "SalesByDay pipeline step #{node.name} SUCCEDED.\n\nScheduled start: #{node.@scheduledStartTime}\nActual start: #{node.@actualStartTime}\nActual end:\n#{node.@actualEndTime}"
},
{
"id": "FailureNotify",
"type": "SnsAlarm",
"topicArn": "arn:aws:sns:us-west-2:969468027173:topic-pipe-demo",
"subject": "[SalesByDay] FAILURE: pipeline step #{node.name}",
"message": "SalesByDay pipeline step FAILED #{node.name}\n\nScheduled start: #{node.@scheduledStartTime}\nError message:\n#{node.errorMessage}\nError stack trace:\n#{node.errorStackTrace}"
}
]
}