-
Notifications
You must be signed in to change notification settings - Fork 0
/
init.sh
executable file
·145 lines (125 loc) · 7.34 KB
/
init.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#!/usr/bin/env bash
# Exit on error. Append "|| true" if you expect an error.
set -o errexit
# Exit on error inside any functions or subshells.
set -o errtrace
# Do not allow use of undefined vars. Use ${VAR:-} to use an undefined VAR
#set -o nounset
# Catch the error in case mysqldump fails (but gzip succeeds) in `mysqldump |gzip`
set -o pipefail
# Turn on traces, useful while debugging but commented out by default
# set -o xtrace
# Sets profile variable to an empty value by default, reassigns in while loop below if it was included as a parameter
PROFILE=""
while [[ $# -gt 0 ]]; do
opt="${1}"
shift;
current_arg="$1"
case ${opt} in
"--rdp-role-arn") export REARC_DATA_PLATFORM_ROLE_ARN="$1"; shift;;
"--rdp-external-id") export REARC_DATA_PLATFORM_EXTERNAL_ID="$1"; shift;;
"--customer-id") export CUSTOMER_ID="$1"; shift;;
"--schedule-cron") export SCHEDULE_CRON="$1"; shift;;
"--asset-bucket") export ASSET_BUCKET="$1"; shift;;
"--manifest-bucket") export MANIFEST_BUCKET="$1"; shift;;
"--dataset-name") export DATASET_NAME="$1"; shift;;
"--product-name") export PRODUCT_NAME="$1"; shift;;
"--product-id") export PRODUCT_ID="$1"; shift;;
"--dataset-arn") export DATASET_ARN="$1"; shift;;
"--region") export REGION="$1"; shift;;
"--first-revision") export FIRST_REVISION="$1"; shift;;
"--products-info-file") export PRODUCTS_INFO_FILE="$1"; shift;;
"--source-url") export SOURCE_URL="$1"; shift;;
"--product-code") export PRODUCT_CODE="$1"; shift;;
"--product-url") export PRODUCT_URL="$1"; shift;;
"--profile") PROFILE=" --profile $1"; shift;;
*) echo "ERROR: Invalid option: \""$opt"\"" >&2; exit 1;;
esac
done
echo "------------------------------------------------------------------------------"
echo "REARC_DATA_PLATFORM_ROLE_ARN: $REARC_DATA_PLATFORM_ROLE_ARN"
echo "REARC_DATA_PLATFORM_EXTERNAL_ID: $REARC_DATA_PLATFORM_EXTERNAL_ID"
echo "CUSTOMER_ID: $CUSTOMER_ID"
echo "ASSET_BUCKET: $ASSET_BUCKET"
echo "MANIFEST_BUCKET: $MANIFEST_BUCKET"
echo "DATASET_NAME: $DATASET_NAME"
echo "DATASET_ARN: $DATASET_ARN"
echo "PRODUCT_NAME: $PRODUCT_NAME"
echo "PRODUCT_ID: $PRODUCT_ID"
echo "SCHEDULE_CRON: $SCHEDULE_CRON"
echo "REGION: $REGION"
echo "PROFILE: $PROFILE"
echo "PRODUCTS_INFO_FILE: $PRODUCTS_INFO_FILE"
echo "SOURCE_URL: $SOURCE_URL"
echo "PRODUCT_CODE: $PRODUCT_CODE"
echo "PRODUCT_URL: $PRODUCT_URL"
echo "FIRST_REVISION: $FIRST_REVISION"
echo "------------------------------------------------------------------------------"
while [[ ${#DATASET_NAME} -gt 53 ]]; do
echo "dataset-name must be under 53 characters in length, enter a shorter name:"
read -p "New dataset-name: " DATASET_NAME
case ${#DATASET_NAME} in
[1-9]|[1-4][0-9]|5[0-3]) break;;
* ) echo "Enter in a shorter dataset-name";;
esac
done
while [[ ${#PRODUCT_NAME} -gt 72 ]]; do
echo "product-name must be under 72 characters in length, enter a shorter name:"
read -p "New product-name: " PRODUCT_NAME
case ${#PRODUCT_NAME} in
[1-9]|[1-6][0-9]|7[0-2]) break;;
* ) echo "Enter in a shorter product-name";;
esac
done
echo "creating a pre-processing zip package, these commands may need to be adjusted depending on folder structure and dependencies"
(cd pre-processing/pre-processing-code && zip -r pre-processing-code.zip . -x "*.dist-info/*" -x "bin/*" -x "**/__pycache__/*")
echo "uploading pre-preprocessing.zip to s3"
aws s3 cp pre-processing/pre-processing-code/pre-processing-code.zip s3://$ASSET_BUCKET/$DATASET_NAME/automation/pre-processing-code.zip --region "$REGION" $PROFILE
if [[ "$FIRST_REVISION" == "true" ]]; then
echo "creating dataset on ADX"
DATASET_COMMAND="aws dataexchange create-data-set --asset-type "S3_SNAPSHOT" --description file://dataset-description.md --name \"${PRODUCT_NAME}\" --region $REGION --output json $PROFILE"
DATASET_OUTPUT=$(eval $DATASET_COMMAND)
DATASET_ARN=$(echo $DATASET_OUTPUT | tr '\r\n' ' ' | jq -r '.Arn')
DATASET_ID=$(echo $DATASET_OUTPUT | tr '\r\n' ' ' | jq -r '.Id')
if [[ -n "$PRODUCTS_INFO_FILE" ]]; then
echo "{\"PRODUCT_CODE\":\"${PRODUCT_CODE}\",\"PRODUCT_URL\":\"${PRODUCT_URL}\",\"SOURCE_URL\": \"${SOURCE_URL}\",\"DATASET_NAME\":\"${DATASET_NAME}\",\"DATASET_ARN\":\"${DATASET_ARN}\",\"DATASET_ID\":\"${DATASET_ID}\",\"PRODUCT_NAME\":\"${PRODUCT_NAME}\",\"PRODUCT_ID\":\"${PRODUCT_ID}\",\"SCHEDULE_CRON\":\"${SCHEDULE_CRON}\"}" >> "$PRODUCTS_INFO_FILE"
fi
echo "Uploading intial assets to asset_bucket for the first revision"
aws s3 cp product-description.md "s3://$ASSET_BUCKET/$DATASET_NAME/dataset/product-description.md"
aws s3 cp dataset-description.md "s3://$ASSET_BUCKET/$DATASET_NAME/dataset/dataset-description.md"
REVISION_COMMAND="aws dataexchange create-data-set --asset-type "S3_SNAPSHOT" --description file://dataset-description.md --name \"${PRODUCT_NAME}\" --region $REGION --output json $PROFILE"
REVISION_OUTPUT=$(eval $REVISION_COMMAND)
echo "Manually, from ADX console, create the first revision of the dataset using
product-description.md and dataset-description.md files and
then create the ADX product.
Then manually re-run the pre-processing CloudFormation template using the following params:"
echo ""
echo "AssetBucket: $ASSET_BUCKET"
echo "ManifestBucket: $MANIFEST_BUCKET"
echo "CustomerId: $CUSTOMER_ID"
echo "DataSetName: $DATASET_NAME"
echo "DataSetArn: $DATASET_ARN"
echo "Region: $REGION"
echo "FIRST_REVISION: false"
echo ""
echo "For the ProductId param use the Product ID of the ADX product"
else
DATASET_ID=$(echo $DATASET_ARN | awk -F/ '{print $NF}')
echo "creating pre-processing cloudformation stack"
CFN_STACK_NAME="producer-${DATASET_NAME}-preprocessing"
aws cloudformation create-stack --stack-name "$CFN_STACK_NAME" --template-body file://pre-processing/pre-processing-cfn.yaml --parameters ParameterKey=RearcDataPlatformRoleArn,ParameterValue="$REARC_DATA_PLATFORM_ROLE_ARN" ParameterKey=RearcDataPlatformExternalId,ParameterValue="$REARC_DATA_PLATFORM_EXTERNAL_ID" ParameterKey=AssetBucket,ParameterValue="$ASSET_BUCKET" ParameterKey=ManifestBucket,ParameterValue="$MANIFEST_BUCKET" ParameterKey=CustomerId,ParameterValue="$CUSTOMER_ID" ParameterKey=DataSetName,ParameterValue="$DATASET_NAME" ParameterKey=DataSetArn,ParameterValue="$DATASET_ARN" ParameterKey=ProductId,ParameterValue="$PRODUCT_ID" ParameterKey=Region,ParameterValue="$REGION" ParameterKey=ScheduleCron,ParameterValue="'$SCHEDULE_CRON'" --region "$REGION" --capabilities "CAPABILITY_AUTO_EXPAND" "CAPABILITY_NAMED_IAM" "CAPABILITY_IAM" $PROFILE
echo "waiting for cloudformation stack creation to complete"
aws cloudformation wait stack-create-complete --stack-name "$CFN_STACK_NAME" --region "$REGION" $PROFILE
if [[ $? -ne 0 ]]; then
echo "Cloudformation stack creation failed"
exit 1
fi
echo "invoking the pre-processing lambda function to upload manifest file to manifest bucket"
LAMBDA_FUNCTION_NAME="source-for-${DATASET_NAME}"
# AWS CLI version 2 changes require explicitly declairing `--cli-binary-format raw-in-base64-out` for the format of the `--payload`
aws lambda invoke --function-name "$LAMBDA_FUNCTION_NAME" --invocation-type "RequestResponse" --payload '{ "test": "event" }' response.json --cli-binary-format raw-in-base64-out --region "$REGION" --query 'StatusCode' --output text $PROFILE
if [[ $? -ne 0 ]]; then
echo "Lambda invocation failed"
exit 1
fi
fi