-
Notifications
You must be signed in to change notification settings - Fork 0
/
bedrock.hcl
107 lines (101 loc) · 3.59 KB
/
bedrock.hcl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
version = "1.0"
train {
step preprocess {
image = "quay.io/basisai/workload-standard:v0.2.2"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements.txt",
]
script = [
{spark-submit {
script = "preprocess.py"
conf {
spark.kubernetes.container.image = "quay.io/basisai/workload-standard:v0.2.2"
spark.kubernetes.pyspark.pythonVersion = "3"
spark.driver.memory = "4g"
spark.driver.cores = "2"
spark.executor.instances = "2"
spark.executor.memory = "4g"
spark.executor.cores = "2"
spark.memory.fraction = "0.5"
spark.sql.parquet.compression.codec = "gzip"
spark.hadoop.fs.AbstractFileSystem.gs.impl = "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS"
spark.hadoop.google.cloud.auth.service.account.enable = "true"
}
}}
]
resources {
cpu = "0.5"
memory = "1G"
}
}
step generate_features {
image = "quay.io/basisai/workload-standard:v0.2.2"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements.txt",
]
script = [
{spark-submit {
script = "generate_features.py"
conf {
spark.kubernetes.container.image = "quay.io/basisai/workload-standard:v0.2.2"
spark.kubernetes.pyspark.pythonVersion = "3"
spark.driver.memory = "4g"
spark.driver.cores = "2"
spark.executor.instances = "2"
spark.executor.memory = "4g"
spark.executor.cores = "2"
spark.memory.fraction = "0.5"
spark.sql.parquet.compression.codec = "gzip"
spark.hadoop.fs.AbstractFileSystem.gs.impl = "com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS"
spark.hadoop.google.cloud.auth.service.account.enable = "true"
}
}}
]
resources {
cpu = "0.5"
memory = "1G"
}
depends_on = ["preprocess"]
}
step train {
image = "quay.io/basisai/workload-standard:v0.1.2"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements.txt",
]
script = [{sh = ["python3 train.py"]}]
resources {
cpu = "0.5"
memory = "1G"
}
depends_on = ["generate_features"]
}
parameters {
RAW_SUBSCRIBERS_DATA = "gs://bedrock-sample/churn_data/subscribers.gz.parquet"
RAW_CALLS_DATA = "gs://bedrock-sample/churn_data/all_calls.gz.parquet"
TEMP_DATA_BUCKET = "gs://span-temp-production/"
PREPROCESSED_DATA = "churn_data/preprocessed"
FEATURES_DATA = "churn_data/features.csv"
BIGQUERY_PROJECT = "span-production"
BIGQUERY_DATASET = "churn"
DEST_SUBSCRIBER_SCORE_TABLE = "subscriber_score"
OUTPUT_MODEL_NAME = "lgb_model.pkl"
}
}
serve {
image = "python:3.7"
install = [
"pip3 install --upgrade pip",
"pip3 install -r requirements-serve.txt",
]
script = [
{sh = [
"gunicorn --bind=:${BEDROCK_SERVER_PORT:-8080} --worker-class=gthread --workers=${WORKERS} --timeout=300 --preload serve_http:app"
]}
]
parameters {
WORKERS = "1"
}
}