-
Notifications
You must be signed in to change notification settings - Fork 11
/
config.properties
106 lines (92 loc) · 4.24 KB
/
config.properties
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/bin/bash
# This file contains all customizable configuration items for the project
# core version to be used at re:Invent 23 builder sessions
######################################################################
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. #
# SPDX-License-Identifier: MIT-0 #
######################################################################
# Project settings
verbose=true
# Model settings
huggingface_model_name=bert-base-multilingual-cased
huggingface_tokenizer_class=BertTokenizer
huggingface_model_class=BertForQuestionAnswering
# Compiler settings
# processor = cpu|gpu|inf1|inf2|graviton
processor=graviton
pipeline_cores=1
sequence_length=128
batch_size=1
test=True
# account is the current AWS user account. This setting is determined automatically.
account=$(aws sts get-caller-identity --query Account --output text)
# region is used to login if the registry is ecr
region=us-west-2
# Container settings
# Default is the private ECR registry in the current AWS account.
# If registry is set, include the registry uri up to the image name, end the registry setting with /
# registry setting for locally built images uploaded to local ECR
registry=${account}.dkr.ecr.${region}.amazonaws.com/
# registry_type=ecr
registry_type=ecr
base_image_name=aws-do-inference-base
base_image_tag=:v15-${processor}
model_image_name=${huggingface_model_name}
model_image_tag=:v15-${processor}
# if using pre-built public ECR registry Model image (may require authentication) use the following settings for model_image
#registry=public.ecr.aws/a2u7h5w3/
#model_image_name=bert-base-workshop
#model_image_tag=:v15-${processor}
# Trace settings
# trace_opts_$processor is a processor-specific setting used by the docker run command in the trace.sh script
# This setting will be automatically assigned based on your processor value
trace_opts_cpu=""
trace_opts_gpu="--gpus 0"
trace_opts_inf1="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
trace_opts_inf2="-e AWS_NEURON_VISIBLE_DEVICES=ALL --privileged"
trace_opts_graviton=""
# Deployment settings
# some of these settings apply only when the runtime is kubernetes
# runtime = docker | kubernetes
runtime=kubernetes
# number of models per model server
num_models=16
# quiet = False | True - sets whether the model server should print logs
quiet=False
# postprocess = True | False - sets whether tensors returned from model should be translated back to text or just returned
postprocess=True
# service_port=8080 - port on which model service will be exposed
service_port=8080
# Kubernetes-specific deployment settings
# instance_type = c5.xxx | g4dn.xlarge | g4dn.12xlarge | inf1.xlarge | inf2.8xlarge | c7g.4xlarge...
# A node group with the specified instance_type must exist in the cluster
# The instance type must have the processor configured above
# Example: processor=graviton, instance_type=c7g.4xlarge
instance_type=c7g.4xlarge
# num_servers - number of model servers to deploy
# note that more than one model server can run on a node with multiple cpu/gpu/inferentia chips.
# example: 4 model servers fit on one inf1.6xlarge instance as it has 4 inferentia chips.
num_servers=1
# Kubernetes namespace
namespace=mpi
# Kubernetes app name
app_name=${huggingface_model_name}-${processor}
app_dir=app-${app_name}-${instance_type}
# Test image settings - locally built images
#test_image_name=test-${huggingface_model_name}
#test_image_tag=:v15-cpu
#when using pre-built test image for CPU architecture available in public ECR registry (may require authentication):
test_image_name=bert-base-workshop
test_image_tag=:test-v15-cpu
# request_frequency - time to sleep between two consecutive requests in curl tests
request_frequency=0.01
# Stop random request test after num_requests number of requests
num_requests=30
# Number of test containers to launch (default=1), use > 1 for scale testing
num_test_containers=5
# test_instance_type - when runtime is kubernetes, node instance type on which test pods will run
test_instance_type=c5.4xlarge
# test_namespace - when runtime is kubernetes, namespace where test pods will be created
test_namespace=mpi
# test_dir - when runtime is kubernetes, directory where test job/pod manifests are stored
test_dir=app-${test_image_name}-${instance_type}