This repository has been archived by the owner on May 28, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 94
/
rayllm-cluster.yaml
87 lines (80 loc) · 2.39 KB
/
rayllm-cluster.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# An unique identifier for the head node and workers of this cluster.
cluster_name: rayllm-deploy
# Cloud-provider specific configuration.
provider:
type: aws
region: us-west-2
cache_stopped_nodes: False
docker:
image: "anyscale/ray-llm:latest"
container_name: "rayllm"
run_options:
- --entrypoint ""
# All the 'conda activate' are necessary to ensure we are in the
# python 3.10 conda env.
setup_commands:
- echo "(conda activate || true)" >> ~/.bashrc
head_setup_commands:
- (conda activate || true) && pip install 'boto3>=1.4.8'
worker_setup_commands: []
head_start_ray_commands:
- (conda activate || true) && ray stop
- (conda activate || true) && ulimit -n 65536; ray start --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host=0.0.0.0
worker_start_ray_commands:
- (conda activate || true) && ray stop
# We need to make sure RAY_HEAD_IP env var is accessible
# after conda activate.
- export RAY_HEAD_IP && echo "export RAY_HEAD_IP=$RAY_HEAD_IP" >> ~/.bashrc && (conda activate || true) && ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-manager-port=8076
available_node_types:
head_node_type:
node_config:
InstanceType: m5.xlarge
BlockDeviceMappings: &mount
- DeviceName: /dev/sda1
Ebs:
VolumeSize: 256
VolumeType: gp3
resources:
head_node: 1
instance_type_m5: 1
gpu_worker_g5:
node_config:
InstanceType: g5.12xlarge
BlockDeviceMappings: *mount
resources:
worker_node: 1
instance_type_g5: 1
accelerator_type_a10: 1
min_workers: 0
max_workers: 8
gpu_worker_p4d:
node_config:
InstanceType: p4d.24xlarge
BlockDeviceMappings: *mount
resources:
worker_node: 1
instance_type_p4d: 1
accelerator_type_a100_40g: 1
min_workers: 0
max_workers: 8
gpu_worker_p4de:
node_config:
InstanceType: p4de.24xlarge
BlockDeviceMappings: *mount
resources:
worker_node: 1
instance_type_p4de: 1
accelerator_type_a100_80g: 1
min_workers: 0
max_workers: 8
cpu_worker:
node_config:
InstanceType: m5.xlarge
BlockDeviceMappings: *mount
resources:
worker_node: 1
instance_type_m5: 1
accelerator_type_cpu: 1
min_workers: 0
max_workers: 16
head_node_type: head_node_type