forked from skypilot-org/skypilot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchatbot-hf.yaml
40 lines (32 loc) · 1.12 KB
/
chatbot-hf.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
resources:
accelerators: A100:1
disk_size: 1024
disk_tier: best
memory: 32+
envs:
MODEL_SIZE: 7
HF_TOKEN: # TODO: Fill with your own huggingface token, or use --env to pass.
setup: |
conda activate chatbot
if [ $? -ne 0 ]; then
conda create -n chatbot python=3.9 -y
conda activate chatbot
fi
# Install dependencies
pip install "fschat[model_worker,webui]"
python -c "import huggingface_hub; huggingface_hub.login('${HF_TOKEN}')"
run: |
conda activate chatbot
echo 'Starting controller...'
python -u -m fastchat.serve.controller --host 127.0.0.1 > ~/controller.log 2>&1 &
sleep 10
echo 'Starting model worker...'
python -u -m fastchat.serve.model_worker \
--model-path meta-llama/Llama-2-${MODEL_SIZE}b-chat-hf \
--num-gpus $SKYPILOT_NUM_GPUS_PER_NODE 2>&1 \
--host 127.0.0.1 \
| tee model_worker.log &
echo 'Waiting for model worker to start...'
while ! `cat model_worker.log | grep -q 'Uvicorn running on'`; do sleep 1; done
echo 'Starting gradio server...'
python -u -m fastchat.serve.gradio_web_server --share | tee ~/gradio.log