Skip to content

Commit 053ac33

Browse files
authored
fix: readme instructions for worker running (#2266)
1 parent dbb4caa commit 053ac33

File tree

3 files changed

+4
-8
lines changed

3 files changed

+4
-8
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -115,11 +115,11 @@ Dynamo provides a simple way to spin up a local set of inference components incl
115115

116116
```
117117
# Start an OpenAI compatible HTTP server, a pre-processor (prompt templating and tokenization) and a router:
118-
python -m dynamo.frontend [--http-port 8080]
118+
python -m dynamo.frontend --http-port 8080
119119
120120
# Start the SGLang engine, connecting to NATS and etcd to receive requests. You can run several of these,
121121
# both for the same model and for multiple models. The frontend node will discover them.
122-
python -m dynamo.sglang.worker deepseek-ai/DeepSeek-R1-Distill-Llama-8B
122+
python -m dynamo.sglang.worker --model deepseek-ai/DeepSeek-R1-Distill-Llama-8B --skip-tokenizer-init
123123
```
124124

125125
#### Send a Request

components/backends/sglang/docs/dsr1-wideep-gb200.md

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ docker run \
6767
```bash
6868
# run ingress
6969
python3 -m dynamo.frontend --http-port=8000 &
70-
# optionally run the http server that allows you to flush the kv cache for all workers (see benchmarking section below)
71-
python3 utils/sgl_http_server.py --ns dynamo &
7270
# run prefill worker
7371
SGLANG_DEEPEP_NUM_MAX_DISPATCH_TOKENS_PER_RANK=2048 \
7472
MC_TE_METRIC=true \
@@ -82,15 +80,14 @@ NCCL_CUMEM_ENABLE=1 \
8280
SGLANG_USE_MESSAGE_QUEUE_BROADCASTER=0 \
8381
SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=1 \
8482
PYTHONUNBUFFERED=1 \
85-
python3 components/worker.py \
83+
python3 -m dynamo.sglang.worker \
8684
--served-model-name deepseek-ai/DeepSeek-R1 \
8785
--model-path /model/ \
8886
--skip-tokenizer-init \
8987
--trust-remote-code \
9088
--disaggregation-mode prefill \
9189
--dist-init-addr ${HEAD_PREFILL_NODE_IP}:29500 \
9290
--disaggregation-bootstrap-port 30001 \
93-
--disaggregation-transfer-backend nixl \
9491
--nnodes 2 \
9592
--node-rank 0 \
9693
--tp-size 8 \
@@ -134,7 +131,7 @@ NCCL_CUMEM_ENABLE=1 \
134131
SGLANG_USE_MESSAGE_QUEUE_BROADCASTER=0 \
135132
SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK=1 \
136133
PYTHONUNBUFFERED=1 \
137-
python3 components/decode_worker.py \
134+
python3 -m dynamo.sglang.decode_worker \
138135
--served-model-name deepseek-ai/DeepSeek-R1 \
139136
--model-path /model/ \
140137
--skip-tokenizer-init \

components/backends/sglang/slurm_jobs/scripts/gb200.sh

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,6 @@ if [ "$mode" = "prefill" ]; then
9494
--disaggregation-mode prefill \
9595
--dist-init-addr "$HOST_IP:$PORT" \
9696
--disaggregation-bootstrap-port 30001 \
97-
--disaggregation-transfer-backend nixl \
9897
--nnodes "$TOTAL_NODES" \
9998
--node-rank "$RANK" \
10099
--tp-size "$TOTAL_GPUS" \

0 commit comments

Comments
 (0)