diff --git a/benchmarks/README.md b/benchmarks/README.md index 4a8ab895e18e..ecab570bb31c 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -146,10 +146,9 @@ python3 vllm/benchmarks/benchmark_serving.py \ ``` bash VLLM_USE_V1=1 vllm serve meta-llama/Meta-Llama-3-8B-Instruct \ - --speculative-model "[ngram]" \ --ngram_prompt_lookup_min 2 \ --ngram-prompt-lookup-max 5 \ - --num_speculative_tokens 5 + --speculative_config '{"model": "[ngram]", "num_speculative_tokens": 5} ``` ``` bash @@ -274,10 +273,9 @@ python3 vllm/benchmarks/benchmark_throughput.py \ --output-len=100 \ --num-prompts=2048 \ --async-engine \ - --speculative-model="[ngram]" \ --ngram_prompt_lookup_min=2 \ --ngram-prompt-lookup-max=5 \ - --num_speculative_tokens=5 + --speculative_config '{"model": "[ngram]", "num_speculative_tokens": 5} ``` ```