diff --git a/benchmarks/benchmark_prefix_caching.py b/benchmarks/benchmark_prefix_caching.py index 5e9381f712e10..3ab421a89c935 100644 --- a/benchmarks/benchmark_prefix_caching.py +++ b/benchmarks/benchmark_prefix_caching.py @@ -10,7 +10,8 @@ --model meta-llama/Llama-2-7b-chat-hf \ --enable-prefix-caching \ --num-prompts 1 \ - --repeat-count 100 + --repeat-count 100 \ + --input-length-range 128:256 ShareGPT example usage: # This command samples 20 prompts with input lengths