From 00ad41466c914021de07cf3a6ff22ea545e6e7e2 Mon Sep 17 00:00:00 2001 From: DearPlanet Date: Wed, 19 Jun 2024 18:19:08 +0800 Subject: [PATCH] [Misc]Add param max-model-len in benchmark_latency.py (#5629) --- benchmarks/benchmark_latency.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/benchmarks/benchmark_latency.py b/benchmarks/benchmark_latency.py index 98e0be2779922..e9d1048c89b64 100644 --- a/benchmarks/benchmark_latency.py +++ b/benchmarks/benchmark_latency.py @@ -29,6 +29,7 @@ def main(args: argparse.Namespace): tensor_parallel_size=args.tensor_parallel_size, trust_remote_code=args.trust_remote_code, dtype=args.dtype, + max_model_len=args.max_model_len, enforce_eager=args.enforce_eager, kv_cache_dtype=args.kv_cache_dtype, quantization_param_path=args.quantization_param_path, @@ -150,6 +151,12 @@ def run_to_completion(profile_dir: Optional[str] = None): parser.add_argument('--trust-remote-code', action='store_true', help='trust remote code from huggingface') + parser.add_argument( + '--max-model-len', + type=int, + default=None, + help='Maximum length of a sequence (including prompt and output). ' + 'If None, will be derived from the model.') parser.add_argument( '--dtype', type=str,