diff --git a/tests/entrypoints/llm/test_generate.py b/tests/entrypoints/llm/test_generate.py index 3c3281c34d56..707891f6bdd8 100644 --- a/tests/entrypoints/llm/test_generate.py +++ b/tests/entrypoints/llm/test_generate.py @@ -125,4 +125,7 @@ def test_max_model_len(): for output in outputs: num_total_tokens = len(output.prompt_token_ids) + len( output.outputs[0].token_ids) - assert num_total_tokens == max_model_len + # Total tokens must not exceed max_model_len. + # It can be less if generation finishes due to other reasons (e.g., EOS) + # before reaching the absolute model length limit. + assert num_total_tokens <= max_model_len