vllm-project · noooop · Oct 11, 2025
@@ -22,7 +22,13 @@ def test_classify_models(
     example_prompts = example_prompts * 2
 
     with vllm_runner(
-        model, max_model_len=512, dtype=dtype, enable_prefix_caching=True
+        model,
+        max_model_len=512,
+        dtype=dtype,
+        enable_prefix_caching=True,
+        # `enable_chunked_prefill`: Set to `False` instead of `None` in VllmRunner
+        # But how is this related to enable_prefix_caching?
+        enable_chunked_prefill=True,
     ) as vllm_model:
         cache_config = vllm_model.llm.llm_engine.cache_config
         assert cache_config.enable_prefix_caching