Skip to content

Commit e2e4c4a

Browse files
committed
[CI]enable chunked prefill by default
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
1 parent f10acdd commit e2e4c4a

File tree

3 files changed

+1
-3
lines changed

3 files changed

+1
-3
lines changed

tests/e2e/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ def __init__(
280280
disable_log_stats: bool = True,
281281
tensor_parallel_size: int = 1,
282282
block_size: int = 16,
283-
enable_chunked_prefill: bool = False,
283+
enable_chunked_prefill: bool = True,
284284
swap_space: int = 4,
285285
enforce_eager: Optional[bool] = False,
286286
quantization: Optional[str] = None,

tests/e2e/multicard/test_prefix_caching.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@
5858
]
5959

6060

61-
@pytest.mark.skip(reason="Fix me, the accuracy is not correct")
6261
@pytest.mark.parametrize("model", MODELS)
6362
@pytest.mark.parametrize("max_tokens", [50])
6463
def test_prefix_cache_with_v1_scheduler(model: str, max_tokens: int) -> None:

tests/e2e/singlecard/spec_decode_v1/test_v1_spec_decode.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ def test_eagle_correctness(
117117
spec_model_name = eagle3_model_name() if use_eagle3 else eagle_model_name()
118118
with VllmRunner(
119119
model_name,
120-
enable_chunked_prefill=True,
121120
max_num_seqs=1,
122121
max_num_batched_tokens=2048,
123122
gpu_memory_utilization=0.6,

0 commit comments

Comments
 (0)