We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 86222a3 commit 870c374Copy full SHA for 870c374
vllm/v1/core/kv_cache_manager.py
@@ -205,8 +205,6 @@ def allocate_slots(
205
# Should not exceed the maximum number of blocks per request.
206
# This is especially because the block table has the shape
207
# [..., max_num_blocks_per_req].
208
- # TODO(woosuk): Check and reject requests if
209
- # num_prompt_tokens + max_tokens > max_model_len.
210
self.max_num_blocks_per_req - len(req_blocks),
211
)
212
assert num_new_blocks > 0
0 commit comments