We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent bf13d40 commit 66233afCopy full SHA for 66233af
vllm/worker/cache_engine.py
@@ -1,8 +1,8 @@
1
# SPDX-License-Identifier: Apache-2.0
2
"""CacheEngine class for managing the KV cache."""
3
+from math import prod
4
from typing import List
5
-import numpy as np
6
import torch
7
8
from vllm import envs
@@ -90,7 +90,7 @@ def _allocate_kv_cache(
90
# NOTE this assumption currently only holds for MLA so we only apply
91
# this optimization when `use_mla` is true
92
entry_shape = kv_cache_shape[2:]
93
- entry_size = np.prod(entry_shape)
+ entry_size = prod(entry_shape)
94
alloc_entry_size = align_to_256bytes(entry_size, self.dtype)
95
alloc_shape = (*kv_cache_shape[:2], alloc_entry_size)
96
else:
0 commit comments