Use math.prod instead of np.prod for trivial ops (#14142)

zhanwenchen · web-flow · commit 66233af7b6e4 · 2025-03-03T21:09:22.000-08:00
diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 """CacheEngine class for managing the KV cache."""
+from math import prod
 from typing import List
 
-import numpy as np
 import torch
 
 from vllm import envs
@@ -90,7 +90,7 @@ def _allocate_kv_cache(
             # NOTE this assumption currently only holds for MLA so we only apply
             # this optimization when `use_mla` is true
             entry_shape = kv_cache_shape[2:]
-            entry_size = np.prod(entry_shape)
+            entry_size = prod(entry_shape)
             alloc_entry_size = align_to_256bytes(entry_size, self.dtype)
             alloc_shape = (*kv_cache_shape[:2], alloc_entry_size)
         else: