Skip to content

Commit 41507c7

Browse files
committed
add align memory allocate for dense model
Signed-off-by: ganyi <pleaplusone.gy@gmail.com>
1 parent 564a686 commit 41507c7

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

vllm_ascend/worker/model_runner_v1.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import types
2424
import weakref
2525
import copy
26+
import math
2627
from contextlib import contextmanager, nullcontext
2728
from dataclasses import dataclass
2829
from typing import TYPE_CHECKING, Dict, List, Optional, Union
@@ -1739,8 +1740,11 @@ def align_memory(tensor: torch.Tensor, alignment: int) -> torch.Tensor:
17391740
kv_cache_list = []
17401741
for i in range(num_caches):
17411742
cache_shape = kv_cache_shape[1:]
1742-
kv_cache_for_compute = torch.zeros(cache_shape, dtype=dtype, device=self.device)
1743-
kv_cache_list.append(kv_cache_for_compute)
1743+
cache_size = math.prod(cache_shape)
1744+
cache_size_aligned = cache_size + alignment
1745+
kv_cache = torch.zeros(cache_size_aligned, dtype=dtype, device=self.device)
1746+
kv_cache = align_memory(kv_cache, alignment)[:cache_size].view(cache_shape)
1747+
kv_cache_list.append(kv_cache)
17441748
kv_caches[layer_name] = kv_cache_list
17451749
# torch_npu.npu_format_cast(kv_caches[layer_name], 2)
17461750
else:

0 commit comments

Comments
 (0)