Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm/attention/ops/ipex_attn.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def get_kv_cache_shape(
head_size: int,
*args,
) -> Tuple[int, ...]:
return (2, num_blocks, block_size * num_kv_heads * head_size)
return 2, num_blocks, block_size * num_kv_heads * head_size

@staticmethod
def split_kv_cache(
Expand Down
8 changes: 4 additions & 4 deletions vllm/worker/cpu_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""A CPU worker class."""
import os
from importlib import util
from typing import Dict, List, Optional, Set, Tuple, Type
from typing import List, Optional, Set, Tuple, Type

import torch
import torch.distributed
Expand Down Expand Up @@ -88,13 +88,13 @@ def _allocate_kv_cache(
torch.empty(kv_cache_shape, dtype=self.dtype, device="cpu"))
return kv_cache

def swap_in(self, src_to_dst: Dict[int, int]) -> None:
def swap_in(self, src_to_dst: torch.Tensor) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Changing the type hint for src_to_dst from Dict[int, int] to torch.Tensor improves type consistency with the blocks_to_swap_in field in the WorkerInput dataclass and the swap_blocks method in the IPEXAttnBackend.

src_to_dst: torch.Tensor

raise NotImplementedError("Swap is not supported in CPUCacheEngine.")

def swap_out(self, src_to_dst: Dict[int, int]) -> None:
def swap_out(self, src_to_dst: torch.Tensor) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Updating the type hint for src_to_dst to torch.Tensor improves type consistency with the blocks_to_swap_out field in WorkerInput and the IPEXAttnBackend.swap_blocks method signature.

src_to_dst: torch.Tensor

raise NotImplementedError("Swap is not supported in CPUCacheEngine.")

def copy(self, src_to_dsts: Dict[int, List[int]]) -> None:
def copy(self, src_to_dsts: torch.Tensor) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Changing the type hint for src_to_dsts from Dict[int, List[int]] to torch.Tensor improves type consistency with the blocks_to_copy which is a torch.Tensor in CPUWorker.prepare_worker_input and with the IPEXAttnBackend.copy_blocks method, which expects src_to_dists: torch.Tensor.

src_to_dsts: torch.Tensor

self.attn_backend.copy_blocks(self.cpu_cache, src_to_dsts)

@staticmethod
Expand Down