diff --git a/vllm/lora/ops/triton_ops/lora_kernel_metadata.py b/vllm/lora/ops/triton_ops/lora_kernel_metadata.py index 39e647b9b88a..e27604728ed0 100644 --- a/vllm/lora/ops/triton_ops/lora_kernel_metadata.py +++ b/vllm/lora/ops/triton_ops/lora_kernel_metadata.py @@ -83,8 +83,8 @@ def prepare_tensors(self, token_lora_mapping: torch.Tensor) -> None: Prepare kernel metadata tensors for the current forward pass. Args: - token_lora_tensor (torch.Tensor): Tensor containing lora indices - for each input token. + token_lora_mapping (torch.Tensor): Tensor containing lora indices + for each input token. """ self._reset() @@ -136,7 +136,7 @@ def meta_args( Args: token_nums (int): Number of input tokens in the current forward - pass. + pass of the kernel. """ return ( self.token_lora_mapping[:token_nums], diff --git a/vllm/lora/ops/xla_ops/lora_ops.py b/vllm/lora/ops/xla_ops/lora_ops.py index 9118f3351ef0..29bfd5753a58 100644 --- a/vllm/lora/ops/xla_ops/lora_ops.py +++ b/vllm/lora/ops/xla_ops/lora_ops.py @@ -93,7 +93,6 @@ def bgmv_shrink( inputs (torch.Tensor): Input tensor of shape [num_tokens, hidden_size]. lora_b_weights (torch.Tensor): LoRA weights of shape [num_loras, lora_rank, hidden_size]. - output_tensor (torch.Tensor): (Unused) output tensor (placeholder). lora_indices_tensor (torch.Tensor): Tensor of shape [num_tokens] indicating which LoRA matrix to use for each token. scaling (float, optional): Scalar multiplier applied to the output.