Skip to content

Commit c536881

Browse files
LucasWilkinsonsimon-mo
authored andcommitted
[BugFix] ChunkedLocalAttention is currently not CG compatible (#26034)
Signed-off-by: Lucas Wilkinson <lwilkins@redhat.com> Signed-off-by: simon-mo <simon.mo@hey.com>
1 parent ebce361 commit c536881

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

vllm/attention/layers/chunked_local_attention.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
import functools
4-
from typing import List, Optional
4+
from typing import ClassVar, List, Optional
55

66
import torch
77

@@ -11,8 +11,8 @@
1111
from vllm.attention.selector import get_attn_backend
1212
from vllm.config import CacheConfig, QuantizationConfig
1313
from vllm.v1.attention.backends.utils import (
14-
CommonAttentionMetadata, make_local_attention_virtual_batches,
15-
subclass_attention_backend)
14+
AttentionCGSupport, CommonAttentionMetadata,
15+
make_local_attention_virtual_batches, subclass_attention_backend)
1616

1717
from ..layer import Attention
1818

@@ -28,6 +28,8 @@ def create_chunked_local_attention_backend(
2828
underlying_builder = underlying_attn_backend.get_builder_cls()
2929

3030
class ChunkedLocalAttentionBuilder(underlying_builder): # type: ignore
31+
cudagraph_support: ClassVar[AttentionCGSupport] = \
32+
AttentionCGSupport.NEVER
3133

3234
def build(self,
3335
common_prefix_len: int,

0 commit comments

Comments
 (0)