Skip to content

Commit e236564

Browse files
MatthewBonannikaran
authored andcommitted
[Attention] Move Backend enum into registry (vllm-project#25893)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Signed-off-by: Karan Goel <3261985+karan@users.noreply.github.com>
1 parent f296d16 commit e236564

File tree

31 files changed

+99
-66
lines changed

31 files changed

+99
-66
lines changed

tests/compile/test_full_graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
import torch
1212

1313
from tests.quantization.utils import is_quant_method_supported
14-
from tests.v1.attention.utils import _Backend
1514
from vllm import LLM, SamplingParams
15+
from vllm.attention.backends.registry import _Backend
1616
from vllm.attention.selector import global_force_attn_backend_context_manager
1717
from vllm.config import (CompilationConfig, CompilationLevel, CUDAGraphMode,
1818
PassConfig)

tests/compile/test_fusion_attn.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88

99
from tests.compile.backend import LazyInitPass, TestBackend
1010
from tests.models.utils import check_outputs_equal
11-
from tests.v1.attention.utils import (BatchSpec, _Backend,
12-
create_common_attn_metadata)
11+
from tests.v1.attention.utils import BatchSpec, create_common_attn_metadata
1312
from vllm import LLM, SamplingParams
1413
from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
1514
from vllm.attention import Attention, AttentionMetadata
15+
from vllm.attention.backends.registry import _Backend
1616
from vllm.attention.selector import global_force_attn_backend_context_manager
1717
from vllm.compilation.fusion import QUANT_OPS
1818
from vllm.compilation.fusion_attn import ATTN_OP, AttnFusionPass

tests/kernels/attention/test_mha_attn.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@
1010
import pytest
1111
import torch
1212

13+
from vllm.attention.backends.registry import _Backend
1314
from vllm.attention.layer import MultiHeadAttention
14-
from vllm.attention.selector import _Backend, _cached_get_attn_backend
15+
from vllm.attention.selector import _cached_get_attn_backend
1516
from vllm.platforms import current_platform
1617
from vllm.platforms.cpu import CpuPlatform
1718
from vllm.platforms.cuda import CudaPlatform

tests/kernels/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515

1616
from tests.kernels.quant_utils import native_w8a8_block_matmul
1717
from vllm.attention import AttentionBackend, AttentionMetadata, AttentionType
18+
from vllm.attention.backends.registry import _Backend
1819
from vllm.model_executor.layers.activation import SiluAndMul
1920
from vllm.model_executor.layers.fused_moe.utils import (
2021
moe_kernel_quantize_input)
21-
from vllm.platforms.interface import _Backend
2222
from vllm.utils import (STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL,
2323
STR_XFORMERS_ATTN_VAL, make_tensor_with_pad)
2424

tests/v1/attention/test_attention_backends.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,11 @@
88
import torch
99
from torch.nn.attention.flex_attention import create_block_mask, flex_attention
1010

11-
from tests.v1.attention.utils import (BatchSpec, _Backend,
12-
create_common_attn_metadata,
11+
from tests.v1.attention.utils import (BatchSpec, create_common_attn_metadata,
1312
create_standard_kv_cache_spec,
1413
create_vllm_config,
1514
get_attention_backend)
15+
from vllm.attention.backends.registry import _Backend
1616
from vllm.config import ModelConfig
1717
from vllm.platforms import current_platform
1818
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, cdiv, is_torch_equal_or_newer

tests/v1/attention/test_mla_backends.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
import pytest
77
import torch
88

9-
from tests.v1.attention.utils import (BatchSpec, _Backend,
10-
create_common_attn_metadata,
9+
from tests.v1.attention.utils import (BatchSpec, create_common_attn_metadata,
1110
create_standard_kv_cache_spec,
1211
create_vllm_config,
1312
get_attention_backend)
1413
from vllm import _custom_ops as ops
14+
from vllm.attention.backends.registry import _Backend
1515
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, cdiv
1616
from vllm.v1.attention.backends.utils import CommonAttentionMetadata
1717
from vllm.v1.kv_cache_interface import FullAttentionSpec

tests/v1/attention/utils.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@
88
import pytest
99
import torch
1010

11+
from vllm.attention.backends.registry import _Backend
1112
from vllm.config import (CacheConfig, CompilationConfig, DeviceConfig,
1213
LoadConfig, ModelConfig, ModelDType, ParallelConfig,
1314
SchedulerConfig, VllmConfig)
14-
from vllm.platforms import _Backend, current_platform
15+
from vllm.platforms import current_platform
1516
from vllm.utils import resolve_obj_by_qualname
1617
from vllm.v1.attention.backends.utils import CommonAttentionMetadata
1718
from vllm.v1.kv_cache_interface import FullAttentionSpec

tests/v1/spec_decode/test_eagle.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@
88
import torch
99

1010
from tests.utils import get_attn_backend_list_based_on_platform
11-
from tests.v1.attention.utils import (BatchSpec, _Backend,
12-
create_common_attn_metadata,
11+
from tests.v1.attention.utils import (BatchSpec, create_common_attn_metadata,
1312
create_standard_kv_cache_spec,
1413
get_attention_backend)
14+
from vllm.attention.backends.registry import _Backend
1515
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
1616
ParallelConfig, SchedulerConfig, SpeculativeConfig,
1717
VllmConfig)

tests/v1/spec_decode/test_mtp.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,10 @@
66
import pytest
77
import torch
88

9-
from tests.v1.attention.utils import (BatchSpec, _Backend,
10-
create_common_attn_metadata,
9+
from tests.v1.attention.utils import (BatchSpec, create_common_attn_metadata,
1110
create_standard_kv_cache_spec,
1211
get_attention_backend)
12+
from vllm.attention.backends.registry import _Backend
1313
from vllm.config import (CacheConfig, DeviceConfig, ModelConfig,
1414
ParallelConfig, SchedulerConfig, SpeculativeConfig,
1515
VllmConfig)

tests/v1/spec_decode/test_tree_attention.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66

77
import torch
88

9-
from tests.v1.attention.utils import (_Backend, create_standard_kv_cache_spec,
9+
from tests.v1.attention.utils import (create_standard_kv_cache_spec,
1010
create_vllm_config,
1111
get_attention_backend)
12+
from vllm.attention.backends.registry import _Backend
1213
from vllm.config import ParallelConfig, SpeculativeConfig
1314
from vllm.v1.attention.backends.utils import CommonAttentionMetadata
1415

0 commit comments

Comments
 (0)