|
37 | 37 | from vllm.inputs import INPUT_REGISTRY |
38 | 38 | from vllm.logger import logger |
39 | 39 | from vllm.model_executor.layers.fused_moe import FusedMoE |
| 40 | +from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding |
40 | 41 | from vllm.model_executor.model_loader import get_model |
41 | 42 | from vllm.multimodal import MULTIMODAL_REGISTRY |
| 43 | +from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange |
| 44 | +from vllm.multimodal.utils import group_mm_inputs_by_modality |
42 | 45 | from vllm.sampling_params import SamplingType |
43 | 46 | from vllm.sequence import IntermediateTensors |
44 | 47 | from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler, |
|
50 | 53 | from vllm.v1.sample.sampler import Sampler |
51 | 54 | from vllm.v1.utils import bind_kv_cache |
52 | 55 | from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch |
53 | | -from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange |
54 | | - |
55 | 56 | from vllm.v1.worker.utils import (gather_mm_placeholders, |
56 | 57 | sanity_check_mm_encoder_outputs, |
57 | 58 | scatter_mm_placeholders) |
58 | 59 |
|
59 | | -from vllm.multimodal.utils import group_mm_inputs_by_modality |
60 | | - |
61 | | -from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding |
62 | | - |
63 | 60 | from vllm_ascend.attention.attention import AttentionMaskBuilder |
64 | 61 | from vllm_ascend.attention.attention_v1 import AscendAttentionState |
65 | 62 | from vllm_ascend.platform import NPUPlatform |
|
0 commit comments