Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
424 changes: 423 additions & 1 deletion tests/models/test_vision.py

Large diffs are not rendered by default.

426 changes: 1 addition & 425 deletions tests/multimodal/test_utils.py

Large diffs are not rendered by default.

3 changes: 1 addition & 2 deletions vllm/model_executor/models/glm4_1v.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@
BaseProcessingInfo, PromptReplacement,
PromptUpdate, PromptUpdateDetails)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.multimodal.utils import run_dp_sharded_mrope_vision_model
from vllm.platforms import _Backend
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.config import uses_mrope
Expand All @@ -83,7 +82,7 @@
from .utils import (AutoWeightsLoader, WeightsMapper,
init_vllm_registered_model, maybe_prefix,
merge_multimodal_embeddings)
from .vision import get_vit_attn_backend
from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model

logger = init_logger(__name__)

Expand Down
3 changes: 2 additions & 1 deletion vllm/model_executor/models/idefics2_vision_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
RowParallelLinear)
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.multimodal.utils import run_dp_sharded_vision_model

from .vision import run_dp_sharded_vision_model


class Idefics2VisionEmbeddings(nn.Module):
Expand Down
3 changes: 2 additions & 1 deletion vllm/model_executor/models/intern_vit.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@
RowParallelLinear)
from vllm.model_executor.layers.quantization import QuantizationConfig
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
from vllm.multimodal.utils import run_dp_sharded_vision_model

from .vision import run_dp_sharded_vision_model

NORM2FN = {
'rms_norm': RMSNorm,
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/models/kimi_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@
BaseProcessingInfo, PromptReplacement,
PromptUpdate)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.multimodal.utils import run_dp_sharded_mrope_vision_model
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs import KimiVLConfig, MoonViTConfig
from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekV2Config
from vllm.utils.tensor_schema import TensorSchema, TensorShape

from .utils import PPMissingLayer, is_pp_missing_parameter, maybe_prefix
from .vision import run_dp_sharded_mrope_vision_model


# For dummy input only
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/models/mllama4.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,14 @@
BaseProcessingInfo, PromptReplacement,
PromptUpdate, PromptUpdateDetails)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.multimodal.utils import run_dp_sharded_vision_model
from vllm.sequence import IntermediateTensors
from vllm.utils.tensor_schema import TensorSchema, TensorShape

from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP
from .llama4 import Llama4ForCausalLM
from .utils import (AutoWeightsLoader, flatten_bn, maybe_prefix,
merge_multimodal_embeddings)
from .vision import run_dp_sharded_vision_model


class Llama4ImagePatchInputs(TensorSchema):
Expand Down
3 changes: 1 addition & 2 deletions vllm/model_executor/models/qwen2_5_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@
from vllm.model_executor.models.module_mapping import MultiModelKeys
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.inputs import MultiModalFieldConfig
from vllm.multimodal.utils import run_dp_sharded_mrope_vision_model
from vllm.platforms import _Backend
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.config import uses_mrope
Expand All @@ -74,7 +73,7 @@
from .utils import (AutoWeightsLoader, WeightsMapper, cast_overflow_tensors,
init_vllm_registered_model, maybe_prefix,
merge_multimodal_embeddings)
from .vision import get_vit_attn_backend
from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model

logger = init_logger(__name__)

Expand Down
3 changes: 1 addition & 2 deletions vllm/model_executor/models/qwen2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
BaseProcessingInfo, PromptReplacement,
PromptUpdate)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.multimodal.utils import run_dp_sharded_mrope_vision_model
from vllm.platforms import _Backend, current_platform
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.config import uses_mrope
Expand All @@ -78,7 +77,7 @@
from .utils import (AutoWeightsLoader, WeightsMapper,
init_vllm_registered_model, maybe_prefix,
merge_multimodal_embeddings)
from .vision import get_vit_attn_backend
from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model

logger = init_logger(__name__)

Expand Down
6 changes: 1 addition & 5 deletions vllm/model_executor/models/qwen3_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
from .qwen3 import Qwen3ForCausalLM, Qwen3Model
from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper,
maybe_prefix, merge_multimodal_embeddings)
from .vision import get_vit_attn_backend
from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model

logger = init_logger(__name__)

Expand Down Expand Up @@ -1214,8 +1214,6 @@ def _process_image_input(
else:
pixel_values = image_input["pixel_values"].type(self.visual.dtype)
if self.use_data_parallel:
from vllm.multimodal.utils import (
run_dp_sharded_mrope_vision_model)
return run_dp_sharded_mrope_vision_model(self.visual,
pixel_values,
grid_thw_list,
Expand Down Expand Up @@ -1245,8 +1243,6 @@ def _process_video_input(
pixel_values_videos = video_input["pixel_values_videos"].type(
self.visual.dtype)
if self.use_data_parallel:
from vllm.multimodal.utils import (
run_dp_sharded_mrope_vision_model)
return run_dp_sharded_mrope_vision_model(self.visual,
pixel_values_videos,
grid_thw_list,
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/models/step3_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
BaseProcessingInfo, PromptReplacement,
PromptUpdate, PromptUpdateDetails)
from vllm.multimodal.profiling import BaseDummyInputsBuilder
from vllm.multimodal.utils import run_dp_sharded_vision_model
from vllm.sequence import IntermediateTensors
from vllm.transformers_utils.configs import Step3VisionEncoderConfig
from vllm.transformers_utils.tokenizer import AnyTokenizer
Expand All @@ -40,6 +39,7 @@
from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn,
init_vllm_registered_model, maybe_prefix,
merge_multimodal_embeddings)
from .vision import run_dp_sharded_vision_model


class Step3VLImagePixelInputs(TypedDict):
Expand Down
Loading
Loading