Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/models/supported_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -591,7 +591,8 @@ See [this page](generative_models.md) for more information on how to use generat
| `Gemma3ForConditionalGeneration` | Gemma 3 | T + I<sup>+</sup> | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc. | ✅︎ | ✅︎ | ⚠️ |
| `GLM4VForCausalLM`<sup>^</sup> | GLM-4V | T + I | `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220`, etc. | ✅︎ | ✅︎ | ✅︎ |
| `Glm4vForConditionalGeneration` | GLM-4.1V-Thinking | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.1V-9B-Thinking`, etc. | ✅︎ | ✅︎ | ✅︎ |
| `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `THUDM/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
| `Glm4MoeForCausalLM` | GLM-4.5 | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ |
| `Glm4v_moeForConditionalGeneration` | GLM-4.5V | T + I<sup>E+</sup> + V<sup>E+</sup> | `zai-org/GLM-4.5V-Air`, etc. | ✅︎ | ✅︎ | ✅︎ |
| `GraniteSpeechForConditionalGeneration` | Granite Speech | T + A | `ibm-granite/granite-speech-3.3-8b` | ✅︎ | ✅︎ | ✅︎ |
| `H2OVLChatModel` | H2OVL | T + I<sup>E+</sup> | `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc. | | ✅︎ | ✅︎ |
| `Idefics3ForConditionalGeneration` | Idefics3 | T + I | `HuggingFaceM4/Idefics3-8B-Llama3`, etc. | ✅︎ | | ✅︎ |
Expand Down
11 changes: 6 additions & 5 deletions tests/models/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,9 +377,10 @@ def check_available_online(
"GLM4VForCausalLM": _HfExamplesInfo("THUDM/glm-4v-9b",
trust_remote_code=True,
hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501
"Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking", min_transformers_version="4.53"), # noqa: E501
"Glm4MoeForCausalLM": _HfExamplesInfo("THUDM/GLM-4.5",
min_transformers_version="4.54",
"Glm4vForConditionalGeneration": _HfExamplesInfo("THUDM/GLM-4.1V-9B-Thinking"), # noqa: E501
"Glm4MoeForCausalLM": _HfExamplesInfo("zai-org/GLM-4.5",
min_transformers_version="4.54"), # noqa: E501
"Glm4v_moeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V-Air",
is_available_online=False), # noqa: E501
"H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m",
extras={"2b": "h2oai/h2ovl-mississippi-2b"}, # noqa: E501
Expand Down Expand Up @@ -515,8 +516,8 @@ def check_available_online(
is_available_online=False,
speculative_model="openbmb/MiniCPM-2B-sft-bf16",
tokenizer="openbmb/MiniCPM-2B-sft-bf16"),
"Glm4MoeMTPModel": _HfExamplesInfo("THUDM/GLM-4.5",
speculative_model="THUDM/GLM-4.5",
"Glm4MoeMTPModel": _HfExamplesInfo("zai-org/GLM-4.5",
speculative_model="zai-org/GLM-4.5",
min_transformers_version="4.54",
is_available_online=False),
"MiMoMTPModel": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL",
Expand Down
2 changes: 1 addition & 1 deletion tests/tool_use/test_glm4_moe_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

pytest.skip("skip glm4_moe parser test", allow_module_level=True)
# Use a common model that is likely to be available
MODEL = "THUDM/GLM-4.5"
MODEL = "zai-org/GLM-4.5"


@pytest.fixture(scope="module")
Expand Down
2 changes: 1 addition & 1 deletion vllm/model_executor/layers/rotary_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -1096,7 +1096,7 @@ def get_input_positions_tensor(
audio_feature_lengths=audio_feature_lengths,
use_audio_in_video=use_audio_in_video,
)
elif "glm4v" in hf_config.model_type:
elif hf_config.model_type in ["glm4v", "glm4v_moe"]:
return cls._glm4v_get_input_positions_tensor(
input_tokens=input_tokens,
hf_config=hf_config,
Expand Down
21 changes: 13 additions & 8 deletions vllm/model_executor/models/glm4_1v.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@
import torch.nn.functional as F
from einops import rearrange
from transformers import BatchFeature
from transformers.models.glm4v.configuration_glm4v import (Glm4vConfig,
Glm4vVisionConfig)
from transformers.models.glm4v.configuration_glm4v import Glm4vVisionConfig
from transformers.models.glm4v.image_processing_glm4v import (
Glm4vImageProcessor, smart_resize)
from transformers.models.glm4v.video_processing_glm4v import (
Expand Down Expand Up @@ -801,7 +800,7 @@ def load_weights(self, weights: Iterable[tuple[str,
class Glm4vProcessingInfo(BaseProcessingInfo):

def get_hf_config(self):
return self.ctx.get_hf_config(Glm4vConfig)
return self.ctx.get_hf_config()

def get_tokenizer(self):
return self.ctx.tokenizer
Expand Down Expand Up @@ -1253,7 +1252,7 @@ def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]:

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__()
config: Glm4vConfig = vllm_config.model_config.hf_config
config = vllm_config.model_config.hf_config
quant_config = vllm_config.quant_config
multimodal_config = vllm_config.model_config.multimodal_config

Expand All @@ -1267,12 +1266,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
prefix=maybe_prefix(prefix, "visual"),
)

if config.model_type == "glm4v":
architectures = ["Glm4ForCausalLM"]
elif config.model_type == "glm4v_moe":
architectures = ["Glm4MoeForCausalLM"]
else:
architectures = None

self.language_model = init_vllm_registered_model(
vllm_config=vllm_config,
prefix=maybe_prefix(prefix, ""),
architectures=["Glm4ForCausalLM"],
hf_config=self.config.get_text_config(),
)
hf_config=config.text_config,
prefix=maybe_prefix(prefix, "language_model"),
architectures=architectures)

self.make_empty_intermediate_tensors = (
self.language_model.make_empty_intermediate_tensors)
Expand Down
1 change: 1 addition & 0 deletions vllm/model_executor/models/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,7 @@
"Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501
"GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
"Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
"Glm4v_moeForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nearly forgotten, can you also update tests/models/registry.py and docs/models/supported_models.md for Glm4v_moeForConditionalGeneration?

"GraniteSpeechForConditionalGeneration": ("granite_speech", "GraniteSpeechForConditionalGeneration"), # noqa: E501
"H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
"InternVLChatModel": ("internvl", "InternVLChatModel"),
Expand Down