[Bugfix][CI/Build] Fix failing Mteb CI (vllm-project#26638)

Isotr0py · xuebwang-amd · commit 37c1689c6b48 · 2025-10-24T09:30:36.000Z
Signed-off-by: Isotr0py &lt;mozf@mail2.sysu.edu.cn&gt;
Signed-off-by: xuebwang-amd &lt;xuebwang@amd.com&gt;
diff --git a/tests/models/language/pooling_mteb_test/mteb_utils.py b/tests/models/language/pooling_mteb_test/mteb_utils.py
@@ -191,7 +191,7 @@ def mteb_test_embed_models(
     with vllm_runner(
         model_info.name,
         runner="pooling",
-        max_model_len=None,
+        max_model_len=model_info.max_model_len,
         **vllm_extra_kwargs,
     ) as vllm_model:
         model_config = vllm_model.llm.llm_engine.model_config
diff --git a/tests/models/language/pooling_mteb_test/test_jina.py b/tests/models/language/pooling_mteb_test/test_jina.py
@@ -25,6 +25,11 @@
         mteb_score=0.824413164,
         architecture="XLMRobertaModel",
         is_matryoshka=True,
+        # The default max length of the model is 8194, which will crash
+        # CUDAGraph due to odd length for Gemm. We set it to 8192 to avoid
+        # avoid this issue.
+        max_model_len=8192,
+        dtype="float32",
     )
 ]
 
diff --git a/tests/models/language/pooling_mteb_test/test_st_projector.py b/tests/models/language/pooling_mteb_test/test_st_projector.py
@@ -23,6 +23,7 @@
         architecture="Gemma3TextModel",
         mteb_score=0.7473819294684156,
         enable_test=True,
+        dtype="float32",
     ),
 ]
 
diff --git a/tests/models/utils.py b/tests/models/utils.py
@@ -369,6 +369,7 @@ class ModelInfo:
     name: str
     architecture: str = ""
     dtype: str = "auto"
+    max_model_len: Optional[int] = None
     hf_dtype: str = "float32"
     hf_overrides: Optional[dict[str, Any]] = None
     default_pooling_type: str = ""
diff --git a/vllm/model_executor/layers/layernorm.py b/vllm/model_executor/layers/layernorm.py
@@ -318,7 +318,11 @@ def forward_static(
         """PyTorch-native implementation equivalent to forward()."""
         orig_dtype = x.dtype
         if residual is not None:
-            x = x + residual.float() if orig_dtype == torch.float16 else x + residual
+            x = (
+                x.float() + residual.float()
+                if orig_dtype == torch.float16
+                else x + residual
+            )
             residual = x
 
         x = x.float()

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,11 @@`
`25`	`25`	`mteb_score=0.824413164,`
`26`	`26`	`architecture="XLMRobertaModel",`
`27`	`27`	`is_matryoshka=True,`
	`28`	`+ # The default max length of the model is 8194, which will crash`
	`29`	`+ # CUDAGraph due to odd length for Gemm. We set it to 8192 to avoid`
	`30`	`+ # avoid this issue.`
	`31`	`+ max_model_len=8192,`
	`32`	`+ dtype="float32",`
`28`	`33`	`)`
`29`	`34`	`]`
`30`	`35`
Original file line number	Diff line number	Diff line change
`@@ -23,6 +23,7 @@`
`23`	`23`	`architecture="Gemma3TextModel",`
`24`	`24`	`mteb_score=0.7473819294684156,`
`25`	`25`	`enable_test=True,`
	`26`	`+ dtype="float32",`
`26`	`27`	`),`
`27`	`28`	`]`
`28`	`29`