fix crash introduced by upstream PR 25613 and PR23991 (#259)

xuechendi · web-flow · commit 1d5b260d05cf · 2025-09-25T13:33:34.000-05:00
vllm-project/vllm#23991 vllm-project/vllm#25613 --------- Signed-off-by: Chendi Xue <Chendi.Xue@intel.com>
diff --git a/vllm_gaudi/__init__.py b/vllm_gaudi/__init__.py
@@ -9,6 +9,7 @@ def register():
 
 def register_ops():
     """Register custom ops for the HPU platform."""
+    HpuPlatform.patch_for_pt27()
     import vllm_gaudi.v1.sample.hpu_rejection_sampler  # noqa: F401
     import vllm_gaudi.distributed.kv_transfer.kv_connector.v1.hpu_nixl_connector  # noqa: F401
     import vllm_gaudi.ops.hpu_fused_moe  # noqa: F401
diff --git a/vllm_gaudi/ops/hpu_compressed_tensors.py b/vllm_gaudi/ops/hpu_compressed_tensors.py
@@ -226,16 +226,17 @@ def apply(
         input_shape = x.shape
         x = x.view(-1, x.shape[-1])
         if use_grouped_topk or custom_routing_function is not None:
-            topk_weights, topk_ids = FusedMoE.select_experts(hidden_states=x,
-                                                             router_logits=router_logits,
-                                                             use_grouped_topk=use_grouped_topk,
-                                                             top_k=top_k,
-                                                             renormalize=renormalize,
-                                                             topk_group=topk_group,
-                                                             num_expert_group=num_expert_group,
-                                                             custom_routing_function=custom_routing_function,
-                                                             scoring_func=scoring_func,
-                                                             e_score_correction_bias=e_score_correction_bias)
+            topk_weights, topk_ids, zero_expert_result = FusedMoE.select_experts(
+                hidden_states=x,
+                router_logits=router_logits,
+                use_grouped_topk=use_grouped_topk,
+                top_k=top_k,
+                renormalize=renormalize,
+                topk_group=topk_group,
+                num_expert_group=num_expert_group,
+                custom_routing_function=custom_routing_function,
+                scoring_func=scoring_func,
+                e_score_correction_bias=e_score_correction_bias)
         else:
             import torch.nn.functional as F
             topk_weights = F.softmax(router_logits, dim=1, dtype=torch.float32)
@@ -663,18 +664,19 @@ def apply(
         x = x.view(-1, x.shape[-1])
 
         if use_grouped_topk or custom_routing_function is not None:
-            topk_weights, topk_ids = FusedMoE.select_experts(hidden_states=x,
-                                                             router_logits=router_logits,
-                                                             use_grouped_topk=use_grouped_topk,
-                                                             top_k=top_k,
-                                                             renormalize=renormalize,
-                                                             topk_group=topk_group,
-                                                             num_expert_group=num_expert_group,
-                                                             custom_routing_function=custom_routing_function,
-                                                             scoring_func=scoring_func,
-                                                             routed_scaling_factor=routed_scaling_factor,
-                                                             e_score_correction_bias=e_score_correction_bias,
-                                                             indices_type=self.topk_indices_dtype)
+            topk_weights, topk_ids, zero_expert_result = FusedMoE.select_experts(
+                hidden_states=x,
+                router_logits=router_logits,
+                use_grouped_topk=use_grouped_topk,
+                top_k=top_k,
+                renormalize=renormalize,
+                topk_group=topk_group,
+                num_expert_group=num_expert_group,
+                custom_routing_function=custom_routing_function,
+                scoring_func=scoring_func,
+                routed_scaling_factor=routed_scaling_factor,
+                e_score_correction_bias=e_score_correction_bias,
+                indices_type=self.topk_indices_dtype)
         else:
             import torch.nn.functional as F
             topk_weights = F.softmax(router_logits, dim=1, dtype=torch.float32)
diff --git a/vllm_gaudi/ops/hpu_fp8.py b/vllm_gaudi/ops/hpu_fp8.py
@@ -126,16 +126,17 @@ def apply(
         input_shape = x.shape
         x = x.view(-1, x.shape[-1])
         if use_grouped_topk or custom_routing_function is not None:
-            topk_weights, topk_ids = FusedMoE.select_experts(hidden_states=x,
-                                                             router_logits=router_logits,
-                                                             use_grouped_topk=use_grouped_topk,
-                                                             top_k=top_k,
-                                                             renormalize=renormalize,
-                                                             topk_group=topk_group,
-                                                             num_expert_group=num_expert_group,
-                                                             custom_routing_function=custom_routing_function,
-                                                             scoring_func=scoring_func,
-                                                             e_score_correction_bias=e_score_correction_bias)
+            topk_weights, topk_ids, zero_expert_result = FusedMoE.select_experts(
+                hidden_states=x,
+                router_logits=router_logits,
+                use_grouped_topk=use_grouped_topk,
+                top_k=top_k,
+                renormalize=renormalize,
+                topk_group=topk_group,
+                num_expert_group=num_expert_group,
+                custom_routing_function=custom_routing_function,
+                scoring_func=scoring_func,
+                e_score_correction_bias=e_score_correction_bias)
         else:
             import torch.nn.functional as F
             topk_weights = F.softmax(router_logits, dim=1, dtype=torch.float32)
diff --git a/vllm_gaudi/ops/hpu_fused_moe.py b/vllm_gaudi/ops/hpu_fused_moe.py
@@ -53,16 +53,17 @@ def forward_oot(
         input_shape = x.shape
         x = x.view(-1, x.shape[-1])
         if use_grouped_topk or custom_routing_function is not None:
-            topk_weights, topk_ids = FusedMoE.select_experts(hidden_states=x,
-                                                             router_logits=router_logits,
-                                                             use_grouped_topk=use_grouped_topk,
-                                                             top_k=top_k,
-                                                             renormalize=renormalize,
-                                                             topk_group=topk_group,
-                                                             num_expert_group=num_expert_group,
-                                                             custom_routing_function=custom_routing_function,
-                                                             scoring_func=scoring_func,
-                                                             e_score_correction_bias=e_score_correction_bias)
+            topk_weights, topk_ids, zero_expert_result = FusedMoE.select_experts(
+                hidden_states=x,
+                router_logits=router_logits,
+                use_grouped_topk=use_grouped_topk,
+                top_k=top_k,
+                renormalize=renormalize,
+                topk_group=topk_group,
+                num_expert_group=num_expert_group,
+                custom_routing_function=custom_routing_function,
+                scoring_func=scoring_func,
+                e_score_correction_bias=e_score_correction_bias)
         else:
             import torch.nn.functional as F
             topk_weights = F.softmax(router_logits, dim=1, dtype=torch.float32)
diff --git a/vllm_gaudi/platform.py b/vllm_gaudi/platform.py
@@ -195,3 +195,24 @@ def _synced_weight_loader(param, *args, **kwargs):
             return out
 
         return _synced_weight_loader
+
+    @classmethod
+    def patch_for_pt27(cls) -> None:
+
+        from vllm.utils import is_torch_equal_or_newer
+        if is_torch_equal_or_newer("2.8.0"):
+            return
+
+        from vllm.model_executor import BasevLLMParameter
+        parent_class = BasevLLMParameter.__mro__[1]
+        parent_torch_function = getattr(parent_class, "__torch_function__", None)
+
+        def torch_function(origin_cls, func, types, args=(), kwargs=None):
+            if kwargs is None:
+                kwargs = {}
+            if parent_torch_function is None:
+                return NotImplemented
+            return parent_torch_function(func, types, args, kwargs)
+
+        BasevLLMParameter.__torch_function__ = classmethod(torch_function)
+        return