PaddlePaddle
diff --git a/‎fastdeploy/model_executor/layers/image_op.py‎
Lines changed: 32 additions & 0 deletions b/‎fastdeploy/model_executor/layers/image_op.py‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎fastdeploy/model_executor/layers/moe/fused_moe_xpu_backend.py‎
Lines changed: 0 additions & 258 deletions b/‎fastdeploy/model_executor/layers/moe/fused_moe_xpu_backend.py‎
Lines changed: 0 additions & 258 deletions
diff --git a/‎fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py‎
Lines changed: 5 additions & 21 deletions b/‎fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py‎
Lines changed: 5 additions & 21 deletions
@@ -0,0 +1,32 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+from fastdeploy.platforms import current_platform
+
+if current_platform.is_cuda():
+    from fastdeploy.model_executor.ops.gpu import (
+        text_image_gather_scatter,
+        text_image_index_out,
+    )
+elif current_platform.is_xpu():
+    from fastdeploy.model_executor.ops.xpu import (
+        text_image_gather_scatter,
+        text_image_index_out,
+    )
+else:
+    raise ImportError("Unsupported platform, only support CUDA and XPU")
+
+__all__ = ["text_image_gather_scatter", "text_image_index_out"]
@@ -31,11 +31,16 @@
 
 from fastdeploy.config import FDConfig
 from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
+from fastdeploy.model_executor.forward_meta import ForwardMeta
 from fastdeploy.model_executor.graph_optimization.decorator import (
     cuda_graph_buffers,
     support_graph_optimization,
 )
 from fastdeploy.model_executor.layers.embeddings import VocabParallelEmbedding
+from fastdeploy.model_executor.layers.image_op import (
+    text_image_gather_scatter,
+    text_image_index_out,
+)
 from fastdeploy.model_executor.layers.linear import ReplicatedLinear
 from fastdeploy.model_executor.layers.lm_head import ParallelLMHead
 from fastdeploy.model_executor.layers.moe.moe import FusedMoE
@@ -45,20 +50,6 @@
     Ernie4_5_MLP,
 )
 from fastdeploy.model_executor.models.model_base import ModelForCasualLM
-from fastdeploy.platforms import current_platform
-
-if current_platform.is_cuda():
-    from fastdeploy.model_executor.ops.gpu import (
-        text_image_gather_scatter,
-        text_image_index_out,
-    )
-elif current_platform.is_xpu():
-    from fastdeploy.model_executor.ops.xpu import (
-        text_image_gather_scatter,
-        text_image_index_out,
-    )
-
-from fastdeploy.model_executor.forward_meta import ForwardMeta
 
 
 class Ernie4_5_VLMLP(Ernie4_5_MLP):
@@ -75,7 +66,6 @@ class VLMoEMeta:
     text_input: paddle.Tensor
     text_index: paddle.Tensor
     image_index: paddle.Tensor
-    image_mask: paddle.Tensor
     token_type_ids: paddle.Tensor
     image_token_num: paddle.Tensor
 
@@ -86,7 +76,6 @@ def __str__(self):
             f"  text_input: {self.text_input}, pointer: {self.text_input.data_ptr()}\n"
             f"  text_index: {self.text_index}, pointer: {self.text_index.data_ptr()}\n"
             f"  image_index: {self.image_index}, pointer: {self.image_index.data_ptr()}\n"
-            f"  image_mask: {self.image_mask}, pointer: {self.image_mask.data_ptr()}\n"
             f"  token_type_ids: {self.token_type_ids}, pointer: {self.token_type_ids.data_ptr()}\n\n"
             f")"
         )
@@ -419,11 +408,6 @@ def forward(
             "dtype": "model_config.dtype",
             "value": 1,
         },
-        "image_mask": {
-            "shape": ["parallel_config.max_model_len", "model_config.hidden_size"],
-            "dtype": "bool",
-            "value": False,
-        },
         "text_index": {
             "shape": ["parallel_config.max_model_len"],
             "dtype": "int32",