[XPU] support XPU VL model inference (#4030)

* [XPU] support XPU VL model inference * fix image op import and device check * rebase develop * fix perf
2025-10-16 13:41:30 +08:00 · 2025-09-25 14:34:15 +08:00
parent e36eccfdad
commit 87179cb744
18 changed files with 1300 additions and 146 deletions
--- a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
+++ b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py
@@ -31,6 +31,7 @@ from paddleformers.utils.log import logger

 from fastdeploy.config import FDConfig
 from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
+from fastdeploy.model_executor.forward_meta import ForwardMeta
 from fastdeploy.model_executor.graph_optimization.decorator import (
    cuda_graph_buffers,
    support_graph_optimization,
@@ -44,20 +45,15 @@ from fastdeploy.model_executor.models.ernie4_5_moe import (
    Ernie4_5_Attention,
    Ernie4_5_MLP,
 )
+from fastdeploy.model_executor.models.ernie4_5_vl.image_op import (
+    text_image_gather_scatter,
+    text_image_index_out,
+)
 from fastdeploy.model_executor.models.model_base import (
    ModelCategory,
    ModelForCasualLM,
    ModelRegistry,
 )
-from fastdeploy.platforms import current_platform
-
-if current_platform.is_cuda():
-    from fastdeploy.model_executor.ops.gpu import (
-        text_image_gather_scatter,
-        text_image_index_out,
-    )
-
-from fastdeploy.model_executor.forward_meta import ForwardMeta


 class Ernie4_5_VLMLP(Ernie4_5_MLP):
--- a/fastdeploy/model_executor/models/ernie4_5_vl/image_op.py
+++ b/fastdeploy/model_executor/models/ernie4_5_vl/image_op.py
@@ -0,0 +1,32 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+from fastdeploy.platforms import current_platform
+
+if current_platform.is_cuda():
+    from fastdeploy.model_executor.ops.gpu import (
+        text_image_gather_scatter,
+        text_image_index_out,
+    )
+elif current_platform.is_xpu():
+    from fastdeploy.model_executor.ops.xpu import (
+        text_image_gather_scatter,
+        text_image_index_out,
+    )
+else:
+    raise ImportError("Unsupported platform, only support CUDA and XPU")
+
+__all__ = ["text_image_gather_scatter", "text_image_index_out"]