[XPU] support XPU VL model inference (#4030)

* [XPU] support XPU VL model inference

* fix image op import and device check

* rebase develop

* fix perf
This commit is contained in:
Lucas
2025-09-25 14:34:15 +08:00
committed by GitHub
parent e36eccfdad
commit 87179cb744
18 changed files with 1300 additions and 146 deletions

View File

@@ -31,6 +31,7 @@ from paddleformers.utils.log import logger
from fastdeploy.config import FDConfig
from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
from fastdeploy.model_executor.forward_meta import ForwardMeta
from fastdeploy.model_executor.graph_optimization.decorator import (
cuda_graph_buffers,
support_graph_optimization,
@@ -44,20 +45,15 @@ from fastdeploy.model_executor.models.ernie4_5_moe import (
Ernie4_5_Attention,
Ernie4_5_MLP,
)
from fastdeploy.model_executor.models.ernie4_5_vl.image_op import (
text_image_gather_scatter,
text_image_index_out,
)
from fastdeploy.model_executor.models.model_base import (
ModelCategory,
ModelForCasualLM,
ModelRegistry,
)
from fastdeploy.platforms import current_platform
if current_platform.is_cuda():
from fastdeploy.model_executor.ops.gpu import (
text_image_gather_scatter,
text_image_index_out,
)
from fastdeploy.model_executor.forward_meta import ForwardMeta
class Ernie4_5_VLMLP(Ernie4_5_MLP):

View File

@@ -0,0 +1,32 @@
"""
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from fastdeploy.platforms import current_platform
if current_platform.is_cuda():
from fastdeploy.model_executor.ops.gpu import (
text_image_gather_scatter,
text_image_index_out,
)
elif current_platform.is_xpu():
from fastdeploy.model_executor.ops.xpu import (
text_image_gather_scatter,
text_image_index_out,
)
else:
raise ImportError("Unsupported platform, only support CUDA and XPU")
__all__ = ["text_image_gather_scatter", "text_image_index_out"]