From ea866e4b349f4c804d49b2fda73a4de645ef073d Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Fri, 31 Oct 2025 10:38:09 +0800
Subject: [PATCH] [XPU] [CI] Add Vl case  (#4649)

* Enhance CI script with health checks and logging

Updated the CI script to include health checks and logging for the VL model testing process.

* Add test for OpenAI chat completions

* Refactor chat completion user message structure

* Fix variable name for exit code in CI script

* Update text prompt to Chinese for artifact question

* Update service port and response assertions in tests

* Refactor assertion for response content comparison

* Update run_45vl.py

* Change service HTTP port from 8123 to 8188
---
 scripts/run_ci_xpu.sh            | 71 ++++++++++++++++++++++++++++++++
 tests/ci_use/XPU_45T/run_45vl.py | 51 +++++++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 tests/ci_use/XPU_45T/run_45vl.py

diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh
index 469364706..5633cde54 100644
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -171,6 +171,77 @@ if [ ${w4a8_test_exit_code} -ne 0 ]; then
     exit 1
 fi
 
+sleep 5
+# 起服务
+rm -rf log/*
+rm -f core*
+# pkill -9 python #流水线不执行这个
+#清空消息队列
+ipcrm --all=msg
+echo "============================开始vl模型测试!============================"
+export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+python -m fastdeploy.entrypoints.openai.api_server \
+    --model ${MODEL_PATH}/ERNIE-4.5-VL-424B-A47B-Paddle \
+    --port 8188 \
+    --tensor-parallel-size 8 \
+    --max-model-len 32768 \
+    --max-num-seqs 10 \
+    --quantization wint8 \
+    --enable-mm \
+    --mm-processor-kwargs '{"video_max_frames": 30}' \
+    --limit-mm-per-prompt '{"image": 10, "video": 3}' \
+    --reasoning-parser ernie-45-vl > server.log 2>&1 &
+
+sleep 60
+# 探活
+TIMEOUT=$((15 * 60))
+INTERVAL=10            # 检查间隔（秒）
+ENDPOINT="http://0.0.0.0:8188/health"
+START_TIME=$(date +%s) # 记录开始时间戳
+echo "开始服务健康检查，最长等待时间：${TIMEOUT}秒"
+while true; do
+    # 计算已耗时
+    CURRENT_TIME=$(date +%s)
+    ELAPSED=$((CURRENT_TIME - START_TIME))
+
+    # 超时判断
+    if [ $ELAPSED -ge $TIMEOUT ]; then
+        echo -e "\n服务启动超时：经过 $((TIMEOUT/60)) 分钟服务仍未启动！"
+        cat server.log
+        cat log/workerlog.0
+        exit 1
+    fi
+
+    HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
+
+    if [ "$HTTP_CODE" = "200" ]; then
+        echo -e "\n服务启动成功！耗时 ${ELAPSED} 秒"
+        break
+    else
+        sleep $INTERVAL
+    fi
+done
+
+cat server.log
+
+# 执行服务化推理
+python -m pytest tests/ci_use/XPU_45T/run_45vl.py
+vl_test_exit_code=$?
+echo vl_test_exit_code is ${vl_test_exit_code}
+
+ps -efww | grep -E 'cache_transfer_manager.py' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
+ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
+ps -efww | grep -E '8188' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
+lsof -t -i :8188 | xargs kill -9 || true
+
+if [ ${vl_test_exit_code} -ne 0 ]; then
+    echo "log/workerlog.0"
+    cat log/workerlog.0
+    echo " vl模型 测试失败，请检查pr代码"
+    exit 1
+fi
+
+
 echo "============================开始EP并行测试!============================"
 sleep 5
 rm -rf log/*
diff --git a/tests/ci_use/XPU_45T/run_45vl.py b/tests/ci_use/XPU_45T/run_45vl.py
new file mode 100644
index 000000000..654d095e0
--- /dev/null
+++ b/tests/ci_use/XPU_45T/run_45vl.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import openai
+
+
+def test_45vl():
+    ip = "0.0.0.0"
+    service_http_port = "8188"  # 服务配置的
+    client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY")
+    base_response = "\n这尊佛像是**北魏时期的释迦牟尼"
+    # 非流式对话
+    response = client.chat.completions.create(
+        model="default",
+        messages=[
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"
+                        },
+                    },
+                    {"type": "text", "text": "图片中的文物来自哪个时代？"},
+                ],
+            },
+        ],
+        temperature=1,
+        top_p=0,
+        max_tokens=75,
+        stream=False,
+    )
+    print(response.choices[0].message.content)
+    # print(base_response)
+    assert response.choices[0].message.content == base_response
+
+
+if __name__ == "__main__":
+    test_45vl()