From fc8bef2c954f97247d85b58e7bb3e2f9446811c0 Mon Sep 17 00:00:00 2001 From: plusNew001 <95567040+plusNew001@users.noreply.github.com> Date: Thu, 6 Nov 2025 14:12:23 +0800 Subject: [PATCH] [XPU][CI]Change ci vl model to 28 b (#4764) * Update XPU_VISIBLE_DEVICES and model parameters * Update base response and adjust max tokens * Implement process cleanup in CI workflow Add process cleanup commands to prevent port conflicts * Remove process cleanup commands from CI workflow Removed old process cleanup commands to prevent port conflicts. --- .github/workflows/ci_xpu.yml | 1 - scripts/run_ci_xpu.sh | 6 +++--- tests/ci_use/XPU_45T/run_45vl.py | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci_xpu.yml b/.github/workflows/ci_xpu.yml index 7398af53d..36371a243 100644 --- a/.github/workflows/ci_xpu.yml +++ b/.github/workflows/ci_xpu.yml @@ -68,7 +68,6 @@ jobs: FD_API_PORT=$((9180 + gpu_id * 100)) FD_ENGINE_QUEUE_PORT=$((9150 + gpu_id * 100)) FD_METRICS_PORT=$((9170 + gpu_id * 100)) - PARENT_DIR=$(dirname "$WORKSPACE") echo "PARENT_DIR:$PARENT_DIR" docker run --rm --net=host --cap-add=SYS_PTRACE --privileged --shm-size=64G \ diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh index d59996372..c407d945a 100644 --- a/scripts/run_ci_xpu.sh +++ b/scripts/run_ci_xpu.sh @@ -187,11 +187,11 @@ rm -f core* #清空消息队列 ipcrm --all=msg echo "============================开始vl模型测试!============================" -export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" +export XPU_VISIBLE_DEVICES="0,1,2,3" python -m fastdeploy.entrypoints.openai.api_server \ - --model ${MODEL_PATH}/ERNIE-4.5-VL-424B-A47B-Paddle \ + --model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Paddle \ --port 8188 \ - --tensor-parallel-size 8 \ + --tensor-parallel-size 4 \ --max-model-len 32768 \ --max-num-seqs 10 \ --quantization wint8 \ diff --git a/tests/ci_use/XPU_45T/run_45vl.py b/tests/ci_use/XPU_45T/run_45vl.py index 654d095e0..7bbde4068 100644 --- a/tests/ci_use/XPU_45T/run_45vl.py +++ b/tests/ci_use/XPU_45T/run_45vl.py @@ -19,7 +19,7 @@ def test_45vl(): ip = "0.0.0.0" service_http_port = "8188" # 服务配置的 client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY") - base_response = "\n这尊佛像是**北魏时期的释迦牟尼" + base_response = "\n\n这尊佛像属于**北魏时期**" # 非流式对话 response = client.chat.completions.create( model="default", @@ -39,7 +39,7 @@ def test_45vl(): ], temperature=1, top_p=0, - max_tokens=75, + max_tokens=70, stream=False, ) print(response.choices[0].message.content)