[XPU][CI] change VL model to 28B-VL-thinking (#5169)

* Enhance run_ci_xpu.sh with caching and prefill options * Update model path and configuration in run_ci_xpu.sh * Add '北朝' keyword to assertion in run_45vl.py * Enhance process termination logic in run_ci_xpu.sh * Set timeout for CI_XPU job to 60 minutes * Remove extra newline in stop_processes function
2025-12-24 13:28:13 +08:00 · 2025-11-24 16:50:18 +08:00
parent af03da5127
commit 5ff93d4998
3 changed files with 9 additions and 7 deletions
--- a/.github/workflows/ci_xpu.yml
+++ b/.github/workflows/ci_xpu.yml
@@ -16,6 +16,7 @@ concurrency:

 jobs:
  CI_XPU:
+    timeout-minutes: 60
    runs-on: [self-hosted, XPU-P800-8Card]
    steps:
      - name: Print current runner name
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -14,6 +14,8 @@ function stop_processes() {
    for port in $(seq $((8188 + XPU_ID * 100 + 10)) $((8188 + XPU_ID * 100 + 40))); do
        lsof -t -i :${port} | xargs kill -9 || true
    done
+    netstat -tunlp 2>/dev/null | grep $((8190 + XPU_ID * 100)) | awk '{print $NF}' | awk -F'/' '{print $1}' | xargs -r kill -9
+    netstat -tunlp 2>/dev/null | grep $((8190 + XPU_ID * 100)) | awk '{print $(NF-1)}' | cut -d/ -f1 | grep -E '^[0-9]+$' | xargs -r kill -9
 }

 stop_processes >kill.log 2>&1
@@ -230,19 +232,18 @@ else
 fi
 export port_num=$((8188 + XPU_ID * 100))
 python -m fastdeploy.entrypoints.openai.api_server \
-    --model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Paddle \
+    --model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Thinking \
    --port $port_num \
    --engine-worker-queue-port $((port_num + 1)) \
    --metrics-port $((port_num + 2)) \
    --cache-queue-port $((port_num + 47873)) \
    --tensor-parallel-size 4 \
    --max-model-len 32768 \
-    --max-num-seqs 10 \
+    --max-num-seqs 32 \
    --quantization wint8 \
-    --enable-mm \
-    --mm-processor-kwargs '{"video_max_frames": 30}' \
-    --limit-mm-per-prompt '{"image": 10, "video": 3}' \
-    --reasoning-parser ernie-45-vl \
+    --reasoning-parser ernie-45-vl-thinking \
+    --tool-call-parser ernie-45-vl-thinking \
+    --mm-processor-kwargs '{"image_max_pixels": 12845056 }' \
    --enable-chunked-prefill > server.log 2>&1 &

 sleep 60
--- a/tests/ci_use/XPU_45T/run_45vl.py
+++ b/tests/ci_use/XPU_45T/run_45vl.py
@@ -45,7 +45,7 @@ def test_45vl():
    )
    print(response.choices[0].message.content)
    # print(base_response)
-    assert any(keyword in response.choices[0].message.content for keyword in ["北魏", "北齐", "释迦牟尼"])
+    assert any(keyword in response.choices[0].message.content for keyword in ["北魏", "北齐", "释迦牟尼", "北朝"])


 if __name__ == "__main__":