[XPU][CI] change VL model to 28B-VL-thinking (#5169)

* Enhance run_ci_xpu.sh with caching and prefill options

* Update model path and configuration in run_ci_xpu.sh

* Add '北朝' keyword to assertion in run_45vl.py

* Enhance process termination logic in run_ci_xpu.sh

* Set timeout for CI_XPU job to 60 minutes

* Remove extra newline in stop_processes function
This commit is contained in:
Jiaxin Sui
2025-11-24 16:50:18 +08:00
committed by GitHub
parent af03da5127
commit 5ff93d4998
3 changed files with 9 additions and 7 deletions

View File

@@ -16,6 +16,7 @@ concurrency:
jobs:
CI_XPU:
timeout-minutes: 60
runs-on: [self-hosted, XPU-P800-8Card]
steps:
- name: Print current runner name

View File

@@ -14,6 +14,8 @@ function stop_processes() {
for port in $(seq $((8188 + XPU_ID * 100 + 10)) $((8188 + XPU_ID * 100 + 40))); do
lsof -t -i :${port} | xargs kill -9 || true
done
netstat -tunlp 2>/dev/null | grep $((8190 + XPU_ID * 100)) | awk '{print $NF}' | awk -F'/' '{print $1}' | xargs -r kill -9
netstat -tunlp 2>/dev/null | grep $((8190 + XPU_ID * 100)) | awk '{print $(NF-1)}' | cut -d/ -f1 | grep -E '^[0-9]+$' | xargs -r kill -9
}
stop_processes >kill.log 2>&1
@@ -230,19 +232,18 @@ else
fi
export port_num=$((8188 + XPU_ID * 100))
python -m fastdeploy.entrypoints.openai.api_server \
--model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Paddle \
--model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Thinking \
--port $port_num \
--engine-worker-queue-port $((port_num + 1)) \
--metrics-port $((port_num + 2)) \
--cache-queue-port $((port_num + 47873)) \
--tensor-parallel-size 4 \
--max-model-len 32768 \
--max-num-seqs 10 \
--max-num-seqs 32 \
--quantization wint8 \
--enable-mm \
--mm-processor-kwargs '{"video_max_frames": 30}' \
--limit-mm-per-prompt '{"image": 10, "video": 3}' \
--reasoning-parser ernie-45-vl \
--reasoning-parser ernie-45-vl-thinking \
--tool-call-parser ernie-45-vl-thinking \
--mm-processor-kwargs '{"image_max_pixels": 12845056 }' \
--enable-chunked-prefill > server.log 2>&1 &
sleep 60

View File

@@ -45,7 +45,7 @@ def test_45vl():
)
print(response.choices[0].message.content)
# print(base_response)
assert any(keyword in response.choices[0].message.content for keyword in ["北魏", "北齐", "释迦牟尼"])
assert any(keyword in response.choices[0].message.content for keyword in ["北魏", "北齐", "释迦牟尼", "北朝"])
if __name__ == "__main__":