mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[XPU][CI] change VL model to 28B-VL-thinking (#5169)
* Enhance run_ci_xpu.sh with caching and prefill options * Update model path and configuration in run_ci_xpu.sh * Add '北朝' keyword to assertion in run_45vl.py * Enhance process termination logic in run_ci_xpu.sh * Set timeout for CI_XPU job to 60 minutes * Remove extra newline in stop_processes function
This commit is contained in:
1
.github/workflows/ci_xpu.yml
vendored
1
.github/workflows/ci_xpu.yml
vendored
@@ -16,6 +16,7 @@ concurrency:
|
||||
|
||||
jobs:
|
||||
CI_XPU:
|
||||
timeout-minutes: 60
|
||||
runs-on: [self-hosted, XPU-P800-8Card]
|
||||
steps:
|
||||
- name: Print current runner name
|
||||
|
||||
@@ -14,6 +14,8 @@ function stop_processes() {
|
||||
for port in $(seq $((8188 + XPU_ID * 100 + 10)) $((8188 + XPU_ID * 100 + 40))); do
|
||||
lsof -t -i :${port} | xargs kill -9 || true
|
||||
done
|
||||
netstat -tunlp 2>/dev/null | grep $((8190 + XPU_ID * 100)) | awk '{print $NF}' | awk -F'/' '{print $1}' | xargs -r kill -9
|
||||
netstat -tunlp 2>/dev/null | grep $((8190 + XPU_ID * 100)) | awk '{print $(NF-1)}' | cut -d/ -f1 | grep -E '^[0-9]+$' | xargs -r kill -9
|
||||
}
|
||||
|
||||
stop_processes >kill.log 2>&1
|
||||
@@ -230,19 +232,18 @@ else
|
||||
fi
|
||||
export port_num=$((8188 + XPU_ID * 100))
|
||||
python -m fastdeploy.entrypoints.openai.api_server \
|
||||
--model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Paddle \
|
||||
--model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Thinking \
|
||||
--port $port_num \
|
||||
--engine-worker-queue-port $((port_num + 1)) \
|
||||
--metrics-port $((port_num + 2)) \
|
||||
--cache-queue-port $((port_num + 47873)) \
|
||||
--tensor-parallel-size 4 \
|
||||
--max-model-len 32768 \
|
||||
--max-num-seqs 10 \
|
||||
--max-num-seqs 32 \
|
||||
--quantization wint8 \
|
||||
--enable-mm \
|
||||
--mm-processor-kwargs '{"video_max_frames": 30}' \
|
||||
--limit-mm-per-prompt '{"image": 10, "video": 3}' \
|
||||
--reasoning-parser ernie-45-vl \
|
||||
--reasoning-parser ernie-45-vl-thinking \
|
||||
--tool-call-parser ernie-45-vl-thinking \
|
||||
--mm-processor-kwargs '{"image_max_pixels": 12845056 }' \
|
||||
--enable-chunked-prefill > server.log 2>&1 &
|
||||
|
||||
sleep 60
|
||||
|
||||
@@ -45,7 +45,7 @@ def test_45vl():
|
||||
)
|
||||
print(response.choices[0].message.content)
|
||||
# print(base_response)
|
||||
assert any(keyword in response.choices[0].message.content for keyword in ["北魏", "北齐", "释迦牟尼"])
|
||||
assert any(keyword in response.choices[0].message.content for keyword in ["北魏", "北齐", "释迦牟尼", "北朝"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user