[XPU] [CI]Change CI to multi-concurrency (#4866)

* Refactor GPU ID logic in CI workflow Updated GPU ID assignment logic and removed unused port calculations. * Refactor GPU device and port configuration * Update engine_worker_queue_port calculation logic * Refactor XPU_VISIBLE_DEVICES export logic * Adjust service port based on GPU ID * Adjust service HTTP port based on GPU ID * Adjust service_http_port based on GPU_ID * Add import for os module in run_45T.py * Update run_45vl.py * Import os module in run_w4a8.py Added import for os module to use environment variables. * Remove duplicate import of os module * Remove duplicate import of os module * Update run_45T.py * Update run_w4a8.py * fix bug * fix bug * Update run_w4a8.py * Fix directory change command in run_ci_xpu.sh
2025-12-24 13:28:13 +08:00 · 2025-11-10 21:09:48 +08:00
parent 59d2edde29
commit 3665c283b5
6 changed files with 79 additions and 29 deletions
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -9,13 +9,27 @@ apt install -y lsof
 function stop_processes() {
    ps -efww | grep -E 'cache_transfer_manager.py' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
    ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
-    ps -efww | grep -E '8188' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
-    lsof -t -i :8188 | xargs kill -9 || true
+    ps -efww | grep -E "$((8188 + GPU_ID * 100))" | grep -v grep | awk '{print $2}' | xargs kill -9 || true
+    lsof -t -i :$((8188 + GPU_ID * 100)) | xargs kill -9 || true
 }
 stop_processes

-#设置模型路径
-export model_path=${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle
+# 由于机器原因，需重启使用的卡，以保障没有问题
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+
+mkdir -p /workspace/deps
+cd /workspace/deps
+wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/5.0.21.21/xre-Linux-x86_64-5.0.21.21.tar.gz
+tar -zxf xre-Linux-x86_64-5.0.21.21.tar.gz && mv xre-Linux-x86_64-5.0.21.21 xre
+cd -
+export PATH=/workspace/deps/xre/bin:$PATH
+
+xpu-smi -r -i $XPU_VISIBLE_DEVICES
+xpu-smi

 echo "pip requirements"
 python -m pip install -r requirements.txt
@@ -51,11 +65,19 @@ rm -f core*
 #清空消息队列
 ipcrm --all=msg
 echo "============================开始V1模式测试!============================"
-export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+export port_num=$((8188 + GPU_ID * 100))
 python -m fastdeploy.entrypoints.openai.api_server \
-    --model ${model_path} \
-    --port 8188 \
-    --tensor-parallel-size 8 \
+    --model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \
+    --port $port_num \
+    --engine-worker-queue-port $((port_num + 1)) \
+    --metrics-port $((port_num + 2)) \
+    --cache-queue-port $((port_num + 47873)) \
+    --tensor-parallel-size 4 \
    --num-gpu-blocks-override 16384 \
    --max-model-len 32768 \
    --max-num-seqs 128 \
@@ -119,10 +141,18 @@ rm -f core*
 #清空消息队列
 ipcrm --all=msg
 echo "============================开始W4A8测试!============================"
-export XPU_VISIBLE_DEVICES="0,1,2,3"
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+export port_num=$((8188 + GPU_ID * 100))
 python -m fastdeploy.entrypoints.openai.api_server \
    --model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-W4A8C8-TP4-Paddle \
-    --port 8188 \
+    --port $port_num \
+    --engine-worker-queue-port $((port_num + 1)) \
+    --metrics-port $((port_num + 2)) \
+    --cache-queue-port $((port_num + 47873)) \
    --tensor-parallel-size 4 \
    --num-gpu-blocks-override 16384 \
    --max-model-len 32768 \
@@ -187,10 +217,18 @@ rm -f core*
 #清空消息队列
 ipcrm --all=msg
 echo "============================开始vl模型测试!============================"
-export XPU_VISIBLE_DEVICES="0,1,2,3"
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+export port_num=$((8188 + GPU_ID * 100))
 python -m fastdeploy.entrypoints.openai.api_server \
    --model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Paddle \
-    --port 8188 \
+    --port $port_num \
+    --engine-worker-queue-port $((port_num + 1)) \
+    --metrics-port $((port_num + 2)) \
+    --cache-queue-port $((port_num + 47873)) \
    --tensor-parallel-size 4 \
    --max-model-len 32768 \
    --max-num-seqs 10 \
@@ -257,7 +295,12 @@ rm -rf log/*
 rm -f core*
 ipcrm --all=msg
 xpu-smi
-export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+
 export BKCL_ENABLE_XDR=1
 export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
 export BKCL_TRACE_TOPO=1
@@ -301,7 +344,12 @@ rm -rf log/*
 rm -f core*
 ipcrm --all=msg
 xpu-smi
-export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+
 export BKCL_ENABLE_XDR=1
 export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
 export BKCL_TRACE_TOPO=1