From 3665c283b52ec547aef7ca7ef09f49e0fa745549 Mon Sep 17 00:00:00 2001
From: plusNew001 <95567040+plusNew001@users.noreply.github.com>
Date: Mon, 10 Nov 2025 21:09:48 +0800
Subject: [PATCH] [XPU] [CI]Change CI to multi-concurrency (#4866)

* Refactor GPU ID logic in CI workflow

Updated GPU ID assignment logic and removed unused port calculations.

* Refactor GPU device and port configuration

* Update engine_worker_queue_port calculation logic

* Refactor XPU_VISIBLE_DEVICES export logic

* Adjust service port based on GPU ID

* Adjust service HTTP port based on GPU ID

* Adjust service_http_port based on GPU_ID

* Add import for os module in run_45T.py

* Update run_45vl.py

* Import os module in run_w4a8.py

Added import for os module to use environment variables.

* Remove duplicate import of os module

* Remove duplicate import of os module

* Update run_45T.py

* Update run_w4a8.py

* fix bug

* fix bug

* Update run_w4a8.py

* Fix directory change command in run_ci_xpu.sh
---
 .github/workflows/ci_xpu.yml     | 11 ++---
 scripts/run_ci_xpu.sh            | 76 ++++++++++++++++++++++++++------
 tests/ci_use/XPU_45T/run_45T.py  |  4 +-
 tests/ci_use/XPU_45T/run_45vl.py |  4 +-
 tests/ci_use/XPU_45T/run_ep.py   |  5 ++-
 tests/ci_use/XPU_45T/run_w4a8.py |  8 ++--
 6 files changed, 79 insertions(+), 29 deletions(-)

diff --git a/.github/workflows/ci_xpu.yml b/.github/workflows/ci_xpu.yml
index 36371a243..a1775dc8e 100644
--- a/.github/workflows/ci_xpu.yml
+++ b/.github/workflows/ci_xpu.yml
@@ -60,14 +60,11 @@ jobs:
           runner_name="${{ runner.name }}"
           last_char="${runner_name: -1}"
 
-          if [[ "$last_char" =~ [0-3] ]]; then
-            gpu_id="$last_char"
+          if [[ "$last_char" == "1" ]]; then
+            gpu_id="4"
           else
             gpu_id="0"
           fi
-          FD_API_PORT=$((9180 + gpu_id * 100))
-          FD_ENGINE_QUEUE_PORT=$((9150 + gpu_id * 100))
-          FD_METRICS_PORT=$((9170 + gpu_id * 100))
           PARENT_DIR=$(dirname "$WORKSPACE")
           echo "PARENT_DIR:$PARENT_DIR"
           docker run --rm --net=host --cap-add=SYS_PTRACE --privileged --shm-size=64G  \
@@ -77,9 +74,7 @@ jobs:
           -e "http_proxy=$(git config --global --get http.proxy)" \
           -e "https_proxy=$(git config --global --get https.proxy)" \
           -e "no_proxy=bcebos.com,mirrors.tuna.tsinghua.edu.cn,127.0.0.1,localhost" \
-          -e "FD_API_PORT=${FD_API_PORT}" \
-          -e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
-          -e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
+          -e "GPU_ID=${gpu_id}" \
            ${docker_image} /bin/bash -c "
           git config --global --add safe.directory /workspace/FastDeploy
           cd FastDeploy
diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh
index d96d9db6a..01c027e14 100644
--- a/scripts/run_ci_xpu.sh
+++ b/scripts/run_ci_xpu.sh
@@ -9,13 +9,27 @@ apt install -y lsof
 function stop_processes() {
     ps -efww | grep -E 'cache_transfer_manager.py' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
     ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
-    ps -efww | grep -E '8188' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
-    lsof -t -i :8188 | xargs kill -9 || true
+    ps -efww | grep -E "$((8188 + GPU_ID * 100))" | grep -v grep | awk '{print $2}' | xargs kill -9 || true
+    lsof -t -i :$((8188 + GPU_ID * 100)) | xargs kill -9 || true
 }
 stop_processes
 
-#设置模型路径
-export model_path=${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle
+# 由于机器原因，需重启使用的卡，以保障没有问题
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+
+mkdir -p /workspace/deps
+cd /workspace/deps
+wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/5.0.21.21/xre-Linux-x86_64-5.0.21.21.tar.gz
+tar -zxf xre-Linux-x86_64-5.0.21.21.tar.gz && mv xre-Linux-x86_64-5.0.21.21 xre
+cd -
+export PATH=/workspace/deps/xre/bin:$PATH
+
+xpu-smi -r -i $XPU_VISIBLE_DEVICES
+xpu-smi
 
 echo "pip requirements"
 python -m pip install -r requirements.txt
@@ -51,11 +65,19 @@ rm -f core*
 #清空消息队列
 ipcrm --all=msg
 echo "============================开始V1模式测试!============================"
-export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+export port_num=$((8188 + GPU_ID * 100))
 python -m fastdeploy.entrypoints.openai.api_server \
-    --model ${model_path} \
-    --port 8188 \
-    --tensor-parallel-size 8 \
+    --model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \
+    --port $port_num \
+    --engine-worker-queue-port $((port_num + 1)) \
+    --metrics-port $((port_num + 2)) \
+    --cache-queue-port $((port_num + 47873)) \
+    --tensor-parallel-size 4 \
     --num-gpu-blocks-override 16384 \
     --max-model-len 32768 \
     --max-num-seqs 128 \
@@ -119,10 +141,18 @@ rm -f core*
 #清空消息队列
 ipcrm --all=msg
 echo "============================开始W4A8测试!============================"
-export XPU_VISIBLE_DEVICES="0,1,2,3"
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+export port_num=$((8188 + GPU_ID * 100))
 python -m fastdeploy.entrypoints.openai.api_server \
     --model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-W4A8C8-TP4-Paddle \
-    --port 8188 \
+    --port $port_num \
+    --engine-worker-queue-port $((port_num + 1)) \
+    --metrics-port $((port_num + 2)) \
+    --cache-queue-port $((port_num + 47873)) \
     --tensor-parallel-size 4 \
     --num-gpu-blocks-override 16384 \
     --max-model-len 32768 \
@@ -187,10 +217,18 @@ rm -f core*
 #清空消息队列
 ipcrm --all=msg
 echo "============================开始vl模型测试!============================"
-export XPU_VISIBLE_DEVICES="0,1,2,3"
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+export port_num=$((8188 + GPU_ID * 100))
 python -m fastdeploy.entrypoints.openai.api_server \
     --model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Paddle \
-    --port 8188 \
+    --port $port_num \
+    --engine-worker-queue-port $((port_num + 1)) \
+    --metrics-port $((port_num + 2)) \
+    --cache-queue-port $((port_num + 47873)) \
     --tensor-parallel-size 4 \
     --max-model-len 32768 \
     --max-num-seqs 10 \
@@ -257,7 +295,12 @@ rm -rf log/*
 rm -f core*
 ipcrm --all=msg
 xpu-smi
-export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+
 export BKCL_ENABLE_XDR=1
 export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
 export BKCL_TRACE_TOPO=1
@@ -301,7 +344,12 @@ rm -rf log/*
 rm -f core*
 ipcrm --all=msg
 xpu-smi
-export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
+if [[ "$GPU_ID" == "0" ]]; then
+    export XPU_VISIBLE_DEVICES="0,1,2,3"
+else
+    export XPU_VISIBLE_DEVICES="4,5,6,7"
+fi
+
 export BKCL_ENABLE_XDR=1
 export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
 export BKCL_TRACE_TOPO=1
diff --git a/tests/ci_use/XPU_45T/run_45T.py b/tests/ci_use/XPU_45T/run_45T.py
index 0ca044344..3341cfc35 100644
--- a/tests/ci_use/XPU_45T/run_45T.py
+++ b/tests/ci_use/XPU_45T/run_45T.py
@@ -11,13 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 
 import openai
 
 
 def test_45t():
     ip = "0.0.0.0"
-    service_http_port = "8188"  # 服务配置的
+    gpu_id = int(os.getenv("GPU_ID", "0"))
+    service_http_port = 8188 + gpu_id * 100  # 服务配置的
     client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY")
     # base_response_110 = "你好！我是一个基于人工智能技术开发的助手，可以帮你解答问题、提供建议、聊天交流或者完成一些任务。无论是学习、工作还是生活中的疑问，都可以随时告诉我哦～😊 你有什么想聊的吗？"
     # base_response_104 = "你好！我是一个基于人工智能技术打造的助手，可以帮你解答问题、提供建议、分享知识，或者陪你聊聊天～😊 无论是学习、工作、生活还是娱乐相关的问题，都可以随时告诉我哦！你今天有什么想聊的吗？"
diff --git a/tests/ci_use/XPU_45T/run_45vl.py b/tests/ci_use/XPU_45T/run_45vl.py
index d8afa6798..2c0b9acbd 100644
--- a/tests/ci_use/XPU_45T/run_45vl.py
+++ b/tests/ci_use/XPU_45T/run_45vl.py
@@ -11,13 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 
 import openai
 
 
 def test_45vl():
     ip = "0.0.0.0"
-    service_http_port = "8188"  # 服务配置的
+    gpu_id = int(os.getenv("GPU_ID", "0"))
+    service_http_port = 8188 + gpu_id * 100  # 服务配置的
     client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY")
     base_response = "北魏时期"
     # 非流式对话
diff --git a/tests/ci_use/XPU_45T/run_ep.py b/tests/ci_use/XPU_45T/run_ep.py
index e8a1e7197..c7bddfc25 100644
--- a/tests/ci_use/XPU_45T/run_ep.py
+++ b/tests/ci_use/XPU_45T/run_ep.py
@@ -37,8 +37,9 @@ def test_fd_ep():
     else:
         tensor_parallel_size = xpu_device_num
         data_parallel_size = 1
-
-    engine_worker_queue_port = [str(8023 + i * 10) for i in range(data_parallel_size)]
+    gpu_id = int(os.getenv("GPU_ID", "0"))
+    base_port = 8023 + gpu_id * 100
+    engine_worker_queue_port = [str(base_port + i * 10) for i in range(data_parallel_size)]
     engine_worker_queue_port = ",".join(engine_worker_queue_port)
 
     llm = LLM(
diff --git a/tests/ci_use/XPU_45T/run_w4a8.py b/tests/ci_use/XPU_45T/run_w4a8.py
index 795b9ffe8..75857d936 100644
--- a/tests/ci_use/XPU_45T/run_w4a8.py
+++ b/tests/ci_use/XPU_45T/run_w4a8.py
@@ -11,16 +11,18 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import os
 
 import openai
 
 
 def test_w4a8():
     ip = "0.0.0.0"
-    service_http_port = "8188"  # 服务配置的
+    gpu_id = int(os.getenv("GPU_ID", "0"))
+    service_http_port = 8188 + gpu_id * 100  # 服务配置的
     client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY")
-    #base_response_110 = "你好！我是一个基于人工智能技术的助手，可以帮你解答问题、提供建议、聊天或者协助完成各种任务。无论是学习、工作还是生活中的疑问，我都可以尽力提供帮助。😊 你有什么想聊的吗？"
-    #base_response_104 = "你好！我是一个人工智能助手，可以帮你解答问题、提供建议、聊天或者完成一些任务。无论是学习、工作还是生活中的疑问，我都可以尽力帮忙哦～有什么需要我做的吗？😊"
+    # base_response_110 = "你好！我是一个基于人工智能技术的助手，可以帮你解答问题、提供建议、聊天或者协助完成各种任务。无论是学习、工作还是生活中的疑问，我都可以尽力提供帮助。😊 你有什么想聊的吗？"
+    # base_response_104 = "你好！我是一个人工智能助手，可以帮你解答问题、提供建议、聊天或者完成一些任务。无论是学习、工作还是生活中的疑问，我都可以尽力帮忙哦～有什么需要我做的吗？😊"
     # 非流式对话
     response = client.chat.completions.create(
         model="default",