From 3665c283b52ec547aef7ca7ef09f49e0fa745549 Mon Sep 17 00:00:00 2001 From: plusNew001 <95567040+plusNew001@users.noreply.github.com> Date: Mon, 10 Nov 2025 21:09:48 +0800 Subject: [PATCH] [XPU] [CI]Change CI to multi-concurrency (#4866) * Refactor GPU ID logic in CI workflow Updated GPU ID assignment logic and removed unused port calculations. * Refactor GPU device and port configuration * Update engine_worker_queue_port calculation logic * Refactor XPU_VISIBLE_DEVICES export logic * Adjust service port based on GPU ID * Adjust service HTTP port based on GPU ID * Adjust service_http_port based on GPU_ID * Add import for os module in run_45T.py * Update run_45vl.py * Import os module in run_w4a8.py Added import for os module to use environment variables. * Remove duplicate import of os module * Remove duplicate import of os module * Update run_45T.py * Update run_w4a8.py * fix bug * fix bug * Update run_w4a8.py * Fix directory change command in run_ci_xpu.sh --- .github/workflows/ci_xpu.yml | 11 ++--- scripts/run_ci_xpu.sh | 76 ++++++++++++++++++++++++++------ tests/ci_use/XPU_45T/run_45T.py | 4 +- tests/ci_use/XPU_45T/run_45vl.py | 4 +- tests/ci_use/XPU_45T/run_ep.py | 5 ++- tests/ci_use/XPU_45T/run_w4a8.py | 8 ++-- 6 files changed, 79 insertions(+), 29 deletions(-) diff --git a/.github/workflows/ci_xpu.yml b/.github/workflows/ci_xpu.yml index 36371a243..a1775dc8e 100644 --- a/.github/workflows/ci_xpu.yml +++ b/.github/workflows/ci_xpu.yml @@ -60,14 +60,11 @@ jobs: runner_name="${{ runner.name }}" last_char="${runner_name: -1}" - if [[ "$last_char" =~ [0-3] ]]; then - gpu_id="$last_char" + if [[ "$last_char" == "1" ]]; then + gpu_id="4" else gpu_id="0" fi - FD_API_PORT=$((9180 + gpu_id * 100)) - FD_ENGINE_QUEUE_PORT=$((9150 + gpu_id * 100)) - FD_METRICS_PORT=$((9170 + gpu_id * 100)) PARENT_DIR=$(dirname "$WORKSPACE") echo "PARENT_DIR:$PARENT_DIR" docker run --rm --net=host --cap-add=SYS_PTRACE --privileged --shm-size=64G \ @@ -77,9 +74,7 @@ jobs: -e "http_proxy=$(git config --global --get http.proxy)" \ -e "https_proxy=$(git config --global --get https.proxy)" \ -e "no_proxy=bcebos.com,mirrors.tuna.tsinghua.edu.cn,127.0.0.1,localhost" \ - -e "FD_API_PORT=${FD_API_PORT}" \ - -e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \ - -e "FD_METRICS_PORT=${FD_METRICS_PORT}" \ + -e "GPU_ID=${gpu_id}" \ ${docker_image} /bin/bash -c " git config --global --add safe.directory /workspace/FastDeploy cd FastDeploy diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh index d96d9db6a..01c027e14 100644 --- a/scripts/run_ci_xpu.sh +++ b/scripts/run_ci_xpu.sh @@ -9,13 +9,27 @@ apt install -y lsof function stop_processes() { ps -efww | grep -E 'cache_transfer_manager.py' | grep -v grep | awk '{print $2}' | xargs kill -9 || true ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs kill -9 || true - ps -efww | grep -E '8188' | grep -v grep | awk '{print $2}' | xargs kill -9 || true - lsof -t -i :8188 | xargs kill -9 || true + ps -efww | grep -E "$((8188 + GPU_ID * 100))" | grep -v grep | awk '{print $2}' | xargs kill -9 || true + lsof -t -i :$((8188 + GPU_ID * 100)) | xargs kill -9 || true } stop_processes -#设置模型路径 -export model_path=${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle +# 由于机器原因,需重启使用的卡,以保障没有问题 +if [[ "$GPU_ID" == "0" ]]; then + export XPU_VISIBLE_DEVICES="0,1,2,3" +else + export XPU_VISIBLE_DEVICES="4,5,6,7" +fi + +mkdir -p /workspace/deps +cd /workspace/deps +wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/5.0.21.21/xre-Linux-x86_64-5.0.21.21.tar.gz +tar -zxf xre-Linux-x86_64-5.0.21.21.tar.gz && mv xre-Linux-x86_64-5.0.21.21 xre +cd - +export PATH=/workspace/deps/xre/bin:$PATH + +xpu-smi -r -i $XPU_VISIBLE_DEVICES +xpu-smi echo "pip requirements" python -m pip install -r requirements.txt @@ -51,11 +65,19 @@ rm -f core* #清空消息队列 ipcrm --all=msg echo "============================开始V1模式测试!============================" -export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" +if [[ "$GPU_ID" == "0" ]]; then + export XPU_VISIBLE_DEVICES="0,1,2,3" +else + export XPU_VISIBLE_DEVICES="4,5,6,7" +fi +export port_num=$((8188 + GPU_ID * 100)) python -m fastdeploy.entrypoints.openai.api_server \ - --model ${model_path} \ - --port 8188 \ - --tensor-parallel-size 8 \ + --model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \ + --port $port_num \ + --engine-worker-queue-port $((port_num + 1)) \ + --metrics-port $((port_num + 2)) \ + --cache-queue-port $((port_num + 47873)) \ + --tensor-parallel-size 4 \ --num-gpu-blocks-override 16384 \ --max-model-len 32768 \ --max-num-seqs 128 \ @@ -119,10 +141,18 @@ rm -f core* #清空消息队列 ipcrm --all=msg echo "============================开始W4A8测试!============================" -export XPU_VISIBLE_DEVICES="0,1,2,3" +if [[ "$GPU_ID" == "0" ]]; then + export XPU_VISIBLE_DEVICES="0,1,2,3" +else + export XPU_VISIBLE_DEVICES="4,5,6,7" +fi +export port_num=$((8188 + GPU_ID * 100)) python -m fastdeploy.entrypoints.openai.api_server \ --model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-W4A8C8-TP4-Paddle \ - --port 8188 \ + --port $port_num \ + --engine-worker-queue-port $((port_num + 1)) \ + --metrics-port $((port_num + 2)) \ + --cache-queue-port $((port_num + 47873)) \ --tensor-parallel-size 4 \ --num-gpu-blocks-override 16384 \ --max-model-len 32768 \ @@ -187,10 +217,18 @@ rm -f core* #清空消息队列 ipcrm --all=msg echo "============================开始vl模型测试!============================" -export XPU_VISIBLE_DEVICES="0,1,2,3" +if [[ "$GPU_ID" == "0" ]]; then + export XPU_VISIBLE_DEVICES="0,1,2,3" +else + export XPU_VISIBLE_DEVICES="4,5,6,7" +fi +export port_num=$((8188 + GPU_ID * 100)) python -m fastdeploy.entrypoints.openai.api_server \ --model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Paddle \ - --port 8188 \ + --port $port_num \ + --engine-worker-queue-port $((port_num + 1)) \ + --metrics-port $((port_num + 2)) \ + --cache-queue-port $((port_num + 47873)) \ --tensor-parallel-size 4 \ --max-model-len 32768 \ --max-num-seqs 10 \ @@ -257,7 +295,12 @@ rm -rf log/* rm -f core* ipcrm --all=msg xpu-smi -export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" +if [[ "$GPU_ID" == "0" ]]; then + export XPU_VISIBLE_DEVICES="0,1,2,3" +else + export XPU_VISIBLE_DEVICES="4,5,6,7" +fi + export BKCL_ENABLE_XDR=1 export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4 export BKCL_TRACE_TOPO=1 @@ -301,7 +344,12 @@ rm -rf log/* rm -f core* ipcrm --all=msg xpu-smi -export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" +if [[ "$GPU_ID" == "0" ]]; then + export XPU_VISIBLE_DEVICES="0,1,2,3" +else + export XPU_VISIBLE_DEVICES="4,5,6,7" +fi + export BKCL_ENABLE_XDR=1 export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4 export BKCL_TRACE_TOPO=1 diff --git a/tests/ci_use/XPU_45T/run_45T.py b/tests/ci_use/XPU_45T/run_45T.py index 0ca044344..3341cfc35 100644 --- a/tests/ci_use/XPU_45T/run_45T.py +++ b/tests/ci_use/XPU_45T/run_45T.py @@ -11,13 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os import openai def test_45t(): ip = "0.0.0.0" - service_http_port = "8188" # 服务配置的 + gpu_id = int(os.getenv("GPU_ID", "0")) + service_http_port = 8188 + gpu_id * 100 # 服务配置的 client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY") # base_response_110 = "你好!我是一个基于人工智能技术开发的助手,可以帮你解答问题、提供建议、聊天交流或者完成一些任务。无论是学习、工作还是生活中的疑问,都可以随时告诉我哦~😊 你有什么想聊的吗?" # base_response_104 = "你好!我是一个基于人工智能技术打造的助手,可以帮你解答问题、提供建议、分享知识,或者陪你聊聊天~😊 无论是学习、工作、生活还是娱乐相关的问题,都可以随时告诉我哦!你今天有什么想聊的吗?" diff --git a/tests/ci_use/XPU_45T/run_45vl.py b/tests/ci_use/XPU_45T/run_45vl.py index d8afa6798..2c0b9acbd 100644 --- a/tests/ci_use/XPU_45T/run_45vl.py +++ b/tests/ci_use/XPU_45T/run_45vl.py @@ -11,13 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os import openai def test_45vl(): ip = "0.0.0.0" - service_http_port = "8188" # 服务配置的 + gpu_id = int(os.getenv("GPU_ID", "0")) + service_http_port = 8188 + gpu_id * 100 # 服务配置的 client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY") base_response = "北魏时期" # 非流式对话 diff --git a/tests/ci_use/XPU_45T/run_ep.py b/tests/ci_use/XPU_45T/run_ep.py index e8a1e7197..c7bddfc25 100644 --- a/tests/ci_use/XPU_45T/run_ep.py +++ b/tests/ci_use/XPU_45T/run_ep.py @@ -37,8 +37,9 @@ def test_fd_ep(): else: tensor_parallel_size = xpu_device_num data_parallel_size = 1 - - engine_worker_queue_port = [str(8023 + i * 10) for i in range(data_parallel_size)] + gpu_id = int(os.getenv("GPU_ID", "0")) + base_port = 8023 + gpu_id * 100 + engine_worker_queue_port = [str(base_port + i * 10) for i in range(data_parallel_size)] engine_worker_queue_port = ",".join(engine_worker_queue_port) llm = LLM( diff --git a/tests/ci_use/XPU_45T/run_w4a8.py b/tests/ci_use/XPU_45T/run_w4a8.py index 795b9ffe8..75857d936 100644 --- a/tests/ci_use/XPU_45T/run_w4a8.py +++ b/tests/ci_use/XPU_45T/run_w4a8.py @@ -11,16 +11,18 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import os import openai def test_w4a8(): ip = "0.0.0.0" - service_http_port = "8188" # 服务配置的 + gpu_id = int(os.getenv("GPU_ID", "0")) + service_http_port = 8188 + gpu_id * 100 # 服务配置的 client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY") - #base_response_110 = "你好!我是一个基于人工智能技术的助手,可以帮你解答问题、提供建议、聊天或者协助完成各种任务。无论是学习、工作还是生活中的疑问,我都可以尽力提供帮助。😊 你有什么想聊的吗?" - #base_response_104 = "你好!我是一个人工智能助手,可以帮你解答问题、提供建议、聊天或者完成一些任务。无论是学习、工作还是生活中的疑问,我都可以尽力帮忙哦~有什么需要我做的吗?😊" + # base_response_110 = "你好!我是一个基于人工智能技术的助手,可以帮你解答问题、提供建议、聊天或者协助完成各种任务。无论是学习、工作还是生活中的疑问,我都可以尽力提供帮助。😊 你有什么想聊的吗?" + # base_response_104 = "你好!我是一个人工智能助手,可以帮你解答问题、提供建议、聊天或者完成一些任务。无论是学习、工作还是生活中的疑问,我都可以尽力帮忙哦~有什么需要我做的吗?😊" # 非流式对话 response = client.chat.completions.create( model="default",