Files
FastDeploy/scripts/run_ci_xpu.sh
Jiaxin Sui bab01e9f85
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
[Cherry-pick][XPU][CI] Set pip index URL to Tsinghua mirror (#5277) (#5280)
* Update run_ci_xpu.sh

* Update Docker image tag in CI workflow
2025-11-28 10:14:14 +08:00

339 lines
9.5 KiB
Bash
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/bin/bash
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
echo "$DIR"
#安装lsof工具
apt install -y lsof
#先kill一遍
function stop_processes() {
ps -efww | grep -E 'cache_transfer_manager.py' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
ps -efww | grep -E "$((8188 + GPU_ID * 100))" | grep -v grep | awk '{print $2}' | xargs kill -9 || true
lsof -t -i :$((8188 + GPU_ID * 100)) | xargs kill -9 || true
}
stop_processes
# 由于机器原因,需重启使用的卡,以保障没有问题
if [[ "$GPU_ID" == "0" ]]; then
export XPU_VISIBLE_DEVICES="0,1,2,3"
else
export XPU_VISIBLE_DEVICES="4,5,6,7"
fi
mkdir -p /workspace/deps
cd /workspace/deps
wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/5.0.21.21/xre-Linux-x86_64-5.0.21.21.tar.gz
tar -zxf xre-Linux-x86_64-5.0.21.21.tar.gz && mv xre-Linux-x86_64-5.0.21.21 xre
cd -
export PATH=/workspace/deps/xre/bin:$PATH
xpu-smi -r -i $XPU_VISIBLE_DEVICES
xpu-smi
python -m pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
echo "pip requirements"
python -m pip install -r requirements.txt
echo "uninstall org"
python -m pip uninstall paddlepaddle-xpu -y
python -m pip uninstall fastdeploy-xpu -y
python -m pip install https://paddle-qa.bj.bcebos.com/paddle-pipeline/Release-TagBuild-Training-Linux-Xpu-P800-SelfBuiltPypiUse/latest/paddlepaddle_xpu-0.0.0-cp310-cp310-linux_x86_64.whl
echo "build whl"
bash custom_ops/xpu_ops/download_dependencies.sh stable
export CLANG_PATH=$(pwd)/custom_ops/xpu_ops/third_party/xtdk
export XVLLM_PATH=$(pwd)/custom_ops/xpu_ops/third_party/xvllm
bash build.sh || exit 1
echo "pip others"
python -m pip install openai -U
python -m pip uninstall -y triton
python -m pip install triton==3.3.0
python -m pip install pytest
python -m pip install pytest-timeout
unset http_proxy
unset https_proxy
unset no_proxy
stop_processes
# 起服务
rm -rf log/*
rm -f core*
# pkill -9 python #流水线不执行这个
#清空消息队列
ipcrm --all=msg
echo "============================开始V1模式测试!============================"
if [[ "$GPU_ID" == "0" ]]; then
export XPU_VISIBLE_DEVICES="0,1,2,3"
else
export XPU_VISIBLE_DEVICES="4,5,6,7"
fi
export port_num=$((8188 + GPU_ID * 100))
python -m fastdeploy.entrypoints.openai.api_server \
--model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \
--port $port_num \
--engine-worker-queue-port $((port_num + 1)) \
--metrics-port $((port_num + 2)) \
--cache-queue-port $((port_num + 47873)) \
--tensor-parallel-size 4 \
--num-gpu-blocks-override 16384 \
--max-model-len 32768 \
--max-num-seqs 128 \
--quantization wint4 \
--load-choices default > server.log 2>&1 &
sleep 60
# 探活
TIMEOUT=$((15 * 60))
INTERVAL=10 # 检查间隔(秒)
ENDPOINT="http://0.0.0.0:${port_num}/health"
START_TIME=$(date +%s) # 记录开始时间戳
echo "开始服务健康检查,最长等待时间:${TIMEOUT}"
while true; do
# 计算已耗时
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
# 超时判断
if [ $ELAPSED -ge $TIMEOUT ]; then
echo -e "\n服务启动超时经过 $((TIMEOUT/60)) 分钟服务仍未启动!"
stop_processes
echo "server.log"
cat server.log
echo "log/workerlog.0"
cat log/workerlog.0
exit 1
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
if [ "$HTTP_CODE" = "200" ]; then
echo -e "\n服务启动成功耗时 ${ELAPSED}"
break
else
sleep $INTERVAL
fi
done
cat server.log
# 执行服务化推理
python -m pytest tests/ci_use/XPU_45T/run_45T.py
kv_block_test_exit_code=$?
echo kv_block_test_exit_code is ${kv_block_test_exit_code}
stop_processes
if [ ${kv_block_test_exit_code} -ne 0 ]; then
echo "log/workerlog.0"
cat log/workerlog.0
echo "kv block相关测试失败请检查pr代码"
exit 1
fi
sleep 5
# 起服务
rm -rf log/*
rm -f core*
# pkill -9 python #流水线不执行这个
#清空消息队列
ipcrm --all=msg
echo "============================开始W4A8测试!============================"
if [[ "$GPU_ID" == "0" ]]; then
export XPU_VISIBLE_DEVICES="0,1,2,3"
else
export XPU_VISIBLE_DEVICES="4,5,6,7"
fi
export port_num=$((8188 + GPU_ID * 100))
python -m fastdeploy.entrypoints.openai.api_server \
--model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-W4A8C8-TP4-Paddle \
--port $port_num \
--engine-worker-queue-port $((port_num + 1)) \
--metrics-port $((port_num + 2)) \
--cache-queue-port $((port_num + 47873)) \
--tensor-parallel-size 4 \
--num-gpu-blocks-override 16384 \
--max-model-len 32768 \
--max-num-seqs 64 \
--quantization "W4A8" \
--load-choices default > server.log 2>&1 &
sleep 60
# 探活
TIMEOUT=$((15 * 60))
INTERVAL=10 # 检查间隔(秒)
ENDPOINT="http://0.0.0.0:${port_num}/health"
START_TIME=$(date +%s) # 记录开始时间戳
echo "开始服务健康检查,最长等待时间:${TIMEOUT}"
while true; do
# 计算已耗时
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
# 超时判断
if [ $ELAPSED -ge $TIMEOUT ]; then
echo -e "\n服务启动超时经过 $((TIMEOUT/60)) 分钟服务仍未启动!"
stop_processes
echo "server.log"
cat server.log
echo "log/workerlog.0"
cat log/workerlog.0
exit 1
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
if [ "$HTTP_CODE" = "200" ]; then
echo -e "\n服务启动成功耗时 ${ELAPSED}"
break
else
sleep $INTERVAL
fi
done
cat server.log
# 执行服务化推理
python -m pytest tests/ci_use/XPU_45T/run_w4a8.py
w4a8_test_exit_code=$?
echo w4a8_test_exit_code is ${w4a8_test_exit_code}
stop_processes
if [ ${w4a8_test_exit_code} -ne 0 ]; then
echo "log/workerlog.0"
cat log/workerlog.0
echo "w4a8 测试失败请检查pr代码"
exit 1
fi
sleep 5
# 起服务
rm -rf log/*
rm -f core*
# pkill -9 python #流水线不执行这个
#清空消息队列
ipcrm --all=msg
echo "============================开始vl模型测试!============================"
if [[ "$GPU_ID" == "0" ]]; then
export XPU_VISIBLE_DEVICES="0,1,2,3"
else
export XPU_VISIBLE_DEVICES="4,5,6,7"
fi
export port_num=$((8188 + GPU_ID * 100))
python -m fastdeploy.entrypoints.openai.api_server \
--model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Paddle \
--port $port_num \
--engine-worker-queue-port $((port_num + 1)) \
--metrics-port $((port_num + 2)) \
--cache-queue-port $((port_num + 47873)) \
--tensor-parallel-size 4 \
--max-model-len 32768 \
--max-num-seqs 10 \
--quantization wint8 \
--enable-mm \
--mm-processor-kwargs '{"video_max_frames": 30}' \
--limit-mm-per-prompt '{"image": 10, "video": 3}' \
--reasoning-parser ernie-45-vl \
--load-choices default > server.log 2>&1 &
sleep 60
# 探活
TIMEOUT=$((15 * 60))
INTERVAL=10 # 检查间隔(秒)
ENDPOINT="http://0.0.0.0:${port_num}/health"
START_TIME=$(date +%s) # 记录开始时间戳
echo "开始服务健康检查,最长等待时间:${TIMEOUT}"
while true; do
# 计算已耗时
CURRENT_TIME=$(date +%s)
ELAPSED=$((CURRENT_TIME - START_TIME))
# 超时判断
if [ $ELAPSED -ge $TIMEOUT ]; then
echo -e "\n服务启动超时经过 $((TIMEOUT/60)) 分钟服务仍未启动!"
stop_processes
echo "server.log"
cat server.log
echo "log/workerlog.0"
cat log/workerlog.0
exit 1
fi
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" -m 2 "$ENDPOINT" || true)
if [ "$HTTP_CODE" = "200" ]; then
echo -e "\n服务启动成功耗时 ${ELAPSED}"
break
else
sleep $INTERVAL
fi
done
cat server.log
# 执行服务化推理
python -m pytest tests/ci_use/XPU_45T/run_45vl.py
vl_test_exit_code=$?
echo vl_test_exit_code is ${vl_test_exit_code}
stop_processes
if [ ${vl_test_exit_code} -ne 0 ]; then
echo "log/workerlog.0"
cat log/workerlog.0
echo " vl模型 测试失败请检查pr代码"
exit 1
fi
echo "============================开始EP并行测试!============================"
sleep 5
rm -rf log/*
rm -f core*
ipcrm --all=msg
xpu-smi
if [[ "$GPU_ID" == "0" ]]; then
export XPU_VISIBLE_DEVICES="0,1,2,3"
else
export XPU_VISIBLE_DEVICES="4,5,6,7"
fi
export BKCL_ENABLE_XDR=1
export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
export BKCL_TRACE_TOPO=1
export BKCL_PCIE_RING=1
export XSHMEM_MODE=1
export XSHMEM_QP_NUM_PER_RANK=32
export BKCL_RDMA_VERBS=1
wget -q https://paddle-qa.bj.bcebos.com/xpu_third_party/xDeepEP.tar.gz
tar -xzf xDeepEP.tar.gz
cd xDeepEP
bash build.sh
cd -
python -m pytest -s --timeout=600 tests/ci_use/XPU_45T/run_ep.py
ep_exit_code=$?
unset BKCL_ENABLE_XDR
unset BKCL_RDMA_NICS
unset BKCL_TRACE_TOPO
unset BKCL_PCIE_RING
unset XSHMEM_MODE
unset XSHMEM_QP_NUM_PER_RANK
unset BKCL_RDMA_VERBS
ps -efww | grep -E 'cache_transfer_manager.py' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
ps -efww | grep -E '8188' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
lsof -t -i :8188 | xargs kill -9 || true
if [ ${ep_exit_code} -ne 0 ]; then
echo "log/workerlog.0"
cat log/workerlog.0
echo "EP并行 相关测试失败请检查pr代码"
exit 1
fi