mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[XPU] [CI]Change CI to multi-concurrency (#4866)
* Refactor GPU ID logic in CI workflow Updated GPU ID assignment logic and removed unused port calculations. * Refactor GPU device and port configuration * Update engine_worker_queue_port calculation logic * Refactor XPU_VISIBLE_DEVICES export logic * Adjust service port based on GPU ID * Adjust service HTTP port based on GPU ID * Adjust service_http_port based on GPU_ID * Add import for os module in run_45T.py * Update run_45vl.py * Import os module in run_w4a8.py Added import for os module to use environment variables. * Remove duplicate import of os module * Remove duplicate import of os module * Update run_45T.py * Update run_w4a8.py * fix bug * fix bug * Update run_w4a8.py * Fix directory change command in run_ci_xpu.sh
This commit is contained in:
11
.github/workflows/ci_xpu.yml
vendored
11
.github/workflows/ci_xpu.yml
vendored
@@ -60,14 +60,11 @@ jobs:
|
||||
runner_name="${{ runner.name }}"
|
||||
last_char="${runner_name: -1}"
|
||||
|
||||
if [[ "$last_char" =~ [0-3] ]]; then
|
||||
gpu_id="$last_char"
|
||||
if [[ "$last_char" == "1" ]]; then
|
||||
gpu_id="4"
|
||||
else
|
||||
gpu_id="0"
|
||||
fi
|
||||
FD_API_PORT=$((9180 + gpu_id * 100))
|
||||
FD_ENGINE_QUEUE_PORT=$((9150 + gpu_id * 100))
|
||||
FD_METRICS_PORT=$((9170 + gpu_id * 100))
|
||||
PARENT_DIR=$(dirname "$WORKSPACE")
|
||||
echo "PARENT_DIR:$PARENT_DIR"
|
||||
docker run --rm --net=host --cap-add=SYS_PTRACE --privileged --shm-size=64G \
|
||||
@@ -77,9 +74,7 @@ jobs:
|
||||
-e "http_proxy=$(git config --global --get http.proxy)" \
|
||||
-e "https_proxy=$(git config --global --get https.proxy)" \
|
||||
-e "no_proxy=bcebos.com,mirrors.tuna.tsinghua.edu.cn,127.0.0.1,localhost" \
|
||||
-e "FD_API_PORT=${FD_API_PORT}" \
|
||||
-e "FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT}" \
|
||||
-e "FD_METRICS_PORT=${FD_METRICS_PORT}" \
|
||||
-e "GPU_ID=${gpu_id}" \
|
||||
${docker_image} /bin/bash -c "
|
||||
git config --global --add safe.directory /workspace/FastDeploy
|
||||
cd FastDeploy
|
||||
|
||||
@@ -9,13 +9,27 @@ apt install -y lsof
|
||||
function stop_processes() {
|
||||
ps -efww | grep -E 'cache_transfer_manager.py' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
|
||||
ps -efww | grep -E 'api_server' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
|
||||
ps -efww | grep -E '8188' | grep -v grep | awk '{print $2}' | xargs kill -9 || true
|
||||
lsof -t -i :8188 | xargs kill -9 || true
|
||||
ps -efww | grep -E "$((8188 + GPU_ID * 100))" | grep -v grep | awk '{print $2}' | xargs kill -9 || true
|
||||
lsof -t -i :$((8188 + GPU_ID * 100)) | xargs kill -9 || true
|
||||
}
|
||||
stop_processes
|
||||
|
||||
#设置模型路径
|
||||
export model_path=${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle
|
||||
# 由于机器原因,需重启使用的卡,以保障没有问题
|
||||
if [[ "$GPU_ID" == "0" ]]; then
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3"
|
||||
else
|
||||
export XPU_VISIBLE_DEVICES="4,5,6,7"
|
||||
fi
|
||||
|
||||
mkdir -p /workspace/deps
|
||||
cd /workspace/deps
|
||||
wget -q https://klx-sdk-release-public.su.bcebos.com/xre/kl3-release/5.0.21.21/xre-Linux-x86_64-5.0.21.21.tar.gz
|
||||
tar -zxf xre-Linux-x86_64-5.0.21.21.tar.gz && mv xre-Linux-x86_64-5.0.21.21 xre
|
||||
cd -
|
||||
export PATH=/workspace/deps/xre/bin:$PATH
|
||||
|
||||
xpu-smi -r -i $XPU_VISIBLE_DEVICES
|
||||
xpu-smi
|
||||
|
||||
echo "pip requirements"
|
||||
python -m pip install -r requirements.txt
|
||||
@@ -51,11 +65,19 @@ rm -f core*
|
||||
#清空消息队列
|
||||
ipcrm --all=msg
|
||||
echo "============================开始V1模式测试!============================"
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
||||
if [[ "$GPU_ID" == "0" ]]; then
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3"
|
||||
else
|
||||
export XPU_VISIBLE_DEVICES="4,5,6,7"
|
||||
fi
|
||||
export port_num=$((8188 + GPU_ID * 100))
|
||||
python -m fastdeploy.entrypoints.openai.api_server \
|
||||
--model ${model_path} \
|
||||
--port 8188 \
|
||||
--tensor-parallel-size 8 \
|
||||
--model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-Paddle \
|
||||
--port $port_num \
|
||||
--engine-worker-queue-port $((port_num + 1)) \
|
||||
--metrics-port $((port_num + 2)) \
|
||||
--cache-queue-port $((port_num + 47873)) \
|
||||
--tensor-parallel-size 4 \
|
||||
--num-gpu-blocks-override 16384 \
|
||||
--max-model-len 32768 \
|
||||
--max-num-seqs 128 \
|
||||
@@ -119,10 +141,18 @@ rm -f core*
|
||||
#清空消息队列
|
||||
ipcrm --all=msg
|
||||
echo "============================开始W4A8测试!============================"
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3"
|
||||
if [[ "$GPU_ID" == "0" ]]; then
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3"
|
||||
else
|
||||
export XPU_VISIBLE_DEVICES="4,5,6,7"
|
||||
fi
|
||||
export port_num=$((8188 + GPU_ID * 100))
|
||||
python -m fastdeploy.entrypoints.openai.api_server \
|
||||
--model ${MODEL_PATH}/ERNIE-4.5-300B-A47B-W4A8C8-TP4-Paddle \
|
||||
--port 8188 \
|
||||
--port $port_num \
|
||||
--engine-worker-queue-port $((port_num + 1)) \
|
||||
--metrics-port $((port_num + 2)) \
|
||||
--cache-queue-port $((port_num + 47873)) \
|
||||
--tensor-parallel-size 4 \
|
||||
--num-gpu-blocks-override 16384 \
|
||||
--max-model-len 32768 \
|
||||
@@ -187,10 +217,18 @@ rm -f core*
|
||||
#清空消息队列
|
||||
ipcrm --all=msg
|
||||
echo "============================开始vl模型测试!============================"
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3"
|
||||
if [[ "$GPU_ID" == "0" ]]; then
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3"
|
||||
else
|
||||
export XPU_VISIBLE_DEVICES="4,5,6,7"
|
||||
fi
|
||||
export port_num=$((8188 + GPU_ID * 100))
|
||||
python -m fastdeploy.entrypoints.openai.api_server \
|
||||
--model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Paddle \
|
||||
--port 8188 \
|
||||
--port $port_num \
|
||||
--engine-worker-queue-port $((port_num + 1)) \
|
||||
--metrics-port $((port_num + 2)) \
|
||||
--cache-queue-port $((port_num + 47873)) \
|
||||
--tensor-parallel-size 4 \
|
||||
--max-model-len 32768 \
|
||||
--max-num-seqs 10 \
|
||||
@@ -257,7 +295,12 @@ rm -rf log/*
|
||||
rm -f core*
|
||||
ipcrm --all=msg
|
||||
xpu-smi
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
||||
if [[ "$GPU_ID" == "0" ]]; then
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3"
|
||||
else
|
||||
export XPU_VISIBLE_DEVICES="4,5,6,7"
|
||||
fi
|
||||
|
||||
export BKCL_ENABLE_XDR=1
|
||||
export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
|
||||
export BKCL_TRACE_TOPO=1
|
||||
@@ -301,7 +344,12 @@ rm -rf log/*
|
||||
rm -f core*
|
||||
ipcrm --all=msg
|
||||
xpu-smi
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
||||
if [[ "$GPU_ID" == "0" ]]; then
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3"
|
||||
else
|
||||
export XPU_VISIBLE_DEVICES="4,5,6,7"
|
||||
fi
|
||||
|
||||
export BKCL_ENABLE_XDR=1
|
||||
export BKCL_RDMA_NICS=xgbe1,xgbe2,xgbe3,xgbe4
|
||||
export BKCL_TRACE_TOPO=1
|
||||
|
||||
@@ -11,13 +11,15 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
|
||||
import openai
|
||||
|
||||
|
||||
def test_45t():
|
||||
ip = "0.0.0.0"
|
||||
service_http_port = "8188" # 服务配置的
|
||||
gpu_id = int(os.getenv("GPU_ID", "0"))
|
||||
service_http_port = 8188 + gpu_id * 100 # 服务配置的
|
||||
client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY")
|
||||
# base_response_110 = "你好!我是一个基于人工智能技术开发的助手,可以帮你解答问题、提供建议、聊天交流或者完成一些任务。无论是学习、工作还是生活中的疑问,都可以随时告诉我哦~😊 你有什么想聊的吗?"
|
||||
# base_response_104 = "你好!我是一个基于人工智能技术打造的助手,可以帮你解答问题、提供建议、分享知识,或者陪你聊聊天~😊 无论是学习、工作、生活还是娱乐相关的问题,都可以随时告诉我哦!你今天有什么想聊的吗?"
|
||||
|
||||
@@ -11,13 +11,15 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
|
||||
import openai
|
||||
|
||||
|
||||
def test_45vl():
|
||||
ip = "0.0.0.0"
|
||||
service_http_port = "8188" # 服务配置的
|
||||
gpu_id = int(os.getenv("GPU_ID", "0"))
|
||||
service_http_port = 8188 + gpu_id * 100 # 服务配置的
|
||||
client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY")
|
||||
base_response = "北魏时期"
|
||||
# 非流式对话
|
||||
|
||||
@@ -37,8 +37,9 @@ def test_fd_ep():
|
||||
else:
|
||||
tensor_parallel_size = xpu_device_num
|
||||
data_parallel_size = 1
|
||||
|
||||
engine_worker_queue_port = [str(8023 + i * 10) for i in range(data_parallel_size)]
|
||||
gpu_id = int(os.getenv("GPU_ID", "0"))
|
||||
base_port = 8023 + gpu_id * 100
|
||||
engine_worker_queue_port = [str(base_port + i * 10) for i in range(data_parallel_size)]
|
||||
engine_worker_queue_port = ",".join(engine_worker_queue_port)
|
||||
|
||||
llm = LLM(
|
||||
|
||||
@@ -11,16 +11,18 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
|
||||
import openai
|
||||
|
||||
|
||||
def test_w4a8():
|
||||
ip = "0.0.0.0"
|
||||
service_http_port = "8188" # 服务配置的
|
||||
gpu_id = int(os.getenv("GPU_ID", "0"))
|
||||
service_http_port = 8188 + gpu_id * 100 # 服务配置的
|
||||
client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY")
|
||||
#base_response_110 = "你好!我是一个基于人工智能技术的助手,可以帮你解答问题、提供建议、聊天或者协助完成各种任务。无论是学习、工作还是生活中的疑问,我都可以尽力提供帮助。😊 你有什么想聊的吗?"
|
||||
#base_response_104 = "你好!我是一个人工智能助手,可以帮你解答问题、提供建议、聊天或者完成一些任务。无论是学习、工作还是生活中的疑问,我都可以尽力帮忙哦~有什么需要我做的吗?😊"
|
||||
# base_response_110 = "你好!我是一个基于人工智能技术的助手,可以帮你解答问题、提供建议、聊天或者协助完成各种任务。无论是学习、工作还是生活中的疑问,我都可以尽力提供帮助。😊 你有什么想聊的吗?"
|
||||
# base_response_104 = "你好!我是一个人工智能助手,可以帮你解答问题、提供建议、聊天或者完成一些任务。无论是学习、工作还是生活中的疑问,我都可以尽力帮忙哦~有什么需要我做的吗?😊"
|
||||
# 非流式对话
|
||||
response = client.chat.completions.create(
|
||||
model="default",
|
||||
|
||||
Reference in New Issue
Block a user