mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00

* add stable ci * fix * update * fix * rename tests dir;fix stable ci bug * add timeout limit * update
60 lines
1.4 KiB
Bash
60 lines
1.4 KiB
Bash
#!/bin/bash
|
|
MODEL_PATH="${1}/TP2"
|
|
FD_API_PORT=${FD_API_PORT:-8000}
|
|
FD_ENGINE_QUEUE_PORT=${FD_ENGINE_QUEUE_PORT:-8001}
|
|
FD_METRICS_PORT=${FD_METRICS_PORT:-8002}
|
|
FD_CACHE_QUEUE_PORT=${FD_CACHE_QUEUE_PORT:-8003}
|
|
|
|
|
|
|
|
if [ -z "$MODEL_PATH" ]; then
|
|
echo "❌ 用法: $0 <模型路径>"
|
|
exit 1
|
|
fi
|
|
|
|
if [ ! -d "$MODEL_PATH" ]; then
|
|
echo "❌ 错误:模型目录不存在: $MODEL_PATH"
|
|
exit 1
|
|
fi
|
|
|
|
echo "使用模型: $MODEL_PATH"
|
|
|
|
|
|
# 清理日志
|
|
rm -rf log/*
|
|
mkdir -p log
|
|
|
|
# 环境变量
|
|
export CUDA_VISIBLE_DEVICES=0,1
|
|
export INFERENCE_MSG_QUEUE_ID=${FD_INFERENCE_MSG_QUEUE_ID:-7679}
|
|
export ENABLE_V1_KVCACHE_SCHEDULER=1
|
|
|
|
|
|
python -m fastdeploy.entrypoints.openai.api_server \
|
|
--tensor-parallel-size 2 \
|
|
--port ${FD_API_PORT} \
|
|
--engine-worker-queue-port ${FD_ENGINE_QUEUE_PORT} \
|
|
--metrics-port ${FD_METRICS_PORT} \
|
|
--cache-queue-port ${FD_CACHE_QUEUE_PORT} \
|
|
--quantization wint8 \
|
|
--max-model-len 32768 \
|
|
--max-num-seqs 256 \
|
|
--gpu-memory-utilization 0.9 \
|
|
--model "$MODEL_PATH" \
|
|
--load-strategy ipc_snapshot \
|
|
--dynamic-load-weight &
|
|
|
|
success=0
|
|
|
|
for i in $(seq 1 300); do
|
|
if (echo > /dev/tcp/127.0.0.1/$FD_API_PORT) >/dev/null 2>&1; then
|
|
echo "API server is up on port $FD_API_PORT on iteration $i"
|
|
success=1
|
|
break
|
|
fi
|
|
sleep 1
|
|
done
|
|
if [ $success -eq 0 ]; then
|
|
echo "超时: API 服务在 300 秒内未启动 (端口 $FD_API_PORT)"
|
|
fi
|