mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
xpu (#4969)
This commit is contained in:
@@ -81,8 +81,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
|
||||
--num-gpu-blocks-override 16384 \
|
||||
--max-model-len 32768 \
|
||||
--max-num-seqs 128 \
|
||||
--quantization wint4 \
|
||||
--load-choices default > server.log 2>&1 &
|
||||
--quantization wint4 > server.log 2>&1 &
|
||||
|
||||
sleep 60
|
||||
# 探活
|
||||
@@ -157,8 +156,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
|
||||
--num-gpu-blocks-override 16384 \
|
||||
--max-model-len 32768 \
|
||||
--max-num-seqs 64 \
|
||||
--quantization "W4A8" \
|
||||
--load-choices default > server.log 2>&1 &
|
||||
--quantization "W4A8" > server.log 2>&1 &
|
||||
|
||||
sleep 60
|
||||
# 探活
|
||||
@@ -236,8 +234,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
|
||||
--enable-mm \
|
||||
--mm-processor-kwargs '{"video_max_frames": 30}' \
|
||||
--limit-mm-per-prompt '{"image": 10, "video": 3}' \
|
||||
--reasoning-parser ernie-45-vl \
|
||||
--load-choices default > server.log 2>&1 &
|
||||
--reasoning-parser ernie-45-vl > server.log 2>&1 &
|
||||
|
||||
sleep 60
|
||||
# 探活
|
||||
|
||||
Reference in New Issue
Block a user