diff --git a/scripts/run_ci_xpu.sh b/scripts/run_ci_xpu.sh index a3140ee71..2672755d6 100644 --- a/scripts/run_ci_xpu.sh +++ b/scripts/run_ci_xpu.sh @@ -85,7 +85,9 @@ python -m fastdeploy.entrypoints.openai.api_server \ --num-gpu-blocks-override 16384 \ --max-model-len 32768 \ --max-num-seqs 128 \ - --quantization wint4 > server.log 2>&1 & + --quantization wint4 \ + --enable-prefix-caching \ + --enable-chunked-prefill > server.log 2>&1 & sleep 60 # 探活 @@ -240,7 +242,8 @@ python -m fastdeploy.entrypoints.openai.api_server \ --enable-mm \ --mm-processor-kwargs '{"video_max_frames": 30}' \ --limit-mm-per-prompt '{"image": 10, "video": 3}' \ - --reasoning-parser ernie-45-vl > server.log 2>&1 & + --reasoning-parser ernie-45-vl \ + --enable-chunked-prefill > server.log 2>&1 & sleep 60 # 探活