mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[XPU][CI]Change ci vl model to 28 b (#4764)
* Update XPU_VISIBLE_DEVICES and model parameters * Update base response and adjust max tokens * Implement process cleanup in CI workflow Add process cleanup commands to prevent port conflicts * Remove process cleanup commands from CI workflow Removed old process cleanup commands to prevent port conflicts.
This commit is contained in:
1
.github/workflows/ci_xpu.yml
vendored
1
.github/workflows/ci_xpu.yml
vendored
@@ -68,7 +68,6 @@ jobs:
|
||||
FD_API_PORT=$((9180 + gpu_id * 100))
|
||||
FD_ENGINE_QUEUE_PORT=$((9150 + gpu_id * 100))
|
||||
FD_METRICS_PORT=$((9170 + gpu_id * 100))
|
||||
|
||||
PARENT_DIR=$(dirname "$WORKSPACE")
|
||||
echo "PARENT_DIR:$PARENT_DIR"
|
||||
docker run --rm --net=host --cap-add=SYS_PTRACE --privileged --shm-size=64G \
|
||||
|
||||
@@ -187,11 +187,11 @@ rm -f core*
|
||||
#清空消息队列
|
||||
ipcrm --all=msg
|
||||
echo "============================开始vl模型测试!============================"
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3,4,5,6,7"
|
||||
export XPU_VISIBLE_DEVICES="0,1,2,3"
|
||||
python -m fastdeploy.entrypoints.openai.api_server \
|
||||
--model ${MODEL_PATH}/ERNIE-4.5-VL-424B-A47B-Paddle \
|
||||
--model ${MODEL_PATH}/ERNIE-4.5-VL-28B-A3B-Paddle \
|
||||
--port 8188 \
|
||||
--tensor-parallel-size 8 \
|
||||
--tensor-parallel-size 4 \
|
||||
--max-model-len 32768 \
|
||||
--max-num-seqs 10 \
|
||||
--quantization wint8 \
|
||||
|
||||
@@ -19,7 +19,7 @@ def test_45vl():
|
||||
ip = "0.0.0.0"
|
||||
service_http_port = "8188" # 服务配置的
|
||||
client = openai.Client(base_url=f"http://{ip}:{service_http_port}/v1", api_key="EMPTY_API_KEY")
|
||||
base_response = "\n这尊佛像是**北魏时期的释迦牟尼"
|
||||
base_response = "\n\n这尊佛像属于**北魏时期**"
|
||||
# 非流式对话
|
||||
response = client.chat.completions.create(
|
||||
model="default",
|
||||
@@ -39,7 +39,7 @@ def test_45vl():
|
||||
],
|
||||
temperature=1,
|
||||
top_p=0,
|
||||
max_tokens=75,
|
||||
max_tokens=70,
|
||||
stream=False,
|
||||
)
|
||||
print(response.choices[0].message.content)
|
||||
|
||||
Reference in New Issue
Block a user