From 08b96baa4ae74cf43edc85e30cd047278c6ac29f Mon Sep 17 00:00:00 2001 From: yzwu Date: Tue, 11 Nov 2025 19:15:19 +0800 Subject: [PATCH] [Iluvatar][Doc] Add ERNIE-4.5-VL-28B-A3B-Thinking doc (#4955) --- docs/get_started/installation/iluvatar_gpu.md | 39 +++++++++++++++++++ .../get_started/installation/iluvatar_gpu.md | 39 +++++++++++++++++++ 2 files changed, 78 insertions(+) diff --git a/docs/get_started/installation/iluvatar_gpu.md b/docs/get_started/installation/iluvatar_gpu.md index f78a23b85..1a205323d 100644 --- a/docs/get_started/installation/iluvatar_gpu.md +++ b/docs/get_started/installation/iluvatar_gpu.md @@ -633,3 +633,42 @@ curl -X POST "http://0.0.0.0:8180/v1/chat/completions" \ "chat_template_kwargs":{"enable_thinking": true} }' ``` + +### ERNIE-4.5-VL-28B-A3B-Thinking +Refer to [gpu doc](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/get_started/ernie-4.5-vl-thinking.md), the command is bellow: + +server: +```bash +#!/bin/bash +export PADDLE_XCCL_BACKEND=iluvatar_gpu +export INFERENCE_MSG_QUEUE_ID=232132 +export LD_PRELOAD=/usr/local/corex/lib64/libcuda.so.1 +export FD_SAMPLING_CLASS=rejection +export FD_DEBUG=1 +python3 -m fastdeploy.entrypoints.openai.api_server \ + --model baidu/ERNIE-4.5-VL-28B-A3B-Thinking \ + --port 8180 \ + --tensor-parallel-size 2 \ + --max-model-len 32768 \ + --quantization wint8 \ + --block-size 16 \ + --limit-mm-per-prompt '{"image": 100, "video": 100}' \ + --reasoning-parser ernie-45-vl-thinking \ + --tool-call-parser ernie-45-vl-thinking \ + --mm-processor-kwargs '{"image_max_pixels": 12845056 }' \ + --max-num-seqs 8 +``` + +client: +```bash +curl -X POST "http://0.0.0.0:8180/v1/chat/completions" \ +-H "Content-Type: application/json" \ +-d '{ + "messages": [ + {"role": "user", "content": [ + {"type":"image_url", "image_url": {"url":"https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"}}, + {"type":"text", "text":"From which era does the artifact in the image originate?"} + ]} + ] +}' +``` diff --git a/docs/zh/get_started/installation/iluvatar_gpu.md b/docs/zh/get_started/installation/iluvatar_gpu.md index 95dc41873..7125dda6e 100644 --- a/docs/zh/get_started/installation/iluvatar_gpu.md +++ b/docs/zh/get_started/installation/iluvatar_gpu.md @@ -633,3 +633,42 @@ curl -X POST "http://0.0.0.0:8180/v1/chat/completions" \ "chat_template_kwargs":{"enable_thinking": true} }' ``` + +### ERNIE-4.5-VL-28B-A3B-Thinking +参考 [gpu doc](https://github.com/PaddlePaddle/FastDeploy/blob/develop/docs/get_started/ernie-4.5-vl-thinking.md), 命令如下所示: + +server: +```bash +#!/bin/bash +export PADDLE_XCCL_BACKEND=iluvatar_gpu +export INFERENCE_MSG_QUEUE_ID=232132 +export LD_PRELOAD=/usr/local/corex/lib64/libcuda.so.1 +export FD_SAMPLING_CLASS=rejection +export FD_DEBUG=1 +python3 -m fastdeploy.entrypoints.openai.api_server \ + --model baidu/ERNIE-4.5-VL-28B-A3B-Thinking \ + --port 8180 \ + --tensor-parallel-size 2 \ + --max-model-len 32768 \ + --quantization wint8 \ + --block-size 16 \ + --limit-mm-per-prompt '{"image": 100, "video": 100}' \ + --reasoning-parser ernie-45-vl-thinking \ + --tool-call-parser ernie-45-vl-thinking \ + --mm-processor-kwargs '{"image_max_pixels": 12845056 }' \ + --max-num-seqs 8 +``` + +client: +```bash +curl -X POST "http://0.0.0.0:8180/v1/chat/completions" \ +-H "Content-Type: application/json" \ +-d '{ + "messages": [ + {"role": "user", "content": [ + {"type":"image_url", "image_url": {"url":"https://paddlenlp.bj.bcebos.com/datasets/paddlemix/demo_images/example2.jpg"}}, + {"type":"text", "text":"From which era does the artifact in the image originate?"} + ]} + ] +}' +```