From b7858c22d9941c7ddba210d3a47574d36a1aab45 Mon Sep 17 00:00:00 2001 From: AIbin <37361953+chang-wenbin@users.noreply.github.com> Date: Mon, 14 Jul 2025 16:01:34 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90Update=20Docs=E3=80=91update=20support?= =?UTF-8?q?ed=5Fmodels=20doc=20(#2836)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update supported_models doc --- docs/supported_models.md | 2 +- docs/zh/supported_models.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/supported_models.md b/docs/supported_models.md index 5820ff098..0fac24f7e 100644 --- a/docs/supported_models.md +++ b/docs/supported_models.md @@ -14,11 +14,11 @@ export FD_MODEL_CACHE=/ssd1/download_models | Model Name | Context Length | Quantization | Minimum Deployment Resources | Notes | | :------------------------------------------ | :------------- | :----------- | :--------------------------- | :----------------------------------------------------------------------------------------- | -| baidu/ERNIE-4.5-VL-424B-A47B-Paddle | 32K/128K | WINT2 | 1*141G GPU VRAM/1T RAM | Chunked Prefill required for 128K | | baidu/ERNIE-4.5-VL-424B-A47B-Paddle | 32K/128K | WINT4 | 4*80G GPU VRAM/1T RAM | Chunked Prefill required for 128K | | baidu/ERNIE-4.5-VL-424B-A47B-Paddle | 32K/128K | WINT8 | 8*80G GPU VRAM/1T RAM | Chunked Prefill required for 128K | | baidu/ERNIE-4.5-300B-A47B-Paddle | 32K/128K | WINT4 | 4*64G GPU VRAM/600G RAM | Chunked Prefill required for 128K | | baidu/ERNIE-4.5-300B-A47B-Paddle | 32K/128K | WINT8 | 8*64G GPU VRAM/600G RAM | Chunked Prefill required for 128K | +| baidu/ERNIE-4.5-300B-A47B-2Bits-Paddle | 32K/128K | WINT2 | 1*141G GPU VRAM/600G RAM | Chunked Prefill required for 128K | | baidu/ERNIE-4.5-300B-A47B-W4A8C8-TP4-Paddle | 32K/128K | W4A8C8 | 4*64G GPU VRAM/160G RAM | Fixed 4-GPU setup, Chunked Prefill recommended | | baidu/ERNIE-4.5-300B-A47B-FP8-Paddle | 32K/128K | FP8 | 8*64G GPU VRAM/600G RAM | Chunked Prefill recommended, only supports PD Disaggragated Deployment with EP parallelism | | baidu/ERNIE-4.5-300B-A47B-Base-Paddle | 32K/128K | WINT4 | 4*64G GPU VRAM/600G RAM | Chunked Prefill recommended | diff --git a/docs/zh/supported_models.md b/docs/zh/supported_models.md index 5206b0f73..562640de9 100644 --- a/docs/zh/supported_models.md +++ b/docs/zh/supported_models.md @@ -14,11 +14,11 @@ export FD_MODEL_CACHE=/ssd1/download_models | 模型名 | 上下文长度 | 量化方式 | 最小部署资源 | 说明 | | :------------------------------------------ | :--------- | :------- | :-------------------- | :---------------------------------------------- | -| baidu/ERNIE-4.5-VL-424B-A47B-Paddle | 32K/128K | WINT2 | 1卡*141G显存/1T内存 | 128K需要开启Chunked Prefill | | baidu/ERNIE-4.5-VL-424B-A47B-Paddle | 32K/128K | WINT4 | 4卡*80G显存/1T内存 | 128K需要开启Chunked Prefill | | baidu/ERNIE-4.5-VL-424B-A47B-Paddle | 32K/128K | WINT8 | 8卡*80G显存/1T内存 | 128K需要开启Chunked Prefill | | baidu/ERNIE-4.5-300B-A47B-Paddle | 32K/128K | WINT4 | 4卡*64G显存/600G内存 | 128K需要开启Chunked Prefill | | baidu/ERNIE-4.5-300B-A47B-Paddle | 32K/128K | WINT8 | 8卡*64G显存/600G内存 | 128K需要开启Chunked Prefill | +| baidu/ERNIE-4.5-300B-A47B-2Bits-Paddle | 32K/128K | WINT2 | 1卡*141G显存/600G内存 | 128K需要开启Chunked Prefill | | baidu/ERNIE-4.5-300B-A47B-W4A8C8-TP4-Paddle | 32K/128K | W4A8C8 | 4卡*64G显存/160G内存 | 限定4卡,建议开启Chunked Prefill | | baidu/ERNIE-4.5-300B-A47B-FP8-Paddle | 32K/128K | FP8 | 8卡*64G显存/600G内存 | 建议开启Chunked Prefill,仅在PD分离EP并行下支持 | | baidu/ERNIE-4.5-300B-A47B-Base-Paddle | 32K/128K | WINT4 | 4卡*64G显存/600G内存 | 建议开启Chunked Prefill |