From c657f8d16adb3e8f92b2542132b14d2127bb03db Mon Sep 17 00:00:00 2001 From: ming1753 <61511741+ming1753@users.noreply.github.com> Date: Mon, 3 Nov 2025 12:12:14 +0800 Subject: [PATCH] [Docs] fix PaddleOCR-VL docs bug (#4702) --- docs/best_practices/PaddleOCR-VL-0.9B.md | 4 ++-- docs/zh/best_practices/PaddleOCR-VL-0.9B.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/best_practices/PaddleOCR-VL-0.9B.md b/docs/best_practices/PaddleOCR-VL-0.9B.md index ef52f79a0..707eec65a 100644 --- a/docs/best_practices/PaddleOCR-VL-0.9B.md +++ b/docs/best_practices/PaddleOCR-VL-0.9B.md @@ -24,7 +24,7 @@ python -m fastdeploy.entrypoints.openai.api_server \ --max-model-len 16384 \ --max-num-batched-tokens 16384 \ --gpu-memory-utilization 0.8 \ - --max-num-seqs 128 \ + --max-num-seqs 128 ``` **Example 2:** Deploying a 16K Context Service on a Single A100 GPU ```shell @@ -36,7 +36,7 @@ python -m fastdeploy.entrypoints.openai.api_server \ --max-model-len 16384 \ --max-num-batched-tokens 16384 \ --gpu-memory-utilization 0.8 \ - --max-num-seqs 256 \ + --max-num-seqs 256 ``` An example is a set of configurations that can run stably while also delivering relatively good performance. If you have further requirements for precision or performance, please continue reading the content below. diff --git a/docs/zh/best_practices/PaddleOCR-VL-0.9B.md b/docs/zh/best_practices/PaddleOCR-VL-0.9B.md index b8a49c7b1..d8f404a14 100644 --- a/docs/zh/best_practices/PaddleOCR-VL-0.9B.md +++ b/docs/zh/best_practices/PaddleOCR-VL-0.9B.md @@ -24,7 +24,7 @@ python -m fastdeploy.entrypoints.openai.api_server \ --max-model-len 16384 \ --max-num-batched-tokens 16384 \ --gpu-memory-utilization 0.8 \ - --max-num-seqs 128 \ + --max-num-seqs 128 ``` **示例2:** A100上单卡部署16K上下文的服务 @@ -37,7 +37,7 @@ python -m fastdeploy.entrypoints.openai.api_server \ --max-model-len 16384 \ --max-num-batched-tokens 16384 \ --gpu-memory-utilization 0.8 \ - --max-num-seqs 256 \ + --max-num-seqs 256 ``` 示例是可以稳定运行的一组配置,同时也能得到比较好的性能。