Update Unit Test for PaddleOCR-VL (#4802)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

* fix paddleocr prefix cache bug

* add test for paddleocr_vl

* disable prefix-caching in ocr

* add test for paddleocr_vl

* Fix top_p for rejection sampling

* add test for ocr processor; fix top_p for rejection sampling

* add test for ocr processor; fix top_p for rejection sampling

* add test for ocr processor; fix top_p for rejection sampling

* add test for ocr processor; fix top_p for rejection sampling

* add test for ocr processor; fix top_p for rejection sampling

---------

Co-authored-by: ming1753 <ideaminghp@163.com>
Co-authored-by: ming1753 <61511741+ming1753@users.noreply.github.com>
This commit is contained in:
Haonan Luo
2025-11-04 22:40:15 +08:00
committed by GitHub
parent 1b61d62ecf
commit 2c281e617c
4 changed files with 1160 additions and 14 deletions

View File

@@ -26,6 +26,8 @@ from fastdeploy.utils import data_processor_logger
from .process import DataProcessor
_SAMPLING_EPS = 1e-5
class Ernie4_5_VLProcessor(Ernie4_5Processor):
"""The processor class for ERNIE MoE VL models."""
@@ -268,6 +270,9 @@ class Ernie4_5_VLProcessor(Ernie4_5Processor):
request["reasoning_max_tokens"] = max(int(request["max_tokens"] * 0.8), 1)
data_processor_logger.info(f"Processed request {request}")
if request.get("top_p") is not None and request.get("top_p") < _SAMPLING_EPS:
request["top_p"] = _SAMPLING_EPS
return request
def append_completion_tokens(self, multimodal_inputs, completion_token_ids):

View File

@@ -22,6 +22,8 @@ from fastdeploy.utils import data_processor_logger
from .process import DataProcessor
_SAMPLING_EPS = 1e-5
class PaddleOCRVLProcessor(TextProcessor):
"""
@@ -61,7 +63,6 @@ class PaddleOCRVLProcessor(TextProcessor):
tool_parser_obj: Tool parser instance
"""
super().__init__(model_name_or_path, reasoning_parser_obj, tool_parser_obj)
data_processor_logger.info(f"model_name_or_path: {model_name_or_path}")
processor_kwargs = self._parse_processor_kwargs(mm_processor_kwargs)
self.processor = DataProcessor(
@@ -252,6 +253,9 @@ class PaddleOCRVLProcessor(TextProcessor):
if request.get("max_tokens") is None:
request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) # Ensure at least 1 token
if request.get("top_p") is not None and request.get("top_p") < _SAMPLING_EPS:
request["top_p"] = _SAMPLING_EPS
return request
def append_generated_tokens(self, multimodal_inputs, generated_token_ids):

View File

@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import shutil
import signal
@@ -99,7 +98,6 @@ def setup_and_run_server():
model_path = "./PaddleOCR-VL-0.9B"
log_path = "server.log"
limit_mm_str = json.dumps({"image": 100, "video": 100})
cmd = [
sys.executable,
@@ -109,8 +107,6 @@ def setup_and_run_server():
model_path,
"--port",
str(FD_API_PORT),
"--tensor-parallel-size",
"2",
"--engine-worker-queue-port",
str(FD_ENGINE_QUEUE_PORT),
"--metrics-port",
@@ -119,18 +115,13 @@ def setup_and_run_server():
str(FD_CACHE_QUEUE_PORT),
"--enable-mm",
"--max-model-len",
"32768",
"16384",
"--max-num-batched-tokens",
"384",
"16384",
"--max-num-seqs",
"128",
"--limit-mm-per-prompt",
limit_mm_str,
"--enable-chunked-prefill",
"--kv-cache-ratio",
"0.71",
"--quantization",
"wint4",
"--gpu-memory-utilization",
"0.9",
"--graph-optimization-config",
'{"graph_opt_level":0, "use_cudagraph":true}',
]

File diff suppressed because it is too large Load Diff