mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
Update Unit Test for PaddleOCR-VL (#4802)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
* fix paddleocr prefix cache bug * add test for paddleocr_vl * disable prefix-caching in ocr * add test for paddleocr_vl * Fix top_p for rejection sampling * add test for ocr processor; fix top_p for rejection sampling * add test for ocr processor; fix top_p for rejection sampling * add test for ocr processor; fix top_p for rejection sampling * add test for ocr processor; fix top_p for rejection sampling * add test for ocr processor; fix top_p for rejection sampling --------- Co-authored-by: ming1753 <ideaminghp@163.com> Co-authored-by: ming1753 <61511741+ming1753@users.noreply.github.com>
This commit is contained in:
@@ -26,6 +26,8 @@ from fastdeploy.utils import data_processor_logger
|
||||
|
||||
from .process import DataProcessor
|
||||
|
||||
_SAMPLING_EPS = 1e-5
|
||||
|
||||
|
||||
class Ernie4_5_VLProcessor(Ernie4_5Processor):
|
||||
"""The processor class for ERNIE MoE VL models."""
|
||||
@@ -268,6 +270,9 @@ class Ernie4_5_VLProcessor(Ernie4_5Processor):
|
||||
request["reasoning_max_tokens"] = max(int(request["max_tokens"] * 0.8), 1)
|
||||
data_processor_logger.info(f"Processed request {request}")
|
||||
|
||||
if request.get("top_p") is not None and request.get("top_p") < _SAMPLING_EPS:
|
||||
request["top_p"] = _SAMPLING_EPS
|
||||
|
||||
return request
|
||||
|
||||
def append_completion_tokens(self, multimodal_inputs, completion_token_ids):
|
||||
|
||||
@@ -22,6 +22,8 @@ from fastdeploy.utils import data_processor_logger
|
||||
|
||||
from .process import DataProcessor
|
||||
|
||||
_SAMPLING_EPS = 1e-5
|
||||
|
||||
|
||||
class PaddleOCRVLProcessor(TextProcessor):
|
||||
"""
|
||||
@@ -61,7 +63,6 @@ class PaddleOCRVLProcessor(TextProcessor):
|
||||
tool_parser_obj: Tool parser instance
|
||||
"""
|
||||
super().__init__(model_name_or_path, reasoning_parser_obj, tool_parser_obj)
|
||||
|
||||
data_processor_logger.info(f"model_name_or_path: {model_name_or_path}")
|
||||
processor_kwargs = self._parse_processor_kwargs(mm_processor_kwargs)
|
||||
self.processor = DataProcessor(
|
||||
@@ -252,6 +253,9 @@ class PaddleOCRVLProcessor(TextProcessor):
|
||||
if request.get("max_tokens") is None:
|
||||
request["max_tokens"] = max(1, max_model_len - len(request["prompt_token_ids"])) # Ensure at least 1 token
|
||||
|
||||
if request.get("top_p") is not None and request.get("top_p") < _SAMPLING_EPS:
|
||||
request["top_p"] = _SAMPLING_EPS
|
||||
|
||||
return request
|
||||
|
||||
def append_generated_tokens(self, multimodal_inputs, generated_token_ids):
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import signal
|
||||
@@ -99,7 +98,6 @@ def setup_and_run_server():
|
||||
model_path = "./PaddleOCR-VL-0.9B"
|
||||
|
||||
log_path = "server.log"
|
||||
limit_mm_str = json.dumps({"image": 100, "video": 100})
|
||||
|
||||
cmd = [
|
||||
sys.executable,
|
||||
@@ -109,8 +107,6 @@ def setup_and_run_server():
|
||||
model_path,
|
||||
"--port",
|
||||
str(FD_API_PORT),
|
||||
"--tensor-parallel-size",
|
||||
"2",
|
||||
"--engine-worker-queue-port",
|
||||
str(FD_ENGINE_QUEUE_PORT),
|
||||
"--metrics-port",
|
||||
@@ -119,18 +115,13 @@ def setup_and_run_server():
|
||||
str(FD_CACHE_QUEUE_PORT),
|
||||
"--enable-mm",
|
||||
"--max-model-len",
|
||||
"32768",
|
||||
"16384",
|
||||
"--max-num-batched-tokens",
|
||||
"384",
|
||||
"16384",
|
||||
"--max-num-seqs",
|
||||
"128",
|
||||
"--limit-mm-per-prompt",
|
||||
limit_mm_str,
|
||||
"--enable-chunked-prefill",
|
||||
"--kv-cache-ratio",
|
||||
"0.71",
|
||||
"--quantization",
|
||||
"wint4",
|
||||
"--gpu-memory-utilization",
|
||||
"0.9",
|
||||
"--graph-optimization-config",
|
||||
'{"graph_opt_level":0, "use_cudagraph":true}',
|
||||
]
|
||||
|
||||
1146
tests/input/test_paddleocr_vl_processor.py
Normal file
1146
tests/input/test_paddleocr_vl_processor.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user