mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
* [Feature] add a new reasoning parser (#4571) * add new reasoning_parser initial commit * add parser file content * add register * ernie_test_reasoning_parser * support <tool_call> token and add tool_parser * add and fix unit tests * modify reasoning_parser * modify reasoning parser and tool parser * modify unit tests * modify reasoning_parser and tool_parser * modify unit tests * fix tool_parser * modify the logic of reasoning_parser and tool_parser * add and modify unit tests * standardize code style * simplify reasoning_parser and tool_parser * modify unit test * [BugFix] Fix finish reason in _create_chat_completion_choice (#4582) * fix n_param _create_chat_completion_choicel * fix unit test * fix final_res * modify unit tests * [BugFix] fix offline llm chat "enable_thinking" is always "False" (#4686) * fix enable_thinking * recover ernie4_5_vl_processor * fix parser register name (#4795) Co-authored-by: luukunn <83932082+luukunn@users.noreply.github.com> --------- Co-authored-by: luukunn <83932082+luukunn@users.noreply.github.com>
This commit is contained in:
@@ -795,7 +795,7 @@ class Ernie4_5_MoePretrainedModel(PretrainedModel):
|
||||
"""
|
||||
get_tensor_parallel_mappings
|
||||
"""
|
||||
logger.info("erine inference model _get_tensor_parallel_mappings")
|
||||
logger.info("ernie inference model _get_tensor_parallel_mappings")
|
||||
from fastdeploy.model_executor.models.tp_utils import (
|
||||
build_expanded_keys,
|
||||
has_prefix,
|
||||
|
||||
@@ -63,7 +63,7 @@ class Ernie4_5_MTPPretrainedModel(PretrainedModel):
|
||||
"""
|
||||
get_tensor_parallel_mappings
|
||||
"""
|
||||
logger.info("erine inference model _get_tensor_parallel_mappings")
|
||||
logger.info("ernie inference model _get_tensor_parallel_mappings")
|
||||
|
||||
from paddleformers.transformers.conversion_utils import split_or_merge_func
|
||||
|
||||
|
||||
@@ -21,10 +21,10 @@ from fastdeploy.entrypoints.openai.protocol import ChatCompletionRequest, DeltaM
|
||||
from fastdeploy.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
|
||||
@ReasoningParserManager.register_module("erine-45-vl-thinking")
|
||||
@ReasoningParserManager.register_module("ernie-45-vl-thinking")
|
||||
class Ernie45VLThinkingReasoningParser(ReasoningParser):
|
||||
"""
|
||||
Reasoning parser for ernir_vl model.
|
||||
Reasoning parser for ernie_vl model.
|
||||
|
||||
The ernie_vl model uses ...</think>... tokens to denote reasoning text
|
||||
within its output. The model provides a strict switch to disable reasoning
|
||||
|
||||
@@ -532,7 +532,7 @@ class TestMaxStreamingResponseTokens(IsolatedAsyncioTestCase):
|
||||
|
||||
mock_processor_instance.process_response_chat = mock_process_response_chat
|
||||
mock_processor_instance.enable_multimodal_content = Mock(return_value=False)
|
||||
mock_processor_instance.reasoning_parser = Mock(__class__.__name__ == "ErineTestReasoningParser")
|
||||
mock_processor_instance.reasoning_parser = Mock(__class__.__name__ == "Ernie45VLThinkingReasoningParser")
|
||||
mock_processor_instance.data_processor = Mock(
|
||||
process_response_dict=lambda resp, stream, enable_thinking, include_stop_str_in_output: resp
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user