mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 00:06:38 +08:00
Release/2.1 (#3414)
* Pre ce modified (#3335) (#3360) * Pre ce modified (#3335) * update * update * fix * fix * update * update * update * fix * update * update * update * add ut fix pr(3367) * [Bug Fix] Fix V1 video bug (#3387) * fix stopseq error info (#3342) Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> * [BugFix] Fix default log level of paddleformers (#3377) Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> * [Polish Code] Remove useless notes * feat(log):add_request_and_response_log (#3392) * Optimize CI execution workflow. (#3371) (#3384) * fix * [BugFix] fix control signal release failed (#3374) * [BugFix] * [BugFix] * [BugFix] * [BugFix] * fix * fix --------- Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> * Revert "Merge branch 'feature/online/vs_think_20250813' into release/2.1" This reverts commit02596fc537
, reversing changes made to03347626a6
. * [XPU] Fixed the issue of performance degradation caused by enabling ENABLE_V1_KVCACHE_SCHEDULER (#3393) * fix v1 schedule oom bug * fix v1 schedule oom bug * [BugFix] fix ErnieProcessor not set raw_prediction (#3401) * [Doc]Release fastdeploy-xpu 2.1.0 (#3407) * fix v1 schedule oom bug * fix v1 schedule oom bug * update release note * [Doc]Release fastdeploy-xpu 2.0.3 (#3408) * fix v1 schedule oom bug * fix v1 schedule oom bug * update release note * update info --------- Co-authored-by: YUNSHEN XIE <1084314248@qq.com> Co-authored-by: ming1753 <61511741+ming1753@users.noreply.github.com> Co-authored-by: JYChen <zoooo0820@qq.com> Co-authored-by: YuBaoku <49938469+EmmonsCurse@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com> Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com> Co-authored-by: xiaolei373 <zley373@gmail.com> Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com> Co-authored-by: yinwei <yinwei_hust@163.com> Co-authored-by: memoryCoderC <1137889088@qq.com>
This commit is contained in:
@@ -20,6 +20,8 @@ from dataclasses import asdict, dataclass
|
||||
from dataclasses import fields as dataclass_fields
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import paddle
|
||||
|
||||
from fastdeploy.config import (
|
||||
CacheConfig,
|
||||
EarlyStopConfig,
|
||||
@@ -93,14 +95,6 @@ class EngineArgs:
|
||||
"""
|
||||
specifies the reasoning parser to use for extracting reasoning content from the model output
|
||||
"""
|
||||
tool_call_parser: str = None
|
||||
"""
|
||||
specifies the tool call parser to use for extracting tool call from the model output
|
||||
"""
|
||||
tool_parser_plugin: str = None
|
||||
"""
|
||||
tool parser plugin used to register user defined tool parsers
|
||||
"""
|
||||
enable_mm: bool = False
|
||||
"""
|
||||
Flags to enable multi-modal model
|
||||
@@ -429,18 +423,6 @@ class EngineArgs:
|
||||
help="Flag specifies the reasoning parser to use for extracting "
|
||||
"reasoning content from the model output",
|
||||
)
|
||||
model_group.add_argument(
|
||||
"--tool-call-parser",
|
||||
type=str,
|
||||
default=EngineArgs.tool_call_parser,
|
||||
help="Flag specifies the tool call parser to use for extracting" "tool call from the model output",
|
||||
)
|
||||
model_group.add_argument(
|
||||
"--tool-parser-plugin",
|
||||
type=str,
|
||||
default=EngineArgs.tool_parser_plugin,
|
||||
help="tool parser plugin used to register user defined tool parsers",
|
||||
)
|
||||
model_group.add_argument(
|
||||
"--speculative-config",
|
||||
type=json.loads,
|
||||
@@ -889,7 +871,10 @@ class EngineArgs:
|
||||
if not int(os.getenv("ENABLE_V1_KVCACHE_SCHEDULER", "0")):
|
||||
self.max_num_batched_tokens = self.max_model_len
|
||||
else:
|
||||
self.max_num_batched_tokens = 8192 # if set to max_model_len, it's easy to be OOM
|
||||
if paddle.is_compiled_with_xpu():
|
||||
self.max_num_batched_tokens = self.max_model_len
|
||||
else:
|
||||
self.max_num_batched_tokens = 8192
|
||||
|
||||
all_dict = asdict(self)
|
||||
all_dict["model_cfg"] = model_cfg
|
||||
@@ -928,7 +913,6 @@ class EngineArgs:
|
||||
mm_processor_kwargs=self.mm_processor_kwargs,
|
||||
enable_mm=self.enable_mm,
|
||||
reasoning_parser=self.reasoning_parser,
|
||||
tool_parser=self.tool_call_parser,
|
||||
splitwise_role=self.splitwise_role,
|
||||
innode_prefill_ports=self.innode_prefill_ports,
|
||||
max_num_partial_prefills=self.max_num_partial_prefills,
|
||||
|
Reference in New Issue
Block a user