mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 01:22:59 +08:00
[Feature] Support limit thinking len for text models (#3527)
* support limit thinking len * remove default think_end_id * remove reasoning_max_tokens * update think_end_id for ernie * update think_end_id for ernie. --------- Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”> Co-authored-by: luukunn <981429396@qq.com>
This commit is contained in:
@@ -123,6 +123,28 @@ def update_fd_config_for_mm(fd_config: FDConfig) -> None:
|
||||
fd_config.model_config.sequence_parallel = fd_config.parallel_config.sequence_parallel
|
||||
|
||||
|
||||
def update_think_end_id_for_ernie(fd_config: FDConfig) -> None:
|
||||
"""
|
||||
Updates the think_end_id in the model config. Uses the ID of '</think>'
|
||||
if it exists, otherwise defaults to None.
|
||||
"""
|
||||
is_ernie = ErnieArchitectures.contains_ernie_arch(fd_config.model_config.architectures)
|
||||
if is_ernie:
|
||||
tokenizer = ErnieBotTokenizer.from_pretrained(
|
||||
fd_config.model_config.model,
|
||||
model_max_length=fd_config.parallel_config.max_model_len,
|
||||
padding_side="right",
|
||||
use_fast=False,
|
||||
)
|
||||
|
||||
vocab = tokenizer.get_vocab()
|
||||
fd_config.model_config.think_end_id = vocab.get("</think>", None)
|
||||
if fd_config.model_config.think_end_id is not None:
|
||||
logger.info(f"Get think_end_id {fd_config.model_config.think_end_id} from vocab.")
|
||||
else:
|
||||
logger.info(("No </think> token found in vocabulary, The model can not do reasoning."))
|
||||
|
||||
|
||||
class PaddleDisWorkerProc:
|
||||
"""
|
||||
Paddle Distributed wrapper for fastdeploy.worker.Worker,
|
||||
@@ -710,7 +732,7 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
||||
cache_config=cache_config,
|
||||
)
|
||||
update_fd_config_for_mm(fd_config)
|
||||
|
||||
update_think_end_id_for_ernie(fd_config)
|
||||
return fd_config
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user