mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
Support limit thinking lengths (#4244)
Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”>
This commit is contained in:
@@ -131,6 +131,28 @@ def update_fd_config_for_mm(fd_config: FDConfig) -> None:
|
||||
fd_config.model_config.sequence_parallel = fd_config.parallel_config.sequence_parallel
|
||||
|
||||
|
||||
def update_think_end_id_for_ernie(fd_config: FDConfig) -> None:
|
||||
"""
|
||||
Updates the think_end_id in the model config. Uses the ID of '</think>'
|
||||
if it exists, otherwise defaults to None.
|
||||
"""
|
||||
is_ernie = ErnieArchitectures.contains_ernie_arch(fd_config.model_config.architectures)
|
||||
if current_platform.is_cuda() and is_ernie:
|
||||
tokenizer = Ernie4_5Tokenizer.from_pretrained(
|
||||
fd_config.model_config.model,
|
||||
model_max_length=fd_config.parallel_config.max_model_len,
|
||||
padding_side="right",
|
||||
use_fast=False,
|
||||
)
|
||||
|
||||
vocab = tokenizer.get_vocab()
|
||||
fd_config.model_config.think_end_id = vocab.get("</think>", None)
|
||||
if fd_config.model_config.think_end_id is not None:
|
||||
logger.info(f"Get think_end_id {fd_config.model_config.think_end_id} from vocab.")
|
||||
else:
|
||||
logger.info("No </think> token found in vocabulary, the model can not do reasoning.")
|
||||
|
||||
|
||||
class PaddleDisWorkerProc:
|
||||
"""
|
||||
Paddle Distributed wrapper for fastdeploy.worker.Worker,
|
||||
@@ -798,6 +820,7 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
||||
moba_attention_config=moba_attention_config,
|
||||
)
|
||||
update_fd_config_for_mm(fd_config)
|
||||
update_think_end_id_for_ernie(fd_config)
|
||||
|
||||
return fd_config
|
||||
|
||||
|
Reference in New Issue
Block a user