mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
Support limit thinking lengths (#4069)
Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”>
This commit is contained in:
@@ -195,8 +195,9 @@ def post_process_normal(
|
||||
) -> ModelRunnerOutput:
|
||||
"""Post-processing steps after completing a single token generation."""
|
||||
# handle vl:
|
||||
if model_output.enable_thinking:
|
||||
exists_think_end = sampler_output.sampled_token_ids == model_output.think_end_id
|
||||
if model_output.think_end_id != -1:
|
||||
thinking_mask = model_output.enable_thinking
|
||||
exists_think_end = (sampler_output.sampled_token_ids == model_output.think_end_id) & thinking_mask
|
||||
paddle.assign(
|
||||
paddle.where(
|
||||
exists_think_end,
|
||||
@@ -206,9 +207,10 @@ def post_process_normal(
|
||||
model_output.need_think_end,
|
||||
)
|
||||
|
||||
reasoning_index_update_cond = model_output.need_think_end.cast("bool") & thinking_mask
|
||||
paddle.assign(
|
||||
paddle.where(
|
||||
model_output.need_think_end.cast("bool"),
|
||||
reasoning_index_update_cond,
|
||||
model_output.reasoning_index - 1,
|
||||
model_output.reasoning_index,
|
||||
),
|
||||
@@ -219,6 +221,8 @@ def post_process_normal(
|
||||
(sampler_output.sampled_token_ids == model_output.eos_token_id.T).any(axis=1, keepdim=True)
|
||||
| (model_output.reasoning_index == 0)
|
||||
) & (model_output.need_think_end > 0)
|
||||
|
||||
stop_wo_think = stop_wo_think & thinking_mask
|
||||
sampler_output.sampled_token_ids = paddle.where(
|
||||
stop_wo_think,
|
||||
model_output.think_end_id,
|
||||
|
Reference in New Issue
Block a user