mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
Support limit thinking lengths (#4244)
Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”>
This commit is contained in:
@@ -193,7 +193,7 @@ def post_process_normal(
|
||||
) -> ModelRunnerOutput:
|
||||
"""Post-processing steps after completing a single token generation."""
|
||||
# handle vl:
|
||||
if model_output.enable_thinking:
|
||||
if model_output.enable_thinking and model_output.think_end_id is not None:
|
||||
exists_think_end = sampler_output.sampled_token_ids == model_output.think_end_id
|
||||
paddle.assign(
|
||||
paddle.where(
|
||||
|
Reference in New Issue
Block a user