mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 00:06:38 +08:00
Merge vl execution path into normal execution path (#2829)
* merge vl model into gpu_model runner Change-Id: I9f4691a3d5f135e8d72b1d58abcd15ef3aa3f2a6 * fix chinese Change-Id: Ic7405109b984c21e076fb3b01ff6feb571d0119a * fix the parse parameter Change-Id: I4cd62ee87c06220af580d91e347145d4394917fe * fix the bug in online_inference Change-Id: Idb111bb2114e83017c4050b2a68cf039c6d3c559 * polish code Change-Id: I7d4194102c2f1b0743b74fbd5fc284eb8ef4d17c
This commit is contained in:
@@ -129,6 +129,36 @@ def post_process_normal(sampler_output: SamplerOutput,
|
||||
save_each_rank: bool = False,
|
||||
skip_save_output: bool = False) -> ModelRunnerOutput:
|
||||
""" Post-processing steps after completing a single token generation. """
|
||||
# handle vl:
|
||||
if model_output.enable_thinking:
|
||||
exists_think_end = sampler_output.sampled_token_ids == model_output.think_end_id
|
||||
paddle.assign(
|
||||
paddle.where(
|
||||
exists_think_end,
|
||||
model_output.need_think_end - 1,
|
||||
model_output.need_think_end,
|
||||
), model_output.need_think_end)
|
||||
|
||||
paddle.assign(
|
||||
paddle.where(
|
||||
model_output.need_think_end.cast("bool"),
|
||||
model_output.reasoning_index - 1,
|
||||
model_output.reasoning_index,
|
||||
), model_output.reasoning_index)
|
||||
|
||||
stop_wo_think = (
|
||||
(sampler_output.sampled_token_ids == model_output.eos_token_id) |
|
||||
(model_output.reasoning_index == 0)) & (
|
||||
model_output.need_think_end > 0)
|
||||
sampler_output.sampled_token_ids = paddle.where(stop_wo_think,
|
||||
model_output.think_end_id,
|
||||
sampler_output.sampled_token_ids)
|
||||
paddle.assign(
|
||||
paddle.where(
|
||||
stop_wo_think,
|
||||
model_output.need_think_end - 1,
|
||||
model_output.need_think_end,
|
||||
), model_output.need_think_end)
|
||||
# 1. Set stop value
|
||||
paddle.assign(
|
||||
paddle.where(
|
||||
|
Reference in New Issue
Block a user