mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-26 20:41:53 +08:00
[Serving] fix offline inference sampling parameters overwrite (#2654)
This commit is contained in:
@@ -251,13 +251,15 @@ class LLM:
|
||||
)
|
||||
req_ids.append(request_id)
|
||||
if isinstance(sampling_params, list):
|
||||
sampling_params = sampling_params[i]
|
||||
current_sampling_params = sampling_params[i]
|
||||
else:
|
||||
current_sampling_params = sampling_params
|
||||
enable_thinking = None
|
||||
if chat_template_kwargs is not None:
|
||||
enable_thinking = chat_template_kwargs.get(
|
||||
"enable_thinking", None)
|
||||
self.llm_engine.add_requests(tasks,
|
||||
sampling_params,
|
||||
current_sampling_params,
|
||||
enable_thinking=enable_thinking)
|
||||
return req_ids
|
||||
|
||||
|
Reference in New Issue
Block a user