[Serving] fix offline inference sampling parameters overwrite (#2654)

This commit is contained in:
ltd0924
2025-07-01 10:17:46 +08:00
committed by GitHub
parent d5af78945b
commit 50aa4080c0

View File

@@ -251,13 +251,15 @@ class LLM:
)
req_ids.append(request_id)
if isinstance(sampling_params, list):
sampling_params = sampling_params[i]
current_sampling_params = sampling_params[i]
else:
current_sampling_params = sampling_params
enable_thinking = None
if chat_template_kwargs is not None:
enable_thinking = chat_template_kwargs.get(
"enable_thinking", None)
self.llm_engine.add_requests(tasks,
sampling_params,
current_sampling_params,
enable_thinking=enable_thinking)
return req_ids