mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
* fix chat_template_args
* fix args
* add offline
* add offline
* fix
* fix
* fix default enable_thinking value
* fix default enable_thinking value
* modify condition
* Revert "modify condition"
This reverts commit 26430bdeb1
.
* fix unit test
This commit is contained in:
@@ -238,7 +238,7 @@ class LLM:
|
||||
self,
|
||||
prompts,
|
||||
sampling_params,
|
||||
chat_template_kwargs: Optional[dict[str, Any]] = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
添加一个请求到 LLM Engine,并返回该请求的 ID。
|
||||
@@ -279,10 +279,7 @@ class LLM:
|
||||
current_sampling_params = sampling_params[i]
|
||||
else:
|
||||
current_sampling_params = sampling_params
|
||||
enable_thinking = None
|
||||
if chat_template_kwargs is not None:
|
||||
enable_thinking = chat_template_kwargs.get("enable_thinking", None)
|
||||
self.llm_engine.add_requests(tasks, current_sampling_params, enable_thinking=enable_thinking)
|
||||
self.llm_engine.add_requests(tasks, current_sampling_params, **kwargs)
|
||||
return req_ids
|
||||
|
||||
def _decode_token(self, token_id: int) -> str:
|
||||
|
Reference in New Issue
Block a user