[Feature] Pass through the chat_template_kwargs to the data processing module (#3421) (#3469)

* fix chat_template_args * fix args * add offline * add offline * fix * fix * fix default enable_thinking value * fix default enable_thinking value * modify condition * Revert "modify condition" This reverts commit 26430bdeb1. * fix unit test
2025-10-05 08:37:06 +08:00 · 2025-08-19 17:40:12 +08:00
parent 3ffbc98179
commit d07338f932
6 changed files with 50 additions and 13 deletions
--- a/fastdeploy/entrypoints/llm.py
+++ b/fastdeploy/entrypoints/llm.py
@@ -238,7 +238,7 @@ class LLM:
        self,
        prompts,
        sampling_params,
-        chat_template_kwargs: Optional[dict[str, Any]] = None,
+        **kwargs,
    ):
        """
            添加一个请求到 LLM Engine，并返回该请求的 ID。
@@ -279,10 +279,7 @@ class LLM:
                current_sampling_params = sampling_params[i]
            else:
                current_sampling_params = sampling_params
-            enable_thinking = None
-            if chat_template_kwargs is not None:
-                enable_thinking = chat_template_kwargs.get("enable_thinking", None)
-            self.llm_engine.add_requests(tasks, current_sampling_params, enable_thinking=enable_thinking)
+            self.llm_engine.add_requests(tasks, current_sampling_params, **kwargs)
        return req_ids

    def _decode_token(self, token_id: int) -> str: