fix request_output sampling_params (#3154) (#3464)

2025-10-06 00:57:33 +08:00 · 2025-08-19 13:52:50 +08:00
parent bca8905b40
commit 6735626014
1 changed files with 5 additions and 4 deletions
--- a/fastdeploy/engine/engine.py
+++ b/fastdeploy/engine/engine.py
@@ -734,10 +734,6 @@ class LLMEngine:
        """
        Insert tasks to engine.
        """
-        for task in tasks:
-            start_span_request("DEQUEUE", task, trace.SpanKind.CONSUMER)
-            if task.sampling_params.bad_words is not None:
-                task.sampling_params.update_from_tokenizer(self.data_processor.tokenizer)
        # TODO 返回至 scheduler
        if allocated:
            current_tasks = []
@@ -764,6 +760,11 @@ class LLMEngine:
            self.engine_worker_queue.put_tasks((current_tasks, self.resource_manager.real_bsz))
            return True

+        for task in tasks:
+            start_span_request("DEQUEUE", task, trace.SpanKind.CONSUMER)
+            if task.sampling_params.bad_words is not None:
+                task.sampling_params.update_from_tokenizer(self.data_processor.tokenizer)
+
        self.resource_manager.check_and_free_block_tables()

        if not isinstance(tasks, list):