[Bug fix] Send first token in D instance (#5199)

* [Bug fix] Send first token in D instance * fix
2025-12-24 13:28:13 +08:00 · 2025-11-24 23:42:20 +08:00
parent 95b39317a9
commit 09b47c7111
1 changed files with 5 additions and 1 deletions
--- a/fastdeploy/engine/common_engine.py
+++ b/fastdeploy/engine/common_engine.py
@@ -1125,7 +1125,7 @@ class EngineService:
                    # received the request sent by the client
                    waiting_request_outputs.append(req_output)
                    continue
-
+                req_output.finished = False
                ready_request_outputs.append(req_output)
                self.llm_logger.debug(f"there are enough resource for prefilled request: {req_output.request_id}")

@@ -1145,6 +1145,8 @@ class EngineService:
                        self.resource_manager.pre_recycle_resource(request_id)
                        if request_id in self.token_processor.tokens_counter:
                            del self.token_processor.tokens_counter[request_id]
+                        req_output.finished = True
+                        self.scheduler.put_results([req_output])
                        continue
                    if req_output.error_code != 200:
                        self.llm_logger.warning(
@@ -1156,6 +1158,8 @@ class EngineService:
                        self.scheduler.put_results([req_output])
                        continue
                    self.token_processor.tokens_counter[request_id] = 1
+                    if envs.FD_ENABLE_INTERNAL_ADAPTER:  # first token sent by D instance
+                        self.scheduler.put_results([req_output])
                    self.resource_manager.add_prefilled_request(req_output)
                    self.llm_logger.debug(f"add prefilled request success, {request_id}")