[Bug fix] Fix bug for running ep (#4245)

* fix bug for ep

* fix bug
This commit is contained in:
chenjian
2025-09-28 14:56:18 +08:00
committed by GitHub
parent 17e00d9f5d
commit 3cef851468
5 changed files with 54 additions and 18 deletions

View File

@@ -30,7 +30,7 @@ import paddle
import zmq
from opentelemetry import trace
from fastdeploy.engine.request import Request, RequestOutput
from fastdeploy.engine.request import Request, RequestOutput, RequestType
from fastdeploy.engine.resource_manager import ResourceManager
from fastdeploy.engine.sched.resource_manager_v1 import ResourceManagerV1
from fastdeploy.inter_communicator import (
@@ -77,6 +77,7 @@ class EngineService:
self.llm_logger = llm_logger
self.scheduler = cfg.scheduler_config.scheduler()
self.enable_decode_cache_task = envs.FD_ENABLE_CACHE_TASK == "1"
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
self.resource_manager = ResourceManagerV1(
@@ -623,7 +624,7 @@ class EngineService:
for tmp_task in need_delete_tasks:
tasks.remove(tmp_task)
# release resource in P
self.resource_manager.prerelease_resource(task)
self.resource_manager.prerelease_resource(tmp_task)
if self.cfg.scheduler_config.splitwise_role == "prefill":
# to send cache info to cache messager
if tasks:
@@ -673,6 +674,21 @@ class EngineService:
tasks = self.resource_manager.schedule()
# 3. Send to engine
if tasks:
if self.cfg.scheduler_config.splitwise_role == "decode":
for task in tasks:
if task.task_type == RequestType.PREEMPTED:
msg = f"{task.request_id} decode not enough blocks, need to be rescheduled."
self.llm_logger.error(msg)
self.scheduler.put_results(
[
RequestOutput(
request_id=task.request_id,
finished=True,
error_code=500,
error_msg=msg,
)
]
)
self.resource_manager.get_real_bsz()
self.engine_worker_queue.put_tasks((tasks, self.resource_manager.real_bsz))
else: