diff --git a/fastdeploy/engine/common_engine.py b/fastdeploy/engine/common_engine.py index 2f53788c2..9c8fe710e 100644 --- a/fastdeploy/engine/common_engine.py +++ b/fastdeploy/engine/common_engine.py @@ -77,6 +77,7 @@ class EngineService: self.llm_logger = llm_logger self.scheduler = cfg.scheduler_config.scheduler() + self.enable_decode_cache_task = envs.FD_ENABLE_CACHE_TASK == "1" if envs.ENABLE_V1_KVCACHE_SCHEDULER: self.resource_manager = ResourceManagerV1( @@ -623,7 +624,7 @@ class EngineService: for tmp_task in need_delete_tasks: tasks.remove(tmp_task) # release resource in P - self.resource_manager.prerelease_resource(task) + self.resource_manager.prerelease_resource(tmp_task) if self.cfg.scheduler_config.splitwise_role == "prefill": # to send cache info to cache messager if tasks: diff --git a/fastdeploy/splitwise/splitwise_connector.py b/fastdeploy/splitwise/splitwise_connector.py index e87520d0d..62d33f433 100644 --- a/fastdeploy/splitwise/splitwise_connector.py +++ b/fastdeploy/splitwise/splitwise_connector.py @@ -387,14 +387,20 @@ class SplitwiseConnector: f"{tasks[i].disaggregate_info['cache_info']['rdma']['ip']}:" + f"{tasks[i].disaggregate_info['cache_info']['rdma']['port']}" ) - cache_info = { - "request_id": tasks[i].request_id, - "device_ids": self.cfg.device_ids.split(","), - "ip": self.cfg.host_ip, - "rdma_ports": self.cfg.disaggregate_info["cache_info"]["rdma"]["rdma_port"], - "transfer_protocol": "rdma", - "dest_block_ids": tasks[i].disaggregate_info["block_tables"], - } + if tasks[i].get("error_msg", None) is not None: + cache_info = { + "request_id": tasks[i].request_id, + "error_msg": tasks[i].get("error_msg"), + } + else: + cache_info = { + "request_id": tasks[i].request_id, + "device_ids": self.cfg.device_ids.split(","), + "ip": self.cfg.host_ip, + "rdma_ports": self.cfg.disaggregate_info["cache_info"]["rdma"]["rdma_port"], + "transfer_protocol": "rdma", + "dest_block_ids": tasks[i].disaggregate_info["block_tables"], + } if addr not in temp_cache_info: temp_cache_info[addr] = []