mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
fix cache messager bug when d restart (#3386)
This commit is contained in:
@@ -61,18 +61,12 @@ class RDMACommManager:
|
||||
Connect to remote gpu and write cache.
|
||||
"""
|
||||
assert self.splitwise_role == "prefill", "only prefill can call this method"
|
||||
addr = f"{ip}:{port!s}"
|
||||
if addr in self.connected_rdma:
|
||||
return True
|
||||
ret = self.messager.is_connected(ip, str(port))
|
||||
if ret:
|
||||
self.connected_rdma.add(addr)
|
||||
return True
|
||||
|
||||
ret = self.messager.connect(ip, str(port))
|
||||
logger.info(f"connect to remote rdma address {ip}:{port} status is {ret}")
|
||||
if ret == 0:
|
||||
self.connected_rdma.add(addr)
|
||||
return ret == 0
|
||||
|
||||
def write_cache(self, ip, port, local_block_ids, remote_block_ids, layer_idx):
|
||||
|
@@ -265,13 +265,13 @@ class TokenProcessor:
|
||||
llm_logger.info(f"finished_task_id: {finished_task_id}")
|
||||
self.prefill_result_status[finished_task_id[0]] = finished_task_id[1]
|
||||
if task_id in self.prefill_result_status:
|
||||
self.split_connector.send_first_token(task.disaggregate_info, [result])
|
||||
self.resource_manager.stop_flags[index] = True
|
||||
self.resource_manager.tasks_list[index] = None
|
||||
self.resource_manager._recycle_block_tables(task)
|
||||
if self.prefill_result_status[task_id] != "finished":
|
||||
result.error_code = 400
|
||||
result.error_message = f"{task_id} failed to {self.prefill_result_status[task_id]}"
|
||||
self.split_connector.send_first_token(task.disaggregate_info, [result])
|
||||
del self.resource_manager.req_dict[task_id]
|
||||
break
|
||||
else:
|
||||
|
Reference in New Issue
Block a user