[BUG] Fix bug for pd in fd (#3034)

* Fix bug for pd in fd

* Fix bug for pd in fd

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
chenjian
2025-07-31 20:17:27 +08:00
committed by GitHub
parent 1a543bca29
commit fe17410f9c
2 changed files with 6 additions and 5 deletions

View File

@@ -142,6 +142,7 @@ class CacheMessager:
self.gpu_id = gpu_id self.gpu_id = gpu_id
self.cache_info = dict() self.cache_info = dict()
self.dp_rank_id = local_data_parallel_id
layerwise_send_cache_thread = threading.Thread(target=self._prefill_layerwise_send_cache_thread) layerwise_send_cache_thread = threading.Thread(target=self._prefill_layerwise_send_cache_thread)
layerwise_send_cache_thread.daemon = True layerwise_send_cache_thread.daemon = True
@@ -159,14 +160,14 @@ class CacheMessager:
prefilled_layer_idx_data = np.zeros(shape=[1], dtype=np.int32) prefilled_layer_idx_data = np.zeros(shape=[1], dtype=np.int32)
try: try:
step_shm_value = IPCSignal( step_shm_value = IPCSignal(
name=f"splitwise_complete_prefilled_step_{self.rank}", name=f"splitwise_complete_prefilled_step_{self.dp_rank_id}",
array=prefilled_step_idx_data, array=prefilled_step_idx_data,
dtype=np.int32, dtype=np.int32,
suffix=self.gpu_id, suffix=self.gpu_id,
create=True, create=True,
) )
layer_shm_value = IPCSignal( layer_shm_value = IPCSignal(
name=f"splitwise_complete_prefilled_layer_{self.rank}", name=f"splitwise_complete_prefilled_layer_{self.dp_rank_id}",
array=prefilled_layer_idx_data, array=prefilled_layer_idx_data,
dtype=np.int32, dtype=np.int32,
suffix=self.gpu_id, suffix=self.gpu_id,
@@ -174,14 +175,14 @@ class CacheMessager:
) )
except: except:
step_shm_value = IPCSignal( step_shm_value = IPCSignal(
name=f"splitwise_complete_prefilled_step_{self.rank}", name=f"splitwise_complete_prefilled_step_{self.dp_rank_id}",
array=prefilled_step_idx_data, array=prefilled_step_idx_data,
dtype=np.int32, dtype=np.int32,
suffix=self.gpu_id, suffix=self.gpu_id,
create=False, create=False,
) )
layer_shm_value = IPCSignal( layer_shm_value = IPCSignal(
name=f"splitwise_complete_prefilled_layer_{self.rank}", name=f"splitwise_complete_prefilled_layer_{self.dp_rank_id}",
array=prefilled_layer_idx_data, array=prefilled_layer_idx_data,
dtype=np.int32, dtype=np.int32,
suffix=self.gpu_id, suffix=self.gpu_id,

View File

@@ -45,7 +45,7 @@ class RDMACommManager:
return return
self.messager = rdma_comm.RDMACommunicator( self.messager = rdma_comm.RDMACommunicator(
splitwise_role, splitwise_role,
rank, gpu_id,
str(rdma_port) if splitwise_role == "decode" else "0", str(rdma_port) if splitwise_role == "decode" else "0",
cache_k_ptr_list, cache_k_ptr_list,
cache_v_ptr_list, cache_v_ptr_list,