mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[BugFix] [PD Disaggregation] fix v1 scheduler prefill node profile run & ipc transfer protocol (#5132)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
* [fix] fix v1 scheduler profile run for append attention in prefill node * [fix] skip send_signal if kv signal not inited for gpu and xpu * [fix] extend fix to flash_attn & mla_attn * [fix] fix v1 pd run in ipc transfer protocol * [ci] add test for v1 pd profile run using ipc transfer protocol * [style] fix code style check * [style] fix code style again * [fix] fix profile run * [update] remove --num-gpu-blocks-override in example script * [chore] rename forward_meta is_profiling to is_dummy_or_profile_run
This commit is contained in:
@@ -517,18 +517,6 @@ class EngineArgs:
|
||||
f"The number of rdma comm ports must be equal to number of ranks ({self.data_parallel_size=} * {self.tensor_parallel_size=} = {self.data_parallel_size * self.tensor_parallel_size}), but got {len(self.rdma_comm_ports)}."
|
||||
)
|
||||
|
||||
if envs.ENABLE_V1_KVCACHE_SCHEDULER == 1:
|
||||
if "ipc" in self.cache_transfer_protocol:
|
||||
# FIXME: support ipc cache transfer protocol
|
||||
raise NotImplementedError(
|
||||
"only support rdma cache transfer protocol " "when using ENABLE_V1_KVCACHE_SCHEDULER."
|
||||
)
|
||||
# FIXME: fix this bug
|
||||
if self.splitwise_role == "prefill" and self.num_gpu_blocks_override is None:
|
||||
raise NotImplementedError(
|
||||
"please set num_gpu_blocks_override for prefill " "instance using ENABLE_V1_KVCACHE_SCHEDULER."
|
||||
)
|
||||
|
||||
if not current_platform.is_cuda() and not current_platform.is_xpu():
|
||||
envs.ENABLE_V1_KVCACHE_SCHEDULER = 0
|
||||
if self.guided_decoding_backend != "off":
|
||||
|
||||
Reference in New Issue
Block a user