mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-25 01:20:43 +08:00
fix DP&&TP (#3872)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
This commit is contained in:
@@ -1284,10 +1284,6 @@ class FDConfig:
|
|||||||
f"be less than or equal to max_num_partial_prefills: {self.max_num_partial_prefills}"
|
f"be less than or equal to max_num_partial_prefills: {self.max_num_partial_prefills}"
|
||||||
)
|
)
|
||||||
assert self.splitwise_role in ["mixed", "prefill", "decode"]
|
assert self.splitwise_role in ["mixed", "prefill", "decode"]
|
||||||
# TODO(@wufeisheng): TP and EP need to be supported simultaneously.
|
|
||||||
assert (self.parallel_config.tensor_parallel_size == 1 and self.parallel_config.expert_parallel_size >= 1) or (
|
|
||||||
self.parallel_config.tensor_parallel_size >= 1 and self.parallel_config.expert_parallel_size == 1
|
|
||||||
), "TP and EP cannot be enabled at the same time"
|
|
||||||
|
|
||||||
if not self.cache_config.enable_chunked_prefill:
|
if not self.cache_config.enable_chunked_prefill:
|
||||||
if not envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
if not envs.ENABLE_V1_KVCACHE_SCHEDULER:
|
||||||
|
@@ -655,7 +655,9 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
|||||||
num_experts_per_rank = num_experts // parallel_config.expert_parallel_size
|
num_experts_per_rank = num_experts // parallel_config.expert_parallel_size
|
||||||
num_experts_start_offset = expert_parallel_rank * num_experts_per_rank
|
num_experts_start_offset = expert_parallel_rank * num_experts_per_rank
|
||||||
max_chips_per_node = 16 if current_platform.is_iluvatar() else 8
|
max_chips_per_node = 16 if current_platform.is_iluvatar() else 8
|
||||||
parallel_config.local_data_parallel_id = expert_parallel_rank % max_chips_per_node
|
parallel_config.local_data_parallel_id = parallel_config.data_parallel_rank % (
|
||||||
|
max_chips_per_node // parallel_config.tensor_parallel_size
|
||||||
|
)
|
||||||
|
|
||||||
parallel_config.expert_parallel_rank = expert_parallel_rank
|
parallel_config.expert_parallel_rank = expert_parallel_rank
|
||||||
parallel_config.num_experts_per_rank = num_experts_per_rank
|
parallel_config.num_experts_per_rank = num_experts_per_rank
|
||||||
|
Reference in New Issue
Block a user