diff --git a/fastdeploy/config.py b/fastdeploy/config.py index 43dfe15f1..f95ea5831 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -1284,10 +1284,6 @@ class FDConfig: f"be less than or equal to max_num_partial_prefills: {self.max_num_partial_prefills}" ) assert self.splitwise_role in ["mixed", "prefill", "decode"] - # TODO(@wufeisheng): TP and EP need to be supported simultaneously. - assert (self.parallel_config.tensor_parallel_size == 1 and self.parallel_config.expert_parallel_size >= 1) or ( - self.parallel_config.tensor_parallel_size >= 1 and self.parallel_config.expert_parallel_size == 1 - ), "TP and EP cannot be enabled at the same time" if not self.cache_config.enable_chunked_prefill: if not envs.ENABLE_V1_KVCACHE_SCHEDULER: diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index 258614176..8ba805023 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -655,7 +655,9 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig: num_experts_per_rank = num_experts // parallel_config.expert_parallel_size num_experts_start_offset = expert_parallel_rank * num_experts_per_rank max_chips_per_node = 16 if current_platform.is_iluvatar() else 8 - parallel_config.local_data_parallel_id = expert_parallel_rank % max_chips_per_node + parallel_config.local_data_parallel_id = parallel_config.data_parallel_rank % ( + max_chips_per_node // parallel_config.tensor_parallel_size + ) parallel_config.expert_parallel_rank = expert_parallel_rank parallel_config.num_experts_per_rank = num_experts_per_rank