[BugFix]Fix ep size (#3092)

* fix ep

* fix num_layer
This commit is contained in:
YuanRisheng
2025-07-30 21:03:12 +08:00
committed by GitHub
parent d17886de19
commit 7dfdd157ac
4 changed files with 10 additions and 1 deletions

View File

@@ -168,7 +168,7 @@ class PrefixCacheManager:
+ f" --device_id {int(device_ids[i])}" + f" --device_id {int(device_ids[i])}"
+ f" --rank {i}" + f" --rank {i}"
+ f" --splitwise_role {self.splitwise_role}" + f" --splitwise_role {self.splitwise_role}"
+ f" --num_layers {cache_config.model_cfg.num_layers}" + f" --num_layers {cache_config.model_cfg.num_hidden_layers}"
+ f" --head_dim {cache_config.model_cfg.head_dim}" + f" --head_dim {cache_config.model_cfg.head_dim}"
+ f" --kv_num_head {kv_num_head}" + f" --kv_num_head {kv_num_head}"
+ f" --mp_num {tensor_parallel_size}" + f" --mp_num {tensor_parallel_size}"

View File

@@ -270,6 +270,8 @@ class ParallelConfig:
if hasattr(self, key): if hasattr(self, key):
setattr(self, key, value) setattr(self, key, value)
# currently, the expert parallel size is equal data parallel size
self.expert_parallel_size = self.data_parallel_size
self.use_ep = self.expert_parallel_size > 1 self.use_ep = self.expert_parallel_size > 1
if self.splitwise_role == "mixed": if self.splitwise_role == "mixed":
self.moe_phase = MoEPhase(phase="prefill") self.moe_phase = MoEPhase(phase="prefill")

View File

@@ -1082,6 +1082,7 @@ class LLMEngine:
f" --splitwise_role {self.cfg.splitwise_role}" f" --splitwise_role {self.cfg.splitwise_role}"
f" --kv_cache_ratio {self.cfg.cache_config.kv_cache_ratio}" f" --kv_cache_ratio {self.cfg.cache_config.kv_cache_ratio}"
f" --expert_parallel_size {self.cfg.parallel_config.expert_parallel_size}" f" --expert_parallel_size {self.cfg.parallel_config.expert_parallel_size}"
f" --data_parallel_size {self.cfg.parallel_config.data_parallel_size}"
f" --quantization {self.cfg.model_config.quantization}" f" --quantization {self.cfg.model_config.quantization}"
f" --ori_vocab_size {ori_vocab_size}" f" --ori_vocab_size {ori_vocab_size}"
f" --speculative_config '{self.cfg.speculative_config.to_json_string()}'" f" --speculative_config '{self.cfg.speculative_config.to_json_string()}'"

View File

@@ -513,6 +513,12 @@ def parse_args():
default=1, default=1,
help="expert parallel size", help="expert parallel size",
) )
parser.add_argument(
"--data_parallel_size",
type=int,
default=1,
help="data parallel size",
)
parser.add_argument( parser.add_argument(
"--enable_expert_parallel", "--enable_expert_parallel",
action="store_true", action="store_true",