[BugFix]Fix ep size (#3092)

* fix ep

* fix num_layer
This commit is contained in:
YuanRisheng
2025-07-30 21:03:12 +08:00
committed by GitHub
parent d17886de19
commit 7dfdd157ac
4 changed files with 10 additions and 1 deletions

View File

@@ -168,7 +168,7 @@ class PrefixCacheManager:
+ f" --device_id {int(device_ids[i])}"
+ f" --rank {i}"
+ f" --splitwise_role {self.splitwise_role}"
+ f" --num_layers {cache_config.model_cfg.num_layers}"
+ f" --num_layers {cache_config.model_cfg.num_hidden_layers}"
+ f" --head_dim {cache_config.model_cfg.head_dim}"
+ f" --kv_num_head {kv_num_head}"
+ f" --mp_num {tensor_parallel_size}"

View File

@@ -270,6 +270,8 @@ class ParallelConfig:
if hasattr(self, key):
setattr(self, key, value)
# currently, the expert parallel size is equal data parallel size
self.expert_parallel_size = self.data_parallel_size
self.use_ep = self.expert_parallel_size > 1
if self.splitwise_role == "mixed":
self.moe_phase = MoEPhase(phase="prefill")

View File

@@ -1082,6 +1082,7 @@ class LLMEngine:
f" --splitwise_role {self.cfg.splitwise_role}"
f" --kv_cache_ratio {self.cfg.cache_config.kv_cache_ratio}"
f" --expert_parallel_size {self.cfg.parallel_config.expert_parallel_size}"
f" --data_parallel_size {self.cfg.parallel_config.data_parallel_size}"
f" --quantization {self.cfg.model_config.quantization}"
f" --ori_vocab_size {ori_vocab_size}"
f" --speculative_config '{self.cfg.speculative_config.to_json_string()}'"

View File

@@ -513,6 +513,12 @@ def parse_args():
default=1,
help="expert parallel size",
)
parser.add_argument(
"--data_parallel_size",
type=int,
default=1,
help="data parallel size",
)
parser.add_argument(
"--enable_expert_parallel",
action="store_true",