From 7dfdd157ac00b57a2527126fd66f56fb4340d8a0 Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Wed, 30 Jul 2025 21:03:12 +0800 Subject: [PATCH] [BugFix]Fix ep size (#3092) * fix ep * fix num_layer --- fastdeploy/cache_manager/prefix_cache_manager.py | 2 +- fastdeploy/config.py | 2 ++ fastdeploy/engine/engine.py | 1 + fastdeploy/worker/worker_process.py | 6 ++++++ 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/fastdeploy/cache_manager/prefix_cache_manager.py b/fastdeploy/cache_manager/prefix_cache_manager.py index b403d3944..dd191c87f 100644 --- a/fastdeploy/cache_manager/prefix_cache_manager.py +++ b/fastdeploy/cache_manager/prefix_cache_manager.py @@ -168,7 +168,7 @@ class PrefixCacheManager: + f" --device_id {int(device_ids[i])}" + f" --rank {i}" + f" --splitwise_role {self.splitwise_role}" - + f" --num_layers {cache_config.model_cfg.num_layers}" + + f" --num_layers {cache_config.model_cfg.num_hidden_layers}" + f" --head_dim {cache_config.model_cfg.head_dim}" + f" --kv_num_head {kv_num_head}" + f" --mp_num {tensor_parallel_size}" diff --git a/fastdeploy/config.py b/fastdeploy/config.py index a4f4c307d..6e27196f6 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -270,6 +270,8 @@ class ParallelConfig: if hasattr(self, key): setattr(self, key, value) + # currently, the expert parallel size is equal data parallel size + self.expert_parallel_size = self.data_parallel_size self.use_ep = self.expert_parallel_size > 1 if self.splitwise_role == "mixed": self.moe_phase = MoEPhase(phase="prefill") diff --git a/fastdeploy/engine/engine.py b/fastdeploy/engine/engine.py index 2135196f0..9ddf0cbf7 100644 --- a/fastdeploy/engine/engine.py +++ b/fastdeploy/engine/engine.py @@ -1082,6 +1082,7 @@ class LLMEngine: f" --splitwise_role {self.cfg.splitwise_role}" f" --kv_cache_ratio {self.cfg.cache_config.kv_cache_ratio}" f" --expert_parallel_size {self.cfg.parallel_config.expert_parallel_size}" + f" --data_parallel_size {self.cfg.parallel_config.data_parallel_size}" f" --quantization {self.cfg.model_config.quantization}" f" --ori_vocab_size {ori_vocab_size}" f" --speculative_config '{self.cfg.speculative_config.to_json_string()}'" diff --git a/fastdeploy/worker/worker_process.py b/fastdeploy/worker/worker_process.py index f29611c8c..54f7019c8 100644 --- a/fastdeploy/worker/worker_process.py +++ b/fastdeploy/worker/worker_process.py @@ -513,6 +513,12 @@ def parse_args(): default=1, help="expert parallel size", ) + parser.add_argument( + "--data_parallel_size", + type=int, + default=1, + help="data parallel size", + ) parser.add_argument( "--enable_expert_parallel", action="store_true",