[Feature] support chunked moe (#4575)

* [Feature] support chunked moe

* update

* update

* fix and add test

* update

* fix conflict and modity test

* fix fused_moe

* fix fused_moe

* fix docstring

* fix

* fix typo

* fix test

* fix

* fix

* fix test

* fix test
This commit is contained in:
Longzhi Wang
2025-12-01 15:17:18 +08:00
committed by GitHub
parent 6f42c37359
commit add524d80c
10 changed files with 405 additions and 5 deletions

View File

@@ -544,6 +544,7 @@ class LLMEngine:
f" --splitwise_role {self.cfg.scheduler_config.splitwise_role}"
f" --kv_cache_ratio {self.cfg.cache_config.kv_cache_ratio}"
f" --expert_parallel_size {self.cfg.parallel_config.expert_parallel_size}"
f" --chunked_moe_size {self.cfg.parallel_config.chunked_moe_size}"
f" --data_parallel_size {self.cfg.parallel_config.data_parallel_size}"
f" --quantization '{json.dumps(self.cfg.model_config.quantization)}'"
f" --ori_vocab_size {ori_vocab_size}"
@@ -573,6 +574,7 @@ class LLMEngine:
worker_store_true_flag = {
"enable_expert_parallel": self.cfg.parallel_config.enable_expert_parallel,
"enable_chunked_moe": self.cfg.parallel_config.enable_chunked_moe,
"enable_prefix_caching": self.cfg.cache_config.enable_prefix_caching,
"enable_chunked_prefill": self.cfg.cache_config.enable_chunked_prefill,
"do_profile": self.do_profile,