[Feature] support chunked moe (#4575)

* [Feature] support chunked moe

* update

* update

* fix and add test

* update

* fix conflict and modity test

* fix fused_moe

* fix fused_moe

* fix docstring

* fix

* fix typo

* fix test

* fix

* fix

* fix test

* fix test
This commit is contained in:
Longzhi Wang
2025-12-01 15:17:18 +08:00
committed by GitHub
parent 6f42c37359
commit add524d80c
10 changed files with 405 additions and 5 deletions

View File

@@ -286,6 +286,16 @@ class EngineArgs:
Enable expert parallelism.
"""
enable_chunked_moe: bool = False
"""
Whether use chunked moe.
"""
chunked_moe_size: int = 256
"""
Chunk size of moe input.
"""
cache_transfer_protocol: str = "ipc"
"""
Protocol to use for cache transfer.
@@ -870,6 +880,18 @@ class EngineArgs:
default=EngineArgs.eplb_config,
help="Config of eplb.",
)
parallel_group.add_argument(
"--enable-chunked-moe",
action="store_true",
default=EngineArgs.enable_chunked_moe,
help="Use chunked moe.",
)
parallel_group.add_argument(
"--chunked-moe-size",
type=int,
default=EngineArgs.chunked_moe_size,
help="Chunked size of moe input.",
)
# Load group
load_group = parser.add_argument_group("Load Configuration")