mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Feature] support chunked moe (#4575)
* [Feature] support chunked moe * update * update * fix and add test * update * fix conflict and modity test * fix fused_moe * fix fused_moe * fix docstring * fix * fix typo * fix test * fix * fix * fix test * fix test
This commit is contained in:
@@ -286,6 +286,16 @@ class EngineArgs:
|
||||
Enable expert parallelism.
|
||||
"""
|
||||
|
||||
enable_chunked_moe: bool = False
|
||||
"""
|
||||
Whether use chunked moe.
|
||||
"""
|
||||
|
||||
chunked_moe_size: int = 256
|
||||
"""
|
||||
Chunk size of moe input.
|
||||
"""
|
||||
|
||||
cache_transfer_protocol: str = "ipc"
|
||||
"""
|
||||
Protocol to use for cache transfer.
|
||||
@@ -870,6 +880,18 @@ class EngineArgs:
|
||||
default=EngineArgs.eplb_config,
|
||||
help="Config of eplb.",
|
||||
)
|
||||
parallel_group.add_argument(
|
||||
"--enable-chunked-moe",
|
||||
action="store_true",
|
||||
default=EngineArgs.enable_chunked_moe,
|
||||
help="Use chunked moe.",
|
||||
)
|
||||
parallel_group.add_argument(
|
||||
"--chunked-moe-size",
|
||||
type=int,
|
||||
default=EngineArgs.chunked_moe_size,
|
||||
help="Chunked size of moe input.",
|
||||
)
|
||||
|
||||
# Load group
|
||||
load_group = parser.add_argument_group("Load Configuration")
|
||||
|
||||
Reference in New Issue
Block a user