mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
supports internode_ll_two_stage (#4162)
* supports internode_ll_two_stage * supports internode_ll_two_stage * supports internode_ll_two_stage * supports internode_ll_two_stage * supports D internode_ll_two_stage * fix codestype * fix xpu internode_ll_two_stage * fix xpu internode_ll_two_stage
This commit is contained in:
@@ -237,6 +237,11 @@ class EngineArgs:
|
||||
Flag to enable the custom all-reduce kernel.
|
||||
"""
|
||||
|
||||
use_internode_ll_two_stage: bool = False
|
||||
"""
|
||||
Flag to use the internode_ll_two_stage kernel.
|
||||
"""
|
||||
|
||||
engine_worker_queue_port: str = "0"
|
||||
"""
|
||||
Port for worker queue communication.
|
||||
@@ -721,6 +726,12 @@ class EngineArgs:
|
||||
default=EngineArgs.disable_custom_all_reduce,
|
||||
help="Flag to disable custom all-reduce.",
|
||||
)
|
||||
parallel_group.add_argument(
|
||||
"--use-internode-ll-two-stage",
|
||||
action="store_true",
|
||||
default=EngineArgs.use_internode_ll_two_stage,
|
||||
help="Flag to use the internode_ll_two_stage kernel.",
|
||||
)
|
||||
parallel_group.add_argument(
|
||||
"--max-num-seqs",
|
||||
type=int,
|
||||
|
||||
Reference in New Issue
Block a user