[Sync Code] develop to release/2.0.3 (#2873)

* [LLM] support send batch data and aggregate data (#2860)

* [LLM] support send batch data and aggregate data

* [LLM] fix ci bugs

* [LLM] fix ci bugs

* [LLM] fix ci bugs

* [LLM] fix ci bugs

* [LLM] update

* [LLM] Update Multinode Deployment (#2830)

* [LLM] fix multinode bugs

* [LLM] update multinode deployment

* [LLM] update multinode deployment

* [LLM] update multinode deployment

* [LLM] update multinode deployment

* [LLM] update multinode deployment

* [LLM] fix ci bugs

* Update fastdeploy/engine/args_utils.py

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

* [LLM] update random port

* [LLM] update random port

* [LLM] fix ci bugs

* fix ci bugs

---------

Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>

---------

Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
Jiang-Jia-Jun
2025-07-16 23:44:26 +08:00
committed by GitHub
parent 63d6e7ce06
commit 09d0073fdc
18 changed files with 375 additions and 264 deletions

View File

@@ -124,9 +124,19 @@ class EngineArgs:
Ratio of tokens to process in a block.
"""
pod_ips: Optional[List[str]] = None
dist_init_ip: Optional[str] = None
"""
List of IP addresses for nodes in the cluster.
The master node ip of multinode deployment
"""
nnodes: int = 1
"""
The number of nodes in multinode deployment
"""
node_rank: int = 0
"""
The rank of the current node in multinode deployment
"""
swap_space: float = None
@@ -485,11 +495,25 @@ class EngineArgs:
# Cluster system parameters group
system_group = parser.add_argument_group("System Configuration")
system_group.add_argument(
"--pod-ips",
type=lambda s: s.split(",") if s else None,
default=EngineArgs.pod_ips,
"--dist-init-ip",
default=EngineArgs.dist_init_ip,
help=
"List of IP addresses for nodes in the cluster (comma-separated).")
"IP addresses of master node.")
system_group.add_argument(
"--nnodes",
type=int,
default=EngineArgs.nnodes,
help=
"The number of all nodes.")
system_group.add_argument(
"--node-rank",
type=int,
default=EngineArgs.node_rank,
help=
"node rank id (range [0, nnodes)).")
# Performance tuning parameters group
@@ -789,7 +813,9 @@ class EngineArgs:
max_num_seqs=self.max_num_seqs,
speculative_config=speculative_cfg,
max_num_batched_tokens=self.max_num_batched_tokens,
pod_ips=self.pod_ips,
dist_init_ip=self.dist_init_ip,
nnodes=self.nnodes,
node_rank=self.node_rank,
use_warmup=self.use_warmup,
engine_worker_queue_port=self.engine_worker_queue_port,
limit_mm_per_prompt=self.limit_mm_per_prompt,