[LLM] support multi node deploy (#2708)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* [LLM] support multi node deploy

* Update engine.py

* fix bugs

* fix

* [LLM] support multi node deploy

* [LLM] support multi node deploy

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
ltd0924
2025-07-06 10:33:51 +08:00
committed by GitHub
parent 04a8e1ef2b
commit 68b4755587
13 changed files with 157 additions and 87 deletions

View File

@@ -37,6 +37,7 @@ class CacheMessager(object):
def __init__(self,
splitwise_role,
transfer_protocol,
pod_ip,
engine_worker_queue_port,
local_data_parallel_id,
gpu_cache_kvs,
@@ -69,7 +70,7 @@ class CacheMessager(object):
self.gpu_cache_kvs = gpu_cache_kvs
self.rank = rank
self.nranks = nranks
address = ('0.0.0.0', engine_worker_queue_port)
address = (pod_ip, engine_worker_queue_port)
self.engine_worker_queue = EngineWorkerQueue(
address=address,
is_server=False,