[LLM] support multi node deploy (#2708)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* [LLM] support multi node deploy

* Update engine.py

* fix bugs

* fix

* [LLM] support multi node deploy

* [LLM] support multi node deploy

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
ltd0924
2025-07-06 10:33:51 +08:00
committed by GitHub
parent 04a8e1ef2b
commit 68b4755587
13 changed files with 157 additions and 87 deletions

View File

@@ -85,10 +85,16 @@ class LLM:
self.mutex = threading.Lock()
self.req_output = dict()
self.master_node_ip = self.llm_engine.cfg.pod_ips[0]
self._receive_output_thread = threading.Thread(
target=self._receive_output, daemon=True)
self._receive_output_thread.start()
def _check_master(self):
"""
Check if the current node is the master node.
"""
return self.llm_engine.cfg._check_master()
def _receive_output(self):
"""
@@ -130,6 +136,10 @@ class LLM:
Union[str, list[str]]: The generated response.
"""
if not self._check_master():
err_msg = f"Only master node can accept completion request, please send request to master node: {self.master_node_ip}"
raise ValueError(err_msg)
if sampling_params is None:
sampling_params = self.default_sampling_params
@@ -182,6 +192,11 @@ class LLM:
Returns:
Union[str, list[str]]: The generated response.
"""
if not self._check_master():
err_msg = f"Only master node can accept completion request, please send request to master node: {self.master_node_ip}"
raise ValueError(err_msg)
if sampling_params is None:
sampling_params = self.default_sampling_params