mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[LLM] support multi node deploy (#2708)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* [LLM] support multi node deploy * Update engine.py * fix bugs * fix * [LLM] support multi node deploy * [LLM] support multi node deploy --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -85,10 +85,16 @@ class LLM:
|
||||
|
||||
self.mutex = threading.Lock()
|
||||
self.req_output = dict()
|
||||
|
||||
self.master_node_ip = self.llm_engine.cfg.pod_ips[0]
|
||||
self._receive_output_thread = threading.Thread(
|
||||
target=self._receive_output, daemon=True)
|
||||
self._receive_output_thread.start()
|
||||
|
||||
def _check_master(self):
|
||||
"""
|
||||
Check if the current node is the master node.
|
||||
"""
|
||||
return self.llm_engine.cfg._check_master()
|
||||
|
||||
def _receive_output(self):
|
||||
"""
|
||||
@@ -130,6 +136,10 @@ class LLM:
|
||||
Union[str, list[str]]: The generated response.
|
||||
"""
|
||||
|
||||
if not self._check_master():
|
||||
err_msg = f"Only master node can accept completion request, please send request to master node: {self.master_node_ip}"
|
||||
raise ValueError(err_msg)
|
||||
|
||||
if sampling_params is None:
|
||||
sampling_params = self.default_sampling_params
|
||||
|
||||
@@ -182,6 +192,11 @@ class LLM:
|
||||
Returns:
|
||||
Union[str, list[str]]: The generated response.
|
||||
"""
|
||||
|
||||
if not self._check_master():
|
||||
err_msg = f"Only master node can accept completion request, please send request to master node: {self.master_node_ip}"
|
||||
raise ValueError(err_msg)
|
||||
|
||||
if sampling_params is None:
|
||||
sampling_params = self.default_sampling_params
|
||||
|
||||
|
Reference in New Issue
Block a user