mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Feature] support eplb in api_server (#4782)
* support eplb in api_server * update code * add eplb test case * update eplb * support tp+dp eplb * update test cese * update code * update code * fix bug * update copilot review * update test case name
This commit is contained in:
@@ -64,6 +64,7 @@ class RolloutModelConfig:
|
||||
plas_attention_config: str = None,
|
||||
data_parallel_size: int = 1,
|
||||
num_nextn_predict_layers: int = 0,
|
||||
eplb_config: str = {},
|
||||
):
|
||||
# Required parameters
|
||||
self.model = model_name_or_path
|
||||
@@ -111,6 +112,7 @@ class RolloutModelConfig:
|
||||
self.ips = None
|
||||
self.plas_attention_config = plas_attention_config
|
||||
self.num_nextn_predict_layers = num_nextn_predict_layers
|
||||
self.eplb_config = eplb_config
|
||||
|
||||
def __str__(self):
|
||||
return "\n".join(f"{k}: {v}" for k, v in self.__dict__.items())
|
||||
|
||||
Reference in New Issue
Block a user