mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[NewFeture]add ep rollout model init and update/clear ep buffer (#4039)
* fix gid * merge * fix test * fix bug * fix * fix ci
This commit is contained in:
@@ -26,13 +26,13 @@ class RolloutModelConfig:
|
||||
max_model_len: int = 32768,
|
||||
tensor_parallel_size: int = 4,
|
||||
dynamic_load_weight: bool = True,
|
||||
load_strategy: str = "ipc_snapshot",
|
||||
load_strategy: str = "meta",
|
||||
enable_mm: bool = False,
|
||||
# Default values for all other parameters
|
||||
max_num_seqs: int = 34,
|
||||
total_block_num: int = 2000,
|
||||
block_size: int = 64,
|
||||
engine_worker_queue_port: int = 9923,
|
||||
engine_worker_queue_port: str = "8002",
|
||||
device_ids: str = "0",
|
||||
dtype: str = "bfloat16",
|
||||
enc_dec_block_num: int = 1,
|
||||
|
Reference in New Issue
Block a user