mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-08 01:50:27 +08:00
[NewFeture]add ep rollout model init and update/clear ep buffer (#3927)
* add ep rollout model init && add deep update/clear * fix test
This commit is contained in:
@@ -24,13 +24,13 @@ class RolloutModelConfig:
|
||||
max_model_len: int = 32768,
|
||||
tensor_parallel_size: int = 4,
|
||||
dynamic_load_weight: bool = True,
|
||||
load_strategy: str = "ipc_snapshot",
|
||||
load_strategy: str = "meta",
|
||||
enable_mm: bool = False,
|
||||
# Default values for all other parameters
|
||||
max_num_seqs: int = 34,
|
||||
total_block_num: int = 2000,
|
||||
block_size: int = 64,
|
||||
engine_worker_queue_port: int = 9923,
|
||||
engine_worker_queue_port: str = "8002",
|
||||
device_ids: str = "0",
|
||||
dtype: str = "bfloat16",
|
||||
enc_dec_block_num: int = 1,
|
||||
|
Reference in New Issue
Block a user