mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[FDConfig]Remove max_num_batched_tokens/max_num_seqs in parallel config (#4116)
* remove max_num_batched_tokens in parallel config * remove max_num_seqs * update test case * fix test * fix --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -44,6 +44,7 @@ from fastdeploy.inter_communicator import EngineWorkerQueue as TaskQueue
|
||||
from fastdeploy.inter_communicator import IPCSignal
|
||||
from fastdeploy.model_executor.layers.quantization import parse_quant_config
|
||||
from fastdeploy.platforms import current_platform
|
||||
from fastdeploy.scheduler import SchedulerConfig
|
||||
from fastdeploy.utils import get_logger
|
||||
from fastdeploy.worker.worker_base import WorkerBase
|
||||
|
||||
@@ -662,6 +663,7 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
||||
speculative_config = SpeculativeConfig(args.speculative_config)
|
||||
parallel_config = ParallelConfig(vars(args))
|
||||
cache_config = CacheConfig(vars(args))
|
||||
scheduler_config = SchedulerConfig(vars(args))
|
||||
parallel_config.tensor_parallel_rank = local_rank % parallel_config.tensor_parallel_size
|
||||
parallel_config.data_parallel_rank = local_rank // parallel_config.tensor_parallel_size
|
||||
# config for EP
|
||||
@@ -758,6 +760,7 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
||||
graph_opt_config=graph_opt_config,
|
||||
early_stop_config=early_stop_config,
|
||||
cache_config=cache_config,
|
||||
scheduler_config=scheduler_config,
|
||||
engine_worker_queue_port=args.engine_worker_queue_port,
|
||||
ips=args.ips,
|
||||
moba_attention_config=moba_attention_config,
|
||||
|
Reference in New Issue
Block a user