mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-11-01 12:22:53 +08:00
[BugFix] fix config bugs (#4370)
* Update expert_service.py * Update common_engine.py * Update expert_service.py * Update expert_service.py * Update expert_service.py --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
@@ -68,12 +68,13 @@ class EngineService:
|
||||
cfg (Config): Config object containing all the configuration parameters.
|
||||
"""
|
||||
self.cfg = cfg
|
||||
if isinstance(self.cfg.cache_config.cache_queue_port, str):
|
||||
self.cfg.cache_config.cache_queue_port = self.cfg.cache_config.cache_queue_port.split(",")
|
||||
if isinstance(self.cfg.cache_config.cache_queue_port, list):
|
||||
self.cfg.cache_config.cache_queue_port = int(
|
||||
self.cfg.cache_config.cache_queue_port[self.cfg.parallel_config.local_data_parallel_id]
|
||||
)
|
||||
if cfg.scheduler_config.splitwise_role != "mixed" or cfg.cache_config.enable_prefix_caching:
|
||||
if isinstance(self.cfg.cache_config.cache_queue_port, str):
|
||||
self.cfg.cache_config.cache_queue_port = self.cfg.cache_config.cache_queue_port.split(",")
|
||||
if isinstance(self.cfg.cache_config.cache_queue_port, list):
|
||||
self.cfg.cache_config.cache_queue_port = int(
|
||||
self.cfg.cache_config.cache_queue_port[self.cfg.parallel_config.local_data_parallel_id]
|
||||
)
|
||||
|
||||
if self.cfg.parallel_config.enable_expert_parallel:
|
||||
self.llm_logger = get_logger(
|
||||
|
||||
@@ -103,29 +103,6 @@ class ExpertService:
|
||||
|
||||
llm_logger.info(f"start expert service {local_data_parallel_id}")
|
||||
|
||||
if self.cfg.scheduler_config.splitwise_role != "mixed" or self.cfg.cache_config.enable_prefix_caching:
|
||||
if self.do_profile:
|
||||
get_profile_block_num = np.zeros([1], dtype=np.int32)
|
||||
while True:
|
||||
try:
|
||||
self.get_profile_block_num_signal = IPCSignal(
|
||||
name="get_profile_block_num",
|
||||
array=get_profile_block_num,
|
||||
dtype=np.int32,
|
||||
suffix=int(self.cfg.engine_worker_queue_port[0]),
|
||||
create=False,
|
||||
)
|
||||
break
|
||||
except:
|
||||
time.sleep(1)
|
||||
self.reset_kvcache_blocks()
|
||||
ipc_signal_suffix_cache = self.cfg.parallel_config.engine_worker_queue_port[local_data_parallel_id]
|
||||
self.cache_manager_processes = self.engine.start_cache_service(
|
||||
self.cfg.local_device_ids, ipc_signal_suffix_cache
|
||||
)
|
||||
if self.cfg.scheduler_config.splitwise_role != "mixed":
|
||||
self.engine.split_mode_get_tasks()
|
||||
|
||||
if self.cfg.scheduler_config.name == "splitwise":
|
||||
self.cfg.init_cache_info()
|
||||
role = self.cfg.scheduler_config.splitwise_role
|
||||
@@ -155,6 +132,29 @@ class ExpertService:
|
||||
)
|
||||
self.launched_expert_service_signal.value[local_rank] = 1
|
||||
|
||||
if self.cfg.scheduler_config.splitwise_role != "mixed" or self.cfg.cache_config.enable_prefix_caching:
|
||||
if self.do_profile:
|
||||
get_profile_block_num = np.zeros([1], dtype=np.int32)
|
||||
while True:
|
||||
try:
|
||||
self.get_profile_block_num_signal = IPCSignal(
|
||||
name="get_profile_block_num",
|
||||
array=get_profile_block_num,
|
||||
dtype=np.int32,
|
||||
suffix=int(self.cfg.parallel_config.engine_worker_queue_port[0]),
|
||||
create=False,
|
||||
)
|
||||
break
|
||||
except:
|
||||
time.sleep(1)
|
||||
self.reset_kvcache_blocks()
|
||||
ipc_signal_suffix_cache = self.cfg.parallel_config.engine_worker_queue_port[local_data_parallel_id]
|
||||
self.cache_manager_processes = self.engine.start_cache_service(
|
||||
self.cfg.local_device_ids,
|
||||
ipc_signal_suffix_cache,
|
||||
create_cache_tensor=(self.cfg.scheduler_config.splitwise_role != "mixed"),
|
||||
)
|
||||
|
||||
console_logger.info(
|
||||
f"Worker processes(rank {local_rank}) are launched with {time.time() - start_time} seconds."
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user