[Cherry-Pick] Launch expert_service before kv_cache initialization in worker_process (#3558)

* launch expert_service before kv_cache initialization

* update code

---------

Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
This commit is contained in:
Zero Rains
2025-08-23 13:08:34 +08:00
committed by GitHub
parent e8af92aab7
commit f8d3255520
3 changed files with 149 additions and 75 deletions

View File

@@ -26,7 +26,7 @@ import weakref
import numpy as np
from fastdeploy.engine.resource_manager import ResourceManager
from fastdeploy.inter_communicator import EngineWorkerQueue
from fastdeploy.inter_communicator import EngineWorkerQueue, IPCSignal
from fastdeploy.metrics.metrics import main_process_metrics
from fastdeploy.output.token_processor import TokenProcessor
from fastdeploy.splitwise.splitwise_connector import SplitwiseConnector
@@ -127,7 +127,7 @@ class ExpertService:
cache_config=self.cfg.cache_config,
tensor_parallel_size=self.cfg.tensor_parallel_size,
device_ids=self.cfg.local_device_ids,
pod_ip=self.cfg.pod_ips[0],
pod_ip=self.cfg.master_ip,
engine_worker_queue_port=self.cfg.engine_worker_queue_port,
pid_suffix=f"{local_data_parallel_id}_{ipc_signal_suffix}",
)
@@ -150,7 +150,22 @@ class ExpertService:
self.scheduler.start(role, host_ip, disaggregate)
self.cfg.print()
console_logger.info(f"Worker processes are launched with {time.time() - start_time} seconds.")
launched_expert_service_signal_data = np.zeros(
shape=[self.cfg.parallel_config.data_parallel_size // self.cfg.nnode], dtype=np.int32
)
self.launched_expert_service_signal = IPCSignal(
name="launched_expert_service_signal",
array=launched_expert_service_signal_data,
dtype=np.int32,
suffix=ipc_signal_suffix,
create=False,
)
local_rank = local_data_parallel_id % self.cfg.worker_num_per_node
self.launched_expert_service_signal.value[local_rank] = 1
console_logger.info(
f"Worker processes(rank {local_rank}) are launched with {time.time() - start_time} seconds."
)
return True
def _insert_task_to_worker(self):