mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[BUGFIX] fix ep mixed bug (#3513)
* Update expert_service.py * Update engine.py * Update engine.py * Update engine.py * Update expert_service.py * Update engine.py
This commit is contained in:
@@ -124,8 +124,9 @@ class LLMEngine:
|
|||||||
cfg.max_num_seqs, cfg, cfg.tensor_parallel_size, cfg.splitwise_role
|
cfg.max_num_seqs, cfg, cfg.tensor_parallel_size, cfg.splitwise_role
|
||||||
)
|
)
|
||||||
|
|
||||||
os.environ["INFERENCE_MSG_QUEUE_ID"] = str(self.cfg.engine_worker_queue_port)
|
os.environ["INFERENCE_MSG_QUEUE_ID"] = str(
|
||||||
|
self.cfg.engine_worker_queue_port + self.cfg.worker_num_per_node * self.cfg.node_rank
|
||||||
|
)
|
||||||
self.split_connector = SplitwiseConnector(cfg, self.scheduler, self.engine_worker_queue, self.resource_manager)
|
self.split_connector = SplitwiseConnector(cfg, self.scheduler, self.engine_worker_queue, self.resource_manager)
|
||||||
|
|
||||||
self.token_processor = TokenProcessor(
|
self.token_processor = TokenProcessor(
|
||||||
|
@@ -59,8 +59,8 @@ class ExpertService:
|
|||||||
self.cfg.disaggregate_info = None
|
self.cfg.disaggregate_info = None
|
||||||
|
|
||||||
self.scheduler = cfg.scheduler_config.scheduler()
|
self.scheduler = cfg.scheduler_config.scheduler()
|
||||||
|
if cfg.scheduler_config.name == "splitwise":
|
||||||
self.scheduler.reset_nodeid(f"{self.scheduler.infer.nodeid}_{local_data_parallel_id!s}")
|
self.scheduler.reset_nodeid(f"{self.scheduler.infer.nodeid}_{local_data_parallel_id!s}")
|
||||||
|
|
||||||
self.cfg.parallel_config.local_data_parallel_id = local_data_parallel_id
|
self.cfg.parallel_config.local_data_parallel_id = local_data_parallel_id
|
||||||
|
|
||||||
@@ -143,11 +143,11 @@ class ExpertService:
|
|||||||
self.token_processor.run()
|
self.token_processor.run()
|
||||||
|
|
||||||
self.cfg.init_cache_info()
|
self.cfg.init_cache_info()
|
||||||
|
if self.cfg.scheduler_config.name == "splitwise":
|
||||||
role = self.cfg.splitwise_role
|
role = self.cfg.splitwise_role
|
||||||
host_ip = self.cfg.host_ip
|
host_ip = self.cfg.host_ip
|
||||||
disaggregate = self.cfg.disaggregate_info
|
disaggregate = self.cfg.disaggregate_info
|
||||||
self.scheduler.start(role, host_ip, disaggregate)
|
self.scheduler.start(role, host_ip, disaggregate)
|
||||||
self.cfg.print()
|
self.cfg.print()
|
||||||
|
|
||||||
console_logger.info(f"Worker processes are launched with {time.time() - start_time} seconds.")
|
console_logger.info(f"Worker processes are launched with {time.time() - start_time} seconds.")
|
||||||
@@ -363,6 +363,10 @@ def start_expert_service(cfg, local_data_parallel_id, ipc_signal_suffix):
|
|||||||
expert_service = ExpertService(cfg, local_data_parallel_id)
|
expert_service = ExpertService(cfg, local_data_parallel_id)
|
||||||
try:
|
try:
|
||||||
expert_service.start(ipc_signal_suffix, local_data_parallel_id)
|
expert_service.start(ipc_signal_suffix, local_data_parallel_id)
|
||||||
expert_service.split_connector.start_receiver()
|
if cfg.splitwise_role != "mixed":
|
||||||
|
expert_service.split_connector.start_receiver()
|
||||||
|
else:
|
||||||
|
while True:
|
||||||
|
time.sleep(100)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
llm_logger.exception(f"Expert service failed to start: {e}")
|
llm_logger.exception(f"Expert service failed to start: {e}")
|
||||||
|
Reference in New Issue
Block a user