mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[BUGFIX] fix ep mixed bug (#3513)
* Update expert_service.py * Update engine.py * Update engine.py * Update engine.py * Update expert_service.py * Update engine.py
This commit is contained in:
@@ -124,8 +124,9 @@ class LLMEngine:
|
||||
cfg.max_num_seqs, cfg, cfg.tensor_parallel_size, cfg.splitwise_role
|
||||
)
|
||||
|
||||
os.environ["INFERENCE_MSG_QUEUE_ID"] = str(self.cfg.engine_worker_queue_port)
|
||||
|
||||
os.environ["INFERENCE_MSG_QUEUE_ID"] = str(
|
||||
self.cfg.engine_worker_queue_port + self.cfg.worker_num_per_node * self.cfg.node_rank
|
||||
)
|
||||
self.split_connector = SplitwiseConnector(cfg, self.scheduler, self.engine_worker_queue, self.resource_manager)
|
||||
|
||||
self.token_processor = TokenProcessor(
|
||||
|
Reference in New Issue
Block a user