add w4afp8 offline script (#3636)

This commit is contained in:
Yuan Xiaolan
2025-08-29 17:56:05 +08:00
committed by GitHub
parent f677c032c0
commit c71ee0831c
12 changed files with 163 additions and 37 deletions

View File

@@ -52,7 +52,7 @@ class EngineSevice:
Base class containing common engine functionality
"""
def __init__(self, cfg):
def __init__(self, cfg, start_queue=True):
"""
Initializes the LLMEngine with the provided configuration.
@@ -84,7 +84,7 @@ class EngineSevice:
cfg.parallel_config.local_data_parallel_id,
)
self.start_worker_queue_service()
self.start_worker_queue_service(start_queue)
os.environ["INFERENCE_MSG_QUEUE_ID"] = self.cfg.engine_worker_queue_port[
self.cfg.parallel_config.local_data_parallel_id
@@ -181,7 +181,7 @@ class EngineSevice:
create=True,
)
def start_worker_queue_service(self):
def start_worker_queue_service(self, start_queue):
"""
start queue service for engine worker communication
"""
@@ -189,7 +189,8 @@ class EngineSevice:
self.cfg.master_ip,
int(self.cfg.engine_worker_queue_port[self.cfg.parallel_config.local_data_parallel_id]),
)
if self.cfg.host_ip == self.cfg.master_ip or self.cfg.master_ip == "0.0.0.0":
if start_queue and (self.cfg.host_ip == self.cfg.master_ip or self.cfg.master_ip == "0.0.0.0"):
llm_logger.info(f"Starting engine worker queue server service at {address}")
self.engine_worker_queue_server = EngineWorkerQueue(
address=address,