[FDConfig]Remove splitwise_role and engine_worker_queue_port in FDConfig (#4147)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

* remove splitwise_role and engine_worker_queue_port

* fix xpu

* fix xpu

* fix xpu

* fix unittest

* resolve conflct
This commit is contained in:
YuanRisheng
2025-09-19 17:01:52 +08:00
committed by GitHub
parent ee9d8a840a
commit 24180fba0a
23 changed files with 129 additions and 89 deletions

View File

@@ -115,7 +115,7 @@ class LLMEngine:
start_time = time.time()
self.api_server_pid = api_server_pid
self.ipc_signal_suffix = self.cfg.engine_worker_queue_port[0]
self.ipc_signal_suffix = self.cfg.parallel_config.engine_worker_queue_port[0]
self._init_worker_signals()
self.data_processor = self.input_processor.create_processor()
@@ -127,7 +127,7 @@ class LLMEngine:
self.engine.start_zmq_service(api_server_pid)
if self.do_profile == 0 and (
self.cfg.cache_config.enable_prefix_caching or self.cfg.splitwise_role != "mixed"
self.cfg.cache_config.enable_prefix_caching or self.cfg.scheduler_config.splitwise_role != "mixed"
):
device_ids = self.cfg.device_ids.split(",")
self.cache_manager_processes = self.engine.start_cache_service(device_ids, self.ipc_signal_suffix)
@@ -161,7 +161,7 @@ class LLMEngine:
self._stop_profile()
# Launch components: scheduler, cache_manager, expert_service et.al.
self.launch_components()
if self.cfg.cache_config.enable_prefix_caching or self.cfg.splitwise_role != "mixed":
if self.cfg.cache_config.enable_prefix_caching or self.cfg.scheduler_config.splitwise_role != "mixed":
self.launched_cache_manager_signal.value[0] = 1
# Worker launched
@@ -311,7 +311,7 @@ class LLMEngine:
)
# launched_cache_manager_signal 用于感知engine是否启动了cache_manager
if self.cfg.cache_config.enable_prefix_caching or self.cfg.splitwise_role != "mixed":
if self.cfg.cache_config.enable_prefix_caching or self.cfg.scheduler_config.splitwise_role != "mixed":
launched_cache_manager_signal_data = np.zeros([1], dtype=np.int32)
self.launched_cache_manager_signal = IPCSignal(
name="launched_cache_manager_signal",
@@ -426,10 +426,10 @@ class LLMEngine:
}
)
if self.cfg.splitwise_role != "mixed":
if self.cfg.scheduler_config.splitwise_role != "mixed":
variables["FLAGS_use_pd_disaggregation"] = 1
# TODO dynamic load environment variable
if self.cfg.splitwise_role == "prefill":
if self.cfg.scheduler_config.splitwise_role == "prefill":
variables["FLAGS_fmt_write_cache_completed_signal"] = 1
if self.cfg.model_config.enable_mm:
@@ -463,7 +463,7 @@ class LLMEngine:
else len(self.data_processor.tokenizer.vocab)
)
ports = ",".join(self.cfg.engine_worker_queue_port)
ports = ",".join(self.cfg.parallel_config.engine_worker_queue_port)
ips = None
if self.cfg.ips is not None:
ips = ",".join(self.cfg.ips)
@@ -481,9 +481,9 @@ class LLMEngine:
f" --enc_dec_block_num {self.cfg.cache_config.enc_dec_block_num}"
f" --eos_tokens_lens {self.data_processor.eos_token_id_len}"
f" --pad_token_id {self.data_processor.pad_token_id}"
f" --engine_pid {self.cfg.engine_worker_queue_port[0]}"
f" --engine_pid {self.cfg.parallel_config.engine_worker_queue_port[0]}"
f" --max_num_batched_tokens {self.cfg.scheduler_config.max_num_batched_tokens}"
f" --splitwise_role {self.cfg.splitwise_role}"
f" --splitwise_role {self.cfg.scheduler_config.splitwise_role}"
f" --kv_cache_ratio {self.cfg.cache_config.kv_cache_ratio}"
f" --expert_parallel_size {self.cfg.parallel_config.expert_parallel_size}"
f" --data_parallel_size {self.cfg.parallel_config.data_parallel_size}"
@@ -602,7 +602,7 @@ class LLMEngine:
num_gpu_blocks = self.get_profile_block_num_signal.value[0]
self.cfg.cache_config.reset(num_gpu_blocks)
self.engine.resource_manager.reset_cache_config(self.cfg.cache_config)
if self.cfg.cache_config.enable_prefix_caching or self.cfg.splitwise_role != "mixed":
if self.cfg.cache_config.enable_prefix_caching or self.cfg.scheduler_config.splitwise_role != "mixed":
device_ids = self.cfg.device_ids.split(",")
self.cache_manager_processes = self.engine.start_cache_service(device_ids, self.ipc_signal_suffix)
@@ -619,7 +619,7 @@ class LLMEngine:
return True, ""
def launch_components(self):
if self.cfg.splitwise_role != "mixed":
if self.cfg.scheduler_config.splitwise_role != "mixed":
# 单机逻辑
self.engine.engine_worker_queue.available_prefill_instances.put(1)
self.engine.split_mode_get_tasks()
@@ -632,7 +632,7 @@ class LLMEngine:
self.cfg.init_cache_info()
role = self.cfg.splitwise_role
role = self.cfg.scheduler_config.splitwise_role
host_ip = self.cfg.host_ip
disaggregate = self.cfg.disaggregate_info
if self.cfg.scheduler_config.name == "splitwise":
@@ -649,7 +649,7 @@ class LLMEngine:
):
address = (
self.cfg.master_ip,
int(self.cfg.engine_worker_queue_port[i]),
int(self.cfg.parallel_config.engine_worker_queue_port[i]),
)
llm_logger.info(f"dp start queue service {address}")
self.dp_engine_worker_queue_server.append(