[FDConfig]Remove splitwise_role and engine_worker_queue_port in FDConfig (#4147)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

* remove splitwise_role and engine_worker_queue_port

* fix xpu

* fix xpu

* fix xpu

* fix unittest

* resolve conflct
This commit is contained in:
YuanRisheng
2025-09-19 17:01:52 +08:00
committed by GitHub
parent ee9d8a840a
commit 24180fba0a
23 changed files with 129 additions and 89 deletions

View File

@@ -296,8 +296,6 @@ class ParallelConfig:
# Do profile or not
self.do_profile: bool = False
# splitwise role
self.splitwise_role: str = "mixed"
# guided decoding backend
self.guided_decoding_backend: str = None
# disable any whitespace for guided decoding
@@ -319,14 +317,6 @@ class ParallelConfig:
else:
self.expert_parallel_size = 1
self.use_ep = self.expert_parallel_size > 1
if self.splitwise_role == "mixed":
self.moe_phase = MoEPhase(phase="prefill")
elif self.splitwise_role == "prefill":
self.moe_phase = MoEPhase(phase="prefill")
elif self.splitwise_role == "decode":
self.moe_phase = MoEPhase(phase="decode")
else:
raise NotImplementedError
# pd_disaggregation
use_pd_disaggregation: int = int(os.getenv("FLAGS_use_pd_disaggregation", 0))
@@ -1116,10 +1106,8 @@ class FDConfig:
max_model_len: int = 8192,
ips: str = None,
use_warmup: bool = False,
engine_worker_queue_port: str = "8002",
limit_mm_per_prompt: Optional[Dict[str, Any]] = None,
mm_processor_kwargs: Optional[Dict[str, Any]] = None,
splitwise_role: str = "mixed",
innode_prefill_ports: Optional[List[int]] = None,
max_num_partial_prefills: int = 1,
max_long_partial_prefills: int = 1,
@@ -1182,7 +1170,6 @@ class FDConfig:
self.limit_mm_per_prompt = limit_mm_per_prompt
self.mm_processor_kwargs = mm_processor_kwargs
self.use_warmup = use_warmup
self.splitwise_role = splitwise_role
self.innode_prefill_ports = innode_prefill_ports
self.max_num_partial_prefills = max_num_partial_prefills
self.max_long_partial_prefills = max_long_partial_prefills
@@ -1190,11 +1177,7 @@ class FDConfig:
self.reasoning_parser = reasoning_parser
self.guided_decoding_backend = guided_decoding_backend
self.disable_any_whitespace = disable_any_whitespace
self.engine_worker_queue_port = engine_worker_queue_port
self._str_to_list("innode_prefill_ports", int)
if isinstance(engine_worker_queue_port, int):
self.engine_worker_queue_port = str(engine_worker_queue_port)
self._str_to_list("engine_worker_queue_port", str)
if envs.FD_FOR_TORCH_MODEL_FORMAT:
self.model_config.model_format = "torch"
@@ -1267,6 +1250,15 @@ class FDConfig:
else:
self.guided_decoding_backend = "xgrammar"
if self.scheduler_config.splitwise_role == "mixed":
self.model_config.moe_phase = MoEPhase(phase="prefill")
elif self.scheduler_config.splitwise_role == "prefill":
self.model_config.moe_phase = MoEPhase(phase="prefill")
elif self.scheduler_config.splitwise_role == "decode":
self.model_config.moe_phase = MoEPhase(phase="decode")
else:
raise NotImplementedError
def check(self):
"""
check the legality of config
@@ -1301,7 +1293,7 @@ class FDConfig:
f"max_long_partial_prefills: {self.max_long_partial_prefills} should "
f"be less than or equal to max_num_partial_prefills: {self.max_num_partial_prefills}"
)
assert self.splitwise_role in ["mixed", "prefill", "decode"]
assert self.scheduler_config.splitwise_role in ["mixed", "prefill", "decode"]
# TODO(@wufeisheng): TP and EP need to be supported simultaneously.
assert (self.parallel_config.tensor_parallel_size == 1 and self.parallel_config.expert_parallel_size >= 1) or (
self.parallel_config.tensor_parallel_size >= 1 and self.parallel_config.expert_parallel_size == 1
@@ -1387,8 +1379,8 @@ class FDConfig:
initialize cache info
"""
disaggregate_info = {}
if self.splitwise_role != "mixed":
disaggregate_info["role"] = self.splitwise_role
if self.scheduler_config.splitwise_role != "mixed":
disaggregate_info["role"] = self.scheduler_config.splitwise_role
disaggregate_info["cache_info"] = dict()
current_protocol = self.cache_config.cache_transfer_protocol.split(",")
disaggregate_info["transfer_protocol"] = current_protocol
@@ -1396,7 +1388,9 @@ class FDConfig:
if protocol == "ipc":
disaggregate_info["cache_info"][protocol] = {
"ip": self.host_ip,
"port": self.engine_worker_queue_port[self.parallel_config.local_data_parallel_id],
"port": self.parallel_config.engine_worker_queue_port[
self.parallel_config.local_data_parallel_id
],
"device_ids": self.local_device_ids,
}
elif protocol == "rdma":