[PD Disaggregation] support different tp_size for prefill and decode (#5296)

* up

* up

* up

* fix
This commit is contained in:
Juncai
2025-12-01 17:50:20 +08:00
committed by GitHub
parent 54119cf07e
commit 0925d44f18
13 changed files with 584 additions and 36 deletions

View File

@@ -150,6 +150,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
"FD_ENABLE_PDL": lambda: int(os.getenv("FD_ENABLE_PDL", "1")),
# "Number of tokens in the group for Mixture of Experts (MoE) computation processing on HPU"
"FD_HPU_CHUNK_SIZE": lambda: int(os.getenv("FD_HPU_CHUNK_SIZE", "64")),
"FD_PREFILL_WAIT_DECODE_RESOURCE_SECONDS": lambda: int(os.getenv("FD_PREFILL_WAIT_DECODE_RESOURCE_SECONDS", "30")),
}