[PD Disaggregation] support different tp_size for prefill and decode (#5296)

* up * up * up * fix
2025-12-24 13:28:13 +08:00 · 2025-12-01 17:50:20 +08:00
parent 54119cf07e
commit 0925d44f18
13 changed files with 584 additions and 36 deletions
--- a/fastdeploy/envs.py
+++ b/fastdeploy/envs.py
@@ -150,6 +150,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "FD_ENABLE_PDL": lambda: int(os.getenv("FD_ENABLE_PDL", "1")),
    # "Number of tokens in the group for Mixture of Experts (MoE) computation processing on HPU"
    "FD_HPU_CHUNK_SIZE": lambda: int(os.getenv("FD_HPU_CHUNK_SIZE", "64")),
+    "FD_PREFILL_WAIT_DECODE_RESOURCE_SECONDS": lambda: int(os.getenv("FD_PREFILL_WAIT_DECODE_RESOURCE_SECONDS", "30")),
 }