[Feature] [PD] add simple router and refine splitwise deployment (#4709)

* add simple router and refine splitwise deployment * fix
2025-12-24 13:28:13 +08:00 · 2025-11-06 14:56:02 +08:00
parent 831266da7a
commit 08ca0f6aea
39 changed files with 2397 additions and 171 deletions
--- a/fastdeploy/worker/worker_process.py
+++ b/fastdeploy/worker/worker_process.py
@@ -475,8 +475,10 @@ class PaddleDisWorkerProc:

            # Execute model to generate token. The generated token will be written to the buffer.
            # These generated tokens can be obtained through get_output op.
+            start_execute_time = time.time()
            self.worker.execute_model(req_dicts, num_running_requests)
            self.exist_prefill_task_signal.value[0] = self.worker.exist_prefill()
+            logger.debug(f"execute model cost: {time.time()-start_execute_time:.5f} s")

    def initialize_kv_cache(self) -> None:
        """Profiles the peak memory usage of the model to determine how many