[Feature] Support batched tokens for EP (#3415)

* Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug
2025-10-05 16:48:03 +08:00 · 2025-08-18 11:43:36 +08:00
parent 3f86ae0007
commit aba94169dc
9 changed files with 235 additions and 97 deletions
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -1274,7 +1274,7 @@ class GPUModelRunner(ModelRunnerBase):
        if not self.not_need_stop():
            self._execute_empty_input()
            return None
-
+        start_time = time.time()
        # 1. Prepare inputs of model and sampler.
        skip_idx_list = self._get_skip_idx(model_forward_batch)
        self._prepare_inputs()
@@ -1409,6 +1409,8 @@ class GPUModelRunner(ModelRunnerBase):

            self._update_chunked_prefill(model_forward_batch)
            self._add_cache(model_forward_batch)
+        end_time = time.time()
+        logger.debug(f"execute one step cost time: {end_time-start_time:.3f} s")
        return None

    def _add_cache(self, model_forward_batch) -> None: