[Feature] Support batched tokens for EP (#3415)

* Support batched tokens for EP

* Support batched tokens for EP

* Support batched tokens for EP

* Support batched tokens for EP

* Support batched tokens for EP and fix bug

* Support batched tokens for EP and fix bug

* Support batched tokens for EP and fix bug

* Support batched tokens for EP and fix bug
This commit is contained in:
chenjian
2025-08-18 11:43:36 +08:00
committed by GitHub
parent 3f86ae0007
commit aba94169dc
9 changed files with 235 additions and 97 deletions

View File

@@ -1274,7 +1274,7 @@ class GPUModelRunner(ModelRunnerBase):
if not self.not_need_stop():
self._execute_empty_input()
return None
start_time = time.time()
# 1. Prepare inputs of model and sampler.
skip_idx_list = self._get_skip_idx(model_forward_batch)
self._prepare_inputs()
@@ -1409,6 +1409,8 @@ class GPUModelRunner(ModelRunnerBase):
self._update_chunked_prefill(model_forward_batch)
self._add_cache(model_forward_batch)
end_time = time.time()
logger.debug(f"execute one step cost time: {end_time-start_time:.3f} s")
return None
def _add_cache(self, model_forward_batch) -> None: