mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Feature] Support batched tokens for EP (#3415)
* Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug * Support batched tokens for EP and fix bug
This commit is contained in:
@@ -1274,7 +1274,7 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
if not self.not_need_stop():
|
||||
self._execute_empty_input()
|
||||
return None
|
||||
|
||||
start_time = time.time()
|
||||
# 1. Prepare inputs of model and sampler.
|
||||
skip_idx_list = self._get_skip_idx(model_forward_batch)
|
||||
self._prepare_inputs()
|
||||
@@ -1409,6 +1409,8 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
|
||||
self._update_chunked_prefill(model_forward_batch)
|
||||
self._add_cache(model_forward_batch)
|
||||
end_time = time.time()
|
||||
logger.debug(f"execute one step cost time: {end_time-start_time:.3f} s")
|
||||
return None
|
||||
|
||||
def _add_cache(self, model_forward_batch) -> None:
|
||||
|
Reference in New Issue
Block a user