[Perf] Remove unnecessary operations in non-cuda_graph (#3010)

* [Perf] Remove unnecessary operations in non-cuda_graph

* fix code logic

* use suggestion comment

* reduce function call

* reduce function call

* reduce function call

* reduce function call
This commit is contained in:
begin2023
2025-07-28 11:38:29 +08:00
committed by GitHub
parent 247010d298
commit dd877f38b1
2 changed files with 15 additions and 9 deletions

View File

@@ -576,8 +576,9 @@ class GCUModelRunner(ModelRunnerBase):
)
# Update Batch type for cuda graph
is_decode_batch = not ((self.share_inputs["seq_lens_this_time"] > 1).sum() > 0)
self.forward_meta.step_use_cudagraph = self.use_cudagraph and is_decode_batch
self.forward_meta.step_use_cudagraph = self.use_cudagraph and (
not ((self.share_inputs["seq_lens_this_time"] > 1).sum() > 0)
)
# Initialzie attention meta data
for attn_backend in self.attn_backends: