[Perf] Remove unnecessary operations in non-cuda_graph (#3010)

* [Perf] Remove unnecessary operations in non-cuda_graph * fix code logic * use suggestion comment * reduce function call * reduce function call * reduce function call * reduce function call
2025-10-04 08:16:42 +08:00 · 2025-07-28 11:38:29 +08:00
parent 247010d298
commit dd877f38b1
2 changed files with 15 additions and 9 deletions
--- a/fastdeploy/worker/gcu_model_runner.py
+++ b/fastdeploy/worker/gcu_model_runner.py
@@ -576,8 +576,9 @@ class GCUModelRunner(ModelRunnerBase):
        )

        # Update Batch type for cuda graph
-        is_decode_batch = not ((self.share_inputs["seq_lens_this_time"] > 1).sum() > 0)
-        self.forward_meta.step_use_cudagraph = self.use_cudagraph and is_decode_batch
+        self.forward_meta.step_use_cudagraph = self.use_cudagraph and (
+            not ((self.share_inputs["seq_lens_this_time"] > 1).sum() > 0)
+        )

        # Initialzie attention meta data
        for attn_backend in self.attn_backends: