diff --git a/fastdeploy/spec_decode/mtp.py b/fastdeploy/spec_decode/mtp.py index 58b9a4632..4fb79e0fc 100644 --- a/fastdeploy/spec_decode/mtp.py +++ b/fastdeploy/spec_decode/mtp.py @@ -659,7 +659,7 @@ class MTPProposer(Proposer): """ check whether prefill stage exist """ - if int(paddle.max(self.model_inputs["seq_lens_encoder"])) != 0: + if np.any(self.share_inputs["seq_lens_encoder"].numpy() > 0): return 1 else: return 0 diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index 02a1d865c..d47066011 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -222,13 +222,13 @@ class GPUModelRunner(ModelRunnerBase): """ check whether prefill stage exist """ - return int(paddle.max(self.share_inputs["seq_lens_encoder"])) > 0 + return np.any(self.share_inputs["seq_lens_encoder"].numpy() > 0) def exist_decode(self): """ check whether decode stage exist """ - return int(paddle.max(self.share_inputs["seq_lens_decoder"])) > 0 + return np.any(self.share_inputs["seq_lens_decoder"].numpy() > 0) def only_prefill(self): """ @@ -1272,7 +1272,7 @@ class GPUModelRunner(ModelRunnerBase): self.share_inputs["output_padding_offset"].copy_(output_padding_offset, False) # Update bad tokens len - max_bad_tokens_len = paddle.max(self.share_inputs["bad_tokens_len"]) + max_bad_tokens_len = np.max(self.share_inputs["bad_tokens_len"].numpy()) # Initialize forward meta data self.initialize_forward_meta()