diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py index c53d1c2a3..7eed4b662 100644 --- a/fastdeploy/output/token_processor.py +++ b/fastdeploy/output/token_processor.py @@ -622,11 +622,11 @@ class TokenProcessor: + i * MAX_DRAFT_TOKENS + accept_num[i] ].tolist() - if (not recovery_stop) and (len(token_ids) == 0 or token_ids[-1] <= 0): - if envs.ENABLE_V1_KVCACHE_SCHEDULER: - if task_id in self.resource_manager.to_be_rescheduled_request_id_set: - self.resource_manager.reschedule_preempt_task(task_id) - continue + if (not recovery_stop) and (len(token_ids) == 0 or token_ids[-1] <= 0): + if envs.ENABLE_V1_KVCACHE_SCHEDULER: + if task_id in self.resource_manager.to_be_rescheduled_request_id_set: + self.resource_manager.reschedule_preempt_task(task_id) + continue else: token_id = int(tokens[i, 0]) token_ids = [token_id]