diff --git a/fastdeploy/output/token_processor.py b/fastdeploy/output/token_processor.py index 341b88d90..46f298d0b 100644 --- a/fastdeploy/output/token_processor.py +++ b/fastdeploy/output/token_processor.py @@ -409,6 +409,9 @@ class TokenProcessor: + accept_num[i] ].tolist() if (not recovery_stop) and (len(token_ids) == 0 or token_ids[-1] <= 0): + if envs.ENABLE_V1_KVCACHE_SCHEDULER: + if task_id in self.resource_manager.to_be_rescheduled_request_id_set: + self.resource_manager.reschedule_preempt_task(task_id) continue else: token_id = int(tokens[i, 0])