Revert "[BugFix] reschedule_preempt_task append waiting & PREEMPTED blocksize…" (#5575)

This reverts commit dbedb0797b.
2025-12-24 13:28:13 +08:00 · 2025-12-16 11:12:57 +08:00
parent 9058cc712d
commit 8b6395478a
1 changed files with 0 additions and 9 deletions
--- a/fastdeploy/engine/sched/resource_manager_v1.py
+++ b/fastdeploy/engine/sched/resource_manager_v1.py
@@ -261,7 +261,6 @@ class ResourceManagerV1(ResourceManager):
                    self.running.insert(0, preempted_req)
                    continue
                preempted_req.status = RequestStatus.PREEMPTED
-                preempted_req.last_preempted_blocksize = len(preempted_req.block_tables)
                preempted_req.num_computed_tokens = 0
                if self.config.scheduler_config.splitwise_role == "decode":
                    self.tasks_list[preempted_req.idx] = None
@@ -736,14 +735,6 @@ class ResourceManagerV1(ResourceManager):
                                break
                        num_new_tokens = self._get_num_new_tokens(request, token_budget)
                        num_new_block = self.get_new_block_nums(request, num_new_tokens)
-                        # If num_new_block is less than the last preempted block size, use the last preempted block size instead.
-                        # For normal requests, when allocating blocks, we reserve two extra blocks for decoding.
-                        # In the request rescheduling scenario, we currently only consider the number of tokens already generated,
-                        # which might lead to allocating fewer blocks than the previous allocation, causing repeated rescheduling.
-                        # This adjustment ensures we at least allocate as many blocks as before to avoid this issue.
-                        last_preempted_blocksize = getattr(request, "last_preempted_blocksize", 0)
-                        if num_new_block < last_preempted_blocksize:
-                            num_new_block = last_preempted_blocksize
                        # Allocate blocks to prefill
                        if self.cache_manager.can_allocate_gpu_blocks(num_new_block):
                            if not request.get("skip_allocate", False):