[BugFix] fix num_requests_running after clear_data (#4989)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled

* [BugFix] fix num_requests_running after clear_data

* [fix] fix tasks_list and stop flags not cleared when _free_blocks failed
This commit is contained in:
Yonghua Li
2025-11-13 13:50:38 +08:00
committed by GitHub
parent 8749ca2fb6
commit 3da9f01e19
2 changed files with 18 additions and 10 deletions

View File

@@ -473,14 +473,7 @@ class ResourceManagerV1(ResourceManager):
if scheduled_reqs:
llm_logger.debug(f"schedued_reqs: {scheduled_reqs}")
# Update metrics
num_tasks = sum([1 if task else 0 for task in self.tasks_list])
num_blocks_used_by_tasks = sum([len(task.block_tables) if task else 0 for task in self.tasks_list])
main_process_metrics.available_gpu_block_num.set(self.total_block_number() - num_blocks_used_by_tasks)
main_process_metrics.batch_size.set(self.max_num_seqs - self.available_batch())
main_process_metrics.gpu_cache_usage_perc.set(self.get_gpu_cache_usage_perc())
main_process_metrics.num_requests_running.set(len(self.running))
main_process_metrics.num_requests_waiting.set(num_tasks - len(self.running))
self.update_metrics()
return scheduled_reqs
@@ -570,7 +563,10 @@ class ResourceManagerV1(ResourceManager):
if request in self.running: # normally run and finished
self.running.remove(request)
request.status = RequestStatus.FINISHED
self._free_blocks(request)
try:
self._free_blocks(request)
except Exception as e:
llm_logger.warning(f"release block failed {req_id}: {e}")
if (
request.request_id in self.to_be_rescheduled_request_id_set
): # finished after preempted, blocks have been recycled.
@@ -587,7 +583,19 @@ class ResourceManagerV1(ResourceManager):
del self.requests[req_id]
except Exception as e:
llm_logger.error(f"finish_request err: {e}, {str(traceback.format_exc())}")
finally:
self.update_metrics()
def clear_data(self):
self.waiting: deque[Request] = deque()
self.to_be_rescheduled_request_id_set = set()
def update_metrics(self):
# Update metrics
num_tasks = sum([1 if task else 0 for task in self.tasks_list])
num_blocks_used_by_tasks = sum([len(task.block_tables) if task else 0 for task in self.tasks_list])
main_process_metrics.available_gpu_block_num.set(self.total_block_number() - num_blocks_used_by_tasks)
main_process_metrics.batch_size.set(self.max_num_seqs - self.available_batch())
main_process_metrics.gpu_cache_usage_perc.set(self.get_gpu_cache_usage_perc())
main_process_metrics.num_requests_running.set(len(self.running))
main_process_metrics.num_requests_waiting.set(num_tasks - len(self.running))

View File

@@ -626,7 +626,7 @@ class TokenProcessor:
def clear_data(self):
if envs.ENABLE_V1_KVCACHE_SCHEDULER:
self.resource_manager.clear_data()
for i in range(self.cfg.max_num_seqs):
for i in range(self.resource_manager.max_num_seqs):
if self.resource_manager.stop_flags[i]:
continue
task = self.resource_manager.tasks_list[i]