mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[fix] fix requests & block metrics (#4404)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
* [fix] fix requests & block metrics * [chore] rename variables
This commit is contained in:
@@ -630,8 +630,6 @@ class ResourceManagerV1(ResourceManager):
|
||||
request, self.config.cache_config.block_size, request.num_computed_tokens
|
||||
)
|
||||
request.status = RequestStatus.RUNNING
|
||||
main_process_metrics.num_requests_waiting.dec(1)
|
||||
main_process_metrics.num_requests_running.inc(1)
|
||||
else:
|
||||
if self.config.cache_config.enable_prefix_caching:
|
||||
self._free_blocks(request)
|
||||
@@ -640,11 +638,17 @@ class ResourceManagerV1(ResourceManager):
|
||||
llm_logger.error("Unknown request status type")
|
||||
|
||||
if scheduled_reqs:
|
||||
task_used_block_num = sum([len(task.block_tables) if task else 0 for task in self.tasks_list])
|
||||
main_process_metrics.available_gpu_block_num.set(self.total_block_number() - task_used_block_num)
|
||||
main_process_metrics.batch_size.set(self.max_num_seqs - self.available_batch())
|
||||
main_process_metrics.gpu_cache_usage_perc.set(self.get_gpu_cache_usage_perc())
|
||||
llm_logger.debug(f"schedued_reqs: {scheduled_reqs}")
|
||||
|
||||
# Update metrics
|
||||
num_tasks = sum([1 if task else 0 for task in self.tasks_list])
|
||||
num_blocks_used_by_tasks = sum([len(task.block_tables) if task else 0 for task in self.tasks_list])
|
||||
main_process_metrics.available_gpu_block_num.set(self.total_block_number() - num_blocks_used_by_tasks)
|
||||
main_process_metrics.batch_size.set(self.max_num_seqs - self.available_batch())
|
||||
main_process_metrics.gpu_cache_usage_perc.set(self.get_gpu_cache_usage_perc())
|
||||
main_process_metrics.num_requests_running.set(len(self.running))
|
||||
main_process_metrics.num_requests_waiting.set(num_tasks - len(self.running))
|
||||
|
||||
return scheduled_reqs
|
||||
|
||||
def get_available_position(self) -> int:
|
||||
|
||||
Reference in New Issue
Block a user