polish code with new pre-commit rule (#2923)

This commit is contained in:
Zero Rains
2025-07-19 23:19:27 +08:00
committed by GitHub
parent b8676d71a8
commit 25698d56d1
424 changed files with 14307 additions and 13518 deletions

View File

@@ -23,7 +23,7 @@ from fastdeploy.scheduler.data import ScheduledRequest, ScheduledResponse
from fastdeploy.utils import scheduler_logger
class LocalScheduler(object):
class LocalScheduler:
"""
A local in-memory task scheduler for request/response management.
@@ -142,7 +142,7 @@ class LocalScheduler(object):
expired_ids = []
for request_id in self.ids:
request = self.requests[request_id]
if (now - request.schedule_time < self.ttl):
if now - request.schedule_time < self.ttl:
break
expired_ids.append(request.request_id)
@@ -157,8 +157,7 @@ class LocalScheduler(object):
else:
self.ids_read_cursor -= len(expired_ids)
def put_requests(
self, requests: List[Request]) -> List[Tuple[str, Optional[str]]]:
def put_requests(self, requests: List[Request]) -> List[Tuple[str, Optional[str]]]:
"""
Add new requests to the scheduler queue.
@@ -171,8 +170,7 @@ class LocalScheduler(object):
"""
with self.mutex:
self._recycle()
if self.max_size > 0 and len(
self.requests) + len(requests) > self.max_size:
if self.max_size > 0 and len(self.requests) + len(requests) > self.max_size:
msg = f"Exceeding the max length of the local scheduler (max_size={self.max_size})"
return [(request.request_id, msg) for request in requests]
@@ -183,8 +181,7 @@ class LocalScheduler(object):
duplicated_ids.append(request.request_id)
else:
scheduled_request = ScheduledRequest(request)
self.requests[
scheduled_request.request_id] = scheduled_request
self.requests[scheduled_request.request_id] = scheduled_request
valid_ids.append(scheduled_request.request_id)
self.ids += valid_ids
@@ -192,13 +189,10 @@ class LocalScheduler(object):
scheduler_logger.info(f"Scheduler has enqueued some requests: {valid_ids}")
if len(duplicated_ids) > 0:
scheduler_logger.warning(
f"Scheduler has received some duplicated requests: {duplicated_ids}"
)
scheduler_logger.warning(f"Scheduler has received some duplicated requests: {duplicated_ids}")
results = [(request_id, None) for request_id in valid_ids]
results += [(request_id, "duplicated request_id")
for request_id in duplicated_ids]
results += [(request_id, "duplicated request_id") for request_id in duplicated_ids]
return results
def calc_required_blocks(self, token_num, block_size):
@@ -214,12 +208,14 @@ class LocalScheduler(object):
"""
return (token_num + block_size - 1) // block_size
def get_requests(self,
available_blocks,
block_size,
reserved_output_blocks,
max_num_batched_tokens,
batch=1) -> List[Request]:
def get_requests(
self,
available_blocks,
block_size,
reserved_output_blocks,
max_num_batched_tokens,
batch=1,
) -> List[Request]:
"""
Retrieve requests from the scheduler based on available resources.
@@ -237,13 +233,15 @@ class LocalScheduler(object):
scheduler_logger.debug(
f"Scheduler's resource are insufficient: available_blocks={available_blocks} "
f"reserved_output_blocks={reserved_output_blocks} batch={batch} "
f"max_num_batched_tokens={max_num_batched_tokens}")
f"max_num_batched_tokens={max_num_batched_tokens}"
)
return []
with self.requests_not_empty:
batch_ids = self.requests_not_empty.wait_for(
lambda: self.ids[self.ids_read_cursor:self.ids_read_cursor +
batch], self.wait_request_timeout)
lambda: self.ids[self.ids_read_cursor : self.ids_read_cursor + batch],
self.wait_request_timeout,
)
required_total_blocks = 0
current_prefill_tokens = 0
@@ -251,8 +249,7 @@ class LocalScheduler(object):
long_partial_requests, short_partial_requests = 0, 0
for request_id in batch_ids:
request = self.requests[request_id]
required_input_blocks = self.calc_required_blocks(
request.prompt_tokens_ids_len, block_size)
required_input_blocks = self.calc_required_blocks(request.prompt_tokens_ids_len, block_size)
current_prefill_tokens += request.prompt_tokens_ids_len
required_total_blocks += required_input_blocks + reserved_output_blocks
if required_total_blocks > available_blocks:
@@ -277,14 +274,10 @@ class LocalScheduler(object):
self.ids_read_cursor += len(requests)
if len(batch_ids) > 0 and len(requests) == 0:
scheduler_logger.debug(
f"Scheduler has put all just-pulled request into the queue: {len(batch_ids)}"
)
scheduler_logger.debug(f"Scheduler has put all just-pulled request into the queue: {len(batch_ids)}")
if len(requests) > 0:
scheduler_logger.info(
f"Scheduler has pulled some request: {[request.request_id for request in requests]}"
)
scheduler_logger.info(f"Scheduler has pulled some request: {[request.request_id for request in requests]}")
return requests
@@ -295,24 +288,16 @@ class LocalScheduler(object):
Args:
results: List of RequestOutput objects containing results
"""
responses: List[ScheduledResponse] = [
ScheduledResponse(result) for result in results
]
responses: List[ScheduledResponse] = [ScheduledResponse(result) for result in results]
finished_responses = [
response.request_id for response in responses if response.finished
]
finished_responses = [response.request_id for response in responses if response.finished]
if len(finished_responses) > 0:
scheduler_logger.info(
f"Scheduler has received some finished responses: {finished_responses}"
)
scheduler_logger.info(f"Scheduler has received some finished responses: {finished_responses}")
with self.mutex:
for response in responses:
if response.request_id not in self.requests:
scheduler_logger.warning(
f"Scheduler has received a expired response: {[response.request_id]}"
)
scheduler_logger.warning(f"Scheduler has received a expired response: {[response.request_id]}")
continue
if response.request_id not in self.responses:
@@ -351,8 +336,7 @@ class LocalScheduler(object):
return responses
with self.responses_not_empty:
responses = self.responses_not_empty.wait_for(
_get_results, self.wait_response_timeout)
responses = self.responses_not_empty.wait_for(_get_results, self.wait_response_timeout)
results = dict()
for request_id, resps in responses.items():
@@ -364,7 +348,5 @@ class LocalScheduler(object):
if finished:
self._recycle(request_id)
scheduler_logger.info(
f"Scheduler has pulled a finished response: {[request_id]}"
)
scheduler_logger.info(f"Scheduler has pulled a finished response: {[request_id]}")
return results