mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 09:31:35 +08:00
polish code with new pre-commit rule (#2923)
This commit is contained in:
@@ -23,7 +23,7 @@ from fastdeploy.scheduler.data import ScheduledRequest, ScheduledResponse
|
||||
from fastdeploy.utils import scheduler_logger
|
||||
|
||||
|
||||
class LocalScheduler(object):
|
||||
class LocalScheduler:
|
||||
"""
|
||||
A local in-memory task scheduler for request/response management.
|
||||
|
||||
@@ -142,7 +142,7 @@ class LocalScheduler(object):
|
||||
expired_ids = []
|
||||
for request_id in self.ids:
|
||||
request = self.requests[request_id]
|
||||
if (now - request.schedule_time < self.ttl):
|
||||
if now - request.schedule_time < self.ttl:
|
||||
break
|
||||
expired_ids.append(request.request_id)
|
||||
|
||||
@@ -157,8 +157,7 @@ class LocalScheduler(object):
|
||||
else:
|
||||
self.ids_read_cursor -= len(expired_ids)
|
||||
|
||||
def put_requests(
|
||||
self, requests: List[Request]) -> List[Tuple[str, Optional[str]]]:
|
||||
def put_requests(self, requests: List[Request]) -> List[Tuple[str, Optional[str]]]:
|
||||
"""
|
||||
Add new requests to the scheduler queue.
|
||||
|
||||
@@ -171,8 +170,7 @@ class LocalScheduler(object):
|
||||
"""
|
||||
with self.mutex:
|
||||
self._recycle()
|
||||
if self.max_size > 0 and len(
|
||||
self.requests) + len(requests) > self.max_size:
|
||||
if self.max_size > 0 and len(self.requests) + len(requests) > self.max_size:
|
||||
msg = f"Exceeding the max length of the local scheduler (max_size={self.max_size})"
|
||||
return [(request.request_id, msg) for request in requests]
|
||||
|
||||
@@ -183,8 +181,7 @@ class LocalScheduler(object):
|
||||
duplicated_ids.append(request.request_id)
|
||||
else:
|
||||
scheduled_request = ScheduledRequest(request)
|
||||
self.requests[
|
||||
scheduled_request.request_id] = scheduled_request
|
||||
self.requests[scheduled_request.request_id] = scheduled_request
|
||||
valid_ids.append(scheduled_request.request_id)
|
||||
|
||||
self.ids += valid_ids
|
||||
@@ -192,13 +189,10 @@ class LocalScheduler(object):
|
||||
scheduler_logger.info(f"Scheduler has enqueued some requests: {valid_ids}")
|
||||
|
||||
if len(duplicated_ids) > 0:
|
||||
scheduler_logger.warning(
|
||||
f"Scheduler has received some duplicated requests: {duplicated_ids}"
|
||||
)
|
||||
scheduler_logger.warning(f"Scheduler has received some duplicated requests: {duplicated_ids}")
|
||||
|
||||
results = [(request_id, None) for request_id in valid_ids]
|
||||
results += [(request_id, "duplicated request_id")
|
||||
for request_id in duplicated_ids]
|
||||
results += [(request_id, "duplicated request_id") for request_id in duplicated_ids]
|
||||
return results
|
||||
|
||||
def calc_required_blocks(self, token_num, block_size):
|
||||
@@ -214,12 +208,14 @@ class LocalScheduler(object):
|
||||
"""
|
||||
return (token_num + block_size - 1) // block_size
|
||||
|
||||
def get_requests(self,
|
||||
available_blocks,
|
||||
block_size,
|
||||
reserved_output_blocks,
|
||||
max_num_batched_tokens,
|
||||
batch=1) -> List[Request]:
|
||||
def get_requests(
|
||||
self,
|
||||
available_blocks,
|
||||
block_size,
|
||||
reserved_output_blocks,
|
||||
max_num_batched_tokens,
|
||||
batch=1,
|
||||
) -> List[Request]:
|
||||
"""
|
||||
Retrieve requests from the scheduler based on available resources.
|
||||
|
||||
@@ -237,13 +233,15 @@ class LocalScheduler(object):
|
||||
scheduler_logger.debug(
|
||||
f"Scheduler's resource are insufficient: available_blocks={available_blocks} "
|
||||
f"reserved_output_blocks={reserved_output_blocks} batch={batch} "
|
||||
f"max_num_batched_tokens={max_num_batched_tokens}")
|
||||
f"max_num_batched_tokens={max_num_batched_tokens}"
|
||||
)
|
||||
return []
|
||||
|
||||
with self.requests_not_empty:
|
||||
batch_ids = self.requests_not_empty.wait_for(
|
||||
lambda: self.ids[self.ids_read_cursor:self.ids_read_cursor +
|
||||
batch], self.wait_request_timeout)
|
||||
lambda: self.ids[self.ids_read_cursor : self.ids_read_cursor + batch],
|
||||
self.wait_request_timeout,
|
||||
)
|
||||
|
||||
required_total_blocks = 0
|
||||
current_prefill_tokens = 0
|
||||
@@ -251,8 +249,7 @@ class LocalScheduler(object):
|
||||
long_partial_requests, short_partial_requests = 0, 0
|
||||
for request_id in batch_ids:
|
||||
request = self.requests[request_id]
|
||||
required_input_blocks = self.calc_required_blocks(
|
||||
request.prompt_tokens_ids_len, block_size)
|
||||
required_input_blocks = self.calc_required_blocks(request.prompt_tokens_ids_len, block_size)
|
||||
current_prefill_tokens += request.prompt_tokens_ids_len
|
||||
required_total_blocks += required_input_blocks + reserved_output_blocks
|
||||
if required_total_blocks > available_blocks:
|
||||
@@ -277,14 +274,10 @@ class LocalScheduler(object):
|
||||
self.ids_read_cursor += len(requests)
|
||||
|
||||
if len(batch_ids) > 0 and len(requests) == 0:
|
||||
scheduler_logger.debug(
|
||||
f"Scheduler has put all just-pulled request into the queue: {len(batch_ids)}"
|
||||
)
|
||||
scheduler_logger.debug(f"Scheduler has put all just-pulled request into the queue: {len(batch_ids)}")
|
||||
|
||||
if len(requests) > 0:
|
||||
scheduler_logger.info(
|
||||
f"Scheduler has pulled some request: {[request.request_id for request in requests]}"
|
||||
)
|
||||
scheduler_logger.info(f"Scheduler has pulled some request: {[request.request_id for request in requests]}")
|
||||
|
||||
return requests
|
||||
|
||||
@@ -295,24 +288,16 @@ class LocalScheduler(object):
|
||||
Args:
|
||||
results: List of RequestOutput objects containing results
|
||||
"""
|
||||
responses: List[ScheduledResponse] = [
|
||||
ScheduledResponse(result) for result in results
|
||||
]
|
||||
responses: List[ScheduledResponse] = [ScheduledResponse(result) for result in results]
|
||||
|
||||
finished_responses = [
|
||||
response.request_id for response in responses if response.finished
|
||||
]
|
||||
finished_responses = [response.request_id for response in responses if response.finished]
|
||||
if len(finished_responses) > 0:
|
||||
scheduler_logger.info(
|
||||
f"Scheduler has received some finished responses: {finished_responses}"
|
||||
)
|
||||
scheduler_logger.info(f"Scheduler has received some finished responses: {finished_responses}")
|
||||
|
||||
with self.mutex:
|
||||
for response in responses:
|
||||
if response.request_id not in self.requests:
|
||||
scheduler_logger.warning(
|
||||
f"Scheduler has received a expired response: {[response.request_id]}"
|
||||
)
|
||||
scheduler_logger.warning(f"Scheduler has received a expired response: {[response.request_id]}")
|
||||
continue
|
||||
|
||||
if response.request_id not in self.responses:
|
||||
@@ -351,8 +336,7 @@ class LocalScheduler(object):
|
||||
return responses
|
||||
|
||||
with self.responses_not_empty:
|
||||
responses = self.responses_not_empty.wait_for(
|
||||
_get_results, self.wait_response_timeout)
|
||||
responses = self.responses_not_empty.wait_for(_get_results, self.wait_response_timeout)
|
||||
|
||||
results = dict()
|
||||
for request_id, resps in responses.items():
|
||||
@@ -364,7 +348,5 @@ class LocalScheduler(object):
|
||||
|
||||
if finished:
|
||||
self._recycle(request_id)
|
||||
scheduler_logger.info(
|
||||
f"Scheduler has pulled a finished response: {[request_id]}"
|
||||
)
|
||||
scheduler_logger.info(f"Scheduler has pulled a finished response: {[request_id]}")
|
||||
return results
|
||||
|
Reference in New Issue
Block a user