mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-26 20:41:53 +08:00
@@ -159,7 +159,6 @@ class DPLocalScheduler(LocalScheduler):
|
||||
required_total_blocks += required_input_blocks + reserved_output_blocks
|
||||
if required_total_blocks > available_blocks:
|
||||
break
|
||||
|
||||
requests.append(request.raw)
|
||||
self.ids_read_cursor += 1
|
||||
start_batch_time = time.time()
|
||||
@@ -174,6 +173,7 @@ class DPLocalScheduler(LocalScheduler):
|
||||
):
|
||||
break
|
||||
else:
|
||||
required_total_blocks = 0
|
||||
batch_ids = self.requests_not_empty.wait_for(
|
||||
lambda: self.ids[self.ids_read_cursor : self.ids_read_cursor + batch],
|
||||
0.005,
|
||||
@@ -181,6 +181,10 @@ class DPLocalScheduler(LocalScheduler):
|
||||
if batch_ids:
|
||||
for request_id in batch_ids:
|
||||
request = self.requests[request_id]
|
||||
required_input_blocks = self.calc_required_blocks(request.prompt_tokens_ids_len, block_size)
|
||||
required_total_blocks += required_input_blocks + reserved_output_blocks
|
||||
if required_total_blocks > available_blocks:
|
||||
break
|
||||
requests.append(request.raw)
|
||||
self.ids_read_cursor += 1
|
||||
|
||||
|
Reference in New Issue
Block a user