Fix fetch request exceed max block num (#4257)

* fix fetch

* fix fetch
This commit is contained in:
chenjian
2025-09-25 00:57:45 +08:00
committed by GitHub
parent 4aa057f28d
commit db653644ad

View File

@@ -159,7 +159,6 @@ class DPLocalScheduler(LocalScheduler):
required_total_blocks += required_input_blocks + reserved_output_blocks
if required_total_blocks > available_blocks:
break
requests.append(request.raw)
self.ids_read_cursor += 1
start_batch_time = time.time()
@@ -174,6 +173,7 @@ class DPLocalScheduler(LocalScheduler):
):
break
else:
required_total_blocks = 0
batch_ids = self.requests_not_empty.wait_for(
lambda: self.ids[self.ids_read_cursor : self.ids_read_cursor + batch],
0.005,
@@ -181,6 +181,10 @@ class DPLocalScheduler(LocalScheduler):
if batch_ids:
for request_id in batch_ids:
request = self.requests[request_id]
required_input_blocks = self.calc_required_blocks(request.prompt_tokens_ids_len, block_size)
required_total_blocks += required_input_blocks + reserved_output_blocks
if required_total_blocks > available_blocks:
break
requests.append(request.raw)
self.ids_read_cursor += 1