fix scheduler bug in release2.1 (#3295)

This commit is contained in:
chenjian
2025-08-10 13:55:22 +08:00
committed by GitHub
parent 86ff68be4b
commit c000cff744
3 changed files with 32 additions and 8 deletions

View File

@@ -206,11 +206,11 @@ class GPUModelRunner(ModelRunnerBase):
req_len = len(req_dicts)
has_prefill_task = False
has_decode_task = False
for i in range(req_len):
request = req_dicts[i]
idx = request.idx
if request.task_type.value == RequestType.PREFILL.value: # prefill task
logger.debug(f"Handle prefill request {request} at idx {idx}")
prefill_start_index = request.prefill_start_index
prefill_end_index = request.prefill_end_index
length = prefill_end_index - prefill_start_index
@@ -256,6 +256,7 @@ class GPUModelRunner(ModelRunnerBase):
)
input_ids = request.prompt_token_ids + request.output_token_ids
logger.debug(f"Handle prefill request {request} at idx {idx} prefill_start_index {prefill_start_index} prefill_end_index {prefill_end_index} need_prefilled_token_num {len(input_ids)}")
self.share_inputs["input_ids"][idx : idx + 1, :length] = np.array(
input_ids[prefill_start_index:prefill_end_index]
)
@@ -284,6 +285,8 @@ class GPUModelRunner(ModelRunnerBase):
self.share_inputs["block_tables"][idx : idx + 1, :encoder_block_num] = np.array(
request.block_tables, dtype="int32"
)
if self.share_inputs["is_block_step"][idx]: # has tasks to continue to decode
has_decode_task = True
continue
else: # preempted task
logger.debug(f"Handle preempted request {request} at idx {idx}")
@@ -329,7 +332,7 @@ class GPUModelRunner(ModelRunnerBase):
else:
self.share_inputs["stop_seqs_len"][idx : idx + 1, :] = 0
if has_prefill_task:
if has_prefill_task or has_decode_task:
self.share_inputs["not_need_stop"][0] = True
self.share_inputs["seq_lens_this_time"] = self.seq_lens_this_time_buffer[:num_running_requests]