mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Bug fix] Fix block num in scheduler v1 for release2.0.4 (#3314)
* fix bug for scheduler v0 * fix block num setting in scheduler v1 * fix block num setting in scheduler v1 * fix block num setting in scheduler v1 * fix block num setting in scheduler v1 * fix block num setting in scheduler v1
This commit is contained in:
@@ -208,11 +208,15 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
request = req_dicts[i]
|
||||
idx = request.idx
|
||||
if request.task_type.value == RequestType.PREFILL.value: # prefill task
|
||||
logger.debug(f"Handle prefill request {request} at idx {idx}")
|
||||
prefill_start_index = request.prefill_start_index
|
||||
prefill_end_index = request.prefill_end_index
|
||||
length = prefill_end_index - prefill_start_index
|
||||
input_ids = request.prompt_token_ids + request.output_token_ids
|
||||
logger.debug(
|
||||
f"Handle prefill request {request} at idx {idx}, "
|
||||
f"{prefill_start_index=}, {prefill_end_index=}, "
|
||||
f"need_prefilled_token_num={len(input_ids)}"
|
||||
)
|
||||
self.share_inputs["input_ids"][idx : idx + 1, :length] = np.array(
|
||||
input_ids[prefill_start_index:prefill_end_index]
|
||||
)
|
||||
|
Reference in New Issue
Block a user