mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[Bug fix] Fix prompt token ids dtype in v1 (#3860)
This commit is contained in:
@@ -290,10 +290,11 @@ class GPUModelRunner(ModelRunnerBase):
|
||||
position_ids, request.get("max_tokens", 2048)
|
||||
)
|
||||
|
||||
if len(request.output_token_ids) == 0:
|
||||
input_ids = request.prompt_token_ids
|
||||
if isinstance(request.prompt_token_ids, np.ndarray):
|
||||
prompt_token_ids = request.prompt_token_ids.tolist()
|
||||
else:
|
||||
input_ids = request.prompt_token_ids + request.output_token_ids
|
||||
prompt_token_ids = request.prompt_token_ids
|
||||
input_ids = prompt_token_ids + request.output_token_ids
|
||||
logger.debug(
|
||||
f"Handle prefill request {request} at idx {idx}, "
|
||||
f"{prefill_start_index=}, {prefill_end_index=}, "
|
||||
|
Reference in New Issue
Block a user