[Bug fix] Fix prompt token ids dtype in v1 (#3861)

This commit is contained in:
chenjian
2025-09-04 11:02:37 +08:00
committed by GitHub
parent fb1e0d6a87
commit a0c03510c0

View File

@@ -270,10 +270,11 @@ class GPUModelRunner(ModelRunnerBase):
position_ids, request.get("max_tokens", 2048)
)
if len(request.output_token_ids) == 0:
input_ids = request.prompt_token_ids
if isinstance(request.prompt_token_ids, np.ndarray):
prompt_token_ids = request.prompt_token_ids.tolist()
else:
input_ids = request.prompt_token_ids + request.output_token_ids
prompt_token_ids = request.prompt_token_ids
input_ids = prompt_token_ids + request.output_token_ids
logger.debug(
f"Handle prefill request {request} at idx {idx}, "
f"{prefill_start_index=}, {prefill_end_index=}, "