[Bug fix] Fix prompt token ids dtype in v1 (#3860)

This commit is contained in:
chenjian
2025-09-08 11:34:13 +08:00
committed by GitHub
parent 7833f2f6cb
commit b5e20e3015

View File

@@ -290,10 +290,11 @@ class GPUModelRunner(ModelRunnerBase):
position_ids, request.get("max_tokens", 2048) position_ids, request.get("max_tokens", 2048)
) )
if len(request.output_token_ids) == 0: if isinstance(request.prompt_token_ids, np.ndarray):
input_ids = request.prompt_token_ids prompt_token_ids = request.prompt_token_ids.tolist()
else: else:
input_ids = request.prompt_token_ids + request.output_token_ids prompt_token_ids = request.prompt_token_ids
input_ids = prompt_token_ids + request.output_token_ids
logger.debug( logger.debug(
f"Handle prefill request {request} at idx {idx}, " f"Handle prefill request {request} at idx {idx}, "
f"{prefill_start_index=}, {prefill_end_index=}, " f"{prefill_start_index=}, {prefill_end_index=}, "