From a0c03510c0d970a62e68de3771c5184dbe20468a Mon Sep 17 00:00:00 2001 From: chenjian <1435317881@qq.com> Date: Thu, 4 Sep 2025 11:02:37 +0800 Subject: [PATCH] [Bug fix] Fix prompt token ids dtype in v1 (#3861) --- fastdeploy/worker/gpu_model_runner.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index 45bb6261a..b79620821 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -270,10 +270,11 @@ class GPUModelRunner(ModelRunnerBase): position_ids, request.get("max_tokens", 2048) ) - if len(request.output_token_ids) == 0: - input_ids = request.prompt_token_ids + if isinstance(request.prompt_token_ids, np.ndarray): + prompt_token_ids = request.prompt_token_ids.tolist() else: - input_ids = request.prompt_token_ids + request.output_token_ids + prompt_token_ids = request.prompt_token_ids + input_ids = prompt_token_ids + request.output_token_ids logger.debug( f"Handle prefill request {request} at idx {idx}, " f"{prefill_start_index=}, {prefill_end_index=}, "