From b5e20e30154e8947581db9f18f8e3a23e14bb774 Mon Sep 17 00:00:00 2001
From: chenjian <1435317881@qq.com>
Date: Mon, 8 Sep 2025 11:34:13 +0800
Subject: [PATCH] [Bug fix] Fix prompt token ids dtype in v1 (#3860)

---
 fastdeploy/worker/gpu_model_runner.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py
index 2b85123ac..50044b4e8 100644
--- a/fastdeploy/worker/gpu_model_runner.py
+++ b/fastdeploy/worker/gpu_model_runner.py
@@ -290,10 +290,11 @@ class GPUModelRunner(ModelRunnerBase):
                         position_ids, request.get("max_tokens", 2048)
                     )
 
-                if len(request.output_token_ids) == 0:
-                    input_ids = request.prompt_token_ids
+                if isinstance(request.prompt_token_ids, np.ndarray):
+                    prompt_token_ids = request.prompt_token_ids.tolist()
                 else:
-                    input_ids = request.prompt_token_ids + request.output_token_ids
+                    prompt_token_ids = request.prompt_token_ids
+                input_ids = prompt_token_ids + request.output_token_ids
                 logger.debug(
                     f"Handle prefill request {request} at idx {idx}, "
                     f"{prefill_start_index=}, {prefill_end_index=}, "