diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index e62fb6e56..a96db10ad 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -166,6 +166,7 @@ class GPUModelRunner(ModelRunnerBase): request = req_dicts[i] idx = request.idx length = len(request.prompt_token_ids) + assert length > 0, "The prompt requested must not be empty." prefill_tokens = [] if (request.guided_json is not None