diff --git a/fastdeploy/config.py b/fastdeploy/config.py index de6d50722..e575c8936 100644 --- a/fastdeploy/config.py +++ b/fastdeploy/config.py @@ -88,7 +88,7 @@ class ModelConfig: self.stop_seqs_max_len = 8 # NOTE(gongshaotain): form _load_model_init_val() - self.top_p = 0.0 + self.top_p = 1.0 self.temperature = 1.0 self.rope_theta = 10000.0 self.penalty_score = 1.0 diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index 20d572a34..8a2f09e2e 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -408,7 +408,6 @@ class GPUModelRunner(ModelRunnerBase): self.share_inputs["max_dec_len"][idx:idx + 1] = max_dec_len self.share_inputs["min_dec_len"][idx:idx + 1] = max_dec_len self.share_inputs["stop_flags"][idx:idx + 1] = False - self.share_inputs["top_p"][idx:idx + 1] = 0.0 self.share_inputs["temperature"][idx:idx + 1] = 1 self.share_inputs["first_token_ids"][