From f6ad26fc08fda686df307a997d1a8f72c95535da Mon Sep 17 00:00:00 2001 From: Sunny-bot1 <68891411+Sunny-bot1@users.noreply.github.com> Date: Fri, 11 Jul 2025 17:10:21 +0800 Subject: [PATCH] fix topp default value (#2814) --- fastdeploy/engine/sampling_params.py | 4 ++-- fastdeploy/worker/gcu_model_runner.py | 4 ++-- fastdeploy/worker/gpu_model_runner.py | 2 +- fastdeploy/worker/iluvatar_model_runner.py | 4 ++-- fastdeploy/worker/xpu_model_runner.py | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/fastdeploy/engine/sampling_params.py b/fastdeploy/engine/sampling_params.py index a7912407a..d81f9f999 100644 --- a/fastdeploy/engine/sampling_params.py +++ b/fastdeploy/engine/sampling_params.py @@ -82,7 +82,7 @@ class SamplingParams: frequency_penalty: float = None repetition_penalty: float = None temperature: float = None - top_p: float = 1.0 + top_p: float = None top_k: int = 0 seed: Optional[int] = None stop: Optional[Union[str, List[str]]] = None @@ -132,7 +132,7 @@ class SamplingParams: repetition_penalty=repetition_penalty if repetition_penalty is not None else 1.0, temperature=temperature if temperature is not None else 1.0, - top_p=top_p if top_p is not None else 1.0, + top_p=top_p, top_k=top_k if top_k is not None else 0, seed=seed, stop=stop, diff --git a/fastdeploy/worker/gcu_model_runner.py b/fastdeploy/worker/gcu_model_runner.py index 5ad0cec76..eee5dbf8e 100644 --- a/fastdeploy/worker/gcu_model_runner.py +++ b/fastdeploy/worker/gcu_model_runner.py @@ -24,6 +24,7 @@ from paddleformers.utils.log import logger from fastdeploy.config import FDConfig from fastdeploy.engine.request import Request +from fastdeploy.model_executor.forward_meta import ForwardMeta from fastdeploy.model_executor.guided_decoding import get_guided_backend from fastdeploy.model_executor.guided_decoding.base_guided_decoding import \ LogitsProcessorBase @@ -39,7 +40,6 @@ from fastdeploy.model_executor.ops.gcu import set_value_by_flags_and_idx from fastdeploy.model_executor.pre_and_post_process import (post_process, pre_process, rebuild_padding) -from fastdeploy.model_executor.forward_meta import ForwardMeta from fastdeploy.worker.model_runner_base import ModelRunnerBase from fastdeploy.worker.output import ModelOutputData, ModelRunnerOutput @@ -235,7 +235,7 @@ class GCUModelRunner(ModelRunnerBase): request.eos_token_ids.append(request.eos_token_ids[0]) self.share_inputs["eos_token_id"][:] = np.array( request.eos_token_ids, dtype="int64").reshape(-1, 1) - self.share_inputs["top_p"][idx:idx + 1] = request.get("top_p", 1.0) + self.share_inputs["top_p"][idx:idx + 1] = request.get("top_p", 0.7) self.share_inputs["top_k"][idx:idx + 1] = request.get("top_k", 0) self.share_inputs["temperature"][idx:idx + 1] = request.get( "temperature", 0.95) diff --git a/fastdeploy/worker/gpu_model_runner.py b/fastdeploy/worker/gpu_model_runner.py index 3a61cbde1..f22d7cc6e 100644 --- a/fastdeploy/worker/gpu_model_runner.py +++ b/fastdeploy/worker/gpu_model_runner.py @@ -245,7 +245,7 @@ class GPUModelRunner(ModelRunnerBase): request.eos_token_ids.append(request.eos_token_ids[0]) self.share_inputs["eos_token_id"][:] = np.array( request.eos_token_ids, dtype="int64").reshape(-1, 1) - self.share_inputs["top_p"][idx:idx + 1] = request.get("top_p", 1.0) + self.share_inputs["top_p"][idx:idx + 1] = request.get("top_p", 0.7) self.share_inputs["top_k"][idx:idx + 1] = request.get("top_k", 0) self.share_inputs["temperature"][idx:idx + 1] = request.get( "temperature", 0.95) diff --git a/fastdeploy/worker/iluvatar_model_runner.py b/fastdeploy/worker/iluvatar_model_runner.py index 069f7b37c..8d9477b78 100644 --- a/fastdeploy/worker/iluvatar_model_runner.py +++ b/fastdeploy/worker/iluvatar_model_runner.py @@ -24,6 +24,7 @@ from paddleformers.utils.log import logger from fastdeploy.config import FDConfig from fastdeploy.engine.request import Request +from fastdeploy.model_executor.forward_meta import ForwardMeta from fastdeploy.model_executor.layers.attention import get_attention_backend from fastdeploy.model_executor.layers.attention.base_attention_backend import \ AttentionBackend @@ -37,7 +38,6 @@ from fastdeploy.model_executor.pre_and_post_process import (post_process, pre_process, rebuild_padding, step_cuda) -from fastdeploy.model_executor.forward_meta import ForwardMeta from fastdeploy.worker.model_runner_base import ModelRunnerBase from fastdeploy.worker.output import ModelOutputData, ModelRunnerOutput @@ -224,7 +224,7 @@ class IluvatarModelRunner(ModelRunnerBase): request.eos_token_ids.append(request.eos_token_ids[0]) self.share_inputs["eos_token_id"][:] = np.array( request.eos_token_ids, dtype="int64").reshape(-1, 1) - self.share_inputs["top_p"][idx:idx + 1] = request.get("top_p", 1.0) + self.share_inputs["top_p"][idx:idx + 1] = request.get("top_p", 0.7) self.share_inputs["top_k"][idx:idx + 1] = request.get("top_k", 0) self.share_inputs["temperature"][idx:idx + 1] = request.get( "temperature", 0.95) diff --git a/fastdeploy/worker/xpu_model_runner.py b/fastdeploy/worker/xpu_model_runner.py index 7fb585f8a..909933976 100644 --- a/fastdeploy/worker/xpu_model_runner.py +++ b/fastdeploy/worker/xpu_model_runner.py @@ -23,6 +23,7 @@ import paddle.nn as nn from fastdeploy.config import FDConfig from fastdeploy.engine.request import Request +from fastdeploy.model_executor.forward_meta import ForwardMeta, XPUForwardMeta from fastdeploy.model_executor.layers.attention import get_attention_backend from fastdeploy.model_executor.layers.attention.base_attention_backend import \ AttentionBackend @@ -31,7 +32,6 @@ from fastdeploy.model_executor.layers.sample.meta_data import SamplingMetadata from fastdeploy.model_executor.layers.sample.sampler import Sampler from fastdeploy.model_executor.model_loader import get_model_from_loader from fastdeploy.utils import get_logger -from fastdeploy.model_executor.forward_meta import ForwardMeta, XPUForwardMeta from fastdeploy.worker.model_runner_base import ModelRunnerBase from fastdeploy.worker.output import ModelOutputData, ModelRunnerOutput @@ -295,7 +295,7 @@ class XPUModelRunner(ModelRunnerBase): self.share_inputs["eos_token_id"][:] = np.array( request.eos_token_ids, dtype="int64").reshape(-1, 1) self.share_inputs["pre_ids"][idx:idx + 1] = -1 - self.share_inputs["top_p"][idx:idx + 1] = request.get("top_p", 1.0) + self.share_inputs["top_p"][idx:idx + 1] = request.get("top_p", 0.7) self.share_inputs["top_k"][idx:idx + 1] = request.get("top_k", 0) self.share_inputs["temperature"][idx:idx + 1] = request.get( "temperature", 0.95)