""" # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ from copy import deepcopy from typing import TYPE_CHECKING, Annotated, Any, Optional import msgspec from fastdeploy.engine.sampling_params import RequestOutputKind from fastdeploy.engine.tasks import PoolingTask if TYPE_CHECKING: from fastdeploy.config import ModelConfig class PoolingParams: """API parameters for pooling models. Attributes: normalize: Whether to normalize the embeddings outputs. dimensions: Reduce the dimensions of embeddings if model support matryoshka representation. activation: Whether to apply activation function to the classification outputs. softmax: Whether to apply softmax to the reward outputs. step_tag_id: Step tag ID for process reward models to identify specific steps in multi-step reasoning tasks. returned_token_ids: List of token IDs to return rewards for, used for fine-grained reward calculation. task: Internal use only. Specifies the pooling task type ("embed" for embeddings, "encode" for reward models). requires_token_ids: Internal use only. Whether token ID information is required for processing. extra_kwargs: Internal use only. Dictionary for storing additional custom parameters for extended functionality. output_kind: Output type specification, fixed to FINAL_ONLY (only final outputs are returned). """ truncate_prompt_tokens: Optional[Annotated[int, msgspec.Meta(ge=-1)]] = None """If set to -1, will use the truncation size supported by the model. If set to an integer k, will use only the last k tokens from the prompt (i.e., left truncation). If set to `None`, truncation is disabled.""" # for embeddings models dimensions: Optional[int] = None normalize: Optional[bool] = None # for reward models softmax: Optional[bool] = None step_tag_id: Optional[int] = None returned_token_ids: Optional[list[int]] = None task: Optional[PoolingTask] = None """Internal use only.""" requires_token_ids: bool = False """Internal use only.""" extra_kwargs: Optional[dict[str, Any]] = None """Internal use only.""" output_kind: RequestOutputKind = RequestOutputKind.FINAL_ONLY @property def _all_parameters(self) -> list[str]: return ["dimensions", "normalize", "softmax", "step_tag_id", "returned_token_ids"] @property def valid_parameters(self): return { "embed": ["dimensions", "normalize"], "encode": ["softmax", "step_tag_id", "returned_token_ids"], } def clone(self) -> "PoolingParams": """Returns a deep copy of the PoolingParams instance.""" return deepcopy(self) def verify(self, task: PoolingTask, model_config: Optional["ModelConfig"] = None) -> None: if self.task is None: self.task = task elif self.task != task: msg = f"You cannot overwrite {self.task=!r} with {task=!r}!" raise ValueError(msg) # NOTE: Task validation needs to done against the model instance, # which is not available in model config. So, it's not included # in this method self._merge_default_parameters(model_config) self._set_default_parameters(model_config) self._verify_valid_parameters() def _merge_default_parameters(self, model_config: Optional["ModelConfig"] = None) -> None: if model_config is None: return pooler_config = model_config.pooler_config if pooler_config is None: return assert self.task is not None, "task must be set" valid_parameters = self.valid_parameters[self.task] for k in valid_parameters: if getattr(pooler_config, k, None) is None: continue if getattr(self, k, None) is None: setattr(self, k, getattr(pooler_config, k)) def _set_default_parameters(self, model_config: Optional["ModelConfig"]): if self.task == "embed": if self.normalize is None: self.normalize = True elif self.task == "encode": if self.softmax is None: self.softmax = True else: raise ValueError(f"Unknown pooling task: {self.task}") def _verify_valid_parameters(self): assert self.task is not None, "task must be set" valid_parameters = self.valid_parameters[self.task] invalid_parameters = [] for k in self._all_parameters: if k in valid_parameters: continue if getattr(self, k, None) is not None: invalid_parameters.append(k) if invalid_parameters: raise ValueError( f"Task {self.task} only supports {valid_parameters} " f"parameters, does not support " f"{invalid_parameters} parameters" ) def __repr__(self) -> str: return ( f"PoolingParams(" f"task={self.task}, " f"normalize={self.normalize}, " f"dimensions={self.dimensions}, " f"softmax={self.softmax}, " f"step_tag_id={self.step_tag_id}, " f"returned_token_ids={self.returned_token_ids}, " f"requires_token_ids={self.requires_token_ids}, " f"extra_kwargs={self.extra_kwargs})" ) def __post_init__(self) -> None: assert self.output_kind == RequestOutputKind.FINAL_ONLY, "For pooling output_kind has to be FINAL_ONLY"