mirror of
				https://github.com/PaddlePaddle/FastDeploy.git
				synced 2025-10-27 02:20:31 +08:00 
			
		
		
		
	 dade19d7a4
			
		
	
	dade19d7a4
	
	
	
		
			
			* [Feature] support logprobs in chat/completions and completions endpoints * Temporarily comment out text_offset due to incorrect logic * Clean up temporary debug prints * [Feature] support logprobs in offline mode via SamplingParams * fix: serialize Logprob as dict before zmq send to fix msgpack error * refactor: remove redundant methods to simplify codebase * Fix missing fields in CompletionOutput.to_dict affecting msgpack serialization * refactor: centralize param validation in engine_client to reduce duplication * revert: rollback changes in offline_demo.py * revert: rollback changes in offline_demo.py * [bugfix] fix parameter validation for logprobs * [bugfix] fix parameter validation for logprobs * [bugfix] fix parameter validation for logprobs * [bugfix] fix parameter validation for logprobs --------- Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
		
			
				
	
	
		
			639 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			639 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
| # Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License"
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #     http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| """
 | |
| 
 | |
| from __future__ import annotations
 | |
| 
 | |
| import json
 | |
| import time
 | |
| from typing import Any, Dict, List, Literal, Optional, Union
 | |
| 
 | |
| from pydantic import BaseModel, Field, model_validator
 | |
| 
 | |
| # from openai.types.chat import ChatCompletionMessageParam
 | |
| # from fastdeploy.entrypoints.chat_utils import ChatCompletionMessageParam
 | |
| 
 | |
| 
 | |
| class ErrorResponse(BaseModel):
 | |
|     """
 | |
|     Error response from OpenAI API.
 | |
|     """
 | |
| 
 | |
|     object: str = "error"
 | |
|     message: str
 | |
|     code: int
 | |
| 
 | |
| 
 | |
| class PromptTokenUsageInfo(BaseModel):
 | |
|     """
 | |
|     Prompt-related token usage info.
 | |
|     """
 | |
| 
 | |
|     cached_tokens: Optional[int] = None
 | |
| 
 | |
| 
 | |
| class UsageInfo(BaseModel):
 | |
|     """
 | |
|     Usage info for a single request.
 | |
|     """
 | |
| 
 | |
|     prompt_tokens: int = 0
 | |
|     total_tokens: int = 0
 | |
|     completion_tokens: Optional[int] = 0
 | |
|     prompt_tokens_details: Optional[PromptTokenUsageInfo] = None
 | |
| 
 | |
| 
 | |
| class FunctionCall(BaseModel):
 | |
|     """
 | |
|     Function call.
 | |
|     """
 | |
| 
 | |
|     name: str
 | |
|     arguments: str
 | |
| 
 | |
| 
 | |
| class ToolCall(BaseModel):
 | |
|     """
 | |
|     Tool call.
 | |
|     """
 | |
| 
 | |
|     id: str = None
 | |
|     type: Literal["function"] = "function"
 | |
|     function: FunctionCall
 | |
|     index: int
 | |
| 
 | |
| 
 | |
| class DeltaFunctionCall(BaseModel):
 | |
|     """
 | |
|     Delta function call.
 | |
|     """
 | |
| 
 | |
|     name: Optional[str] = None
 | |
|     arguments: Optional[str] = None
 | |
| 
 | |
| 
 | |
| # a tool call delta where everything is optional
 | |
| class DeltaToolCall(BaseModel):
 | |
|     """
 | |
|     Delta tool call.
 | |
|     """
 | |
| 
 | |
|     id: Optional[str] = None
 | |
|     type: Optional[Literal["function"]] = None
 | |
|     index: int
 | |
|     function: Optional[DeltaFunctionCall] = None
 | |
| 
 | |
| 
 | |
| class FunctionDefinition(BaseModel):
 | |
|     """
 | |
|     Function definition.
 | |
|     """
 | |
| 
 | |
|     name: str
 | |
|     description: Optional[str] = None
 | |
|     parameters: Optional[dict[str, Any]] = None
 | |
| 
 | |
| 
 | |
| class ChatCompletionToolsParam(BaseModel):
 | |
|     """
 | |
|     Chat completion tools parameter.
 | |
|     """
 | |
| 
 | |
|     type: Literal["function"] = "function"
 | |
|     function: FunctionDefinition
 | |
| 
 | |
| 
 | |
| class ChatMessage(BaseModel):
 | |
|     """
 | |
|     Chat message.
 | |
|     """
 | |
| 
 | |
|     role: str
 | |
|     content: str
 | |
|     reasoning_content: Optional[str] = None
 | |
|     tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
 | |
|     prompt_token_ids: Optional[List[int]] = None
 | |
|     completion_token_ids: Optional[List[int]] = None
 | |
| 
 | |
| 
 | |
| class ChatCompletionResponseChoice(BaseModel):
 | |
|     """
 | |
|     Chat completion response choice.
 | |
|     """
 | |
| 
 | |
|     index: int
 | |
|     message: ChatMessage
 | |
|     logprobs: Optional[LogProbs] = None
 | |
|     finish_reason: Optional[Literal["stop", "length", "tool_calls", "recover_stop"]]
 | |
| 
 | |
| 
 | |
| class ChatCompletionResponse(BaseModel):
 | |
|     """
 | |
|     Chat completion response.
 | |
|     """
 | |
| 
 | |
|     id: str
 | |
|     object: str = "chat.completion"
 | |
|     created: int = Field(default_factory=lambda: int(time.time()))
 | |
|     model: str
 | |
|     choices: List[ChatCompletionResponseChoice]
 | |
|     usage: UsageInfo
 | |
| 
 | |
| 
 | |
| class LogProbEntry(BaseModel):
 | |
|     """
 | |
|     Log probability entry.
 | |
|     """
 | |
| 
 | |
|     token: str
 | |
|     logprob: float
 | |
|     bytes: Optional[List[int]] = None
 | |
|     top_logprobs: Optional[List[LogProbEntry]] = None
 | |
| 
 | |
| 
 | |
| class LogProbs(BaseModel):
 | |
|     """
 | |
|     LogProbs.
 | |
|     """
 | |
| 
 | |
|     content: Optional[List[LogProbEntry]] = None
 | |
|     refusal: Optional[Union[str, None]] = None
 | |
| 
 | |
| 
 | |
| class DeltaMessage(BaseModel):
 | |
|     """
 | |
|     Delta message for chat completion stream response.
 | |
|     """
 | |
| 
 | |
|     role: Optional[str] = None
 | |
|     content: Optional[str] = None
 | |
|     prompt_token_ids: Optional[List[int]] = None
 | |
|     completion_token_ids: Optional[List[int]] = None
 | |
|     reasoning_content: Optional[str] = None
 | |
|     tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
 | |
| 
 | |
| 
 | |
| class ChatCompletionResponseStreamChoice(BaseModel):
 | |
|     """
 | |
|     Chat completion response choice for stream response.
 | |
|     """
 | |
| 
 | |
|     index: int
 | |
|     delta: DeltaMessage
 | |
|     logprobs: Optional[LogProbs] = None
 | |
|     finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
 | |
|     arrival_time: Optional[float] = None
 | |
| 
 | |
| 
 | |
| class ChatCompletionStreamResponse(BaseModel):
 | |
|     """
 | |
|     Chat completion response for stream response.
 | |
|     """
 | |
| 
 | |
|     id: str
 | |
|     object: str = "chat.completion.chunk"
 | |
|     created: int = Field(default_factory=lambda: int(time.time()))
 | |
|     model: str
 | |
|     choices: List[ChatCompletionResponseStreamChoice]
 | |
|     usage: Optional[UsageInfo] = None
 | |
| 
 | |
| 
 | |
| class CompletionResponseChoice(BaseModel):
 | |
|     """
 | |
|     Completion response choice.
 | |
|     """
 | |
| 
 | |
|     index: int
 | |
|     text: str
 | |
|     prompt_token_ids: Optional[List[int]] = None
 | |
|     completion_token_ids: Optional[List[int]] = None
 | |
|     arrival_time: Optional[float] = None
 | |
|     logprobs: Optional[CompletionLogprobs] = None
 | |
|     reasoning_content: Optional[str] = None
 | |
|     finish_reason: Optional[Literal["stop", "length", "tool_calls"]]
 | |
|     tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
 | |
| 
 | |
| 
 | |
| class CompletionResponse(BaseModel):
 | |
|     """
 | |
|     Completion response.
 | |
|     """
 | |
| 
 | |
|     id: str
 | |
|     object: str = "text_completion"
 | |
|     created: int = Field(default_factory=lambda: int(time.time()))
 | |
|     model: str
 | |
|     choices: List[CompletionResponseChoice]
 | |
|     usage: UsageInfo
 | |
| 
 | |
| 
 | |
| class CompletionLogprobs(BaseModel):
 | |
|     """
 | |
|     Completion logprobs.
 | |
|     """
 | |
| 
 | |
|     tokens: Optional[List[str]] = None
 | |
|     token_logprobs: Optional[List[float]] = None
 | |
|     top_logprobs: Optional[List[Dict]] = None
 | |
|     text_offset: Optional[List[int]] = None
 | |
| 
 | |
| 
 | |
| class CompletionResponseStreamChoice(BaseModel):
 | |
|     """
 | |
|     Completion response choice for stream response.
 | |
|     """
 | |
| 
 | |
|     index: int
 | |
|     text: str
 | |
|     arrival_time: float = None
 | |
|     logprobs: Optional[CompletionLogprobs] = None
 | |
|     prompt_token_ids: Optional[List[int]] = None
 | |
|     completion_token_ids: Optional[List[int]] = None
 | |
|     reasoning_content: Optional[str] = None
 | |
|     finish_reason: Optional[Literal["stop", "length", "tool_calls"]] = None
 | |
|     tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None
 | |
| 
 | |
| 
 | |
| class CompletionStreamResponse(BaseModel):
 | |
|     """
 | |
|     Completion response for stream response.
 | |
|     """
 | |
| 
 | |
|     id: str
 | |
|     object: str = "text_completion"
 | |
|     created: int = Field(default_factory=lambda: int(time.time()))
 | |
|     model: str
 | |
|     choices: List[CompletionResponseStreamChoice]
 | |
|     usage: Optional[UsageInfo] = None
 | |
| 
 | |
| 
 | |
| class StreamOptions(BaseModel):
 | |
|     """
 | |
|     Stream options.
 | |
|     """
 | |
| 
 | |
|     include_usage: Optional[bool] = True
 | |
|     continuous_usage_stats: Optional[bool] = False
 | |
| 
 | |
| 
 | |
| class StructuralTag(BaseModel):
 | |
|     """
 | |
|     Structural tag.
 | |
|     """
 | |
| 
 | |
|     begin: str
 | |
|     structural_tag_schema: Optional[dict[str, Any]] = Field(default=None, alias="schema")
 | |
|     end: str
 | |
| 
 | |
| 
 | |
| class JsonSchemaResponseFormat(BaseModel):
 | |
|     """
 | |
|     Json schema for ResponseFormat.
 | |
|     """
 | |
| 
 | |
|     name: str
 | |
|     description: Optional[str] = None
 | |
|     json_schema: Optional[dict[str, Any]] = Field(default=None, alias="schema")
 | |
|     strict: Optional[bool] = None
 | |
| 
 | |
| 
 | |
| class StructuralTagResponseFormat(BaseModel):
 | |
|     """
 | |
|     Structural tag for ResponseFormat.
 | |
|     """
 | |
| 
 | |
|     type: Literal["structural_tag"]
 | |
|     structures: list[StructuralTag]
 | |
|     triggers: list[str]
 | |
| 
 | |
| 
 | |
| class ResponseFormat(BaseModel):
 | |
|     """
 | |
|     response_format type.
 | |
|     """
 | |
| 
 | |
|     type: Literal["text", "json_object", "json_schema"]
 | |
|     json_schema: Optional[JsonSchemaResponseFormat] = None
 | |
| 
 | |
| 
 | |
| AnyResponseFormat = Union[ResponseFormat, StructuralTagResponseFormat]
 | |
| 
 | |
| 
 | |
| class CompletionRequest(BaseModel):
 | |
|     """
 | |
|     Completion request to the engine.
 | |
|     """
 | |
| 
 | |
|     # Ordered by official OpenAI API documentation
 | |
|     # https://platform.openai.com/docs/api-reference/completions/create
 | |
|     model: Optional[str] = "default"
 | |
|     prompt: Union[List[int], List[List[int]], str, List[str]]
 | |
|     best_of: Optional[int] = None
 | |
|     echo: Optional[bool] = False
 | |
|     frequency_penalty: Optional[float] = None
 | |
|     logprobs: Optional[int] = None
 | |
|     max_tokens: Optional[int] = None
 | |
|     n: int = 1
 | |
|     presence_penalty: Optional[float] = None
 | |
|     seed: Optional[int] = None
 | |
|     stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
 | |
|     stream: Optional[bool] = False
 | |
|     stream_options: Optional[StreamOptions] = None
 | |
|     suffix: Optional[dict] = None
 | |
|     temperature: Optional[float] = None
 | |
|     top_p: Optional[float] = None
 | |
|     user: Optional[str] = None
 | |
| 
 | |
|     # doc: begin-completion-sampling-params
 | |
|     top_k: Optional[int] = None
 | |
|     min_p: Optional[float] = None
 | |
|     repetition_penalty: Optional[float] = None
 | |
|     stop_token_ids: Optional[List[int]] = Field(default_factory=list)
 | |
|     min_tokens: Optional[int] = None
 | |
|     include_stop_str_in_output: Optional[bool] = False
 | |
|     bad_words: Optional[List[str]] = None
 | |
|     # doc: end-completion-sampling-params
 | |
| 
 | |
|     # doc: start-completion-extra-params
 | |
|     response_format: Optional[AnyResponseFormat] = None
 | |
|     guided_json: Optional[Union[str, dict, BaseModel]] = None
 | |
|     guided_regex: Optional[str] = None
 | |
|     guided_choice: Optional[list[str]] = None
 | |
|     guided_grammar: Optional[str] = None
 | |
| 
 | |
|     max_streaming_response_tokens: Optional[int] = None
 | |
|     return_token_ids: Optional[bool] = None
 | |
|     prompt_token_ids: Optional[List[int]] = None
 | |
|     # doc: end-completion-extra-params
 | |
| 
 | |
|     def to_dict_for_infer(self, request_id=None, prompt=None):
 | |
|         """
 | |
|         Convert the request parameters into a dictionary
 | |
| 
 | |
|         Returns:
 | |
|             dict: request parameters in dict format
 | |
|         """
 | |
|         req_dict = {}
 | |
|         if request_id is not None:
 | |
|             req_dict["request_id"] = request_id
 | |
| 
 | |
|         # parse request model into dict
 | |
|         if self.suffix is not None:
 | |
|             for key, value in self.suffix.items():
 | |
|                 req_dict[key] = value
 | |
|         for key, value in self.dict().items():
 | |
|             if value is not None:
 | |
|                 req_dict[key] = value
 | |
| 
 | |
|         if prompt is not None:
 | |
|             req_dict["prompt"] = prompt
 | |
| 
 | |
|         if "prompt_token_ids" in req_dict:
 | |
|             if "prompt" in req_dict:
 | |
|                 del req_dict["prompt"]
 | |
|         else:
 | |
|             assert len(prompt) > 0
 | |
| 
 | |
|         guided_json_object = None
 | |
|         if self.response_format is not None:
 | |
|             if self.response_format.type == "json_object":
 | |
|                 guided_json_object = True
 | |
|             elif self.response_format.type == "json_schema":
 | |
|                 json_schema = self.response_format.json_schema.json_schema
 | |
|                 assert json_schema is not None, "response_format.json_schema can not be None"
 | |
|                 if isinstance(json_schema, (BaseModel, type(BaseModel))):
 | |
|                     self.guided_json = json_schema.model_json_schema()
 | |
|                 else:
 | |
|                     self.guided_json = json_schema
 | |
| 
 | |
|         if guided_json_object:
 | |
|             req_dict["guided_json_object"] = guided_json_object
 | |
| 
 | |
|         guided_schema = [
 | |
|             "guided_json",
 | |
|             "guided_regex",
 | |
|             "guided_choice",
 | |
|             "guided_grammar",
 | |
|             "structural_tag",
 | |
|         ]
 | |
|         for key in guided_schema:
 | |
|             item = getattr(self, key, None)
 | |
|             if item is not None:
 | |
|                 req_dict[key] = item
 | |
| 
 | |
|         return req_dict
 | |
| 
 | |
|     @model_validator(mode="before")
 | |
|     @classmethod
 | |
|     def validate_stream_options(cls, data):
 | |
|         """
 | |
|         Validate stream options
 | |
|         """
 | |
|         if data.get("stream_options") and not data.get("stream"):
 | |
|             raise ValueError("Stream options can only be defined when `stream=True`.")
 | |
| 
 | |
|         guided_count = sum(
 | |
|             [
 | |
|                 "guided_json" in data and data["guided_json"] is not None,
 | |
|                 "guided_regex" in data and data["guided_regex"] is not None,
 | |
|                 "guided_choice" in data and data["guided_choice"] is not None,
 | |
|                 "guided_grammar" in data and data["guided_grammar"] is not None,
 | |
|             ]
 | |
|         )
 | |
| 
 | |
|         if guided_count > 1:
 | |
|             raise ValueError(
 | |
|                 "You can only use one kind of guided decoding "
 | |
|                 "('guided_json', 'guided_regex', 'guided_choice', 'guided_grammar')."
 | |
|             )
 | |
| 
 | |
|         return data
 | |
| 
 | |
| 
 | |
| class ChatCompletionRequest(BaseModel):
 | |
|     """
 | |
|     Chat completion request to the engine.
 | |
|     """
 | |
| 
 | |
|     # Ordered by official OpenAI API documentation
 | |
|     # https://platform.openai.com/docs/api-reference/chat/create
 | |
|     messages: Union[List[Any], List[int]]
 | |
|     tools: Optional[List[ChatCompletionToolsParam]] = None
 | |
|     model: Optional[str] = "default"
 | |
|     frequency_penalty: Optional[float] = None
 | |
|     logprobs: Optional[bool] = False
 | |
|     top_logprobs: Optional[int] = 0
 | |
|     # remove max_tokens when field is removed from OpenAI API
 | |
|     max_tokens: Optional[int] = Field(
 | |
|         default=None,
 | |
|         deprecated="max_tokens is deprecated in favor of the max_completion_tokens field",
 | |
|     )
 | |
|     max_completion_tokens: Optional[int] = None
 | |
|     n: Optional[int] = 1
 | |
|     presence_penalty: Optional[float] = None
 | |
|     seed: Optional[int] = None
 | |
|     stop: Optional[Union[str, List[str]]] = Field(default_factory=list)
 | |
|     stream: Optional[bool] = False
 | |
|     stream_options: Optional[StreamOptions] = None
 | |
|     temperature: Optional[float] = None
 | |
|     top_p: Optional[float] = None
 | |
|     user: Optional[str] = None
 | |
|     metadata: Optional[dict] = None
 | |
|     response_format: Optional[AnyResponseFormat] = None
 | |
| 
 | |
|     # doc: begin-chat-completion-sampling-params
 | |
|     top_k: Optional[int] = None
 | |
|     min_p: Optional[float] = None
 | |
|     min_tokens: Optional[int] = None
 | |
|     include_stop_str_in_output: Optional[bool] = False
 | |
|     bad_words: Optional[List[str]] = None
 | |
|     repetition_penalty: Optional[float] = None
 | |
|     stop_token_ids: Optional[List[int]] = Field(default_factory=list)
 | |
|     # doc: end-chat-completion-sampling-params
 | |
| 
 | |
|     # doc: start-completion-extra-params
 | |
|     chat_template_kwargs: Optional[dict] = None
 | |
|     reasoning_max_tokens: Optional[int] = None
 | |
|     structural_tag: Optional[str] = None
 | |
|     guided_json: Optional[Union[str, dict, BaseModel]] = None
 | |
|     guided_regex: Optional[str] = None
 | |
|     guided_choice: Optional[list[str]] = None
 | |
|     guided_grammar: Optional[str] = None
 | |
| 
 | |
|     return_token_ids: Optional[bool] = None
 | |
|     prompt_token_ids: Optional[List[int]] = None
 | |
|     max_streaming_response_tokens: Optional[int] = None
 | |
|     disable_chat_template: Optional[bool] = False
 | |
|     # doc: end-chat-completion-extra-params
 | |
| 
 | |
|     def to_dict_for_infer(self, request_id=None):
 | |
|         """
 | |
|         Convert the request parameters into a dictionary
 | |
| 
 | |
|         Returns:
 | |
|             dict: request parameters in dict format
 | |
|         """
 | |
|         req_dict = {}
 | |
|         if request_id is not None:
 | |
|             req_dict["request_id"] = request_id
 | |
| 
 | |
|         req_dict["max_tokens"] = self.max_completion_tokens or self.max_tokens
 | |
|         req_dict["logprobs"] = self.top_logprobs if self.logprobs else None
 | |
| 
 | |
|         # parse request model into dict, priority: request params > metadata params
 | |
|         if self.metadata is not None:
 | |
|             assert (
 | |
|                 "raw_request" not in self.metadata
 | |
|             ), "The parameter `raw_request` is not supported now, please use completion api instead."
 | |
|             for key, value in self.metadata.items():
 | |
|                 req_dict[key] = value
 | |
|         for key, value in self.dict().items():
 | |
|             if value is not None:
 | |
|                 req_dict[key] = value
 | |
| 
 | |
|         if "prompt_token_ids" in req_dict:
 | |
|             if "messages" in req_dict:
 | |
|                 del req_dict["messages"]
 | |
|         else:
 | |
|             assert len(self.messages) > 0
 | |
| 
 | |
|         # If disable_chat_template is set, then the first message in messages will be used as the prompt.
 | |
|         if self.disable_chat_template:
 | |
|             req_dict["prompt"] = req_dict["messages"][0]["content"]
 | |
|             del req_dict["messages"]
 | |
| 
 | |
|         guided_json_object = None
 | |
|         if self.response_format is not None:
 | |
|             if self.response_format.type == "json_object":
 | |
|                 guided_json_object = True
 | |
|             elif self.response_format.type == "json_schema":
 | |
|                 json_schema = self.response_format.json_schema.json_schema
 | |
|                 assert json_schema is not None, "response_format.json_schema can not be None"
 | |
|                 if isinstance(json_schema, (BaseModel, type(BaseModel))):
 | |
|                     self.guided_json = json_schema.model_json_schema()
 | |
|                 else:
 | |
|                     self.guided_json = json_schema
 | |
|             elif self.response_format.type == "structural_tag":
 | |
|                 structural_tag = self.response_format
 | |
|                 assert structural_tag is not None and isinstance(structural_tag, StructuralTagResponseFormat)
 | |
|                 self.structural_tag = json.dumps(structural_tag.model_dump(by_alias=True))
 | |
| 
 | |
|         if guided_json_object:
 | |
|             req_dict["guided_json_object"] = guided_json_object
 | |
| 
 | |
|         guided_schema = [
 | |
|             "guided_json",
 | |
|             "guided_regex",
 | |
|             "guided_choice",
 | |
|             "guided_grammar",
 | |
|             "structural_tag",
 | |
|         ]
 | |
|         for key in guided_schema:
 | |
|             item = getattr(self, key, None)
 | |
|             if item is not None:
 | |
|                 req_dict[key] = item
 | |
| 
 | |
|         return req_dict
 | |
| 
 | |
|     @model_validator(mode="before")
 | |
|     @classmethod
 | |
|     def validate_stream_options(cls, data):
 | |
|         """
 | |
|         Validate stream options
 | |
|         """
 | |
|         if data.get("stream_options") and not data.get("stream"):
 | |
|             raise ValueError("Stream options can only be defined when `stream=True`.")
 | |
| 
 | |
|         guided_count = sum(
 | |
|             [
 | |
|                 "guided_json" in data and data["guided_json"] is not None,
 | |
|                 "guided_regex" in data and data["guided_regex"] is not None,
 | |
|                 "guided_choice" in data and data["guided_choice"] is not None,
 | |
|                 "guided_grammar" in data and data["guided_grammar"] is not None,
 | |
|                 "structural_tag" in data and data["structural_tag"] is not None,
 | |
|             ]
 | |
|         )
 | |
| 
 | |
|         if guided_count > 1:
 | |
|             raise ValueError(
 | |
|                 "You can only use one kind of guided decoding "
 | |
|                 "('guided_json', 'guided_regex', 'guided_choice', 'guided_grammar', 'structural_tag')."
 | |
|             )
 | |
| 
 | |
|         return data
 | |
| 
 | |
|     @model_validator(mode="before")
 | |
|     @classmethod
 | |
|     def check_logprobs(cls, data):
 | |
| 
 | |
|         if (top_logprobs := data.get("top_logprobs")) is not None:
 | |
|             if top_logprobs < 0:
 | |
|                 raise ValueError("`top_logprobs` must be a positive value.")
 | |
| 
 | |
|             if top_logprobs > 0 and not data.get("logprobs"):
 | |
|                 raise ValueError("when using `top_logprobs`, `logprobs` must be set to true.")
 | |
| 
 | |
|         return data
 | |
| 
 | |
| 
 | |
| class ControlSchedulerRequest(BaseModel):
 | |
|     """
 | |
|     Control scheduler request to the engine.
 | |
|     """
 | |
| 
 | |
|     reset: Optional[bool] = False
 | |
|     load_shards_num: Optional[int] = None
 | |
|     reallocate_shard: Optional[bool] = False
 |