diff --git a/docs/offline_inference.md b/docs/offline_inference.md index 5e8446f01..0c5b10de3 100644 --- a/docs/offline_inference.md +++ b/docs/offline_inference.md @@ -198,6 +198,8 @@ For ``LLM`` configuration, refer to [Parameter Documentation](parameters.md). * finished(bool): Completion status * metrics(fastdeploy.engine.request.RequestMetrics): Performance metrics * num_cached_tokens(int): Cached token count (only valid when enable_prefix_caching``` is enabled) +* num_input_image_tokens(int): Number of input image tokens. +* num_input_video_tokens(int): Number of input video tokens. * error_code(int): Error code * error_msg(str): Error message diff --git a/docs/online_serving/README.md b/docs/online_serving/README.md index 8423530ff..526573a63 100644 --- a/docs/online_serving/README.md +++ b/docs/online_serving/README.md @@ -238,6 +238,19 @@ ChatMessage: completion_token_ids: Optional[List[int]] = None prompt_tokens: Optional[str] = None completion_tokens: Optional[str] = None +UsageInfo: + prompt_tokens: int = 0 + total_tokens: int = 0 + completion_tokens: Optional[int] = 0 + prompt_tokens_details: Optional[PromptTokenUsageInfo] = None + completion_tokens_details: Optional[CompletionTokenUsageInfo] = None +PromptTokenUsageInfo: + cached_tokens: Optional[int] = None + image_tokens: Optional[int] = None + video_tokens: Optional[int] = None +CompletionTokenUsageInfo: + reasoning_tokens: Optional[int] = None + image_tokens: Optional[int] = None ToolCall: id: str = None type: Literal["function"] = "function" @@ -414,6 +427,19 @@ CompletionResponseChoice: reasoning_content: Optional[str] = None finish_reason: Optional[Literal["stop", "length", "tool_calls"]] tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None +UsageInfo: + prompt_tokens: int = 0 + total_tokens: int = 0 + completion_tokens: Optional[int] = 0 + prompt_tokens_details: Optional[PromptTokenUsageInfo] = None + completion_tokens_details: Optional[CompletionTokenUsageInfo] = None +PromptTokenUsageInfo: + cached_tokens: Optional[int] = None + image_tokens: Optional[int] = None + video_tokens: Optional[int] = None +CompletionTokenUsageInfo: + reasoning_tokens: Optional[int] = None + image_tokens: Optional[int] = None ToolCall: id: str = None type: Literal["function"] = "function" diff --git a/docs/zh/offline_inference.md b/docs/zh/offline_inference.md index 45e28efb1..bf5296b2d 100644 --- a/docs/zh/offline_inference.md +++ b/docs/zh/offline_inference.md @@ -198,6 +198,8 @@ for output in outputs: * finished(bool):标识当前query 是否推理结束 * metrics(fastdeploy.engine.request.RequestMetrics):记录推理耗时指标 * num_cached_tokens(int): 缓存的token数量, 仅在开启 ``enable_prefix_caching``时有效 +* num_input_image_tokens(int): 输入图片token的数量 +* num_input_video_tokens(int): 输入视频token的数量 * error_code(int): 错误码 * error_msg(str): 错误信息 diff --git a/docs/zh/online_serving/README.md b/docs/zh/online_serving/README.md index 587a0a9fa..04c5a6868 100644 --- a/docs/zh/online_serving/README.md +++ b/docs/zh/online_serving/README.md @@ -237,6 +237,19 @@ ChatMessage: completion_token_ids: Optional[List[int]] = None prompt_tokens: Optional[str] = None completion_tokens: Optional[str] = None +UsageInfo: + prompt_tokens: int = 0 + total_tokens: int = 0 + completion_tokens: Optional[int] = 0 + prompt_tokens_details: Optional[PromptTokenUsageInfo] = None + completion_tokens_details: Optional[CompletionTokenUsageInfo] = None +PromptTokenUsageInfo: + cached_tokens: Optional[int] = None + image_tokens: Optional[int] = None + video_tokens: Optional[int] = None +CompletionTokenUsageInfo: + reasoning_tokens: Optional[int] = None + image_tokens: Optional[int] = None ToolCall: id: str = None type: Literal["function"] = "function" @@ -410,6 +423,19 @@ CompletionResponseChoice: reasoning_content: Optional[str] = None finish_reason: Optional[Literal["stop", "length", "tool_calls"]] tool_calls: Optional[List[DeltaToolCall | ToolCall]] = None +UsageInfo: + prompt_tokens: int = 0 + total_tokens: int = 0 + completion_tokens: Optional[int] = 0 + prompt_tokens_details: Optional[PromptTokenUsageInfo] = None + completion_tokens_details: Optional[CompletionTokenUsageInfo] = None +PromptTokenUsageInfo: + cached_tokens: Optional[int] = None + image_tokens: Optional[int] = None + video_tokens: Optional[int] = None +CompletionTokenUsageInfo: + reasoning_tokens: Optional[int] = None + image_tokens: Optional[int] = None ToolCall: id: str = None type: Literal["function"] = "function"