""" # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License" # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ import uuid from copy import deepcopy from typing import List, Literal, Union from urllib.parse import urlparse import requests from openai.types.chat import ( ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam, ) from openai.types.chat import ( ChatCompletionMessageParam as OpenAIChatCompletionMessageParam, ) from typing_extensions import Required, TypeAlias, TypedDict from fastdeploy.input.multimodal.image import ImageMediaIO from fastdeploy.input.multimodal.video import VideoMediaIO class VideoURL(TypedDict, total=False): """Video URL object""" url: Required[str] """Either a URL of the video or the base64 encoded video data""" class CustomChatCompletionContentPartVideoParam(TypedDict, total=False): """Custom Video URL object""" video_url: Required[VideoURL] type: Required[Literal["video_url"]] """The type of the content type.""" CustomChatCompletionContentPartParam: TypeAlias = Union[ OpenAIChatCompletionContentPartParam, CustomChatCompletionContentPartVideoParam, ] class CustomChatCompletionMessageParam(TypedDict, total=False): """Custom User chat message parameter.""" content: Required[Union[str, List[CustomChatCompletionContentPartParam]]] """The contents of the user message""" role: Required[str] """The role of the messages author, in this case `user`.""" name: str """An optional name for the participant Provides the model information to differentiate between participants of the same role. """ ChatCompletionMessageParam = Union[OpenAIChatCompletionMessageParam, CustomChatCompletionMessageParam] class MultiModalPartParser: """Multi Modal Part parser""" def __init__(self): self.image_io = ImageMediaIO() self.video_io = VideoMediaIO() def parse_image(self, image_url): """ "Parse Image""" return self.load_from_url(image_url, self.image_io) def parse_video(self, video_url): """Parse Video""" return self.load_from_url(video_url, self.video_io) def load_from_url(self, url, media_io): """Load media from URL""" parsed = urlparse(url) if parsed.scheme.startswith("http"): media_bytes = requests.get(url).content return media_io.load_bytes(media_bytes) if parsed.scheme.startswith("data"): data_spec, data = parsed.path.split(",", 1) media_type, data_type = data_spec.split(";", 1) return media_io.load_base64(media_type, data) if parsed.scheme.startswith("file"): localpath = parsed.path return media_io.load_file(localpath) def parse_content_part(mm_parser, part): """only support openai compatible format for now""" part_type = part.get("type", None) if part_type == "text": return part if part_type == "image_url": content = part.get("image_url", {}).get("url", None) image = mm_parser.parse_image(content) parsed = deepcopy(part) del parsed["image_url"]["url"] parsed["image"] = image parsed["type"] = "image" return parsed if part_type == "video_url": content = part.get("video_url", {}).get("url", None) video = mm_parser.parse_video(content) parsed = deepcopy(part) del parsed["video_url"]["url"] parsed["video"] = video parsed["type"] = "video" return parsed raise ValueError(f"Unknown content part type: {part_type}") # TODO async # def parse_chat_messages(messages: List[ChatCompletionMessageParam]): def parse_chat_messages(messages): """Parse chat messages to [dict]""" mm_parser = MultiModalPartParser() conversation = [] for message in messages: role = message["role"] content = message["content"] parsed_content = [] if content is None: parsed_content = [] elif isinstance(content, str): parsed_content = [{"type": "text", "text": content}] else: parsed_content = [parse_content_part(mm_parser, part) for part in content] conversation.append({"role": role, "content": parsed_content}) return conversation def random_tool_call_id() -> str: return f"chatcmpl-tool-{str(uuid.uuid4().hex)}"