[Feature] remove dependency on enable_mm and refine multimodal's code (#3014)

* remove dependency on enable_mm * fix codestyle check error * fix codestyle check error * update docs * resolve conflicts on model config * fix unit test error * fix code style check error --------- Co-authored-by: shige <1021937542@qq.com> Co-authored-by: Jiang-Jia-Jun <163579578+Jiang-Jia-Jun@users.noreply.github.com>
2025-09-29 22:02:30 +08:00 · 2025-08-01 20:01:18 +08:00
parent 243394044d
commit b71cbb466d
24 changed files with 118 additions and 29 deletions
--- a/fastdeploy/multimodal/utils.py
+++ b/fastdeploy/multimodal/utils.py
@@ -0,0 +1,219 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import base64
+import io
+import ipaddress
+import mimetypes
+import os
+import random
+import socket
+import subprocess
+import tempfile
+from urllib.parse import urlparse
+
+import cairosvg
+import pyheif
+import requests
+from pdf2image import convert_from_path
+from PIL import Image, ImageOps
+
+from fastdeploy.utils import data_processor_logger
+
+
+def process_image_data(image_data, mime_type, url):
+    """处理不同类型的图像数据并返回 PIL 图像对象"""
+
+    if mime_type in ["image/heif", "image/heic"] or url.lower().endswith(".heif") or url.lower().endswith(".heic"):
+        heif_file = pyheif.read(image_data)
+        pil_image = Image.frombytes(
+            heif_file.mode,
+            heif_file.size,
+            heif_file.data,
+            "raw",
+            heif_file.mode,
+            heif_file.stride,
+        )
+    elif mime_type == "application/pdf" or url.lower().endswith(".pdf"):
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
+            temp_pdf.write(image_data.getvalue())
+            temp_pdf_path = temp_pdf.name
+        images = convert_from_path(temp_pdf_path)
+        pil_image = images[0]
+        os.remove(temp_pdf_path)
+    elif mime_type == "image/svg+xml" or url.lower().endswith(".svg"):
+        png_data = cairosvg.svg2png(bytestring=image_data.getvalue())
+        pil_image = Image.open(io.BytesIO(png_data))
+    elif mime_type in [
+        "application/postscript",
+        "application/illustrator",
+    ] or url.lower().endswith(".ai"):
+        with (
+            tempfile.NamedTemporaryFile(delete=False, suffix=".ai") as ai_temp,
+            tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as pdf_temp,
+        ):
+            ai_temp_path = ai_temp.name
+            pdf_temp_path = pdf_temp.name
+            ai_temp.write(image_data.getvalue())
+            ai_temp.close()
+            subprocess.run(
+                ["inkscape", ai_temp_path, "--export-pdf=" + pdf_temp_path],
+                check=True,
+            )
+            images = convert_from_path(pdf_temp_path)
+            pil_image = images[0]
+            os.remove(ai_temp_path)
+            os.remove(pdf_temp_path)
+
+    elif mime_type == "image/gif" or url.lower().endswith(".gif"):
+        pil_image = Image.open(image_data)
+    else:
+        pil_image = Image.open(image_data)
+
+    return pil_image
+
+
+def http_to_pil_image(url):
+    """http_to_pil_image"""
+    if is_public_url(url) and int(os.getenv("DOWNLOAD_WITH_TP_SERVER", "0")):
+        return http_to_pil_image_with_tp_server(url)
+
+    response = requests.get(url)
+    if response.status_code != 200:
+        raise Exception("Failed to download the image from URL.")
+    image_data = io.BytesIO(response.content)
+
+    mime_type = response.headers.get("Content-Type")
+    if mime_type is None:
+        mime_type, _ = mimetypes.guess_type(url)
+
+    data_processor_logger.info(f"Detected MIME type: {mime_type}")  # 调试信息
+    pil_image = process_image_data(image_data, mime_type, url)
+
+    return pil_image
+
+
+def http_to_pil_image_with_tp_server(url, retry_time=6):
+    """cnap平台没有外网访问权限，需要使用tp服务下载图片"""
+    proxies = [
+        {"http": "http://10.229.197.142:8807"},
+        {"http": "http://10.229.197.161:8804"},
+        {"http": "http://10.229.198.143:8804"},
+        {"http": "http://10.122.108.164:8807"},
+        {"http": "http://10.122.108.165:8807"},
+        {"http": "http://10.122.108.166:8807"},
+        {"http": "http://10.122.108.168:8801"},
+        {"http": "http://10.122.150.146:8802"},
+        {"http": "http://10.122.150.158:8802"},
+        {"http": "http://10.122.150.164:8801"},
+        {"http": "http://10.143.51.38:8813"},
+        {"http": "http://10.143.103.42:8810"},
+        {"http": "http://10.143.194.45:8804"},
+        {"http": "http://10.143.226.25:8801"},
+        {"http": "http://10.143.236.12:8807"},
+        {"http": "http://10.143.238.36:8807"},
+        {"http": "http://10.144.71.30:8807"},
+        {"http": "http://10.144.73.16:8804"},
+        {"http": "http://10.144.138.36:8801"},
+        {"http": "http://10.144.152.40:8810"},
+        {"http": "http://10.144.199.29:8810"},
+        {"http": "http://10.144.251.29:8813"},
+    ]
+    headers = {
+        "X-Tp-Authorization": "Basic RVJOSUVMaXRlVjpFUk5JRUxpdGVWXzFxYXo0cmZ2M2VkYzV0Z2Iyd3N4LWJmZS10cA==",
+        "scheme": "https",
+    }
+
+    new_url = url.replace("https://", "http://") if url.startswith("https://") else url
+
+    # 代理可能不稳定，需要重试
+    for idx in range(retry_time):
+        try:
+            response = requests.get(new_url, headers=headers, proxies=random.choice(proxies))
+            if response.status_code == 200:
+                image_data = io.BytesIO(response.content)
+
+                mime_type = response.headers.get("Content-Type")
+                if mime_type is None:
+                    mime_type, _ = mimetypes.guess_type(url)
+
+                data_processor_logger.info(f"Detected MIME type: {mime_type}")  # 调试信息
+                pil_image = process_image_data(image_data, mime_type, url)
+
+                return pil_image
+        except Exception as e:
+            data_processor_logger.error(f"Failed to download the image, idx: {idx}, URL: {url}, error: {e}")
+
+    raise Exception(f"Failed to download the image from URL: {url}")
+
+
+def base64_to_pil_image(base64_string):
+    """base64_to_pil_image"""
+    image_bytes = base64.b64decode(base64_string)
+    buffer = io.BytesIO(image_bytes)
+    pil_image = Image.open(buffer)
+    return pil_image
+
+
+def is_public_url(url):
+    """判断是否公网url"""
+    try:
+        # 解析URL
+        parsed_url = urlparse(url)
+        hostname = parsed_url.hostname
+        if hostname is None:
+            return False
+        # 尝试将域名解析为IP地址
+        ip_address = socket.gethostbyname(hostname)
+        # 转换为IP地址对象
+        ip_obj = ipaddress.ip_address(ip_address)
+        # 判断是否为私有IP或保留IP地址
+        if ip_obj.is_private or ip_obj.is_loopback or ip_obj.is_link_local or ip_obj.is_reserved:
+            return False
+        else:
+            return True
+    except Exception as e:
+        print(f"Error checking URL: {e}")
+        return False
+
+
+def process_transparency(image):
+    """process transparency."""
+
+    def _is_transparent(image):
+        # 检查图片是否有alpha通道
+        if image.mode in ("RGBA", "LA") or (image.mode == "P" and "transparency" in image.info):
+            # 获取alpha通道
+            alpha = image.convert("RGBA").split()[-1]
+            # 如果alpha通道中存在0，说明图片有透明部分
+            if alpha.getextrema()[0] < 255:
+                return True
+        return False
+
+    def _convert_transparent_paste(image):
+        width, height = image.size
+        new_image = Image.new("RGB", (width, height), (255, 255, 255))  # 生成一张白色底图
+        new_image.paste(image, (0, 0), image)
+        return new_image
+
+    try:
+        if _is_transparent(image):  # Check and fix transparent images
+            data_processor_logger.info("Image has transparent background, adding white background.")
+            image = _convert_transparent_paste(image)
+    except:
+        pass
+
+    return ImageOps.exif_transpose(image)