[Feature] support async download features (#5003)

* support async download features * add test case * update code
2025-12-24 13:28:13 +08:00 · 2025-11-19 22:23:36 +08:00
parent bde97e09f7
commit 109d48e456
10 changed files with 433 additions and 75 deletions
--- a/fastdeploy/utils.py
+++ b/fastdeploy/utils.py
@@ -21,6 +21,7 @@ import importlib
 import json
 import logging
 import os
+import pickle
 import random
 import re
 import socket
@@ -975,6 +976,36 @@ def init_bos_client():
    return BosClient(cfg)


+def download_from_bos(bos_client, bos_links):
+    """
+    Download pickled objects from Baidu Object Storage (BOS).
+    Args:
+        bos_client: BOS client instance
+        bos_links: Single link or list of BOS links in format "bos://bucket-name/path/to/object"
+    Yields:
+        tuple: (success: bool, data: np.ndarray | error_msg: str)
+            - On success: (True, deserialized_data)
+            - On failure: (False, error_message) and stops processing remaining links
+    Security Note:
+        Uses pickle deserialization. Only use with trusted data sources.
+    """
+    if not isinstance(bos_links, list):
+        bos_links = [bos_links]
+
+    for link in bos_links:
+        try:
+            if link.startswith("bos://"):
+                link = link.replace("bos://", "")
+
+            bucket_name = "/".join(link.split("/")[1:-1])
+            object_key = link.split("/")[-1]
+            response = bos_client.get_object_as_string(bucket_name, object_key)
+            yield True, pickle.loads(response)
+        except Exception as e:
+            yield False, f"link {link} download error: {str(e)}"
+            break
+
+
 llm_logger = get_logger("fastdeploy", "fastdeploy.log")
 data_processor_logger = get_logger("data_processor", "data_processor.log")
 scheduler_logger = get_logger("scheduler", "scheduler.log")