From dac0a00d0fc2d0ba8d206cbb1a3c2888f737404c Mon Sep 17 00:00:00 2001 From: ApplEOFDiscord <31272106+ApplEOFDiscord@users.noreply.github.com> Date: Wed, 3 Sep 2025 17:50:29 +0800 Subject: [PATCH] [BugFix] fix max streaming tokens invalid (#3774) (#3856) * Update serving_chat.py * Update serving_completion.py Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com> --- fastdeploy/entrypoints/chat_utils.py | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/fastdeploy/entrypoints/chat_utils.py b/fastdeploy/entrypoints/chat_utils.py index 4f7357e11..ee2517e26 100644 --- a/fastdeploy/entrypoints/chat_utils.py +++ b/fastdeploy/entrypoints/chat_utils.py @@ -14,6 +14,7 @@ # limitations under the License. """ +import time from copy import deepcopy from typing import List, Literal, Union from urllib.parse import urlparse @@ -29,6 +30,7 @@ from typing_extensions import Required, TypeAlias, TypedDict from fastdeploy.input.multimodal.image import ImageMediaIO from fastdeploy.input.multimodal.video import VideoMediaIO +from fastdeploy.utils import api_server_logger class VideoURL(TypedDict, total=False): @@ -87,12 +89,32 @@ class MultiModalPartParser: """Parse Video""" return self.load_from_url(video_url, self.video_io) + def http_get_with_retry(self, url, max_retries=3, retry_delay=1, backoff_factor=2): + """HTTP retry""" + + retry_cnt = 0 + delay = retry_delay + + while retry_cnt < max_retries: + try: + response = requests.get(url) + response.raise_for_status() + return response.content + except Exception as e: + retry_cnt += 1 + if retry_cnt >= max_retries: + api_server_logger.error(f"HTTP GET failed: {e}. Max retries reached") + raise + api_server_logger.info(f"HTTP GET failed: {e}. Start retry {retry_cnt}") + time.sleep(delay) + delay *= backoff_factor + def load_from_url(self, url, media_io): """Load media from URL""" parsed = urlparse(url) if parsed.scheme.startswith("http"): - media_bytes = requests.get(url).content + media_bytes = self.http_get_with_retry(url) return media_io.load_bytes(media_bytes) if parsed.scheme.startswith("data"):