[BugFix] fix max streaming tokens invalid (#3774) (#3856)

* Update serving_chat.py

* Update serving_completion.py

Co-authored-by: ltd0924 <32387785+ltd0924@users.noreply.github.com>
This commit is contained in:
ApplEOFDiscord
2025-09-03 17:50:29 +08:00
committed by GitHub
parent c5591c45df
commit dac0a00d0f

View File

@@ -14,6 +14,7 @@
# limitations under the License. # limitations under the License.
""" """
import time
from copy import deepcopy from copy import deepcopy
from typing import List, Literal, Union from typing import List, Literal, Union
from urllib.parse import urlparse from urllib.parse import urlparse
@@ -29,6 +30,7 @@ from typing_extensions import Required, TypeAlias, TypedDict
from fastdeploy.input.multimodal.image import ImageMediaIO from fastdeploy.input.multimodal.image import ImageMediaIO
from fastdeploy.input.multimodal.video import VideoMediaIO from fastdeploy.input.multimodal.video import VideoMediaIO
from fastdeploy.utils import api_server_logger
class VideoURL(TypedDict, total=False): class VideoURL(TypedDict, total=False):
@@ -87,12 +89,32 @@ class MultiModalPartParser:
"""Parse Video""" """Parse Video"""
return self.load_from_url(video_url, self.video_io) return self.load_from_url(video_url, self.video_io)
def http_get_with_retry(self, url, max_retries=3, retry_delay=1, backoff_factor=2):
"""HTTP retry"""
retry_cnt = 0
delay = retry_delay
while retry_cnt < max_retries:
try:
response = requests.get(url)
response.raise_for_status()
return response.content
except Exception as e:
retry_cnt += 1
if retry_cnt >= max_retries:
api_server_logger.error(f"HTTP GET failed: {e}. Max retries reached")
raise
api_server_logger.info(f"HTTP GET failed: {e}. Start retry {retry_cnt}")
time.sleep(delay)
delay *= backoff_factor
def load_from_url(self, url, media_io): def load_from_url(self, url, media_io):
"""Load media from URL""" """Load media from URL"""
parsed = urlparse(url) parsed = urlparse(url)
if parsed.scheme.startswith("http"): if parsed.scheme.startswith("http"):
media_bytes = requests.get(url).content media_bytes = self.http_get_with_retry(url)
return media_io.load_bytes(media_bytes) return media_io.load_bytes(media_bytes)
if parsed.scheme.startswith("data"): if parsed.scheme.startswith("data"):