[Feature] support async download features (#5003)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

* support async download features

* add test case

* update code
This commit is contained in:
kevin
2025-11-19 22:23:36 +08:00
committed by GitHub
parent bde97e09f7
commit 109d48e456
10 changed files with 433 additions and 75 deletions

View File

@@ -51,14 +51,7 @@ from fastdeploy.splitwise.internal_adapter_utils import InternalAdapter
from fastdeploy.splitwise.splitwise_connector import SplitwiseConnector
from fastdeploy.trace.constants import LoggingEventName
from fastdeploy.trace.trace_logger import print as trace_print
from fastdeploy.utils import (
EngineError,
check_download_links,
envs,
get_logger,
init_bos_client,
llm_logger,
)
from fastdeploy.utils import EngineError, envs, get_logger, llm_logger
try:
TokenProcessor = load_token_processor_plugins()
@@ -808,7 +801,7 @@ class EngineService:
else:
raise
# 2. Schedule requests
tasks = self.resource_manager.schedule()
tasks, error_tasks = self.resource_manager.schedule()
# 3. Send to engine
if tasks:
@@ -833,7 +826,16 @@ class EngineService:
trace_print(LoggingEventName.REQUEST_SCHEDULE_END, task.request_id, getattr(task, "user", ""))
trace_print(LoggingEventName.INFERENCE_START, task.request_id, getattr(task, "user", ""))
self.engine_worker_queue.put_tasks((tasks, self.resource_manager.real_bsz))
else:
# 4. Response error tasks
if error_tasks:
for request_id, failed in error_tasks:
if failed is None:
llm_logger.warning(f"Request {request_id} has no error, skip sending error response.")
continue
self._send_error_response(request_id, failed)
if not tasks and not error_tasks:
time.sleep(0.005)
except RuntimeError as e:
@@ -909,24 +911,6 @@ class EngineService:
self.llm_logger.error(f"Receive request error: {err_msg}")
results.append((request.request_id, err_msg))
if self._has_features_info(request) and err_msg is None:
if self.bos_client is None:
self.bos_client = init_bos_client()
download_urls = []
inputs = request.multimodal_inputs
if inputs.get("video_feature_urls") is not None:
download_urls.extend(inputs.get("video_feature_urls"))
if inputs.get("image_feature_urls") is not None:
download_urls.extend(inputs.get("image_feature_urls"))
if inputs.get("audio_feature_urls") is not None:
download_urls.extend(inputs.get("audio_feature_urls"))
err_msg = check_download_links(self.bos_client, download_urls)
if err_msg:
llm_logger.error(f"Receive request {request.request_id} download error: {err_msg}")
results.append((request.request_id, err_msg))
if err_msg is None:
insert_task.append(request)
@@ -948,21 +932,27 @@ class EngineService:
main_process_metrics.num_requests_waiting.inc(1)
continue
error_result = RequestOutput(
request_id=request_id,
finished=True,
error_code=500,
error_msg=failed,
)
# Since the request is not in scheduler
# Send result by zmq directly
self.send_response_server.send_response(request_id, [error_result])
self._send_error_response(request_id, failed)
except Exception as e:
self.llm_logger.error(
f"Error happened while receiving new request from zmq, details={e}, "
f"traceback={traceback.format_exc()}"
)
def _send_error_response(self, request_id, error_msg, error_code: int = 500):
llm_logger.error(
f"Send error response to client, request_id: {request_id}, error_msg: {error_msg}, error_code: {error_code}"
)
error_result = RequestOutput(
request_id=request_id,
finished=True,
error_code=error_code,
error_msg=error_msg,
)
# Since the request is not in scheduler
# Send result by zmq directly
self.send_response_server.send_response(request_id, [error_result])
def _decode_token(self, token_ids, req_id, is_end):
delta_text = ""
if envs.FD_ENABLE_RETURN_TEXT:
@@ -977,19 +967,6 @@ class EngineService:
del self.data_processor.decode_status[req_id]
return delta_text, token_ids
def _has_features_info(self, task):
inputs = task.multimodal_inputs
if inputs is None or len(inputs) == 0:
return False
if (
(inputs.get("video_feature_urls") is not None and len(inputs["video_feature_urls"]) > 0)
or (inputs.get("image_feature_urls") is not None and len(inputs["image_feature_urls"]) > 0)
or (inputs.get("audio_feature_urls") is not None and len(inputs["audio_feature_urls"]) > 0)
):
return True
return False
def _zmq_send_generated_tokens(self):
"""
Recieve output for zmq