mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Feature] support async download features (#5003)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
* support async download features * add test case * update code
This commit is contained in:
@@ -51,14 +51,7 @@ from fastdeploy.splitwise.internal_adapter_utils import InternalAdapter
|
||||
from fastdeploy.splitwise.splitwise_connector import SplitwiseConnector
|
||||
from fastdeploy.trace.constants import LoggingEventName
|
||||
from fastdeploy.trace.trace_logger import print as trace_print
|
||||
from fastdeploy.utils import (
|
||||
EngineError,
|
||||
check_download_links,
|
||||
envs,
|
||||
get_logger,
|
||||
init_bos_client,
|
||||
llm_logger,
|
||||
)
|
||||
from fastdeploy.utils import EngineError, envs, get_logger, llm_logger
|
||||
|
||||
try:
|
||||
TokenProcessor = load_token_processor_plugins()
|
||||
@@ -808,7 +801,7 @@ class EngineService:
|
||||
else:
|
||||
raise
|
||||
# 2. Schedule requests
|
||||
tasks = self.resource_manager.schedule()
|
||||
tasks, error_tasks = self.resource_manager.schedule()
|
||||
|
||||
# 3. Send to engine
|
||||
if tasks:
|
||||
@@ -833,7 +826,16 @@ class EngineService:
|
||||
trace_print(LoggingEventName.REQUEST_SCHEDULE_END, task.request_id, getattr(task, "user", ""))
|
||||
trace_print(LoggingEventName.INFERENCE_START, task.request_id, getattr(task, "user", ""))
|
||||
self.engine_worker_queue.put_tasks((tasks, self.resource_manager.real_bsz))
|
||||
else:
|
||||
|
||||
# 4. Response error tasks
|
||||
if error_tasks:
|
||||
for request_id, failed in error_tasks:
|
||||
if failed is None:
|
||||
llm_logger.warning(f"Request {request_id} has no error, skip sending error response.")
|
||||
continue
|
||||
self._send_error_response(request_id, failed)
|
||||
|
||||
if not tasks and not error_tasks:
|
||||
time.sleep(0.005)
|
||||
|
||||
except RuntimeError as e:
|
||||
@@ -909,24 +911,6 @@ class EngineService:
|
||||
self.llm_logger.error(f"Receive request error: {err_msg}")
|
||||
results.append((request.request_id, err_msg))
|
||||
|
||||
if self._has_features_info(request) and err_msg is None:
|
||||
if self.bos_client is None:
|
||||
self.bos_client = init_bos_client()
|
||||
|
||||
download_urls = []
|
||||
inputs = request.multimodal_inputs
|
||||
if inputs.get("video_feature_urls") is not None:
|
||||
download_urls.extend(inputs.get("video_feature_urls"))
|
||||
if inputs.get("image_feature_urls") is not None:
|
||||
download_urls.extend(inputs.get("image_feature_urls"))
|
||||
if inputs.get("audio_feature_urls") is not None:
|
||||
download_urls.extend(inputs.get("audio_feature_urls"))
|
||||
|
||||
err_msg = check_download_links(self.bos_client, download_urls)
|
||||
if err_msg:
|
||||
llm_logger.error(f"Receive request {request.request_id} download error: {err_msg}")
|
||||
results.append((request.request_id, err_msg))
|
||||
|
||||
if err_msg is None:
|
||||
insert_task.append(request)
|
||||
|
||||
@@ -948,21 +932,27 @@ class EngineService:
|
||||
main_process_metrics.num_requests_waiting.inc(1)
|
||||
continue
|
||||
|
||||
error_result = RequestOutput(
|
||||
request_id=request_id,
|
||||
finished=True,
|
||||
error_code=500,
|
||||
error_msg=failed,
|
||||
)
|
||||
# Since the request is not in scheduler
|
||||
# Send result by zmq directly
|
||||
self.send_response_server.send_response(request_id, [error_result])
|
||||
self._send_error_response(request_id, failed)
|
||||
except Exception as e:
|
||||
self.llm_logger.error(
|
||||
f"Error happened while receiving new request from zmq, details={e}, "
|
||||
f"traceback={traceback.format_exc()}"
|
||||
)
|
||||
|
||||
def _send_error_response(self, request_id, error_msg, error_code: int = 500):
|
||||
llm_logger.error(
|
||||
f"Send error response to client, request_id: {request_id}, error_msg: {error_msg}, error_code: {error_code}"
|
||||
)
|
||||
error_result = RequestOutput(
|
||||
request_id=request_id,
|
||||
finished=True,
|
||||
error_code=error_code,
|
||||
error_msg=error_msg,
|
||||
)
|
||||
# Since the request is not in scheduler
|
||||
# Send result by zmq directly
|
||||
self.send_response_server.send_response(request_id, [error_result])
|
||||
|
||||
def _decode_token(self, token_ids, req_id, is_end):
|
||||
delta_text = ""
|
||||
if envs.FD_ENABLE_RETURN_TEXT:
|
||||
@@ -977,19 +967,6 @@ class EngineService:
|
||||
del self.data_processor.decode_status[req_id]
|
||||
return delta_text, token_ids
|
||||
|
||||
def _has_features_info(self, task):
|
||||
inputs = task.multimodal_inputs
|
||||
if inputs is None or len(inputs) == 0:
|
||||
return False
|
||||
|
||||
if (
|
||||
(inputs.get("video_feature_urls") is not None and len(inputs["video_feature_urls"]) > 0)
|
||||
or (inputs.get("image_feature_urls") is not None and len(inputs["image_feature_urls"]) > 0)
|
||||
or (inputs.get("audio_feature_urls") is not None and len(inputs["audio_feature_urls"]) > 0)
|
||||
):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _zmq_send_generated_tokens(self):
|
||||
"""
|
||||
Recieve output for zmq
|
||||
|
||||
Reference in New Issue
Block a user