mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[Optimization] Improve perf for fd response token with internal adapter (#4992)
* [Optimize] Improve perf for fd response token with internal adapter * fix * fix bug * fix ci * fix ci * fix ci * fix ci
This commit is contained in:
@@ -102,6 +102,8 @@ class Request:
|
||||
prefill_start_index: int = 0,
|
||||
prefill_end_index: int = 0,
|
||||
num_computed_tokens: int = 0,
|
||||
# for internal adapter
|
||||
ic_req_data: Optional[dict] = (None,),
|
||||
) -> None:
|
||||
self.request_id = request_id
|
||||
self.prompt = prompt
|
||||
@@ -172,6 +174,8 @@ class Request:
|
||||
self.extend_block_tables = []
|
||||
# dp
|
||||
self.dp_rank = dp_rank
|
||||
self.llm_engine_recv_req_timestamp = time.time()
|
||||
self.ic_req_data = ic_req_data
|
||||
|
||||
self.async_process_futures = []
|
||||
self.error_message = None
|
||||
@@ -226,6 +230,7 @@ class Request:
|
||||
video_end=d.get("video_end", 0),
|
||||
audio_end=d.get("audio_end", 0),
|
||||
dp_rank=d.get("dp_rank", None),
|
||||
ic_req_data=d.get("ic_req_data", None),
|
||||
inference_start_time=d.get("inference_start_time"),
|
||||
llm_engine_recv_req_timestamp=d.get("llm_engine_recv_req_timestamp"),
|
||||
)
|
||||
@@ -278,6 +283,7 @@ class Request:
|
||||
"image_end": self.image_end,
|
||||
"video_end": self.video_end,
|
||||
"audio_end": self.audio_end,
|
||||
"ic_req_data": self.ic_req_data,
|
||||
}
|
||||
add_params = [
|
||||
"guided_json",
|
||||
@@ -478,6 +484,9 @@ class RequestOutput:
|
||||
num_input_video_tokens: Optional[int] = 0,
|
||||
error_code: Optional[int] = 200,
|
||||
error_msg: Optional[str] = None,
|
||||
# for internal adapter
|
||||
ic_req_data: Optional[dict] = None,
|
||||
prompt_token_ids_len: Optional[int] = 0,
|
||||
) -> None:
|
||||
self.request_id = request_id
|
||||
self.prompt = prompt
|
||||
@@ -493,6 +502,8 @@ class RequestOutput:
|
||||
self.num_input_video_tokens = num_input_video_tokens
|
||||
self.error_code = error_code
|
||||
self.error_msg = error_msg
|
||||
self.ic_req_data = ic_req_data
|
||||
self.prompt_token_ids_len = prompt_token_ids_len
|
||||
|
||||
if prompt_token_ids is None:
|
||||
self.prompt_token_ids = []
|
||||
@@ -565,6 +576,8 @@ class RequestOutput:
|
||||
"num_input_video_tokens": self.num_input_video_tokens,
|
||||
"error_code": self.error_code,
|
||||
"error_msg": self.error_msg,
|
||||
"ic_req_data": self.ic_req_data,
|
||||
"prompt_token_ids_len": self.prompt_token_ids_len,
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user