[Optimization] Improve perf for fd response token with internal adapter (#4992)

* [Optimize] Improve perf for fd response token with internal adapter

* fix

* fix bug

* fix ci

* fix ci

* fix ci

* fix ci
This commit is contained in:
chenjian
2025-11-21 19:02:03 +08:00
committed by GitHub
parent 5bcf79d780
commit 3ea1b44a58
15 changed files with 202 additions and 67 deletions

View File

@@ -102,6 +102,8 @@ class Request:
prefill_start_index: int = 0,
prefill_end_index: int = 0,
num_computed_tokens: int = 0,
# for internal adapter
ic_req_data: Optional[dict] = (None,),
) -> None:
self.request_id = request_id
self.prompt = prompt
@@ -172,6 +174,8 @@ class Request:
self.extend_block_tables = []
# dp
self.dp_rank = dp_rank
self.llm_engine_recv_req_timestamp = time.time()
self.ic_req_data = ic_req_data
self.async_process_futures = []
self.error_message = None
@@ -226,6 +230,7 @@ class Request:
video_end=d.get("video_end", 0),
audio_end=d.get("audio_end", 0),
dp_rank=d.get("dp_rank", None),
ic_req_data=d.get("ic_req_data", None),
inference_start_time=d.get("inference_start_time"),
llm_engine_recv_req_timestamp=d.get("llm_engine_recv_req_timestamp"),
)
@@ -278,6 +283,7 @@ class Request:
"image_end": self.image_end,
"video_end": self.video_end,
"audio_end": self.audio_end,
"ic_req_data": self.ic_req_data,
}
add_params = [
"guided_json",
@@ -478,6 +484,9 @@ class RequestOutput:
num_input_video_tokens: Optional[int] = 0,
error_code: Optional[int] = 200,
error_msg: Optional[str] = None,
# for internal adapter
ic_req_data: Optional[dict] = None,
prompt_token_ids_len: Optional[int] = 0,
) -> None:
self.request_id = request_id
self.prompt = prompt
@@ -493,6 +502,8 @@ class RequestOutput:
self.num_input_video_tokens = num_input_video_tokens
self.error_code = error_code
self.error_msg = error_msg
self.ic_req_data = ic_req_data
self.prompt_token_ids_len = prompt_token_ids_len
if prompt_token_ids is None:
self.prompt_token_ids = []
@@ -565,6 +576,8 @@ class RequestOutput:
"num_input_video_tokens": self.num_input_video_tokens,
"error_code": self.error_code,
"error_msg": self.error_msg,
"ic_req_data": self.ic_req_data,
"prompt_token_ids_len": self.prompt_token_ids_len,
}