[Feature] Add return_token_ids, prompt_token_ids, and delete training, raw_request in request body (#2940)

* [feat] add return_token_ids, prompt_token_ids, delete raw_request in request body

* [fix] return_token_ids not working in curl request

* [test] improve some test cases of return_token_ids and prompt_token_ids

* [fix] the server responds ok even if request.messages is an empty list
This commit is contained in:
李泳桦
2025-07-21 19:31:14 +08:00
committed by GitHub
parent 2845bde964
commit 8a619e9db5
8 changed files with 506 additions and 59 deletions

View File

@@ -46,7 +46,6 @@ class Request:
preprocess_end_time: Optional[float] = None,
multimodal_inputs: Optional[dict] = None,
multimodal_data: Optional[dict] = None,
raw_request: bool = True,
disaggregate_info: Optional[dict] = None,
draft_token_ids: Optional[list[int]] = None,
guided_json: Optional[Any] = None,
@@ -74,7 +73,6 @@ class Request:
self.arrival_time = arrival_time
self.preprocess_start_time = preprocess_start_time
self.preprocess_end_time = preprocess_end_time
self.raw_request = raw_request
self.disaggregate_info = disaggregate_info
# speculative method in disaggregate-mode
@@ -117,7 +115,6 @@ class Request:
multimodal_data=d.get("multimodal_data"),
disaggregate_info=d.get("disaggregate_info"),
draft_token_ids=d.get("draft_token_ids"),
raw_request=d.get("raw_request", True),
guided_json=d.get("guided_json", None),
guided_regex=d.get("guided_regex", None),
guided_choice=d.get("guided_choice", None),
@@ -145,7 +142,6 @@ class Request:
"preprocess_end_time": self.preprocess_end_time,
"multimodal_inputs": self.multimodal_inputs,
"multimodal_data": self.multimodal_data,
"raw_request": self.raw_request,
"disaggregate_info": self.disaggregate_info,
"draft_token_ids": self.draft_token_ids,
"enable_thinking": self.enable_thinking,