Support for async processor added. (#3869)

* Support for async processor added.

* remove yappi code

---------

Co-authored-by: Yuanle Liu <yuanlehome@163.com>
This commit is contained in:
SunLei
2025-09-04 19:58:53 +08:00
committed by GitHub
parent ed97cf8396
commit 29628de6a7
4 changed files with 12 additions and 8 deletions

View File

@@ -14,6 +14,7 @@
# limitations under the License.
"""
import inspect
import os
import time
import traceback
@@ -112,7 +113,7 @@ class EngineClient:
self.zmq_client = ZmqClient(model, mode)
self.zmq_client.connect()
def format_and_add_data(self, prompts: dict):
async def format_and_add_data(self, prompts: dict):
"""
Format the request data and send the request to the server.
"""
@@ -123,10 +124,10 @@ class EngineClient:
if "max_tokens" not in prompts:
prompts["max_tokens"] = self.max_model_len - 1
self.add_requests(prompts)
await self.add_requests(prompts)
return prompts["prompt_token_ids"]
def add_requests(self, task):
async def add_requests(self, task):
"""
Add a new request to the queue.
@@ -140,7 +141,10 @@ class EngineClient:
task["preprocess_start_time"] = time.time()
try:
self.data_processor.process_request_dict(task, self.max_model_len)
if inspect.iscoroutinefunction(self.data_processor.process_request_dict):
await self.data_processor.process_request_dict(task, self.max_model_len)
else:
self.data_processor.process_request_dict(task, self.max_model_len)
task["prompt_token_ids_len"] = len(task["prompt_token_ids"])
input_ids_len = task["prompt_token_ids_len"]