mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Feature] optimize expert parallel (#3196)
* optimize * Update expert_service.py * Update worker_process.py * optimize
This commit is contained in:
@@ -177,6 +177,8 @@ class OpenAIServingChat:
|
||||
for res in response:
|
||||
if res.get("error_code", 200) != 200:
|
||||
raise ValueError("{}".format(res["error_msg"]))
|
||||
if res["finished"]:
|
||||
api_server_logger.info(f"chat completion finished: {request_id}")
|
||||
|
||||
self.engine_client.data_processor.process_response_dict(
|
||||
res,
|
||||
|
Reference in New Issue
Block a user