mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[feature] support reward api (#4518)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled
Co-authored-by: SunLei <sunlei5788@gmail.com>
This commit is contained in:
@@ -41,6 +41,7 @@ from fastdeploy.entrypoints.engine_client import EngineClient
|
||||
from fastdeploy.entrypoints.openai.protocol import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponse,
|
||||
ChatRewardRequest,
|
||||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
ControlSchedulerRequest,
|
||||
@@ -53,6 +54,7 @@ from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
|
||||
from fastdeploy.entrypoints.openai.serving_completion import OpenAIServingCompletion
|
||||
from fastdeploy.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
|
||||
from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels
|
||||
from fastdeploy.entrypoints.openai.serving_reward import OpenAIServingReward
|
||||
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
|
||||
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser
|
||||
from fastdeploy.envs import environment_variables
|
||||
@@ -232,12 +234,16 @@ async def lifespan(app: FastAPI):
|
||||
args.max_waiting_time,
|
||||
chat_template,
|
||||
)
|
||||
reward_handler = OpenAIServingReward(
|
||||
engine_client, app.state.model_handler, config, pid, args.ips, args.max_waiting_time, chat_template
|
||||
)
|
||||
engine_client.create_zmq_client(model=pid, mode=zmq.PUSH)
|
||||
engine_client.pid = pid
|
||||
app.state.engine_client = engine_client
|
||||
app.state.chat_handler = chat_handler
|
||||
app.state.completion_handler = completion_handler
|
||||
app.state.embedding_handler = embedding_handler
|
||||
app.state.reward_handler = reward_handler
|
||||
global llm_engine
|
||||
if llm_engine is not None:
|
||||
llm_engine.engine.data_processor = engine_client.data_processor
|
||||
@@ -447,6 +453,20 @@ async def list_models() -> Response:
|
||||
return JSONResponse(content=models.model_dump())
|
||||
|
||||
|
||||
@app.post("/v1/reward")
|
||||
async def create_reward(request: ChatRewardRequest):
|
||||
"""
|
||||
Create reward for the input texts
|
||||
"""
|
||||
if app.state.dynamic_load_weight:
|
||||
status, msg = app.state.engine_client.is_workers_alive()
|
||||
if not status:
|
||||
return JSONResponse(content={"error": "Worker Service Not Healthy"}, status_code=304)
|
||||
|
||||
generator = await app.state.reward_handler.create_reward(request)
|
||||
return JSONResponse(content=generator.model_dump())
|
||||
|
||||
|
||||
@app.post("/v1/embeddings")
|
||||
async def create_embedding(request: EmbeddingRequest):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user