[feature] support reward api (#4518)
Some checks failed
CE Compile Job / ce_job_pre_check (push) Has been cancelled
CE Compile Job / print_ce_job_pre_check_outputs (push) Has been cancelled
CE Compile Job / FD-Clone-Linux (push) Has been cancelled
CE Compile Job / Show Code Archive Output (push) Has been cancelled
CE Compile Job / BUILD_SM8090 (push) Has been cancelled
CE Compile Job / BUILD_SM8689 (push) Has been cancelled
CE Compile Job / CE_UPLOAD (push) Has been cancelled
Deploy GitHub Pages / deploy (push) Has been cancelled

Co-authored-by: SunLei <sunlei5788@gmail.com>
This commit is contained in:
xiaolei373
2025-10-29 00:20:28 +08:00
committed by GitHub
parent a012e3608b
commit 14e7d88ea4
9 changed files with 362 additions and 17 deletions

View File

@@ -41,6 +41,7 @@ from fastdeploy.entrypoints.engine_client import EngineClient
from fastdeploy.entrypoints.openai.protocol import (
ChatCompletionRequest,
ChatCompletionResponse,
ChatRewardRequest,
CompletionRequest,
CompletionResponse,
ControlSchedulerRequest,
@@ -53,6 +54,7 @@ from fastdeploy.entrypoints.openai.serving_chat import OpenAIServingChat
from fastdeploy.entrypoints.openai.serving_completion import OpenAIServingCompletion
from fastdeploy.entrypoints.openai.serving_embedding import OpenAIServingEmbedding
from fastdeploy.entrypoints.openai.serving_models import ModelPath, OpenAIServingModels
from fastdeploy.entrypoints.openai.serving_reward import OpenAIServingReward
from fastdeploy.entrypoints.openai.tool_parsers import ToolParserManager
from fastdeploy.entrypoints.openai.utils import UVICORN_CONFIG, make_arg_parser
from fastdeploy.envs import environment_variables
@@ -232,12 +234,16 @@ async def lifespan(app: FastAPI):
args.max_waiting_time,
chat_template,
)
reward_handler = OpenAIServingReward(
engine_client, app.state.model_handler, config, pid, args.ips, args.max_waiting_time, chat_template
)
engine_client.create_zmq_client(model=pid, mode=zmq.PUSH)
engine_client.pid = pid
app.state.engine_client = engine_client
app.state.chat_handler = chat_handler
app.state.completion_handler = completion_handler
app.state.embedding_handler = embedding_handler
app.state.reward_handler = reward_handler
global llm_engine
if llm_engine is not None:
llm_engine.engine.data_processor = engine_client.data_processor
@@ -447,6 +453,20 @@ async def list_models() -> Response:
return JSONResponse(content=models.model_dump())
@app.post("/v1/reward")
async def create_reward(request: ChatRewardRequest):
"""
Create reward for the input texts
"""
if app.state.dynamic_load_weight:
status, msg = app.state.engine_client.is_workers_alive()
if not status:
return JSONResponse(content={"error": "Worker Service Not Healthy"}, status_code=304)
generator = await app.state.reward_handler.create_reward(request)
return JSONResponse(content=generator.model_dump())
@app.post("/v1/embeddings")
async def create_embedding(request: EmbeddingRequest):
"""