mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
[FDConfig] remove engine client args, use fd_config instead (#5217)
* [refactor] remove engine client args, use fd_config instead * [chore] update * [fix] fix * [fix] fix * [chore] rename config to fd_config * [fix] fix run_batch * [ci] add ci case for engine client --------- Co-authored-by: Jiaxin Sui <95567040+plusNew001@users.noreply.github.com>
This commit is contained in:
@@ -175,25 +175,12 @@ async def lifespan(app: FastAPI):
|
||||
model_paths = [ModelPath(name=served_model_names, model_path=args.model, verification=verification)]
|
||||
|
||||
engine_args = EngineArgs.from_cli_args(args)
|
||||
config = engine_args.create_engine_config(port_availability_check=False)
|
||||
fd_config = engine_args.create_engine_config(port_availability_check=False)
|
||||
engine_client = EngineClient(
|
||||
model_name_or_path=args.model,
|
||||
tokenizer=args.tokenizer,
|
||||
max_model_len=args.max_model_len,
|
||||
tensor_parallel_size=args.tensor_parallel_size,
|
||||
pid=pid,
|
||||
port=int(os.environ.get("INFERENCE_MSG_QUEUE_ID", "0")),
|
||||
limit_mm_per_prompt=args.limit_mm_per_prompt,
|
||||
mm_processor_kwargs=args.mm_processor_kwargs,
|
||||
reasoning_parser=args.reasoning_parser,
|
||||
data_parallel_size=args.data_parallel_size,
|
||||
enable_logprob=args.enable_logprob,
|
||||
fd_config=fd_config,
|
||||
workers=args.workers,
|
||||
tool_parser=args.tool_call_parser,
|
||||
enable_prefix_caching=args.enable_prefix_caching,
|
||||
splitwise_role=args.splitwise_role,
|
||||
max_processor_cache=args.max_processor_cache,
|
||||
config=config,
|
||||
)
|
||||
await engine_client.connection_manager.initialize()
|
||||
app.state.dynamic_load_weight = args.dynamic_load_weight
|
||||
@@ -224,14 +211,14 @@ async def lifespan(app: FastAPI):
|
||||
embedding_handler = OpenAIServingEmbedding(
|
||||
engine_client,
|
||||
app.state.model_handler,
|
||||
config,
|
||||
fd_config,
|
||||
pid,
|
||||
args.ips,
|
||||
args.max_waiting_time,
|
||||
chat_template,
|
||||
)
|
||||
reward_handler = OpenAIServingReward(
|
||||
engine_client, app.state.model_handler, config, pid, args.ips, args.max_waiting_time, chat_template
|
||||
engine_client, app.state.model_handler, fd_config, pid, args.ips, args.max_waiting_time, chat_template
|
||||
)
|
||||
engine_client.create_zmq_client(model=pid, mode=zmq.PUSH)
|
||||
engine_client.pid = pid
|
||||
|
||||
Reference in New Issue
Block a user