Feature:Add support for Pooling Model Embedding and provide an OpenAI-compatible API. (#4344)

* feat: add OpenAIServing

* feat: add ZmqOpenAIServing & OpenAIServingEmbedding

* feat: Refine the basic ServingEngine class and introduce ServingContext

* fix: codestyle

* fix: request

* fix: pooling_params

* feat: _process_chat_template_kwargs

* feat: support batch request

* feat: pooling_params verify & default parameters

---------

Co-authored-by: sunlei1024 <sunlei1024@example.com>
This commit is contained in:
SunLei
2025-10-15 19:42:59 +08:00
committed by GitHub
parent 744287e1a9
commit b4b579a7ed
12 changed files with 971 additions and 55 deletions

View File

@@ -1014,7 +1014,7 @@ class EngineArgs:
early_stop_args[k] = v
return EarlyStopConfig(early_stop_args)
def create_engine_config(self) -> FDConfig:
def create_engine_config(self, port_availability_check=True) -> FDConfig:
"""
Create and return a Config object based on the current settings.
"""
@@ -1064,9 +1064,10 @@ class EngineArgs:
early_stop_cfg = self.create_early_stop_config()
early_stop_cfg.update_enable_early_stop(self.enable_early_stop)
assert is_port_available(
"0.0.0.0", int(self.engine_worker_queue_port[parallel_cfg.local_data_parallel_id])
), f"The parameter `engine_worker_queue_port`:{self.engine_worker_queue_port} is already in use."
if port_availability_check:
assert is_port_available(
"0.0.0.0", int(self.engine_worker_queue_port[parallel_cfg.local_data_parallel_id])
), f"The parameter `engine_worker_queue_port`:{self.engine_worker_queue_port} is already in use."
return FDConfig(
model_config=model_cfg,