[Feature] support pool (#3827)

* support pool * update pooling * add pooler_config and check * update * support AutoWeightsLoader load weight * fix * update * delete print * update pre-commit * fix * fix xpu * fix ModelRegistry->model_registry * fix Copilot review * fix pooler.py * delete StepPooler * fix abstract * fix default_loader_v1 * fix Pre Commit * support torch qwen3 dense * add test and fix torch-qwen * fix * fix * adapter ci: * fix review * fix pooling_params.py * fix * fix tasks.py 2025 * fix print and logger * Modefy ModelRegistry and delete AutoWeightsLoader * fix logger * fix test_embedding * fix ci bug * ernie4_5 model_registry * fix test * support Qwen3-Embedding-0.6B tp=1 load * fix extra code * fix * delete fix vocab_size * delete prepare_params_dict * fix:
2025-10-05 08:37:06 +08:00 · 2025-09-22 14:09:09 +08:00
parent da74a5f0b3
commit c86945ef49
36 changed files with 2371 additions and 51 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -18,13 +18,14 @@ import argparse
 import json
 from dataclasses import asdict, dataclass
 from dataclasses import fields as dataclass_fields
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union

 import paddle

 from fastdeploy import envs
 from fastdeploy.config import (
    CacheConfig,
+    ConvertOption,
    EarlyStopConfig,
    FDConfig,
    GraphOptimizationConfig,
@@ -32,6 +33,8 @@ from fastdeploy.config import (
    MobaAttentionConfig,
    ModelConfig,
    ParallelConfig,
+    PoolerConfig,
+    RunnerOption,
    SpeculativeConfig,
    TaskOption,
 )
@@ -95,6 +98,20 @@ class EngineArgs:
    """
    The task to be executed by the model.
    """
+    runner: RunnerOption = "auto"
+    """
+    The type of model runner to use.Each FD instance only supports one model runner.
+    even if the same model can be used for multiple types.
+    """
+    convert: ConvertOption = "auto"
+    """
+    Convert the model using adapters. The most common use case is to
+    adapt a text generation model to be used for pooling tasks.
+    """
+    override_pooler_config: Optional[Union[dict, PoolerConfig]] = None
+    """
+    Override configuration for the pooler.
+    """
    max_num_seqs: int = 8
    """
    Maximum number of sequences per iteration.
@@ -473,6 +490,21 @@ class EngineArgs:
            default=EngineArgs.task,
            help="Task to be executed by the model.",
        )
+        model_group.add_argument(
+            "--runner",
+            type=str,
+            default=EngineArgs.runner,
+            help="The type of model runner to use",
+        )
+        model_group.add_argument(
+            "--convert", type=str, default=EngineArgs.convert, help="Convert the model using adapters"
+        )
+        model_group.add_argument(
+            "--override-pooler-config",
+            type=json.loads,
+            default=EngineArgs.override_pooler_config,
+            help="Override the pooler configuration with a JSON string.",
+        )
        model_group.add_argument(
            "--use-warmup",
            type=int,