mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Feature] support pool (#3827)
* support pool * update pooling * add pooler_config and check * update * support AutoWeightsLoader load weight * fix * update * delete print * update pre-commit * fix * fix xpu * fix ModelRegistry->model_registry * fix Copilot review * fix pooler.py * delete StepPooler * fix abstract * fix default_loader_v1 * fix Pre Commit * support torch qwen3 dense * add test and fix torch-qwen * fix * fix * adapter ci: * fix review * fix pooling_params.py * fix * fix tasks.py 2025 * fix print and logger * Modefy ModelRegistry and delete AutoWeightsLoader * fix logger * fix test_embedding * fix ci bug * ernie4_5 model_registry * fix test * support Qwen3-Embedding-0.6B tp=1 load * fix extra code * fix * delete fix vocab_size * delete prepare_params_dict * fix:
This commit is contained in:
@@ -18,13 +18,14 @@ import argparse
|
||||
import json
|
||||
from dataclasses import asdict, dataclass
|
||||
from dataclasses import fields as dataclass_fields
|
||||
from typing import Any, Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
import paddle
|
||||
|
||||
from fastdeploy import envs
|
||||
from fastdeploy.config import (
|
||||
CacheConfig,
|
||||
ConvertOption,
|
||||
EarlyStopConfig,
|
||||
FDConfig,
|
||||
GraphOptimizationConfig,
|
||||
@@ -32,6 +33,8 @@ from fastdeploy.config import (
|
||||
MobaAttentionConfig,
|
||||
ModelConfig,
|
||||
ParallelConfig,
|
||||
PoolerConfig,
|
||||
RunnerOption,
|
||||
SpeculativeConfig,
|
||||
TaskOption,
|
||||
)
|
||||
@@ -95,6 +98,20 @@ class EngineArgs:
|
||||
"""
|
||||
The task to be executed by the model.
|
||||
"""
|
||||
runner: RunnerOption = "auto"
|
||||
"""
|
||||
The type of model runner to use.Each FD instance only supports one model runner.
|
||||
even if the same model can be used for multiple types.
|
||||
"""
|
||||
convert: ConvertOption = "auto"
|
||||
"""
|
||||
Convert the model using adapters. The most common use case is to
|
||||
adapt a text generation model to be used for pooling tasks.
|
||||
"""
|
||||
override_pooler_config: Optional[Union[dict, PoolerConfig]] = None
|
||||
"""
|
||||
Override configuration for the pooler.
|
||||
"""
|
||||
max_num_seqs: int = 8
|
||||
"""
|
||||
Maximum number of sequences per iteration.
|
||||
@@ -473,6 +490,21 @@ class EngineArgs:
|
||||
default=EngineArgs.task,
|
||||
help="Task to be executed by the model.",
|
||||
)
|
||||
model_group.add_argument(
|
||||
"--runner",
|
||||
type=str,
|
||||
default=EngineArgs.runner,
|
||||
help="The type of model runner to use",
|
||||
)
|
||||
model_group.add_argument(
|
||||
"--convert", type=str, default=EngineArgs.convert, help="Convert the model using adapters"
|
||||
)
|
||||
model_group.add_argument(
|
||||
"--override-pooler-config",
|
||||
type=json.loads,
|
||||
default=EngineArgs.override_pooler_config,
|
||||
help="Override the pooler configuration with a JSON string.",
|
||||
)
|
||||
model_group.add_argument(
|
||||
"--use-warmup",
|
||||
type=int,
|
||||
|
Reference in New Issue
Block a user