mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Feature] support pool (#3827)
* support pool * update pooling * add pooler_config and check * update * support AutoWeightsLoader load weight * fix * update * delete print * update pre-commit * fix * fix xpu * fix ModelRegistry->model_registry * fix Copilot review * fix pooler.py * delete StepPooler * fix abstract * fix default_loader_v1 * fix Pre Commit * support torch qwen3 dense * add test and fix torch-qwen * fix * fix * adapter ci: * fix review * fix pooling_params.py * fix * fix tasks.py 2025 * fix print and logger * Modefy ModelRegistry and delete AutoWeightsLoader * fix logger * fix test_embedding * fix ci bug * ernie4_5 model_registry * fix test * support Qwen3-Embedding-0.6B tp=1 load * fix extra code * fix * delete fix vocab_size * delete prepare_params_dict * fix:
This commit is contained in:
@@ -45,7 +45,7 @@ from fastdeploy.inter_communicator import IPCSignal
|
||||
from fastdeploy.model_executor.layers.quantization import parse_quant_config
|
||||
from fastdeploy.platforms import current_platform
|
||||
from fastdeploy.scheduler import SchedulerConfig
|
||||
from fastdeploy.utils import get_logger
|
||||
from fastdeploy.utils import get_logger, optional_type
|
||||
from fastdeploy.worker.worker_base import WorkerBase
|
||||
|
||||
logger = get_logger("worker_process", "worker_process.log")
|
||||
@@ -643,6 +643,27 @@ def parse_args():
|
||||
help="Flag to specify dtype of lm_head as FP32",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--runner",
|
||||
type=str,
|
||||
default="auto",
|
||||
help="The type of model runner to use.Each FD instance only supports one model runner.even if the same model can be used for multiple types.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--convert",
|
||||
type=str,
|
||||
default="auto",
|
||||
help="Convert the model using adapters. The most common use case is to adapt a text generation model to be used for pooling tasks.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--override-pooler-config",
|
||||
type=optional_type(json.loads),
|
||||
default=None,
|
||||
help="Override configuration for the pooler.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
Reference in New Issue
Block a user