mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 00:06:38 +08:00
Merge vl execution path into normal execution path (#2829)
* merge vl model into gpu_model runner Change-Id: I9f4691a3d5f135e8d72b1d58abcd15ef3aa3f2a6 * fix chinese Change-Id: Ic7405109b984c21e076fb3b01ff6feb571d0119a * fix the parse parameter Change-Id: I4cd62ee87c06220af580d91e347145d4394917fe * fix the bug in online_inference Change-Id: Idb111bb2114e83017c4050b2a68cf039c6d3c559 * polish code Change-Id: I7d4194102c2f1b0743b74fbd5fc284eb8ef4d17c
This commit is contained in:
@@ -549,6 +549,10 @@ def parse_args():
|
||||
"'ipc_snapshot': load from disk snapshot of IPC weights, "
|
||||
"'meta': provide RL traing worker, no_weights_load"
|
||||
"'normal':normal load weight")
|
||||
parser.add_argument("--enable_mm",
|
||||
type=str,
|
||||
default="false",
|
||||
help="Whether to use vl")
|
||||
parser.add_argument("--enable_logprob",
|
||||
action='store_true',
|
||||
help="Enable output of token-level log probabilities.")
|
||||
@@ -650,6 +654,8 @@ def initialize_fd_config(args, ranks: int = 1, local_rank: int = 0) -> FDConfig:
|
||||
"No quantization config found and use original weight and act dtype."
|
||||
)
|
||||
|
||||
# Set VL tag
|
||||
model_config.enable_mm = getattr(args, 'enable_mm', 'false').lower() == 'true'
|
||||
logger.info(f"- Dynamic load weight: {load_config.dynamic_load_weight}")
|
||||
logger.info(f"- Load strategy: {load_config.load_strategy}")
|
||||
|
||||
|
Reference in New Issue
Block a user