qwen loader (#3057)

2025-10-06 17:17:14 +08:00 · 2025-07-30 19:09:38 +08:00
parent 28fff1b035
commit db698bda01
22 changed files with 494 additions and 92 deletions
--- a/fastdeploy/engine/args_utils.py
+++ b/fastdeploy/engine/args_utils.py
@@ -326,6 +326,13 @@ class EngineArgs:
    Configuration for early stop.
    """

+    load_choices: str = "default"
+    """The format of the model weights to load.
+        Options include:
+        - "default": default loader.
+        - "new_loader": new  loader.
+    """
+
    def __post_init__(self):
        """
        Post-initialization processing to set default tokenizer if not provided.
@@ -543,6 +550,16 @@ class EngineArgs:
            help="Enable expert parallelism.",
        )

+        # Load group
+        load_group = parser.add_argument_group("Load Configuration")
+        load_group.add_argument(
+            "--load_choices",
+            type=str,
+            default=EngineArgs.load_choices,
+            help="The format of the model weights to load.\
+                 default/new_loader.",
+        )
+
        # CacheConfig parameters group
        cache_group = parser.add_argument_group("Cache Configuration")

@@ -897,4 +914,5 @@ class EngineArgs:
            disable_any_whitespace=self.guided_decoding_disable_any_whitespace,
            enable_logprob=self.enable_logprob,
            early_stop_config=early_stop_cfg,
+            load_choices=self.load_choices,
        )