qwen loader (#3057)

2025-10-17 06:00:59 +08:00 · 2025-07-30 19:09:38 +08:00
parent 28fff1b035
commit db698bda01
22 changed files with 494 additions and 92 deletions
--- a/fastdeploy/model_executor/model_loader/init.py
+++ b/fastdeploy/model_executor/model_loader/init.py
@@ -0,0 +1,32 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+from fastdeploy.config import LoadChoices, LoadConfig
+from fastdeploy.model_executor.model_loader.base_loader import BaseModelLoader
+from fastdeploy.model_executor.model_loader.default_loader import DefaultModelLoader
+from fastdeploy.model_executor.model_loader.new_loader import NewModelLoader
+
+
+def get_model_loader(load_config: LoadConfig) -> BaseModelLoader:
+    """get_model_loader"""
+
+    if load_config.load_choices == LoadChoices.NEW_LOADER:
+        return NewModelLoader(load_config)
+
+    return DefaultModelLoader(load_config)
+
+
+__all__ = ["get_model_loader"]
--- a/fastdeploy/model_executor/model_loader/base_loader.py
+++ b/fastdeploy/model_executor/model_loader/base_loader.py
@@ -0,0 +1,38 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+from abc import ABC, abstractmethod
+
+from paddle import nn
+
+from fastdeploy.config import FDConfig, LoadConfig, ModelConfig
+
+
+class BaseModelLoader(ABC):
+    """Base class for model loaders."""
+
+    def __init__(self, load_config: LoadConfig):
+        self.load_config = load_config
+
+    @abstractmethod
+    def download_model(self, load_config: ModelConfig) -> None:
+        """Download a model so that it can be immediately loaded."""
+        raise NotImplementedError
+
+    @abstractmethod
+    def load_model(self, fd_config: FDConfig) -> nn.Layer:
+        """Load a model with the given configurations."""
+        raise NotImplementedError
--- a/fastdeploy/model_executor/model_loader/default_loader.py
+++ b/fastdeploy/model_executor/model_loader/default_loader.py
@@ -0,0 +1,88 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import paddle
+from paddle import nn
+from paddleformers.utils.log import logger
+
+from fastdeploy.config import FDConfig, LoadConfig, ModelConfig
+from fastdeploy.model_executor.load_weight_utils import (
+    load_composite_checkpoint,
+    measure_time,
+)
+from fastdeploy.model_executor.model_loader.base_loader import BaseModelLoader
+from fastdeploy.model_executor.model_loader.utils import get_pretrain_cls
+from fastdeploy.model_executor.models.model_base import ModelRegistry
+from fastdeploy.platforms import current_platform
+
+
+class DefaultModelLoader(BaseModelLoader):
+    """ModelLoader that can load registered models"""
+
+    def __init__(self, load_config: LoadConfig):
+        super().__init__(load_config)
+        logger.info("Load the model and weights using DefaultModelLoader")
+
+    def download_model(self, model_config: ModelConfig) -> None:
+        """download_model"""
+        pass
+
+    def clean_memory_fragments(self, state_dict: dict) -> None:
+        """clean_memory_fragments"""
+        if current_platform.is_cuda():
+            if state_dict:
+                for k, v in state_dict.items():
+                    if isinstance(v, paddle.Tensor):
+                        v.value().get_tensor()._clear()
+            paddle.device.cuda.empty_cache()
+            paddle.device.synchronize()
+
+    @measure_time
+    def load_weights(self, model, fd_config: FDConfig, architectures: str) -> None:
+        model_class = get_pretrain_cls(architectures)
+        state_dict = load_composite_checkpoint(
+            fd_config.model_config.model,
+            model_class,
+            fd_config,
+            return_numpy=True,
+        )
+        model.set_state_dict(state_dict)
+        self.clean_memory_fragments(state_dict)
+
+    def load_model(self, fd_config: FDConfig) -> nn.Layer:
+        context = paddle.LazyGuard()
+        architectures = fd_config.model_config.architectures[0]
+        logger.info(f"Starting to load model {architectures}")
+
+        if fd_config.load_config.dynamic_load_weight:
+            # register rl model
+            import fastdeploy.rl  # noqa
+
+            architectures = architectures + "RL"
+
+        with context:
+            model_cls = ModelRegistry.get_class(architectures)
+            model = model_cls(fd_config)
+
+        model.eval()
+
+        # RL model not need set_state_dict
+        if fd_config.load_config.dynamic_load_weight:
+            return model
+
+        # TODO(gongshaotian): Now, only support safetensor
+        self.load_weights(model, fd_config, architectures)
+        return model
--- a/fastdeploy/model_executor/model_loader/new_loader.py
+++ b/fastdeploy/model_executor/model_loader/new_loader.py
@@ -0,0 +1,74 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+import paddle
+from paddle import nn
+from paddleformers.utils.log import logger
+
+from fastdeploy.config import FDConfig, LoadConfig, ModelConfig
+from fastdeploy.model_executor.load_weight_utils import (
+    get_all_safetensors,
+    measure_time,
+    safetensors_weights_iterator,
+)
+from fastdeploy.model_executor.model_loader.base_loader import BaseModelLoader
+from fastdeploy.model_executor.models.model_base import ModelRegistry
+from fastdeploy.platforms import current_platform
+
+
+class NewModelLoader(BaseModelLoader):
+    """ModelLoader that can load registered models"""
+
+    def __init__(self, load_config: LoadConfig):
+        super().__init__(load_config)
+
+    def download_model(self, model_config: ModelConfig) -> None:
+        pass
+
+    def clean_memory_fragments(self) -> None:
+        """clean_memory_fragments"""
+        if current_platform.is_cuda():
+            paddle.device.cuda.empty_cache()
+            paddle.device.synchronize()
+
+    @measure_time
+    def load_weights(self, model, fd_config: FDConfig) -> None:
+        _, safetensor_files = get_all_safetensors(fd_config.model_config.model)
+        weights_iterator = safetensors_weights_iterator(safetensor_files)
+        model.load_weights(weights_iterator)
+        self.clean_memory_fragments()
+
+    def load_model(self, fd_config: FDConfig) -> nn.Layer:
+        architectures = fd_config.model_config.architectures[0]
+        logger.info(f"Starting to load model {architectures}")
+
+        if fd_config.load_config.dynamic_load_weight:
+            # register rl model
+            import fastdeploy.rl  # noqa
+
+            architectures = architectures + "RL"
+
+        model_cls = ModelRegistry.get_class(architectures)
+        model = model_cls(fd_config)
+
+        model.eval()
+
+        # RL model not need set_state_dict
+        if fd_config.load_config.dynamic_load_weight:
+            return model
+
+        self.load_weights(model, fd_config)
+        return model
--- a/fastdeploy/model_executor/model_loader/utils.py
+++ b/fastdeploy/model_executor/model_loader/utils.py
@@ -0,0 +1,43 @@
+"""
+# Copyright (c) 2025  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+
+from paddleformers.transformers import PretrainedModel
+
+from fastdeploy.model_executor.models.deepseek_v3 import DeepSeekV3PretrainedModel
+from fastdeploy.model_executor.models.ernie4_5_moe import Ernie4_5_PretrainedModel
+from fastdeploy.model_executor.models.ernie4_5_mtp import Ernie4_5_MTPPretrainedModel
+from fastdeploy.model_executor.models.ernie4_5_vl.ernie4_5_vl_moe import (
+    Ernie4_5_VLPretrainedModel,
+)
+from fastdeploy.model_executor.models.qwen2 import Qwen2PretrainedModel
+from fastdeploy.model_executor.models.qwen3 import Qwen3PretrainedModel
+from fastdeploy.model_executor.models.qwen3moe import Qwen3MoePretrainedModel
+
+MODEL_CLASSES = {
+    "Ernie4_5_MoeForCausalLM": Ernie4_5_PretrainedModel,
+    "Ernie4_5_MTPForCausalLM": Ernie4_5_MTPPretrainedModel,
+    "Qwen2ForCausalLM": Qwen2PretrainedModel,
+    "Qwen3ForCausalLM": Qwen3PretrainedModel,
+    "Qwen3MoeForCausalLM": Qwen3MoePretrainedModel,
+    "Ernie4_5_ForCausalLM": Ernie4_5_PretrainedModel,
+    "DeepseekV3ForCausalLM": DeepSeekV3PretrainedModel,
+    "Ernie4_5_VLMoeForConditionalGeneration": Ernie4_5_VLPretrainedModel,
+}
+
+
+def get_pretrain_cls(architectures: str) -> PretrainedModel:
+    """get_pretrain_cls"""
+    return MODEL_CLASSES[architectures]