From b455fd39f322a802ec5969e6f754d7fc617523df Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Wed, 24 Sep 2025 11:17:12 +0800 Subject: [PATCH] register_model_class compatible with plugins (#4236) --- .../model_executor/models/deepseek_v3.py | 2 +- .../model_executor/models/ernie4_5_moe.py | 6 ++-- .../model_executor/models/ernie4_5_mtp.py | 2 +- .../models/ernie4_5_vl/ernie4_5_vl_moe.py | 2 +- fastdeploy/model_executor/models/glm4_moe.py | 2 +- .../model_executor/models/model_base.py | 31 +++++++++---------- fastdeploy/model_executor/models/qwen2.py | 2 +- .../models/qwen2_5_vl/qwen2_5_vl.py | 2 +- fastdeploy/model_executor/models/qwen3.py | 2 +- fastdeploy/model_executor/models/qwen3moe.py | 2 +- 10 files changed, 25 insertions(+), 28 deletions(-) diff --git a/fastdeploy/model_executor/models/deepseek_v3.py b/fastdeploy/model_executor/models/deepseek_v3.py index c2045cb82..faa76be8d 100644 --- a/fastdeploy/model_executor/models/deepseek_v3.py +++ b/fastdeploy/model_executor/models/deepseek_v3.py @@ -596,7 +596,7 @@ class DeepSeekV3Model(nn.Layer): @ModelRegistry.register_model_class( architecture="DeepseekV3ForCausalLM", - module_path="deepseek_v3", + module_name="deepseek_v3", category=ModelCategory.TEXT_GENERATION, primary_use=ModelCategory.TEXT_GENERATION, ) diff --git a/fastdeploy/model_executor/models/ernie4_5_moe.py b/fastdeploy/model_executor/models/ernie4_5_moe.py index 6b5a89164..c2baeb910 100644 --- a/fastdeploy/model_executor/models/ernie4_5_moe.py +++ b/fastdeploy/model_executor/models/ernie4_5_moe.py @@ -484,7 +484,7 @@ class Ernie4_5_Model(nn.Layer): @ModelRegistry.register_model_class( architecture="Ernie4_5_MoeForCausalLM", - module_path="ernie4_5_moe", + module_name="ernie4_5_moe", category=ModelCategory.TEXT_GENERATION, primary_use=ModelCategory.TEXT_GENERATION, ) @@ -665,7 +665,7 @@ class Ernie4_5_MoeForCausalLM(ModelForCasualLM): @ModelRegistry.register_model_class( architecture="Ernie4_5_ForCausalLM", - module_path="ernie4_5_moe", + module_name="ernie4_5_moe", category=ModelCategory.TEXT_GENERATION, primary_use=ModelCategory.TEXT_GENERATION, ) @@ -684,7 +684,7 @@ class Ernie4_5_ForCausalLM(Ernie4_5_MoeForCausalLM): @ModelRegistry.register_model_class( architecture="Ernie4_5ForCausalLM", - module_path="ernie4_5_moe", + module_name="ernie4_5_moe", category=ModelCategory.TEXT_GENERATION, primary_use=ModelCategory.TEXT_GENERATION, ) diff --git a/fastdeploy/model_executor/models/ernie4_5_mtp.py b/fastdeploy/model_executor/models/ernie4_5_mtp.py index f5b237274..c9bef3700 100644 --- a/fastdeploy/model_executor/models/ernie4_5_mtp.py +++ b/fastdeploy/model_executor/models/ernie4_5_mtp.py @@ -331,7 +331,7 @@ class Ernie4_5_MTPModel(nn.Layer): @ModelRegistry.register_model_class( architecture="Ernie4_5_MTPForCausalLM", - module_path="ernie4_5_mtp", + module_name="ernie4_5_mtp", category=ModelCategory.TEXT_GENERATION, primary_use=ModelCategory.TEXT_GENERATION, ) diff --git a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py index fc71b9daf..de1c405af 100644 --- a/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py +++ b/fastdeploy/model_executor/models/ernie4_5_vl/ernie4_5_vl_moe.py @@ -798,7 +798,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(ModelForCasualLM): @ModelRegistry.register_model_class( architecture="Ernie4_5_VLMoeForConditionalGeneration", - module_path="ernie4_5_vl.ernie4_5_vl_moe", + module_name="ernie4_5_vl.ernie4_5_vl_moe", category=ModelCategory.MULTIMODAL, primary_use=ModelCategory.MULTIMODAL, ) diff --git a/fastdeploy/model_executor/models/glm4_moe.py b/fastdeploy/model_executor/models/glm4_moe.py index 40a492126..22e07a3c3 100644 --- a/fastdeploy/model_executor/models/glm4_moe.py +++ b/fastdeploy/model_executor/models/glm4_moe.py @@ -375,7 +375,7 @@ class Glm4MoeModel(nn.Layer): @ModelRegistry.register_model_class( architecture="Glm4MoeForCausalLM", - module_path="glm4_moe", + module_name="glm4_moe", category=ModelCategory.TEXT_GENERATION, primary_use=ModelCategory.TEXT_GENERATION, ) diff --git a/fastdeploy/model_executor/models/model_base.py b/fastdeploy/model_executor/models/model_base.py index 627d9050b..fddfb4de5 100644 --- a/fastdeploy/model_executor/models/model_base.py +++ b/fastdeploy/model_executor/models/model_base.py @@ -81,11 +81,12 @@ class LazyRegisteredModel(BaseRegisteredModel): """Lazy loaded model""" module_name: str + module_path: str class_name: str def load_model_cls(self) -> Type[nn.Layer]: try: - full_module = f"fastdeploy.model_executor.models.{self.module_name}" + full_module = f"{self.module_path}.{self.module_name}" module = importlib.import_module(full_module) return getattr(module, self.class_name) except (ImportError, AttributeError) as e: @@ -96,18 +97,6 @@ class LazyRegisteredModel(BaseRegisteredModel): return ModelInfo.from_model_cls(model_cls, self.module_name) -@dataclass(frozen=True) -class RegisteredModel(BaseRegisteredModel): - - model_cls: Type[nn.Layer] - - def load_model_cls(self) -> Type[nn.Layer]: - return self.model_cls - - def inspect_model_cls(self) -> ModelInfo: - return ModelInfo.from_model_cls(self.model_cls) - - @lru_cache(maxsize=128) def _try_inspect_model_cls( model_arch: str, @@ -133,7 +122,11 @@ class ModelRegistry: def _register_enhanced_models(self): for arch, model_info in self._enhanced_models.items(): - model = LazyRegisteredModel(module_name=model_info["module_path"], class_name=model_info["class_name"]) + model = LazyRegisteredModel( + module_name=model_info["module_name"], + module_path=model_info["module_path"], + class_name=model_info["class_name"], + ) self.models[arch] = model self._registered_models[arch] = model @@ -212,7 +205,8 @@ class ModelRegistry: model_class=None, *, architecture: str = None, - module_path: str = None, + module_name: str = None, + module_path: str = "fastdeploy.model_executor.models", category: Union[ModelCategory, List[ModelCategory]] = ModelCategory.TEXT_GENERATION, primary_use: ModelCategory = None, ): @@ -226,7 +220,8 @@ class ModelRegistry: Args: model_class: The model class (when used as simple decorator) architecture (str): Unique identifier for the model architecture - module_path (str): Relative path to the module containing the model + module_name (str): Relative path to the module containing the model + module_path (str): Absolute path to the module containing the model category: Model category or list of categories primary_use: Primary category for multi-category models """ @@ -237,13 +232,14 @@ class ModelRegistry: cls._arch_to_model_cls[model_cls.name()] = model_cls # Enhanced decorator-style registration - if architecture and module_path: + if architecture and module_name: categories = category if isinstance(category, list) else [category] # Register main entry arch_key = architecture cls._enhanced_models[arch_key] = { "class_name": model_cls.__name__, + "module_name": module_name, "module_path": module_path, "category": primary_use or categories[0], "class": model_cls, @@ -255,6 +251,7 @@ class ModelRegistry: key = f"{arch_key}_{cat.value}" cls._enhanced_models[key] = { "class_name": model_cls.__name__, + "module_name": module_name, "module_path": module_path, "category": cat, "primary_use": primary_use or categories[0], diff --git a/fastdeploy/model_executor/models/qwen2.py b/fastdeploy/model_executor/models/qwen2.py index 1f95bdc4a..fd51358c5 100644 --- a/fastdeploy/model_executor/models/qwen2.py +++ b/fastdeploy/model_executor/models/qwen2.py @@ -288,7 +288,7 @@ class Qwen2Model(nn.Layer): @ModelRegistry.register_model_class( architecture="Qwen2ForCausalLM", - module_path="qwen2", + module_name="qwen2", category=[ModelCategory.TEXT_GENERATION, ModelCategory.EMBEDDING], primary_use=ModelCategory.TEXT_GENERATION, ) diff --git a/fastdeploy/model_executor/models/qwen2_5_vl/qwen2_5_vl.py b/fastdeploy/model_executor/models/qwen2_5_vl/qwen2_5_vl.py index 8992b1c18..1ec312fd6 100644 --- a/fastdeploy/model_executor/models/qwen2_5_vl/qwen2_5_vl.py +++ b/fastdeploy/model_executor/models/qwen2_5_vl/qwen2_5_vl.py @@ -163,7 +163,7 @@ class Qwen2_5_VLModel(nn.Layer): @ModelRegistry.register_model_class( architecture="Qwen2_5_VLForConditionalGeneration", - module_path="qwen2_5_vl.qwen2_5_vl", + module_name="qwen2_5_vl.qwen2_5_vl", category=ModelCategory.MULTIMODAL, primary_use=ModelCategory.MULTIMODAL, ) diff --git a/fastdeploy/model_executor/models/qwen3.py b/fastdeploy/model_executor/models/qwen3.py index 47ed104ba..32af35279 100644 --- a/fastdeploy/model_executor/models/qwen3.py +++ b/fastdeploy/model_executor/models/qwen3.py @@ -225,7 +225,7 @@ class Qwen3Model(nn.Layer): @ModelRegistry.register_model_class( architecture="Qwen3ForCausalLM", - module_path="qwen3", + module_name="qwen3", category=[ModelCategory.TEXT_GENERATION], primary_use=ModelCategory.TEXT_GENERATION, ) diff --git a/fastdeploy/model_executor/models/qwen3moe.py b/fastdeploy/model_executor/models/qwen3moe.py index bc270e126..8e47a919b 100644 --- a/fastdeploy/model_executor/models/qwen3moe.py +++ b/fastdeploy/model_executor/models/qwen3moe.py @@ -322,7 +322,7 @@ class Qwen3MoeModel(nn.Layer): @ModelRegistry.register_model_class( architecture="Qwen3MoeForCausalLM", - module_path="qwen3moe", + module_name="qwen3moe", category=ModelCategory.TEXT_GENERATION, primary_use=ModelCategory.TEXT_GENERATION, )