mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
[Feature] support pool (#3827)
* support pool * update pooling * add pooler_config and check * update * support AutoWeightsLoader load weight * fix * update * delete print * update pre-commit * fix * fix xpu * fix ModelRegistry->model_registry * fix Copilot review * fix pooler.py * delete StepPooler * fix abstract * fix default_loader_v1 * fix Pre Commit * support torch qwen3 dense * add test and fix torch-qwen * fix * fix * adapter ci: * fix review * fix pooling_params.py * fix * fix tasks.py 2025 * fix print and logger * Modefy ModelRegistry and delete AutoWeightsLoader * fix logger * fix test_embedding * fix ci bug * ernie4_5 model_registry * fix test * support Qwen3-Embedding-0.6B tp=1 load * fix extra code * fix * delete fix vocab_size * delete prepare_params_dict * fix:
This commit is contained in:
@@ -39,7 +39,11 @@ from fastdeploy.model_executor.layers.linear import (
|
||||
from fastdeploy.model_executor.layers.lm_head import ParallelLMHead
|
||||
from fastdeploy.model_executor.layers.moe.moe import FusedMoE
|
||||
from fastdeploy.model_executor.layers.normalization import RMSNorm
|
||||
from fastdeploy.model_executor.models.model_base import ModelForCasualLM
|
||||
from fastdeploy.model_executor.models.model_base import (
|
||||
ModelCategory,
|
||||
ModelForCasualLM,
|
||||
ModelRegistry,
|
||||
)
|
||||
|
||||
|
||||
class Glm4MoeMLP(nn.Layer):
|
||||
@@ -363,6 +367,12 @@ class Glm4MoeModel(nn.Layer):
|
||||
return out
|
||||
|
||||
|
||||
@ModelRegistry.register_model_class(
|
||||
architecture="Glm4MoeForCausalLM",
|
||||
module_path="glm4_moe",
|
||||
category=ModelCategory.TEXT_GENERATION,
|
||||
primary_use=ModelCategory.TEXT_GENERATION,
|
||||
)
|
||||
class Glm4MoeForCausalLM(ModelForCasualLM):
|
||||
"""
|
||||
Glm4MoeForCausalLM
|
||||
|
Reference in New Issue
Block a user