mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[BugFix][V1 Loader] fix the bug in creat weight for block_wise_fp8 (#3486)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
This commit is contained in:
@@ -22,6 +22,7 @@ import fastdeploy
|
||||
from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
|
||||
from fastdeploy.model_executor.layers.utils import get_tensor
|
||||
from fastdeploy.model_executor.ops.gpu import count_tokens_per_expert_func, deep_gemm
|
||||
from fastdeploy.utils import ceil_div
|
||||
|
||||
from .fused_moe_backend_base import MoEMethodBase
|
||||
|
||||
@@ -73,8 +74,8 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
|
||||
layer.create_parameter(
|
||||
shape=[
|
||||
layer.num_local_experts,
|
||||
layer.moe_intermediate_size * 2 // self.quant_config.weight_block_size[0],
|
||||
layer.hidden_size // self.quant_config.weight_block_size[1],
|
||||
ceil_div(layer.moe_intermediate_size * 2, self.quant_config.weight_block_size[0]),
|
||||
ceil_div(layer.hidden_size, self.quant_config.weight_block_size[1]),
|
||||
],
|
||||
dtype="float32",
|
||||
default_initializer=paddle.nn.initializer.Constant(0),
|
||||
@@ -86,8 +87,8 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
|
||||
layer.create_parameter(
|
||||
shape=[
|
||||
layer.num_local_experts,
|
||||
layer.hidden_size // self.quant_config.weight_block_size[0],
|
||||
layer.moe_intermediate_size // self.quant_config.weight_block_size[1],
|
||||
ceil_div(layer.hidden_size, self.quant_config.weight_block_size[0]),
|
||||
ceil_div(layer.moe_intermediate_size, self.quant_config.weight_block_size[1]),
|
||||
],
|
||||
dtype="float32",
|
||||
default_initializer=paddle.nn.initializer.Constant(0),
|
||||
|
@@ -642,8 +642,8 @@ class BlockWiseFP8MoEMethod(QuantMethodBase):
|
||||
layer.create_parameter(
|
||||
shape=[
|
||||
layer.num_local_experts,
|
||||
layer.moe_intermediate_size * 2 // self.quant_config.weight_block_size[0],
|
||||
layer.hidden_size // self.quant_config.weight_block_size[1],
|
||||
ceil_div(layer.moe_intermediate_size * 2, self.quant_config.weight_block_size[0]),
|
||||
ceil_div(layer.hidden_size, self.quant_config.weight_block_size[1]),
|
||||
],
|
||||
dtype="float32",
|
||||
default_initializer=paddle.nn.initializer.Constant(0),
|
||||
@@ -655,8 +655,8 @@ class BlockWiseFP8MoEMethod(QuantMethodBase):
|
||||
layer.create_parameter(
|
||||
shape=[
|
||||
layer.num_local_experts,
|
||||
layer.hidden_size // self.quant_config.weight_block_size[0],
|
||||
layer.moe_intermediate_size // self.quant_config.weight_block_size[1],
|
||||
ceil_div(layer.hidden_size, self.quant_config.weight_block_size[0]),
|
||||
ceil_div(layer.moe_intermediate_size, self.quant_config.weight_block_size[1]),
|
||||
],
|
||||
dtype="float32",
|
||||
default_initializer=paddle.nn.initializer.Constant(0),
|
||||
|
Reference in New Issue
Block a user