mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[BugFix][V1 Loader] fix the bug in creat weight for block_wise_fp8 (#3486)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
This commit is contained in:
@@ -22,6 +22,7 @@ import fastdeploy
|
|||||||
from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
|
from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
|
||||||
from fastdeploy.model_executor.layers.utils import get_tensor
|
from fastdeploy.model_executor.layers.utils import get_tensor
|
||||||
from fastdeploy.model_executor.ops.gpu import count_tokens_per_expert_func, deep_gemm
|
from fastdeploy.model_executor.ops.gpu import count_tokens_per_expert_func, deep_gemm
|
||||||
|
from fastdeploy.utils import ceil_div
|
||||||
|
|
||||||
from .fused_moe_backend_base import MoEMethodBase
|
from .fused_moe_backend_base import MoEMethodBase
|
||||||
|
|
||||||
@@ -73,8 +74,8 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
|
|||||||
layer.create_parameter(
|
layer.create_parameter(
|
||||||
shape=[
|
shape=[
|
||||||
layer.num_local_experts,
|
layer.num_local_experts,
|
||||||
layer.moe_intermediate_size * 2 // self.quant_config.weight_block_size[0],
|
ceil_div(layer.moe_intermediate_size * 2, self.quant_config.weight_block_size[0]),
|
||||||
layer.hidden_size // self.quant_config.weight_block_size[1],
|
ceil_div(layer.hidden_size, self.quant_config.weight_block_size[1]),
|
||||||
],
|
],
|
||||||
dtype="float32",
|
dtype="float32",
|
||||||
default_initializer=paddle.nn.initializer.Constant(0),
|
default_initializer=paddle.nn.initializer.Constant(0),
|
||||||
@@ -86,8 +87,8 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
|
|||||||
layer.create_parameter(
|
layer.create_parameter(
|
||||||
shape=[
|
shape=[
|
||||||
layer.num_local_experts,
|
layer.num_local_experts,
|
||||||
layer.hidden_size // self.quant_config.weight_block_size[0],
|
ceil_div(layer.hidden_size, self.quant_config.weight_block_size[0]),
|
||||||
layer.moe_intermediate_size // self.quant_config.weight_block_size[1],
|
ceil_div(layer.moe_intermediate_size, self.quant_config.weight_block_size[1]),
|
||||||
],
|
],
|
||||||
dtype="float32",
|
dtype="float32",
|
||||||
default_initializer=paddle.nn.initializer.Constant(0),
|
default_initializer=paddle.nn.initializer.Constant(0),
|
||||||
|
@@ -642,8 +642,8 @@ class BlockWiseFP8MoEMethod(QuantMethodBase):
|
|||||||
layer.create_parameter(
|
layer.create_parameter(
|
||||||
shape=[
|
shape=[
|
||||||
layer.num_local_experts,
|
layer.num_local_experts,
|
||||||
layer.moe_intermediate_size * 2 // self.quant_config.weight_block_size[0],
|
ceil_div(layer.moe_intermediate_size * 2, self.quant_config.weight_block_size[0]),
|
||||||
layer.hidden_size // self.quant_config.weight_block_size[1],
|
ceil_div(layer.hidden_size, self.quant_config.weight_block_size[1]),
|
||||||
],
|
],
|
||||||
dtype="float32",
|
dtype="float32",
|
||||||
default_initializer=paddle.nn.initializer.Constant(0),
|
default_initializer=paddle.nn.initializer.Constant(0),
|
||||||
@@ -655,8 +655,8 @@ class BlockWiseFP8MoEMethod(QuantMethodBase):
|
|||||||
layer.create_parameter(
|
layer.create_parameter(
|
||||||
shape=[
|
shape=[
|
||||||
layer.num_local_experts,
|
layer.num_local_experts,
|
||||||
layer.hidden_size // self.quant_config.weight_block_size[0],
|
ceil_div(layer.hidden_size, self.quant_config.weight_block_size[0]),
|
||||||
layer.moe_intermediate_size // self.quant_config.weight_block_size[1],
|
ceil_div(layer.moe_intermediate_size, self.quant_config.weight_block_size[1]),
|
||||||
],
|
],
|
||||||
dtype="float32",
|
dtype="float32",
|
||||||
default_initializer=paddle.nn.initializer.Constant(0),
|
default_initializer=paddle.nn.initializer.Constant(0),
|
||||||
|
Reference in New Issue
Block a user