[BugFix][V1 Loader] fix the bug in creat weight for block_wise_fp8 (#3486)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

This commit is contained in:
Zero Rains
2025-08-20 20:52:54 +08:00
committed by GitHub
parent bcdfc1d6b9
commit 30b3f2dc07
2 changed files with 9 additions and 8 deletions

View File

@@ -22,6 +22,7 @@ import fastdeploy
from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce from fastdeploy.distributed.communication import tensor_model_parallel_all_reduce
from fastdeploy.model_executor.layers.utils import get_tensor from fastdeploy.model_executor.layers.utils import get_tensor
from fastdeploy.model_executor.ops.gpu import count_tokens_per_expert_func, deep_gemm from fastdeploy.model_executor.ops.gpu import count_tokens_per_expert_func, deep_gemm
from fastdeploy.utils import ceil_div
from .fused_moe_backend_base import MoEMethodBase from .fused_moe_backend_base import MoEMethodBase
@@ -73,8 +74,8 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
layer.create_parameter( layer.create_parameter(
shape=[ shape=[
layer.num_local_experts, layer.num_local_experts,
layer.moe_intermediate_size * 2 // self.quant_config.weight_block_size[0], ceil_div(layer.moe_intermediate_size * 2, self.quant_config.weight_block_size[0]),
layer.hidden_size // self.quant_config.weight_block_size[1], ceil_div(layer.hidden_size, self.quant_config.weight_block_size[1]),
], ],
dtype="float32", dtype="float32",
default_initializer=paddle.nn.initializer.Constant(0), default_initializer=paddle.nn.initializer.Constant(0),
@@ -86,8 +87,8 @@ class DeepGemmFusedMoeMethod(MoEMethodBase):
layer.create_parameter( layer.create_parameter(
shape=[ shape=[
layer.num_local_experts, layer.num_local_experts,
layer.hidden_size // self.quant_config.weight_block_size[0], ceil_div(layer.hidden_size, self.quant_config.weight_block_size[0]),
layer.moe_intermediate_size // self.quant_config.weight_block_size[1], ceil_div(layer.moe_intermediate_size, self.quant_config.weight_block_size[1]),
], ],
dtype="float32", dtype="float32",
default_initializer=paddle.nn.initializer.Constant(0), default_initializer=paddle.nn.initializer.Constant(0),

View File

@@ -642,8 +642,8 @@ class BlockWiseFP8MoEMethod(QuantMethodBase):
layer.create_parameter( layer.create_parameter(
shape=[ shape=[
layer.num_local_experts, layer.num_local_experts,
layer.moe_intermediate_size * 2 // self.quant_config.weight_block_size[0], ceil_div(layer.moe_intermediate_size * 2, self.quant_config.weight_block_size[0]),
layer.hidden_size // self.quant_config.weight_block_size[1], ceil_div(layer.hidden_size, self.quant_config.weight_block_size[1]),
], ],
dtype="float32", dtype="float32",
default_initializer=paddle.nn.initializer.Constant(0), default_initializer=paddle.nn.initializer.Constant(0),
@@ -655,8 +655,8 @@ class BlockWiseFP8MoEMethod(QuantMethodBase):
layer.create_parameter( layer.create_parameter(
shape=[ shape=[
layer.num_local_experts, layer.num_local_experts,
layer.hidden_size // self.quant_config.weight_block_size[0], ceil_div(layer.hidden_size, self.quant_config.weight_block_size[0]),
layer.moe_intermediate_size // self.quant_config.weight_block_size[1], ceil_div(layer.moe_intermediate_size, self.quant_config.weight_block_size[1]),
], ],
dtype="float32", dtype="float32",
default_initializer=paddle.nn.initializer.Constant(0), default_initializer=paddle.nn.initializer.Constant(0),