load hadamard_block_size from config (#3797)

This commit is contained in:
Yuan Xiaolan
2025-09-05 17:07:58 +08:00
committed by GitHub
parent 41aee08982
commit 2cf55168ca
10 changed files with 60 additions and 30 deletions

View File

@@ -255,7 +255,8 @@ paddle::Tensor MoeExpertFFNFunc(
const paddle::optional<paddle::Tensor>& down_proj_in_scale,
const paddle::optional<paddle::Tensor>& expert_idx_per_token,
const std::string& quant_method, const bool used_in_ep_low_latency,
const int estimate_total_token_nums);
const int estimate_total_token_nums,
const int hadamard_block_size);
paddle::Tensor MoeExpertFFNWint2Func(
const paddle::Tensor& permute_input,