load hadamard_block_size from config (#3797)

2025-10-05 16:48:03 +08:00 · 2025-09-05 17:07:58 +08:00
parent 41aee08982
commit 2cf55168ca
10 changed files with 60 additions and 30 deletions
--- a/custom_ops/gpu_ops/cpp_extensions.cc
+++ b/custom_ops/gpu_ops/cpp_extensions.cc
@@ -255,7 +255,8 @@ paddle::Tensor MoeExpertFFNFunc(
    const paddle::optional<paddle::Tensor>& down_proj_in_scale,
    const paddle::optional<paddle::Tensor>& expert_idx_per_token,
    const std::string& quant_method, const bool used_in_ep_low_latency,
-    const int estimate_total_token_nums);
+    const int estimate_total_token_nums,
+    const int hadamard_block_size);

 paddle::Tensor MoeExpertFFNWint2Func(
    const paddle::Tensor& permute_input,