Move create_parameters to __init__ in FuseMOE for CultassBackend and TritonBackend (#3148)

* w4a8 bug * fix w4a8 bug * remove code * modify the triton backend * fix ep * fix the bug with tensor_wise_fp8 in triton backend * fix the RL * fix bug by merge * fix the bug in w4a8 * fix the tensor_wise_fp8 bug * fix RL
2025-10-05 16:48:03 +08:00 · 2025-08-08 15:55:47 +08:00
parent d0e9a70380
commit ce1f353c70
10 changed files with 444 additions and 83 deletions
--- a/fastdeploy/model_executor/layers/utils.py
+++ b/fastdeploy/model_executor/layers/utils.py
@@ -15,7 +15,7 @@
 """

 import functools
-from typing import Tuple, Union
+from typing import Any, Optional, Tuple, Union

 import numpy as np
 import paddle
@@ -45,6 +45,14 @@ if cache_params != "none":
    c8_state_dict = paddle.load(cache_params, return_numpy=True)


+# TODO(lulinjun): delete it, import from fastdeploy.model_executor.models.utils after supporting all backends
+def set_weight_attrs(param, param_attr_map: Optional[dict[str, Any]]):
+    if param_attr_map is None:
+        return
+    for key, value in param_attr_map.items():
+        setattr(param, key, value)
+
+
 def per_block_cast_to_fp8(x: Tensor, block_size: list = [128, 128]) -> Tuple[Tensor, Tensor]:
    """
    Only used in deep_gemm block wise quant weight.