Move create_parameters to __init__ in FuseMOE for CultassBackend and TritonBackend (#3148)

* w4a8 bug

* fix w4a8 bug

* remove code

* modify the triton backend

* fix ep

* fix the bug with tensor_wise_fp8 in triton backend

* fix the RL

* fix bug by merge

* fix the bug in w4a8

* fix the tensor_wise_fp8 bug

* fix RL
This commit is contained in:
Zero Rains
2025-08-08 15:55:47 +08:00
committed by GitHub
parent d0e9a70380
commit ce1f353c70
10 changed files with 444 additions and 83 deletions

View File

@@ -15,7 +15,7 @@
"""
import functools
from typing import Tuple, Union
from typing import Any, Optional, Tuple, Union
import numpy as np
import paddle
@@ -45,6 +45,14 @@ if cache_params != "none":
c8_state_dict = paddle.load(cache_params, return_numpy=True)
# TODO(lulinjun): delete it, import from fastdeploy.model_executor.models.utils after supporting all backends
def set_weight_attrs(param, param_attr_map: Optional[dict[str, Any]]):
if param_attr_map is None:
return
for key, value in param_attr_map.items():
setattr(param, key, value)
def per_block_cast_to_fp8(x: Tensor, block_size: list = [128, 128]) -> Tuple[Tensor, Tensor]:
"""
Only used in deep_gemm block wise quant weight.