mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
Move create_parameters to __init__ in FuseMOE for CultassBackend and TritonBackend (#3148)
* w4a8 bug * fix w4a8 bug * remove code * modify the triton backend * fix ep * fix the bug with tensor_wise_fp8 in triton backend * fix the RL * fix bug by merge * fix the bug in w4a8 * fix the tensor_wise_fp8 bug * fix RL
This commit is contained in:
@@ -15,7 +15,7 @@
|
||||
"""
|
||||
|
||||
import functools
|
||||
from typing import Tuple, Union
|
||||
from typing import Any, Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
import paddle
|
||||
@@ -45,6 +45,14 @@ if cache_params != "none":
|
||||
c8_state_dict = paddle.load(cache_params, return_numpy=True)
|
||||
|
||||
|
||||
# TODO(lulinjun): delete it, import from fastdeploy.model_executor.models.utils after supporting all backends
|
||||
def set_weight_attrs(param, param_attr_map: Optional[dict[str, Any]]):
|
||||
if param_attr_map is None:
|
||||
return
|
||||
for key, value in param_attr_map.items():
|
||||
setattr(param, key, value)
|
||||
|
||||
|
||||
def per_block_cast_to_fp8(x: Tensor, block_size: list = [128, 128]) -> Tuple[Tensor, Tensor]:
|
||||
"""
|
||||
Only used in deep_gemm block wise quant weight.
|
||||
|
Reference in New Issue
Block a user