Move create_parameters to __init__ in FuseMOE for CultassBackend and TritonBackend (#3148)

* w4a8 bug * fix w4a8 bug * remove code * modify the triton backend * fix ep * fix the bug with tensor_wise_fp8 in triton backend * fix the RL * fix bug by merge * fix the bug in w4a8 * fix the tensor_wise_fp8 bug * fix RL
2025-10-07 09:31:35 +08:00 · 2025-08-08 15:55:47 +08:00
parent d0e9a70380
commit ce1f353c70
10 changed files with 444 additions and 83 deletions
--- a/fastdeploy/model_executor/layers/quantization/tensor_wise_fp8.py
+++ b/fastdeploy/model_executor/layers/quantization/tensor_wise_fp8.py
@@ -82,7 +82,7 @@ class TensorWiseFP8LinearMethod(QuantMethodBase):
        self.weight_dtype = "float8_e4m3fn"

    def create_weights(self, layer, **extra_weight_attrs):
-
+        layer.weight_dtype = "float8_e4m3fn"
        layer.weight = layer.create_parameter(
            shape=layer.weight_shape,
            dtype=layer.weight_dtype,