refactor rl get_name_mappings_to_training (#2847)

* refactor rl get_name_mappings_to_training * fix tp>1 * change variable name(ffn1->up_gate_proj/ffn2->down_proj) * change variable name(linear_weight->weight/linear_bias->bias) * add rl names mapping for vl * fix ernie 0.3B error * fix develop code * fix
2025-10-16 13:41:30 +08:00 · 2025-07-15 22:31:42 +08:00
parent e7bcbbab52
commit 61b3997b85
47 changed files with 1591 additions and 1629 deletions
--- a/fastdeploy/model_executor/layers/backends/dcu/weight_only.py
+++ b/fastdeploy/model_executor/layers/backends/dcu/weight_only.py
@@ -16,8 +16,8 @@
 import paddle
 from paddle.nn.quant import weight_dequantize

-from fastdeploy.model_executor.layers.quantization.weight_only import WeightOnlyConfig, GPUWeightOnlyLinearMethod
-
+from fastdeploy.model_executor.layers.quantization.weight_only import (
+    GPUWeightOnlyLinearMethod, WeightOnlyConfig)


 class DCUWeightOnlyLinearMethod(GPUWeightOnlyLinearMethod):
@@ -35,12 +35,12 @@ class DCUWeightOnlyLinearMethod(GPUWeightOnlyLinearMethod):

    def apply(self, layer, x):
        dequant_out = weight_dequantize(
-            x=layer.linear_weight,
-            scale=layer.linear_weight_scale,
+            x=layer.weight,
+            scale=layer.weight_scale,
            algo=self.quant_config.algo,
            out_dtype=paddle.get_default_dtype()
        )
        linear_out = paddle.matmul(x, dequant_out)
-        if layer.linear_bias is not None:
-            linear_out = paddle.add(linear_out, layer.linear_bias)
+        if layer.bias is not None:
+            linear_out = paddle.add(linear_out, layer.bias)
        return linear_out