refactor rl get_name_mappings_to_training (#2847)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled

* refactor rl get_name_mappings_to_training

* fix tp>1

* change variable name(ffn1->up_gate_proj/ffn2->down_proj)

* change variable name(linear_weight->weight/linear_bias->bias)

* add rl names mapping for vl

* fix ernie 0.3B error

* fix develop code

* fix
This commit is contained in:
Yuanle Liu
2025-07-15 22:31:42 +08:00
committed by GitHub
parent e7bcbbab52
commit 61b3997b85
47 changed files with 1591 additions and 1629 deletions

View File

@@ -16,8 +16,8 @@
import paddle
from paddle.nn.quant import weight_dequantize
from fastdeploy.model_executor.layers.quantization.weight_only import WeightOnlyConfig, GPUWeightOnlyLinearMethod
from fastdeploy.model_executor.layers.quantization.weight_only import (
GPUWeightOnlyLinearMethod, WeightOnlyConfig)
class DCUWeightOnlyLinearMethod(GPUWeightOnlyLinearMethod):
@@ -35,12 +35,12 @@ class DCUWeightOnlyLinearMethod(GPUWeightOnlyLinearMethod):
def apply(self, layer, x):
dequant_out = weight_dequantize(
x=layer.linear_weight,
scale=layer.linear_weight_scale,
x=layer.weight,
scale=layer.weight_scale,
algo=self.quant_config.algo,
out_dtype=paddle.get_default_dtype()
)
linear_out = paddle.matmul(x, dequant_out)
if layer.linear_bias is not None:
linear_out = paddle.add(linear_out, layer.linear_bias)
if layer.bias is not None:
linear_out = paddle.add(linear_out, layer.bias)
return linear_out