mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-11-03 02:53:26 +08:00
【Fearture】support qwen2 some func (#2740)
* add rl qwen model support * fix * fix
This commit is contained in:
@@ -21,7 +21,11 @@ from dataclasses import dataclass, field
|
||||
from typing import List, Optional
|
||||
|
||||
import paddle
|
||||
from paddle.nn.functional.flash_attention import flash_attention_v3_varlen
|
||||
|
||||
try:
|
||||
from paddle.nn.functional.flash_attention import flash_attention_v3_varlen
|
||||
except:
|
||||
flash_attention_v3_varlen = None
|
||||
|
||||
from fastdeploy.config import FDConfig
|
||||
from fastdeploy.model_executor.layers.attention.attention import Attention
|
||||
|
||||
@@ -294,7 +294,7 @@ class ColumnParallelLinear(LinearBase):
|
||||
)
|
||||
if self.nranks > 0:
|
||||
# col parallel
|
||||
_set_var_distributed(self.linear_weight, split_axis=-1)
|
||||
_set_var_distributed(self.linear_weight, split_axis=1)
|
||||
|
||||
self.linear_bias = None
|
||||
if self.with_bias:
|
||||
@@ -305,7 +305,7 @@ class ColumnParallelLinear(LinearBase):
|
||||
)
|
||||
if self.nranks > 0:
|
||||
# col parallel
|
||||
_set_var_distributed(self.linear_bias, split_axis=-1)
|
||||
_set_var_distributed(self.linear_bias, split_axis=1)
|
||||
|
||||
# smooth quant
|
||||
self.linear_shift = None
|
||||
|
||||
@@ -89,6 +89,7 @@ class FusedMoE(nn.Layer):
|
||||
self.routed_scaling_factor = routed_scaling_factor
|
||||
|
||||
moe_quant_config = fd_config.quant_config
|
||||
self.moe_quant_type = None
|
||||
if moe_quant_config:
|
||||
self.quant_method = moe_quant_config.get_quant_method(self)
|
||||
self.moe_quant_type = moe_quant_config.name()
|
||||
@@ -142,7 +143,7 @@ class FusedMoE(nn.Layer):
|
||||
if self.moe_quant_type == "fp8":
|
||||
#(TODO:gaoziyuan)
|
||||
pass
|
||||
else:
|
||||
elif self.moe_quant_type == "wint8":
|
||||
self.weight_dtype = "int8"
|
||||
self.init_weight_only_scale()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user