mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 16:22:57 +08:00
[Sync] Update to latest code (#2679)
* [Sync] Update to latest code * Add new code files * Add new code files * update code * Try to fix build.sh * Try to fix build.sh * Update code * Update requirements.txt * Update code --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
This commit is contained in:
@@ -61,7 +61,7 @@ class Qwen2MLP(nn.Layer):
|
||||
self.down_proj = RowParallelLinear(
|
||||
fd_config=fd_config,
|
||||
prefix=f"{prefix}.down_proj",
|
||||
input_size=(fd_config.model_config.ffn_hidden_size // self.nranks),
|
||||
input_size=fd_config.model_config.ffn_hidden_size,
|
||||
output_size=fd_config.model_config.hidden_size,
|
||||
with_bias=False,
|
||||
)
|
||||
@@ -97,8 +97,6 @@ class Qwen2Attention(nn.Layer):
|
||||
prefix: str = "") -> None:
|
||||
super().__init__()
|
||||
|
||||
nranks = fd_config.parallel_config.tensor_parallel_degree
|
||||
|
||||
self.qkv_proj = QKVParallelLinear(fd_config=fd_config,
|
||||
prefix=f"{prefix}.qkv_proj",
|
||||
with_bias=True)
|
||||
@@ -106,7 +104,7 @@ class Qwen2Attention(nn.Layer):
|
||||
self.o_proj = RowParallelLinear(
|
||||
fd_config=fd_config,
|
||||
prefix=f"{prefix}.o_proj",
|
||||
input_size=(fd_config.model_config.hidden_size // nranks),
|
||||
input_size=fd_config.model_config.hidden_size,
|
||||
output_size=fd_config.model_config.hidden_size,
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user