[Sync] Update to latest code (#2679)

* [Sync] Update to latest code

* Add new code files

* Add new code files

* update code

* Try to fix build.sh

* Try to fix build.sh

* Update code

* Update requirements.txt

* Update code

---------

Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
This commit is contained in:
Jiang-Jia-Jun
2025-07-03 15:43:53 +08:00
committed by GitHub
parent d222248d00
commit 05c670e593
95 changed files with 9916 additions and 1312 deletions

View File

@@ -61,7 +61,7 @@ class Qwen2MLP(nn.Layer):
self.down_proj = RowParallelLinear(
fd_config=fd_config,
prefix=f"{prefix}.down_proj",
input_size=(fd_config.model_config.ffn_hidden_size // self.nranks),
input_size=fd_config.model_config.ffn_hidden_size,
output_size=fd_config.model_config.hidden_size,
with_bias=False,
)
@@ -97,8 +97,6 @@ class Qwen2Attention(nn.Layer):
prefix: str = "") -> None:
super().__init__()
nranks = fd_config.parallel_config.tensor_parallel_degree
self.qkv_proj = QKVParallelLinear(fd_config=fd_config,
prefix=f"{prefix}.qkv_proj",
with_bias=True)
@@ -106,7 +104,7 @@ class Qwen2Attention(nn.Layer):
self.o_proj = RowParallelLinear(
fd_config=fd_config,
prefix=f"{prefix}.o_proj",
input_size=(fd_config.model_config.hidden_size // nranks),
input_size=fd_config.model_config.hidden_size,
output_size=fd_config.model_config.hidden_size,
)