[Sync] Update to latest code (#2679)

* [Sync] Update to latest code * Add new code files * Add new code files * update code * Try to fix build.sh * Try to fix build.sh * Update code * Update requirements.txt * Update code --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
2025-10-04 16:22:57 +08:00 · 2025-07-03 15:43:53 +08:00
parent d222248d00
commit 05c670e593
95 changed files with 9916 additions and 1312 deletions
--- a/fastdeploy/model_executor/models/qwen2.py
+++ b/fastdeploy/model_executor/models/qwen2.py
@@ -61,7 +61,7 @@ class Qwen2MLP(nn.Layer):
        self.down_proj = RowParallelLinear(
            fd_config=fd_config,
            prefix=f"{prefix}.down_proj",
-            input_size=(fd_config.model_config.ffn_hidden_size // self.nranks),
+            input_size=fd_config.model_config.ffn_hidden_size,
            output_size=fd_config.model_config.hidden_size,
            with_bias=False,
        )
@@ -97,8 +97,6 @@ class Qwen2Attention(nn.Layer):
                 prefix: str = "") -> None:
        super().__init__()

-        nranks = fd_config.parallel_config.tensor_parallel_degree
-
        self.qkv_proj = QKVParallelLinear(fd_config=fd_config,
                                          prefix=f"{prefix}.qkv_proj",
                                          with_bias=True)
@@ -106,7 +104,7 @@ class Qwen2Attention(nn.Layer):
        self.o_proj = RowParallelLinear(
            fd_config=fd_config,
            prefix=f"{prefix}.o_proj",
-            input_size=(fd_config.model_config.hidden_size // nranks),
+            input_size=fd_config.model_config.hidden_size,
            output_size=fd_config.model_config.hidden_size,
        )