mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 16:48:03 +08:00
[feat] support fa3 backend for pd disaggregated (#2695)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* support fa3 backend run in pd disaggregated * support fa3 backend run in pd disaggregated * support fa3 backend run in pd disaggregated * support fa3 backend run in pd disaggregated * delete use_fast_ffn
This commit is contained in:
@@ -28,7 +28,7 @@ from fastdeploy.config import FDConfig
|
||||
from fastdeploy.distributed.communication_op import \
|
||||
tensor_model_parallel_all_reduce
|
||||
from fastdeploy.model_executor.layers.activation import SiluAndMul
|
||||
from fastdeploy.model_executor.layers.attention import Attention
|
||||
from fastdeploy.model_executor.layers.attention.attention import Attention
|
||||
from fastdeploy.model_executor.layers.embeddings import VocabParallelEmbedding
|
||||
from fastdeploy.model_executor.layers.linear import (
|
||||
ColumnParallelLinear, KVBatchLinear, MergedColumnParallelLinear,
|
||||
@@ -68,7 +68,6 @@ class DeepSeekV3MLP(nn.Layer):
|
||||
output_size=intermediate_size * 2,
|
||||
with_bias=False,
|
||||
activation=fd_config.model_config.hidden_act,
|
||||
use_fast_ffn=True,
|
||||
)
|
||||
|
||||
self.down_proj = RowParallelLinear(
|
||||
|
Reference in New Issue
Block a user