mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
Supports DP+TP+EP hybrid parallel deployment strategy (#3489)
* Support DP+TP+EP hybrid parallel deployment strategy * Support DP+TP+EP hybrid parallel deployment strategy * fix conflict * add moe_tp_ep function split_allgather_out * del tp_group in moe_cutlass_backend * for ci * fix parallel_config for ci * del log
This commit is contained in:
@@ -72,6 +72,7 @@ class TensorSplitMode(Enum):
|
||||
"""TensorSplitMode"""
|
||||
|
||||
GQA = "is_gqa"
|
||||
TP_ROW_BIAS = "is_tp_row_bias"
|
||||
TRANSPOSE = "transpose"
|
||||
QKV = "is_old_qkv"
|
||||
PairFused = "is_naive_2fuse"
|
||||
@@ -212,7 +213,7 @@ def gqa_qkv_split_func(
|
||||
"""
|
||||
|
||||
def fn(x, is_column=True):
|
||||
"""fucn"""
|
||||
"""func"""
|
||||
|
||||
def get_shape(tensor):
|
||||
"""get_shape"""
|
||||
@@ -430,7 +431,15 @@ def split_or_merge_func_v1(
|
||||
def fn(x, **kwargs):
|
||||
"""func"""
|
||||
is_gqa = kwargs.pop("is_gqa", False)
|
||||
if is_gqa:
|
||||
is_tp_row_bias = kwargs.pop("is_tp_row_bias", False)
|
||||
if is_tp_row_bias:
|
||||
tensor = x[:, ...]
|
||||
if isinstance(tensor, paddle.Tensor):
|
||||
res = tensor / tensor_parallel_degree
|
||||
else:
|
||||
res = paddle.to_tensor(tensor, paddle.get_default_dtype()) / tensor_parallel_degree
|
||||
return res
|
||||
elif is_gqa:
|
||||
func = split_or_merge_qkv_func(
|
||||
is_split=is_split,
|
||||
tensor_parallel_degree=tensor_parallel_degree,
|
||||
|
Reference in New Issue
Block a user