mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-08 10:00:29 +08:00
[Intel HPU] Support intel hpu platform (#4161)
* [Intel HPU] Support intel hpu platform * fix some issues * apply precommit and move AttentionBackend_HPU * fix format issue * correct ops import * fix ci issue * update code in layers * fix code style issue * remove dense tp moe ep mode * fix enc_dec_block_num * fix rebase issue * rename hpu to gaudi in readme * rename ForwardMeta_HPU to HPUForwardMeta
This commit is contained in:
@@ -66,3 +66,26 @@ try:
|
||||
|
||||
except:
|
||||
tensor_model_parallel_all_reduce = None
|
||||
|
||||
from paddle.distributed.communication import stream
|
||||
from paddle.distributed.communication.reduce import ReduceOp
|
||||
|
||||
|
||||
def all_reduce(
|
||||
tensor,
|
||||
op,
|
||||
group,
|
||||
sync_op: bool = True,
|
||||
):
|
||||
return stream.all_reduce(tensor, op=op, group=group, sync_op=sync_op, use_calc_stream=True)
|
||||
|
||||
|
||||
@paddle.jit.marker.unified
|
||||
def tensor_model_parallel_all_reduce_custom(input_: paddle.Tensor) -> paddle.Tensor:
|
||||
"""All-reduce the input tensor across model parallel group on calc stream."""
|
||||
if paddle.in_dynamic_mode():
|
||||
hcg = dist.fleet.get_hybrid_communicate_group()
|
||||
mp_group = hcg.get_model_parallel_group()
|
||||
all_reduce(input_, op=ReduceOp.SUM, group=mp_group)
|
||||
else:
|
||||
dist.all_reduce(input_)
|
||||
|
Reference in New Issue
Block a user