mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-31 03:46:40 +08:00
Remove redundant inplace outputs for append_attention (#4341)
This commit is contained in:
@@ -1065,9 +1065,7 @@ PD_BUILD_STATIC_OP(append_attention)
|
|||||||
paddle::Optional("kv_signal_data"),
|
paddle::Optional("kv_signal_data"),
|
||||||
paddle::Optional("q_norm_weight"),
|
paddle::Optional("q_norm_weight"),
|
||||||
paddle::Optional("k_norm_weight")})
|
paddle::Optional("k_norm_weight")})
|
||||||
.Outputs({"fmha_out", "key_cache_out", "value_cache_out"})
|
.Outputs({"fmha_out"})
|
||||||
.SetInplaceMap({{"key_cache", "key_cache_out"},
|
|
||||||
{"value_cache", "value_cache_out"}})
|
|
||||||
.Attrs({"rms_norm_eps: float",
|
.Attrs({"rms_norm_eps: float",
|
||||||
"compute_type: std::string",
|
"compute_type: std::string",
|
||||||
"cache_quant_type: std::string",
|
"cache_quant_type: std::string",
|
||||||
|
|||||||
Reference in New Issue
Block a user