[SOT][Cudagraph] Remove BreakGraph of #3302 && update CustomOp (#3694)

* rm inplace info && to(gpu)

* update append_attention

* unpin paddle version

* add full_cuda_graph=False

* add blank line

---------

Co-authored-by: SigureMo <sigure.qaq@gmail.com>
This commit is contained in:
Ryan
2025-10-17 10:57:55 +08:00
committed by GitHub
parent a37c9416ac
commit 49cea8fb1c
5 changed files with 12 additions and 11 deletions

View File

@@ -593,7 +593,7 @@ std::vector<paddle::Tensor> AppendAttention(
return {paddle::Tensor{}};
}
void AppendAttentionWithOutput(
std::vector<paddle::Tensor> AppendAttentionWithOutput(
const paddle::Tensor& qkv,
const paddle::Tensor& key_cache,
const paddle::Tensor& value_cache,
@@ -756,6 +756,8 @@ void AppendAttentionWithOutput(
break;
}
}
return {fmha_out};
}
@@ -1112,10 +1114,8 @@ PD_BUILD_STATIC_OP(append_attention_with_output)
paddle::Optional("kv_signal_data"),
paddle::Optional("q_norm_weight"),
paddle::Optional("k_norm_weight")})
.Outputs({"fmha_out_out", "qkv_out", "key_cache_out", "value_cache_out"})
.SetInplaceMap({{"fmha_out", "fmha_out_out"},
{"key_cache", "key_cache_out"},
{"value_cache", "value_cache_out"}})
.Outputs({"fmha_out_out"})
.SetInplaceMap({{"fmha_out", "fmha_out_out"}})
.Attrs({"rms_norm_eps: float",
"compute_type: std::string",
"cache_quant_type: std::string",

View File

@@ -91,7 +91,7 @@ std::vector<paddle::Tensor> AppendAttention(
const int speculate_max_draft_token_num, const bool causal,
const bool speculate_decoder);
void AppendAttentionWithOutput(
std::vector<paddle::Tensor> AppendAttentionWithOutput(
const paddle::Tensor &qkv, const paddle::Tensor &key_cache,
const paddle::Tensor &value_cache, const paddle::Tensor &seq_lens_encoder,
const paddle::Tensor &seq_lens_decoder,