[Feature] DeepseekV3 use pd_build_static_op (#2948)

Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”>
This commit is contained in:
K11OntheBoat
2025-07-22 15:03:41 +08:00
committed by GitHub
parent 2a8a2c06de
commit e991777757
6 changed files with 13 additions and 8 deletions

View File

@@ -13,6 +13,7 @@
// limitations under the License.
#pragma once
#include "helper.h"
#include "mla_cache_kernel.cuh"
template <paddle::DataType T>
@@ -259,7 +260,7 @@ std::vector<paddle::Tensor> DecodeMLAWriteCacheKernel(
}
PD_BUILD_OP(prefill_mla_write_cache)
PD_BUILD_STATIC_OP(prefill_mla_write_cache)
.Inputs({"kv_nope",
"kv_pe",
"kv_cache",
@@ -274,7 +275,7 @@ PD_BUILD_OP(prefill_mla_write_cache)
"max_seq_len: int"})
.SetKernelFn(PD_KERNEL(PrefillMLAWriteCacheKernel));
PD_BUILD_OP(decode_mla_write_cache)
PD_BUILD_STATIC_OP(decode_mla_write_cache)
.Inputs({"kv_nope",
"kv_pe",
"kv_cache",