[Feature] DeepseekV3 use pd_build_static_op (#2948)

Co-authored-by: K11OntheBoat <“ruianmaidanglao@163.com”>
2025-10-05 08:37:06 +08:00 · 2025-07-22 15:03:41 +08:00
parent 2a8a2c06de
commit e991777757
6 changed files with 13 additions and 8 deletions
--- a/custom_ops/gpu_ops/append_attn/mla_cache_kernel.cu
+++ b/custom_ops/gpu_ops/append_attn/mla_cache_kernel.cu
@@ -13,6 +13,7 @@
 // limitations under the License.
 #pragma once

+#include "helper.h"
 #include "mla_cache_kernel.cuh"

 template <paddle::DataType T>
@@ -259,7 +260,7 @@ std::vector<paddle::Tensor> DecodeMLAWriteCacheKernel(
 }


-PD_BUILD_OP(prefill_mla_write_cache)
+PD_BUILD_STATIC_OP(prefill_mla_write_cache)
    .Inputs({"kv_nope",
             "kv_pe",
             "kv_cache",
@@ -274,7 +275,7 @@ PD_BUILD_OP(prefill_mla_write_cache)
            "max_seq_len: int"})
    .SetKernelFn(PD_KERNEL(PrefillMLAWriteCacheKernel));

-PD_BUILD_OP(decode_mla_write_cache)
+PD_BUILD_STATIC_OP(decode_mla_write_cache)
    .Inputs({"kv_nope",
             "kv_pe",
             "kv_cache",