[Sync] Update to latest code (#2679)

* [Sync] Update to latest code * Add new code files * Add new code files * update code * Try to fix build.sh * Try to fix build.sh * Update code * Update requirements.txt * Update code --------- Co-authored-by: Jiang-Jia-Jun <jiangjiajun@baidu.com>
2025-10-05 16:48:03 +08:00 · 2025-07-03 15:43:53 +08:00
parent d222248d00
commit 05c670e593
95 changed files with 9916 additions and 1312 deletions
--- a/custom_ops/setup_ops.py
+++ b/custom_ops/setup_ops.py
@@ -267,6 +267,9 @@ elif paddle.is_compiled_with_cuda():
        "gpu_ops/text_image_index_out.cu",
        "gpu_ops/text_image_gather_scatter.cu",
        "gpu_ops/sample_kernels/rejection_top_p_sampling.cu",
+        "gpu_ops/get_position_ids_and_mask_encoder_batch.cu",
+        "gpu_ops/fused_rotary_position_encoding.cu",
+        "gpu_ops/noaux_tc.cu",
    ]

    # pd_disaggregation
@@ -376,6 +379,8 @@ elif paddle.is_compiled_with_cuda():
        # append_attention
        sources += ["gpu_ops/append_attention.cu"]
        sources += find_end_files("gpu_ops/append_attn", ".cu")
+        # mla
+        sources += ["gpu_ops/multi_head_latent_attention.cu"]
        # gemm_dequant
        sources += ["gpu_ops/int8_gemm_with_cutlass/gemm_dequant.cu"]
        # speculate_decoding
@@ -441,6 +446,10 @@ elif paddle.is_compiled_with_cuda():

        sources += find_end_files(fp8_auto_gen_directory, ".cu")

+    if cc >= 90 and nvcc_version >= 12.0:
+        # Hopper optmized mla
+        sources += find_end_files("gpu_ops/mla_attn", ".cu")
+
    setup(
        name="fastdeploy_ops",
        ext_modules=CUDAExtension(