【Inference Optimize】Support automatic generation of marlin kernel (#3149)

* Support automatic generation of marlin kernel
2025-10-05 08:37:06 +08:00 · 2025-08-01 22:43:18 +08:00
parent b71cbb466d
commit 22fe695f1c
7 changed files with 125 additions and 376 deletions
--- a/custom_ops/setup_ops.py
+++ b/custom_ops/setup_ops.py
@@ -409,6 +409,7 @@ elif paddle.is_compiled_with_cuda():
        sources += find_end_files("gpu_ops/speculate_decoding", ".cc")
        nvcc_compile_args += ["-DENABLE_BF16"]
        # moe
+        os.system("python gpu_ops/moe/moe_wna16_marlin_utils/generate_kernels.py")
        sources += find_end_files("gpu_ops/cutlass_kernels/moe_gemm/", ".cu")
        sources += find_end_files("gpu_ops/cutlass_kernels/w4a8_moe/", ".cu")
        sources += find_end_files("gpu_ops/moe/", ".cu")