mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-05 08:37:06 +08:00
[Optimize] Optimize tensorwise fp8 performance (#2729)
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
Some checks failed
Deploy GitHub Pages / deploy (push) Has been cancelled
* [Optimize] Optimize tensorwise fp8 performance
This commit is contained in:
@@ -442,6 +442,7 @@ elif paddle.is_compiled_with_cuda():
|
||||
"gpu_ops/scaled_gemm_f8_i4_f16_weight_quantize.cu",
|
||||
"gpu_ops/cutlass_kernels/cutlass_heuristic.cu",
|
||||
"gpu_ops/cutlass_kernels/cutlass_preprocessors.cu",
|
||||
"gpu_ops/fused_hadamard_quant_fp8.cu"
|
||||
]
|
||||
|
||||
sources += find_end_files(fp8_auto_gen_directory, ".cu")
|
||||
|
Reference in New Issue
Block a user