mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-04 08:16:42 +08:00
[Optimize]support machete weight only gemm (#3561)
* support machete weight only gemm * add generate * update * fix * change file location * add sm_version limit * fix * fix * fix ci * fix coverage * fix xpu
This commit is contained in:
@@ -129,6 +129,24 @@ paddle::Tensor FusedExpertMoeFunc(
|
||||
const std::string &quant_method, const int moe_topk,
|
||||
const bool norm_topk_prob, const bool group_moe);
|
||||
|
||||
std::vector<paddle::Tensor> MacheteMMKernel(
|
||||
paddle::Tensor const& A, paddle::Tensor const& B,
|
||||
paddle::optional<paddle::Tensor> const& maybe_group_scales,
|
||||
paddle::optional<paddle::Tensor> const& maybe_group_zeros,
|
||||
paddle::optional<paddle::Tensor> const& maybe_channel_scales,
|
||||
paddle::optional<paddle::Tensor> const& maybe_token_scales,
|
||||
std::string const& b_type_str,
|
||||
std::string const& maybe_out_type_str,
|
||||
int64_t const& maybe_group_size,
|
||||
std::string const& maybe_schedule);
|
||||
|
||||
std::vector<paddle::Tensor> MachetePrepackBKernel(
|
||||
paddle::Tensor const& B, std::string const& a_type_str, std::string const& b_type_str,
|
||||
std::string const& maybe_group_scales_type_str);
|
||||
|
||||
std::vector<std::string> MacheteSupportedSchedules(
|
||||
std::string const& a_type_str, std::string const& b_type_str);
|
||||
|
||||
std::vector<paddle::Tensor> MoeExpertDispatch(
|
||||
const paddle::Tensor &input, const paddle::Tensor &gating_output,
|
||||
const paddle::optional<paddle::Tensor> &gating_correction_bias,
|
||||
@@ -924,6 +942,25 @@ PYBIND11_MODULE(fastdeploy_ops, m) {
|
||||
py::arg("recv_expert_count"), py::arg("block_size"),
|
||||
"per token per block quant");
|
||||
|
||||
/*machete/machete_mm.cu
|
||||
* machete_mm
|
||||
*/
|
||||
m.def("machete_mm", &MacheteMMKernel, py::arg("A"), py::arg("B"), py::arg("maybe_group_scale"),
|
||||
py::arg("maybe_group_zeros"), py::arg("maybe_channel_scales"), py::arg("maybe_token_scales"),
|
||||
py::arg("b_type_str"), py::arg("maybe_out_type_str"), py::arg("maybe_group_size"),
|
||||
py::arg("maybe_schedule"),
|
||||
"machete mm function");
|
||||
|
||||
/*machete/machete_prepack_B.cu
|
||||
* machete_prepack_B
|
||||
*/
|
||||
m.def("machete_prepack_B", &MachetePrepackBKernel, "machete prepacked B function");
|
||||
|
||||
/*machete/machete_supported_schedules.cu
|
||||
* machete_supported_schedules
|
||||
*/
|
||||
m.def("machete_supported_schedules", &MacheteSupportedSchedules, "machete supported schedules function");
|
||||
|
||||
/**
|
||||
* moe/fused_moe/moe_topk_select.cu
|
||||
* moe_topk_select
|
||||
|
Reference in New Issue
Block a user