mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
* w4afp8 支持per group * code style * fix transpose * revert fast hardmard --------- Co-authored-by: yuanxiaolan <yuanxiaolan01@baidu.com> Co-authored-by: plusNew001 <95567040+plusNew001@users.noreply.github.com>
28 lines
1.3 KiB
JSON
28 lines
1.3 KiB
JSON
{
|
|
"moe_fast_hardamard_impl": {
|
|
"name": "moe_fast_hardamard_impl",
|
|
"function_name": "MoeFastHardamardImplWrapper",
|
|
"impl_file": "moe_fast_hardamard_impl.cuh",
|
|
"template_params": [
|
|
"T",
|
|
"OutT",
|
|
"kLogN",
|
|
"VecSize",
|
|
"kNChunks",
|
|
"kThreads",
|
|
"UseDiagonalBlockMatrix"
|
|
],
|
|
"dispatch_params": {},
|
|
"data_types": [
|
|
["phi::dtype::float16", "phi::dtype::float16", "float16_float16"],
|
|
["phi::dtype::float16", "int8_t", "float16_int8"],
|
|
["phi::dtype::bfloat16", "phi::dtype::bfloat16", "bfloat16_bfloat16"],
|
|
["phi::dtype::bfloat16", "int8_t", "bfloat16_int8"],
|
|
["phi::dtype::bfloat16", "phi::dtype::float8_e4m3fn", "bfloat16_fp8"]
|
|
],
|
|
"max_instances_per_file": 16,
|
|
"file_prefix": "moe_fast_hardamard_impl_",
|
|
"function_signature": "template void {function_name}{template_args}(\n const T *x,\n const int64_t *expert_idx_per_token,\n const int64_t *recv_expert_count,\n const T *shift,\n const T *smooth,\n const float* quant_scales,\n const int quant_round_type,\n const float quant_max_bound,\n const float quant_min_bound,\n const int64_t token_num,\n const int64_t dim,\n const int num_max_tokens_per_expert,\n bool used_in_ep_low_latency,\n OutT* out,\n cudaStream_t stream);\n\n"
|
|
}
|
|
}
|