mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
【New Feature】W4afp8 supports per group quantization (#4987)
* w4afp8 支持per group * code style * fix transpose * revert fast hardmard --------- Co-authored-by: yuanxiaolan <yuanxiaolan01@baidu.com> Co-authored-by: plusNew001 <95567040+plusNew001@users.noreply.github.com>
This commit is contained in:
@@ -17,7 +17,8 @@
|
||||
["phi::dtype::float16", "phi::dtype::float16", "float16_float16"],
|
||||
["phi::dtype::float16", "int8_t", "float16_int8"],
|
||||
["phi::dtype::bfloat16", "phi::dtype::bfloat16", "bfloat16_bfloat16"],
|
||||
["phi::dtype::bfloat16", "int8_t", "bfloat16_int8"]
|
||||
["phi::dtype::bfloat16", "int8_t", "bfloat16_int8"],
|
||||
["phi::dtype::bfloat16", "phi::dtype::float8_e4m3fn", "bfloat16_fp8"]
|
||||
],
|
||||
"max_instances_per_file": 16,
|
||||
"file_prefix": "moe_fast_hardamard_impl_",
|
||||
|
||||
Reference in New Issue
Block a user