mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-12-24 13:28:13 +08:00
fix scaled_gemm_f8_i4_f16_weight_quantize input (#3685)
This commit is contained in:
@@ -217,7 +217,7 @@ std::vector<paddle::DataType> Fp8Int4WeightQuantizeInferDtype(
|
||||
|
||||
|
||||
PD_BUILD_STATIC_OP(scaled_gemm_f8_i4_f16_weight_quantize)
|
||||
.Inputs({"intput"})
|
||||
.Inputs({"input"})
|
||||
.Attrs({"groupsize: int",
|
||||
"scale_dtype: std::string"})
|
||||
.Outputs({"output", "scale"})
|
||||
|
||||
Reference in New Issue
Block a user