[Other] Deprecate some option api and parameters (#1243)

* Optimize Poros backend * fix error * Add more pybind * fix conflicts * add some deprecate notices * [Other] Deprecate some apis in RuntimeOption (#1240) * Deprecate more options * modify serving * Update option.h * fix tensorrt error * Update option_pybind.cc * Update option_pybind.cc * Fix error in serving * fix word spell error
2025-10-05 08:37:06 +08:00 · 2023-02-07 17:57:46 +08:00
parent a18cc0f94c
commit 713afe7f1c
15 changed files with 380 additions and 229 deletions
--- a/serving/docs/EN/model_configuration-en.md
+++ b/serving/docs/EN/model_configuration-en.md
@@ -162,7 +162,8 @@ optimization {
    gpu_execution_accelerator : [
      {
        name : "tensorrt"
-        # Use FP16 inference in TensorRT. You can also choose: trt_fp32, trt_int8
+        # Use FP16 inference in TensorRT. You can also choose: trt_fp32
+        # If the loaded model is a quantized model, this precision will be int8 automatically
        parameters { key: "precision" value: "trt_fp16" }
      }
    ]
@@ -203,4 +204,4 @@ optimization {
  }
  ]
 }}
-```
+```
--- a/serving/docs/zh_CN/model_configuration.md
+++ b/serving/docs/zh_CN/model_configuration.md
@@ -162,7 +162,8 @@ optimization {
    gpu_execution_accelerator : [
      {
        name : "tensorrt"
-        # 使用TensorRT的FP16推理,其他可选项为: trt_fp32、trt_int8
+        # 使用TensorRT的FP16推理,其他可选项为: trt_fp32
+        # 如果加载的是量化模型，此精度设置无效，会默认使用int8进行推理
        parameters { key: "precision" value: "trt_fp16" }
      }
    ]