fix input name bug (#352)

2025-10-06 17:17:14 +08:00 · 2022-10-12 22:49:27 -05:00
parent 2c4a555929
commit 8a3ad3eb43
9 changed files with 48 additions and 26 deletions
--- a/serving/Dockerfile
+++ b/serving/Dockerfile
@@ -20,7 +20,7 @@ COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
 COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
 COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python

-COPY TensorRT-8.4.1.5 /opt/
+COPY serving/TensorRT-8.4.1.5 /opt/

 ENV TZ=Asia/Shanghai \
    DEBIAN_FRONTEND=noninteractive \
--- a/serving/Dockerfile_cpu
+++ b/serving/Dockerfile_cpu
@@ -19,9 +19,9 @@ ENV TZ=Asia/Shanghai \

 RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
    && python3 -m pip install -U pip \
-    && python3 -m pip install paddlepaddle faster_tokenizer 
+    && python3 -m pip install paddlepaddle paddlenlp faster_tokenizer 

-COPY python/dist/*.whl /opt/fastdeploy/
+COPY python/dist/*.whl *.whl /opt/fastdeploy/ 
 RUN python3 -m pip install  /opt/fastdeploy/*.whl \
    && rm -rf /opt/fastdeploy/*.whl

--- a/serving/README_CN.md
+++ b/serving/README_CN.md
@@ -6,14 +6,31 @@

 FastDeploy基于[Triton Inference Server](https://github.com/triton-inference-server/server)搭建了端到端的服务化部署。底层后端使用FastDeploy高性能Runtime模块，并串联FastDeploy前后处理模块实现端到端的服务化部署。具有快速部署、使用简单、性能卓越的特性。

-## 端到端部署示例
+## 准备环境

- [YOLOV5 检测任务](../examples/vision/detection/yolov5/README.md)
- [OCR ]()
- [Erinie3.0 文本分类任务]()
- [UIE ]()
- [Speech ]()
+### 环境要求
+- Linux
+- 如果使用GPU镜像， 要求NVIDIA Driver >= 470(如果是旧的Tesla架构GPU，如T4使用的NVIDIA Driver可以是418.40+、440.33+、450.51+、460.27+)

-## 高阶文档
- [模型仓库](docs/zh_CN/model_repository.md)
- [模型配置](docs/zh_CN/model_configuration.md)
+### 获取镜像
+
+#### CPU镜像
+CPU镜像仅支持Paddle/ONNX模型在CPU上进行服务化部署，支持的推理后端包括OpenVINO、Paddle Inference和ONNX Runtime
+``` shell
+docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only
+```
+
+#### GPU镜像
+GPU镜像支持Paddle/ONNX模型在GPU/CPU上进行服务化部署，支持的推理后端包括OpenVINO、TensorRT、Paddle Inference和ONNX Runtime
+```
+docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10
+```
+
+用户也可根据自身需求，参考如下文档自行编译镜像
+- [FastDeploy服务化部署镜像编译说明](docs/zh_CN/compile.md)
+
+## 其它文档
+- [服务化模型目录说明](docs/zh_CN/model_repository.md) (说明如何准备模型目录)
+- [服务化部署配置说明](docs/zh_CN/model_configuration.md)  (说明runtime的配置选项)
+- [服务化部署示例](docs/zh_CN/demo.md)
+  - [YOLOV5 检测任务](../examples/vision/detection/yolov5/serving/README.md)
--- a/serving/docs/zh_CN/compile.md
+++ b/serving/docs/zh_CN/compile.md
@@ -0,0 +1 @@
+# 服务化部署镜像编译
--- a/serving/docs/zh_CN/demo.md
+++ b/serving/docs/zh_CN/demo.md
@@ -0,0 +1 @@
+# 服务化部署示例
--- a/serving/scripts/build_fd_backend.sh
+++ b/serving/scripts/build_fd_backend.sh
@@ -24,17 +24,17 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
 fi

 docker run -it --rm --name build_fd_backend \
-           -v`pwd`:/workspace/fastdeploy \
+           -v`pwd`/..:/workspace/fastdeploy \
           nvcr.io/nvidia/tritonserver:21.10-py3 \
           bash -c \
           'cd /workspace/fastdeploy/serving;
            rm -rf build; mkdir build; cd build;
            apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
-            export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
+            export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
            cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
 else
 docker run -it --rm --name build_fd_backend \
-           -v`pwd`:/workspace/fastdeploy \
+           -v`pwd`/..:/workspace/fastdeploy \
           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
           bash -c \
           'cd /workspace/fastdeploy/serving;
--- a/serving/scripts/build_fd_runtime.sh
+++ b/serving/scripts/build_fd_runtime.sh
@@ -29,7 +29,7 @@ if [ ! -d "./TensorRT-8.4.1.5/" ]; then
 fi

 docker run -it --rm --name build_fd_runtime \
-           -v`pwd`:/workspace/fastdeploy \
+           -v`pwd`/..:/workspace/fastdeploy \
           nvcr.io/nvidia/tritonserver:21.10-py3-min \
           bash -c \
           'cd /workspace/fastdeploy;
@@ -37,15 +37,15 @@ docker run -it --rm --name build_fd_runtime \
            apt-get update;
            apt-get install -y --no-install-recommends python3-dev python3-pip;
            ln -s /usr/bin/python3 /usr/bin/python;
-            export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
-            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
+            export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
+            cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
            make -j`nproc`;
            make install'

 else

 docker run -it --rm --name build_fd_runtime \
-           -v`pwd`:/workspace/fastdeploy \
+           -v`pwd`/..:/workspace/fastdeploy \
           paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
           bash -c \
           'cd /workspace/fastdeploy;
--- a/serving/scripts/build_fd_vison.sh
+++ b/serving/scripts/build_fd_vison.sh
@@ -23,7 +23,7 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
 fi

 docker run -it --rm --name build_fd_vison \
-           -v`pwd`:/workspace/fastdeploy \
+           -v`pwd`/..:/workspace/fastdeploy \
           nvcr.io/nvidia/tritonserver:21.10-py3-min \
           bash -c \
           'cd /workspace/fastdeploy/python;
@@ -31,7 +31,7 @@ docker run -it --rm --name build_fd_vison \
            apt-get update;
            apt-get install -y --no-install-recommends patchelf python3-dev python3-pip;
            ln -s /usr/bin/python3 /usr/bin/python;
-            export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
+            export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
            export WITH_GPU=ON;
            export ENABLE_ORT_BACKEND=OFF;
            export ENABLE_VISION=ON;
--- a/serving/src/fastdeploy_runtime.cc
+++ b/serving/src/fastdeploy_runtime.cc
@@ -315,6 +315,8 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
                //       &runtime_options_->trt_max_workspace_size));
              } else if (param_key == "cache_file") {
                runtime_options_->SetTrtCacheFile(value_string);
+              } else  (param_key == "use_paddle") {
+                runtime_options_->EnablePaddleToTrt();
              }
            }
          }
@@ -1025,12 +1027,13 @@ TRITONSERVER_Error* ModelInstanceState::SetInputTensors(
        input, &input_name, &input_datatype, &input_shape, &input_dims_count,
        nullptr, nullptr));

-    if (input_tensors_[input_idx].name != std::string(input_name)) {
+    int index = GetInfoIndex(std::string(input_name), input_tensor_infos_);
+    if (index < 0) {
      auto err = TRITONSERVER_ErrorNew(
          TRITONSERVER_ERROR_INTERNAL,
          (std::string("Input name [") + input_name +
           std::string("] is not one of the FD predictor input: ") +
-           input_tensors_[input_idx].name)
+           input_tensors_[index].name)
              .c_str());
      // SendErrorForResponses(responses, request_count, err);
      return err;
@@ -1075,12 +1078,12 @@ TRITONSERVER_Error* ModelInstanceState::SetInputTensors(
      memory_type = TRITONSERVER_MEMORY_CPU;
      device = fastdeploy::Device::CPU;
    }
-    input_tensors_[input_idx].Resize(
+    input_tensors_[index].Resize(
        batchn_shape, ConvertDataTypeToFD(input_datatype), input_name, device);
    collector->ProcessTensor(
        input_name,
-        reinterpret_cast<char*>(input_tensors_[input_idx].MutableData()),
-        input_tensors_[input_idx].Nbytes(), memory_type, device_id);
+        reinterpret_cast<char*>(input_tensors_[index].MutableData()),
+        input_tensors_[index].Nbytes(), memory_type, device_id);
  }

  // Finalize...