fix input name bug (#352)

This commit is contained in:
heliqi
2022-10-12 22:49:27 -05:00
committed by GitHub
parent 2c4a555929
commit 8a3ad3eb43
9 changed files with 48 additions and 26 deletions

View File

@@ -20,7 +20,7 @@ COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python
COPY TensorRT-8.4.1.5 /opt/
COPY serving/TensorRT-8.4.1.5 /opt/
ENV TZ=Asia/Shanghai \
DEBIAN_FRONTEND=noninteractive \

View File

@@ -19,9 +19,9 @@ ENV TZ=Asia/Shanghai \
RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
&& python3 -m pip install -U pip \
&& python3 -m pip install paddlepaddle faster_tokenizer
&& python3 -m pip install paddlepaddle paddlenlp faster_tokenizer
COPY python/dist/*.whl /opt/fastdeploy/
COPY python/dist/*.whl *.whl /opt/fastdeploy/
RUN python3 -m pip install /opt/fastdeploy/*.whl \
&& rm -rf /opt/fastdeploy/*.whl

View File

@@ -6,14 +6,31 @@
FastDeploy基于[Triton Inference Server](https://github.com/triton-inference-server/server)搭建了端到端的服务化部署。底层后端使用FastDeploy高性能Runtime模块并串联FastDeploy前后处理模块实现端到端的服务化部署。具有快速部署、使用简单、性能卓越的特性。
## 端到端部署示例
## 准备环境
- [YOLOV5 检测任务](../examples/vision/detection/yolov5/README.md)
- [OCR ]()
- [Erinie3.0 文本分类任务]()
- [UIE ]()
- [Speech ]()
### 环境要求
- Linux
- 如果使用GPU镜像 要求NVIDIA Driver >= 470(如果是旧的Tesla架构GPU如T4使用的NVIDIA Driver可以是418.40+、440.33+、450.51+、460.27+)
## 高阶文档
- [模型仓库](docs/zh_CN/model_repository.md)
- [模型配置](docs/zh_CN/model_configuration.md)
### 获取镜像
#### CPU镜像
CPU镜像仅支持Paddle/ONNX模型在CPU上进行服务化部署支持的推理后端包括OpenVINO、Paddle Inference和ONNX Runtime
``` shell
docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only
```
#### GPU镜像
GPU镜像支持Paddle/ONNX模型在GPU/CPU上进行服务化部署支持的推理后端包括OpenVINO、TensorRT、Paddle Inference和ONNX Runtime
```
docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10
```
用户也可根据自身需求,参考如下文档自行编译镜像
- [FastDeploy服务化部署镜像编译说明](docs/zh_CN/compile.md)
## 其它文档
- [服务化模型目录说明](docs/zh_CN/model_repository.md) (说明如何准备模型目录)
- [服务化部署配置说明](docs/zh_CN/model_configuration.md) (说明runtime的配置选项)
- [服务化部署示例](docs/zh_CN/demo.md)
- [YOLOV5 检测任务](../examples/vision/detection/yolov5/serving/README.md)

View File

@@ -0,0 +1 @@
# 服务化部署镜像编译

View File

@@ -0,0 +1 @@
# 服务化部署示例

View File

@@ -24,17 +24,17 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
fi
docker run -it --rm --name build_fd_backend \
-v`pwd`:/workspace/fastdeploy \
-v`pwd`/..:/workspace/fastdeploy \
nvcr.io/nvidia/tritonserver:21.10-py3 \
bash -c \
'cd /workspace/fastdeploy/serving;
rm -rf build; mkdir build; cd build;
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
else
docker run -it --rm --name build_fd_backend \
-v`pwd`:/workspace/fastdeploy \
-v`pwd`/..:/workspace/fastdeploy \
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
bash -c \
'cd /workspace/fastdeploy/serving;

View File

@@ -29,7 +29,7 @@ if [ ! -d "./TensorRT-8.4.1.5/" ]; then
fi
docker run -it --rm --name build_fd_runtime \
-v`pwd`:/workspace/fastdeploy \
-v`pwd`/..:/workspace/fastdeploy \
nvcr.io/nvidia/tritonserver:21.10-py3-min \
bash -c \
'cd /workspace/fastdeploy;
@@ -37,15 +37,15 @@ docker run -it --rm --name build_fd_runtime \
apt-get update;
apt-get install -y --no-install-recommends python3-dev python3-pip;
ln -s /usr/bin/python3 /usr/bin/python;
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
make -j`nproc`;
make install'
else
docker run -it --rm --name build_fd_runtime \
-v`pwd`:/workspace/fastdeploy \
-v`pwd`/..:/workspace/fastdeploy \
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
bash -c \
'cd /workspace/fastdeploy;

View File

@@ -23,7 +23,7 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
fi
docker run -it --rm --name build_fd_vison \
-v`pwd`:/workspace/fastdeploy \
-v`pwd`/..:/workspace/fastdeploy \
nvcr.io/nvidia/tritonserver:21.10-py3-min \
bash -c \
'cd /workspace/fastdeploy/python;
@@ -31,7 +31,7 @@ docker run -it --rm --name build_fd_vison \
apt-get update;
apt-get install -y --no-install-recommends patchelf python3-dev python3-pip;
ln -s /usr/bin/python3 /usr/bin/python;
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
export WITH_GPU=ON;
export ENABLE_ORT_BACKEND=OFF;
export ENABLE_VISION=ON;

View File

@@ -315,6 +315,8 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
// &runtime_options_->trt_max_workspace_size));
} else if (param_key == "cache_file") {
runtime_options_->SetTrtCacheFile(value_string);
} else (param_key == "use_paddle") {
runtime_options_->EnablePaddleToTrt();
}
}
}
@@ -1025,12 +1027,13 @@ TRITONSERVER_Error* ModelInstanceState::SetInputTensors(
input, &input_name, &input_datatype, &input_shape, &input_dims_count,
nullptr, nullptr));
if (input_tensors_[input_idx].name != std::string(input_name)) {
int index = GetInfoIndex(std::string(input_name), input_tensor_infos_);
if (index < 0) {
auto err = TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
(std::string("Input name [") + input_name +
std::string("] is not one of the FD predictor input: ") +
input_tensors_[input_idx].name)
input_tensors_[index].name)
.c_str());
// SendErrorForResponses(responses, request_count, err);
return err;
@@ -1075,12 +1078,12 @@ TRITONSERVER_Error* ModelInstanceState::SetInputTensors(
memory_type = TRITONSERVER_MEMORY_CPU;
device = fastdeploy::Device::CPU;
}
input_tensors_[input_idx].Resize(
input_tensors_[index].Resize(
batchn_shape, ConvertDataTypeToFD(input_datatype), input_name, device);
collector->ProcessTensor(
input_name,
reinterpret_cast<char*>(input_tensors_[input_idx].MutableData()),
input_tensors_[input_idx].Nbytes(), memory_type, device_id);
reinterpret_cast<char*>(input_tensors_[index].MutableData()),
input_tensors_[index].Nbytes(), memory_type, device_id);
}
// Finalize...