mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 17:17:14 +08:00
fix input name bug (#352)
This commit is contained in:
@@ -20,7 +20,7 @@ COPY --from=full /opt/tritonserver/lib /opt/tritonserver/lib
|
||||
COPY --from=full /opt/tritonserver/include /opt/tritonserver/include
|
||||
COPY --from=full /opt/tritonserver/backends/python /opt/tritonserver/backends/python
|
||||
|
||||
COPY TensorRT-8.4.1.5 /opt/
|
||||
COPY serving/TensorRT-8.4.1.5 /opt/
|
||||
|
||||
ENV TZ=Asia/Shanghai \
|
||||
DEBIAN_FRONTEND=noninteractive \
|
||||
|
@@ -19,9 +19,9 @@ ENV TZ=Asia/Shanghai \
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends apt-utils libgomp1 \
|
||||
&& python3 -m pip install -U pip \
|
||||
&& python3 -m pip install paddlepaddle faster_tokenizer
|
||||
&& python3 -m pip install paddlepaddle paddlenlp faster_tokenizer
|
||||
|
||||
COPY python/dist/*.whl /opt/fastdeploy/
|
||||
COPY python/dist/*.whl *.whl /opt/fastdeploy/
|
||||
RUN python3 -m pip install /opt/fastdeploy/*.whl \
|
||||
&& rm -rf /opt/fastdeploy/*.whl
|
||||
|
@@ -6,14 +6,31 @@
|
||||
|
||||
FastDeploy基于[Triton Inference Server](https://github.com/triton-inference-server/server)搭建了端到端的服务化部署。底层后端使用FastDeploy高性能Runtime模块,并串联FastDeploy前后处理模块实现端到端的服务化部署。具有快速部署、使用简单、性能卓越的特性。
|
||||
|
||||
## 端到端部署示例
|
||||
## 准备环境
|
||||
|
||||
- [YOLOV5 检测任务](../examples/vision/detection/yolov5/README.md)
|
||||
- [OCR ]()
|
||||
- [Erinie3.0 文本分类任务]()
|
||||
- [UIE ]()
|
||||
- [Speech ]()
|
||||
### 环境要求
|
||||
- Linux
|
||||
- 如果使用GPU镜像, 要求NVIDIA Driver >= 470(如果是旧的Tesla架构GPU,如T4使用的NVIDIA Driver可以是418.40+、440.33+、450.51+、460.27+)
|
||||
|
||||
## 高阶文档
|
||||
- [模型仓库](docs/zh_CN/model_repository.md)
|
||||
- [模型配置](docs/zh_CN/model_configuration.md)
|
||||
### 获取镜像
|
||||
|
||||
#### CPU镜像
|
||||
CPU镜像仅支持Paddle/ONNX模型在CPU上进行服务化部署,支持的推理后端包括OpenVINO、Paddle Inference和ONNX Runtime
|
||||
``` shell
|
||||
docker pull paddlepaddle/fastdeploy:0.3.0-cpu-only
|
||||
```
|
||||
|
||||
#### GPU镜像
|
||||
GPU镜像支持Paddle/ONNX模型在GPU/CPU上进行服务化部署,支持的推理后端包括OpenVINO、TensorRT、Paddle Inference和ONNX Runtime
|
||||
```
|
||||
docker pull paddlepaddle/fastdeploy:0.3.0-gpu-cuda11.4-trt8.4-21.10
|
||||
```
|
||||
|
||||
用户也可根据自身需求,参考如下文档自行编译镜像
|
||||
- [FastDeploy服务化部署镜像编译说明](docs/zh_CN/compile.md)
|
||||
|
||||
## 其它文档
|
||||
- [服务化模型目录说明](docs/zh_CN/model_repository.md) (说明如何准备模型目录)
|
||||
- [服务化部署配置说明](docs/zh_CN/model_configuration.md) (说明runtime的配置选项)
|
||||
- [服务化部署示例](docs/zh_CN/demo.md)
|
||||
- [YOLOV5 检测任务](../examples/vision/detection/yolov5/serving/README.md)
|
||||
|
1
serving/docs/zh_CN/compile.md
Normal file
1
serving/docs/zh_CN/compile.md
Normal file
@@ -0,0 +1 @@
|
||||
# 服务化部署镜像编译
|
1
serving/docs/zh_CN/demo.md
Normal file
1
serving/docs/zh_CN/demo.md
Normal file
@@ -0,0 +1 @@
|
||||
# 服务化部署示例
|
@@ -24,17 +24,17 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
||||
fi
|
||||
|
||||
docker run -it --rm --name build_fd_backend \
|
||||
-v`pwd`:/workspace/fastdeploy \
|
||||
-v`pwd`/..:/workspace/fastdeploy \
|
||||
nvcr.io/nvidia/tritonserver:21.10-py3 \
|
||||
bash -c \
|
||||
'cd /workspace/fastdeploy/serving;
|
||||
rm -rf build; mkdir build; cd build;
|
||||
apt-get update; apt-get install -y --no-install-recommends rapidjson-dev;
|
||||
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
|
||||
export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
|
||||
cmake .. -DFASTDEPLOY_DIR=/workspace/fastdeploy/build/fastdeploy-0.0.3 -DTRITON_COMMON_REPO_TAG=r21.10 -DTRITON_CORE_REPO_TAG=r21.10 -DTRITON_BACKEND_REPO_TAG=r21.10; make -j`nproc`'
|
||||
else
|
||||
docker run -it --rm --name build_fd_backend \
|
||||
-v`pwd`:/workspace/fastdeploy \
|
||||
-v`pwd`/..:/workspace/fastdeploy \
|
||||
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
|
||||
bash -c \
|
||||
'cd /workspace/fastdeploy/serving;
|
||||
|
@@ -29,7 +29,7 @@ if [ ! -d "./TensorRT-8.4.1.5/" ]; then
|
||||
fi
|
||||
|
||||
docker run -it --rm --name build_fd_runtime \
|
||||
-v`pwd`:/workspace/fastdeploy \
|
||||
-v`pwd`/..:/workspace/fastdeploy \
|
||||
nvcr.io/nvidia/tritonserver:21.10-py3-min \
|
||||
bash -c \
|
||||
'cd /workspace/fastdeploy;
|
||||
@@ -37,15 +37,15 @@ docker run -it --rm --name build_fd_runtime \
|
||||
apt-get update;
|
||||
apt-get install -y --no-install-recommends python3-dev python3-pip;
|
||||
ln -s /usr/bin/python3 /usr/bin/python;
|
||||
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
|
||||
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=${PWD}/../TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
|
||||
export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
|
||||
cmake .. -DENABLE_TRT_BACKEND=ON -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-0.0.3 -DWITH_GPU=ON -DTRT_DIRECTORY=/workspace/fastdeploy/serving/TensorRT-8.4.1.5/ -DENABLE_PADDLE_BACKEND=ON -DENABLE_ORT_BACKEND=ON -DENABLE_OPENVINO_BACKEND=ON -DENABLE_VISION=OFF -DBUILD_FASTDEPLOY_PYTHON=OFF -DENABLE_PADDLE_FRONTEND=ON -DENABLE_TEXT=OFF -DLIBRARY_NAME=fastdeploy_runtime;
|
||||
make -j`nproc`;
|
||||
make install'
|
||||
|
||||
else
|
||||
|
||||
docker run -it --rm --name build_fd_runtime \
|
||||
-v`pwd`:/workspace/fastdeploy \
|
||||
-v`pwd`/..:/workspace/fastdeploy \
|
||||
paddlepaddle/fastdeploy:22.09-cpu-only-buildbase \
|
||||
bash -c \
|
||||
'cd /workspace/fastdeploy;
|
||||
|
@@ -23,7 +23,7 @@ if [ ! -d "./cmake-3.18.6-Linux-x86_64/" ]; then
|
||||
fi
|
||||
|
||||
docker run -it --rm --name build_fd_vison \
|
||||
-v`pwd`:/workspace/fastdeploy \
|
||||
-v`pwd`/..:/workspace/fastdeploy \
|
||||
nvcr.io/nvidia/tritonserver:21.10-py3-min \
|
||||
bash -c \
|
||||
'cd /workspace/fastdeploy/python;
|
||||
@@ -31,7 +31,7 @@ docker run -it --rm --name build_fd_vison \
|
||||
apt-get update;
|
||||
apt-get install -y --no-install-recommends patchelf python3-dev python3-pip;
|
||||
ln -s /usr/bin/python3 /usr/bin/python;
|
||||
export PATH=/workspace/fastdeploy/cmake-3.18.6-Linux-x86_64/bin:$PATH;
|
||||
export PATH=/workspace/fastdeploy/serving/cmake-3.18.6-Linux-x86_64/bin:$PATH;
|
||||
export WITH_GPU=ON;
|
||||
export ENABLE_ORT_BACKEND=OFF;
|
||||
export ENABLE_VISION=ON;
|
||||
|
@@ -315,6 +315,8 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
|
||||
// &runtime_options_->trt_max_workspace_size));
|
||||
} else if (param_key == "cache_file") {
|
||||
runtime_options_->SetTrtCacheFile(value_string);
|
||||
} else (param_key == "use_paddle") {
|
||||
runtime_options_->EnablePaddleToTrt();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1025,12 +1027,13 @@ TRITONSERVER_Error* ModelInstanceState::SetInputTensors(
|
||||
input, &input_name, &input_datatype, &input_shape, &input_dims_count,
|
||||
nullptr, nullptr));
|
||||
|
||||
if (input_tensors_[input_idx].name != std::string(input_name)) {
|
||||
int index = GetInfoIndex(std::string(input_name), input_tensor_infos_);
|
||||
if (index < 0) {
|
||||
auto err = TRITONSERVER_ErrorNew(
|
||||
TRITONSERVER_ERROR_INTERNAL,
|
||||
(std::string("Input name [") + input_name +
|
||||
std::string("] is not one of the FD predictor input: ") +
|
||||
input_tensors_[input_idx].name)
|
||||
input_tensors_[index].name)
|
||||
.c_str());
|
||||
// SendErrorForResponses(responses, request_count, err);
|
||||
return err;
|
||||
@@ -1075,12 +1078,12 @@ TRITONSERVER_Error* ModelInstanceState::SetInputTensors(
|
||||
memory_type = TRITONSERVER_MEMORY_CPU;
|
||||
device = fastdeploy::Device::CPU;
|
||||
}
|
||||
input_tensors_[input_idx].Resize(
|
||||
input_tensors_[index].Resize(
|
||||
batchn_shape, ConvertDataTypeToFD(input_datatype), input_name, device);
|
||||
collector->ProcessTensor(
|
||||
input_name,
|
||||
reinterpret_cast<char*>(input_tensors_[input_idx].MutableData()),
|
||||
input_tensors_[input_idx].Nbytes(), memory_type, device_id);
|
||||
reinterpret_cast<char*>(input_tensors_[index].MutableData()),
|
||||
input_tensors_[index].Nbytes(), memory_type, device_id);
|
||||
}
|
||||
|
||||
// Finalize...
|
||||
|
Reference in New Issue
Block a user