mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 09:31:35 +08:00
[Backend & Serving] Serving and Runtime support Clone (#464)
* Add Serving and Runtime use Clone * support TRT, OpenVINO and Paddle Backend Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
@@ -285,6 +285,7 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
|
||||
BuildTrtEngine();
|
||||
}
|
||||
|
||||
cudaSetDevice(option_.gpu_id);
|
||||
SetInputs(inputs);
|
||||
AllocateOutputsBuffer(outputs);
|
||||
|
||||
@@ -356,13 +357,17 @@ void TrtBackend::GetInputOutputInfo() {
|
||||
outputs_device_buffer_[name] = FDDeviceBuffer(dtype);
|
||||
casted_output_tensors_[name] = FDTensor();
|
||||
}
|
||||
io_name_index_[name] = i;
|
||||
}
|
||||
bindings_.resize(num_binds);
|
||||
}
|
||||
|
||||
void TrtBackend::SetInputs(const std::vector<FDTensor>& inputs) {
|
||||
for (const auto& item : inputs) {
|
||||
auto idx = engine_->getBindingIndex(item.name.c_str());
|
||||
// auto idx = engine_->getBindingIndex(item.name.c_str());
|
||||
auto iter = io_name_index_.find(item.name);
|
||||
FDASSERT(iter != io_name_index_.end(), "TRTBackend SetInputs not find name:%s", item.name.c_str());
|
||||
auto idx = iter->second;
|
||||
std::vector<int> shape(item.shape.begin(), item.shape.end());
|
||||
auto dims = ToDims(shape);
|
||||
context_->setBindingDimensions(idx, dims);
|
||||
@@ -410,7 +415,10 @@ void TrtBackend::AllocateOutputsBuffer(std::vector<FDTensor>* outputs) {
|
||||
outputs->resize(outputs_desc_.size());
|
||||
}
|
||||
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
||||
auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
|
||||
// auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
|
||||
auto idx_iter = io_name_index_.find(outputs_desc_[i].name);
|
||||
FDASSERT(idx_iter != io_name_index_.end(), "TRTBackend Outputs not find name:%s", outputs_desc_[i].name.c_str());
|
||||
auto idx = idx_iter->second;
|
||||
auto output_dims = context_->getBindingDimensions(idx);
|
||||
|
||||
// find the original index of output
|
||||
@@ -673,4 +681,47 @@ std::vector<TensorInfo> TrtBackend::GetOutputInfos() {
|
||||
return infos;
|
||||
}
|
||||
|
||||
std::unique_ptr<BaseBackend> TrtBackend::Clone(void *stream, int device_id) {
|
||||
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<TrtBackend>();
|
||||
auto casted_backend = dynamic_cast<TrtBackend*>(new_backend.get());
|
||||
if(device_id > 0 && device_id != option_.gpu_id) {
|
||||
auto clone_option = option_;
|
||||
clone_option.gpu_id = device_id;
|
||||
clone_option.external_stream_ = stream;
|
||||
if (option_.model_format == ModelFormat::ONNX) {
|
||||
FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option),
|
||||
"Clone model from ONNX failed while initialize TrtBackend.");
|
||||
} else {
|
||||
FDASSERT(casted_backend->InitFromPaddle(option_.model_file,
|
||||
option_.params_file, clone_option),
|
||||
"Clone model from Paddle failed while initialize TrtBackend.");
|
||||
}
|
||||
FDWARNING << "The target device id:"
|
||||
<< device_id
|
||||
<< " is different from current device id:"
|
||||
<< option_.gpu_id
|
||||
<< ", cannot share memory with current engine."
|
||||
<< std::endl;
|
||||
return new_backend;
|
||||
}
|
||||
cudaSetDevice(option_.gpu_id);
|
||||
casted_backend->option_.gpu_id = option_.gpu_id;
|
||||
if (stream) {
|
||||
casted_backend->stream_ = reinterpret_cast<cudaStream_t>(stream);
|
||||
} else {
|
||||
FDASSERT(cudaStreamCreate(&casted_backend->stream_) == 0,
|
||||
"[ERROR] Error occurs while clone calling cudaStreamCreate().");
|
||||
}
|
||||
casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
|
||||
casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end());
|
||||
casted_backend->outputs_order_.insert(outputs_order_.begin(), outputs_order_.end());
|
||||
casted_backend->shape_range_info_.insert(shape_range_info_.begin(), shape_range_info_.end());
|
||||
casted_backend->engine_ = engine_;
|
||||
casted_backend->context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
|
||||
casted_backend->engine_->createExecutionContext());
|
||||
casted_backend->GetInputOutputInfo();
|
||||
FDINFO << "TRTBackend clone finish." << std::endl;
|
||||
return new_backend;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
|
@@ -25,6 +25,7 @@
|
||||
#include "NvOnnxParser.h"
|
||||
#include "fastdeploy/backends/backend.h"
|
||||
#include "fastdeploy/backends/tensorrt/utils.h"
|
||||
#include "fastdeploy/utils/unique_ptr.h"
|
||||
|
||||
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||
public:
|
||||
@@ -45,7 +46,7 @@ class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||
|
||||
void writeCalibrationCache(const void* cache,
|
||||
size_t length) noexcept override {
|
||||
std::cout << "NOT IMPLEMENT." << std::endl;
|
||||
fastdeploy::FDERROR << "NOT IMPLEMENT." << std::endl;
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -62,6 +63,11 @@ struct TrtValueInfo {
|
||||
};
|
||||
|
||||
struct TrtBackendOption {
|
||||
std::string model_file = ""; // Path of model file
|
||||
std::string params_file = ""; // Path of parameters file, can be empty
|
||||
// format of input model
|
||||
ModelFormat model_format = ModelFormat::AUTOREC;
|
||||
|
||||
int gpu_id = 0;
|
||||
bool enable_fp16 = false;
|
||||
bool enable_int8 = false;
|
||||
@@ -99,6 +105,8 @@ class TrtBackend : public BaseBackend {
|
||||
TensorInfo GetOutputInfo(int index);
|
||||
std::vector<TensorInfo> GetInputInfos() override;
|
||||
std::vector<TensorInfo> GetOutputInfos() override;
|
||||
std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
|
||||
int device_id = -1) override;
|
||||
|
||||
~TrtBackend() {
|
||||
if (parser_) {
|
||||
@@ -119,6 +127,7 @@ class TrtBackend : public BaseBackend {
|
||||
std::vector<TrtValueInfo> outputs_desc_;
|
||||
std::map<std::string, FDDeviceBuffer> inputs_device_buffer_;
|
||||
std::map<std::string, FDDeviceBuffer> outputs_device_buffer_;
|
||||
std::map<std::string, int> io_name_index_;
|
||||
|
||||
std::string calibration_str_;
|
||||
|
||||
|
Reference in New Issue
Block a user