mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00
[Backend & Serving] Serving and Runtime support Clone (#464)
* Add Serving and Runtime use Clone * support TRT, OpenVINO and Paddle Backend Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
@@ -21,6 +21,7 @@
|
|||||||
|
|
||||||
#include "fastdeploy/backends/common/multiclass_nms.h"
|
#include "fastdeploy/backends/common/multiclass_nms.h"
|
||||||
#include "fastdeploy/core/fd_tensor.h"
|
#include "fastdeploy/core/fd_tensor.h"
|
||||||
|
#include "fastdeploy/core/fd_type.h"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
@@ -63,6 +64,11 @@ class BaseBackend {
|
|||||||
virtual std::vector<TensorInfo> GetOutputInfos() = 0;
|
virtual std::vector<TensorInfo> GetOutputInfos() = 0;
|
||||||
virtual bool Infer(std::vector<FDTensor>& inputs,
|
virtual bool Infer(std::vector<FDTensor>& inputs,
|
||||||
std::vector<FDTensor>* outputs) = 0;
|
std::vector<FDTensor>* outputs) = 0;
|
||||||
|
virtual std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
|
||||||
|
int device_id = -1) {
|
||||||
|
FDERROR << "Clone no support" << std::endl;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -74,6 +74,8 @@ ov::element::Type FDDataTypeToOV(const FDDataType& type) {
|
|||||||
return ov::element::f32;
|
return ov::element::f32;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ov::Core OpenVINOBackend::core_;
|
||||||
|
|
||||||
void OpenVINOBackend::InitTensorInfo(
|
void OpenVINOBackend::InitTensorInfo(
|
||||||
const std::vector<ov::Output<ov::Node>>& ov_outputs,
|
const std::vector<ov::Output<ov::Node>>& ov_outputs,
|
||||||
std::map<std::string, TensorInfo>* tensor_infos) {
|
std::map<std::string, TensorInfo>* tensor_infos) {
|
||||||
@@ -96,10 +98,6 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
option_ = option;
|
option_ = option;
|
||||||
ov::AnyMap properties;
|
|
||||||
if (option_.cpu_thread_num > 0) {
|
|
||||||
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::shared_ptr<ov::Model> model = core_.read_model(model_file, params_file);
|
std::shared_ptr<ov::Model> model = core_.read_model(model_file, params_file);
|
||||||
|
|
||||||
@@ -149,7 +147,19 @@ bool OpenVINOBackend::InitFromPaddle(const std::string& model_file,
|
|||||||
output_infos_.push_back(iter->second);
|
output_infos_.push_back(iter->second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ov::AnyMap properties;
|
||||||
|
if (option_.cpu_thread_num > 0) {
|
||||||
|
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
|
||||||
|
}
|
||||||
|
if (option_.ov_num_streams == -1) {
|
||||||
|
properties["NUM_STREAMS"] = ov::streams::AUTO;
|
||||||
|
} else if (option_.ov_num_streams == -2) {
|
||||||
|
properties["NUM_STREAMS"] = ov::streams::NUMA;
|
||||||
|
} else if (option_.ov_num_streams > 0) {
|
||||||
|
properties["NUM_STREAMS"] = option_.ov_num_streams;
|
||||||
|
}
|
||||||
compiled_model_ = core_.compile_model(model, "CPU", properties);
|
compiled_model_ = core_.compile_model(model, "CPU", properties);
|
||||||
|
|
||||||
request_ = compiled_model_.create_infer_request();
|
request_ = compiled_model_.create_infer_request();
|
||||||
initialized_ = true;
|
initialized_ = true;
|
||||||
return true;
|
return true;
|
||||||
@@ -185,10 +195,6 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
option_ = option;
|
option_ = option;
|
||||||
ov::AnyMap properties;
|
|
||||||
if (option_.cpu_thread_num > 0) {
|
|
||||||
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::shared_ptr<ov::Model> model = core_.read_model(model_file);
|
std::shared_ptr<ov::Model> model = core_.read_model(model_file);
|
||||||
|
|
||||||
@@ -238,8 +244,21 @@ bool OpenVINOBackend::InitFromOnnx(const std::string& model_file,
|
|||||||
output_infos_.push_back(iter->second);
|
output_infos_.push_back(iter->second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ov::AnyMap properties;
|
||||||
|
if (option_.cpu_thread_num > 0) {
|
||||||
|
properties["INFERENCE_NUM_THREADS"] = option_.cpu_thread_num;
|
||||||
|
}
|
||||||
|
if (option_.ov_num_streams == -1) {
|
||||||
|
properties["NUM_STREAMS"] = ov::streams::AUTO;
|
||||||
|
} else if (option_.ov_num_streams == -2) {
|
||||||
|
properties["NUM_STREAMS"] = ov::streams::NUMA;
|
||||||
|
} else if (option_.ov_num_streams > 0) {
|
||||||
|
properties["NUM_STREAMS"] = option_.ov_num_streams;
|
||||||
|
}
|
||||||
compiled_model_ = core_.compile_model(model, "CPU", properties);
|
compiled_model_ = core_.compile_model(model, "CPU", properties);
|
||||||
|
|
||||||
request_ = compiled_model_.create_infer_request();
|
request_ = compiled_model_.create_infer_request();
|
||||||
|
|
||||||
initialized_ = true;
|
initialized_ = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -281,4 +300,14 @@ bool OpenVINOBackend::Infer(std::vector<FDTensor>& inputs,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<BaseBackend> OpenVINOBackend::Clone(void *stream, int device_id) {
|
||||||
|
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<OpenVINOBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<OpenVINOBackend*>(new_backend.get());
|
||||||
|
casted_backend->option_ = option_;
|
||||||
|
casted_backend->request_ = compiled_model_.create_infer_request();
|
||||||
|
casted_backend->input_infos_.assign(input_infos_.begin(), input_infos_.end());
|
||||||
|
casted_backend->output_infos_.assign(output_infos_.begin(), output_infos_.end());
|
||||||
|
return new_backend;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -20,17 +20,20 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "fastdeploy/backends/backend.h"
|
#include "fastdeploy/backends/backend.h"
|
||||||
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
#include "openvino/openvino.hpp"
|
#include "openvino/openvino.hpp"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
struct OpenVINOBackendOption {
|
struct OpenVINOBackendOption {
|
||||||
int cpu_thread_num = 8;
|
int cpu_thread_num = -1;
|
||||||
|
int ov_num_streams = 1;
|
||||||
std::map<std::string, std::vector<int64_t>> shape_infos;
|
std::map<std::string, std::vector<int64_t>> shape_infos;
|
||||||
};
|
};
|
||||||
|
|
||||||
class OpenVINOBackend : public BaseBackend {
|
class OpenVINOBackend : public BaseBackend {
|
||||||
public:
|
public:
|
||||||
|
static ov::Core core_;
|
||||||
OpenVINOBackend() {}
|
OpenVINOBackend() {}
|
||||||
virtual ~OpenVINOBackend() = default;
|
virtual ~OpenVINOBackend() = default;
|
||||||
|
|
||||||
@@ -54,10 +57,13 @@ class OpenVINOBackend : public BaseBackend {
|
|||||||
std::vector<TensorInfo> GetInputInfos() override;
|
std::vector<TensorInfo> GetInputInfos() override;
|
||||||
std::vector<TensorInfo> GetOutputInfos() override;
|
std::vector<TensorInfo> GetOutputInfos() override;
|
||||||
|
|
||||||
|
std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
|
||||||
|
int device_id = -1) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void InitTensorInfo(const std::vector<ov::Output<ov::Node>>& ov_outputs,
|
void InitTensorInfo(const std::vector<ov::Output<ov::Node>>& ov_outputs,
|
||||||
std::map<std::string, TensorInfo>* tensor_infos);
|
std::map<std::string, TensorInfo>* tensor_infos);
|
||||||
ov::Core core_;
|
|
||||||
ov::CompiledModel compiled_model_;
|
ov::CompiledModel compiled_model_;
|
||||||
ov::InferRequest request_;
|
ov::InferRequest request_;
|
||||||
OpenVINOBackendOption option_;
|
OpenVINOBackendOption option_;
|
||||||
|
@@ -216,6 +216,30 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<BaseBackend> PaddleBackend::Clone(void *stream, int device_id) {
|
||||||
|
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<PaddleBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<PaddleBackend*>(new_backend.get());
|
||||||
|
if(device_id > 0 && option_.use_gpu == true && device_id != option_.gpu_id) {
|
||||||
|
auto clone_option = option_;
|
||||||
|
clone_option.gpu_id = device_id;
|
||||||
|
clone_option.external_stream_ = stream;
|
||||||
|
casted_backend->InitFromPaddle(clone_option.model_file,
|
||||||
|
clone_option.params_file,
|
||||||
|
clone_option);
|
||||||
|
FDWARNING << "The target device id:"
|
||||||
|
<< device_id
|
||||||
|
<< " is different from current device id:"
|
||||||
|
<< option_.gpu_id
|
||||||
|
<< ", cannot share memory with current engine."
|
||||||
|
<< std::endl;
|
||||||
|
return new_backend;
|
||||||
|
}
|
||||||
|
casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
|
||||||
|
casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end());
|
||||||
|
casted_backend->predictor_ = std::move(predictor_->Clone(stream));
|
||||||
|
return new_backend;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
void PaddleBackend::SetTRTDynamicShapeToConfig(const PaddleBackendOption& option) {
|
void PaddleBackend::SetTRTDynamicShapeToConfig(const PaddleBackendOption& option) {
|
||||||
std::map<std::string, std::vector<int>> max_shape;
|
std::map<std::string, std::vector<int>> max_shape;
|
||||||
|
@@ -24,6 +24,7 @@
|
|||||||
#include "paddle2onnx/converter.h"
|
#include "paddle2onnx/converter.h"
|
||||||
#endif
|
#endif
|
||||||
#include "paddle_inference_api.h" // NOLINT
|
#include "paddle_inference_api.h" // NOLINT
|
||||||
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
|
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
#include "fastdeploy/backends/tensorrt/trt_backend.h"
|
#include "fastdeploy/backends/tensorrt/trt_backend.h"
|
||||||
@@ -43,6 +44,9 @@ struct IpuOption {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct PaddleBackendOption {
|
struct PaddleBackendOption {
|
||||||
|
std::string model_file = ""; // Path of model file
|
||||||
|
std::string params_file = ""; // Path of parameters file, can be empty
|
||||||
|
|
||||||
#ifdef WITH_GPU
|
#ifdef WITH_GPU
|
||||||
bool use_gpu = true;
|
bool use_gpu = true;
|
||||||
#else
|
#else
|
||||||
@@ -110,6 +114,9 @@ class PaddleBackend : public BaseBackend {
|
|||||||
|
|
||||||
int NumOutputs() const override { return outputs_desc_.size(); }
|
int NumOutputs() const override { return outputs_desc_.size(); }
|
||||||
|
|
||||||
|
std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
|
||||||
|
int device_id = -1) override;
|
||||||
|
|
||||||
TensorInfo GetInputInfo(int index) override;
|
TensorInfo GetInputInfo(int index) override;
|
||||||
TensorInfo GetOutputInfo(int index) override;
|
TensorInfo GetOutputInfo(int index) override;
|
||||||
std::vector<TensorInfo> GetInputInfos() override;
|
std::vector<TensorInfo> GetInputInfos() override;
|
||||||
|
@@ -285,6 +285,7 @@ bool TrtBackend::Infer(std::vector<FDTensor>& inputs,
|
|||||||
BuildTrtEngine();
|
BuildTrtEngine();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cudaSetDevice(option_.gpu_id);
|
||||||
SetInputs(inputs);
|
SetInputs(inputs);
|
||||||
AllocateOutputsBuffer(outputs);
|
AllocateOutputsBuffer(outputs);
|
||||||
|
|
||||||
@@ -356,13 +357,17 @@ void TrtBackend::GetInputOutputInfo() {
|
|||||||
outputs_device_buffer_[name] = FDDeviceBuffer(dtype);
|
outputs_device_buffer_[name] = FDDeviceBuffer(dtype);
|
||||||
casted_output_tensors_[name] = FDTensor();
|
casted_output_tensors_[name] = FDTensor();
|
||||||
}
|
}
|
||||||
|
io_name_index_[name] = i;
|
||||||
}
|
}
|
||||||
bindings_.resize(num_binds);
|
bindings_.resize(num_binds);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TrtBackend::SetInputs(const std::vector<FDTensor>& inputs) {
|
void TrtBackend::SetInputs(const std::vector<FDTensor>& inputs) {
|
||||||
for (const auto& item : inputs) {
|
for (const auto& item : inputs) {
|
||||||
auto idx = engine_->getBindingIndex(item.name.c_str());
|
// auto idx = engine_->getBindingIndex(item.name.c_str());
|
||||||
|
auto iter = io_name_index_.find(item.name);
|
||||||
|
FDASSERT(iter != io_name_index_.end(), "TRTBackend SetInputs not find name:%s", item.name.c_str());
|
||||||
|
auto idx = iter->second;
|
||||||
std::vector<int> shape(item.shape.begin(), item.shape.end());
|
std::vector<int> shape(item.shape.begin(), item.shape.end());
|
||||||
auto dims = ToDims(shape);
|
auto dims = ToDims(shape);
|
||||||
context_->setBindingDimensions(idx, dims);
|
context_->setBindingDimensions(idx, dims);
|
||||||
@@ -410,7 +415,10 @@ void TrtBackend::AllocateOutputsBuffer(std::vector<FDTensor>* outputs) {
|
|||||||
outputs->resize(outputs_desc_.size());
|
outputs->resize(outputs_desc_.size());
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
||||||
auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
|
// auto idx = engine_->getBindingIndex(outputs_desc_[i].name.c_str());
|
||||||
|
auto idx_iter = io_name_index_.find(outputs_desc_[i].name);
|
||||||
|
FDASSERT(idx_iter != io_name_index_.end(), "TRTBackend Outputs not find name:%s", outputs_desc_[i].name.c_str());
|
||||||
|
auto idx = idx_iter->second;
|
||||||
auto output_dims = context_->getBindingDimensions(idx);
|
auto output_dims = context_->getBindingDimensions(idx);
|
||||||
|
|
||||||
// find the original index of output
|
// find the original index of output
|
||||||
@@ -673,4 +681,47 @@ std::vector<TensorInfo> TrtBackend::GetOutputInfos() {
|
|||||||
return infos;
|
return infos;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<BaseBackend> TrtBackend::Clone(void *stream, int device_id) {
|
||||||
|
std::unique_ptr<BaseBackend> new_backend = utils::make_unique<TrtBackend>();
|
||||||
|
auto casted_backend = dynamic_cast<TrtBackend*>(new_backend.get());
|
||||||
|
if(device_id > 0 && device_id != option_.gpu_id) {
|
||||||
|
auto clone_option = option_;
|
||||||
|
clone_option.gpu_id = device_id;
|
||||||
|
clone_option.external_stream_ = stream;
|
||||||
|
if (option_.model_format == ModelFormat::ONNX) {
|
||||||
|
FDASSERT(casted_backend->InitFromOnnx(option_.model_file, clone_option),
|
||||||
|
"Clone model from ONNX failed while initialize TrtBackend.");
|
||||||
|
} else {
|
||||||
|
FDASSERT(casted_backend->InitFromPaddle(option_.model_file,
|
||||||
|
option_.params_file, clone_option),
|
||||||
|
"Clone model from Paddle failed while initialize TrtBackend.");
|
||||||
|
}
|
||||||
|
FDWARNING << "The target device id:"
|
||||||
|
<< device_id
|
||||||
|
<< " is different from current device id:"
|
||||||
|
<< option_.gpu_id
|
||||||
|
<< ", cannot share memory with current engine."
|
||||||
|
<< std::endl;
|
||||||
|
return new_backend;
|
||||||
|
}
|
||||||
|
cudaSetDevice(option_.gpu_id);
|
||||||
|
casted_backend->option_.gpu_id = option_.gpu_id;
|
||||||
|
if (stream) {
|
||||||
|
casted_backend->stream_ = reinterpret_cast<cudaStream_t>(stream);
|
||||||
|
} else {
|
||||||
|
FDASSERT(cudaStreamCreate(&casted_backend->stream_) == 0,
|
||||||
|
"[ERROR] Error occurs while clone calling cudaStreamCreate().");
|
||||||
|
}
|
||||||
|
casted_backend->inputs_desc_.assign(inputs_desc_.begin(), inputs_desc_.end());
|
||||||
|
casted_backend->outputs_desc_.assign(outputs_desc_.begin(), outputs_desc_.end());
|
||||||
|
casted_backend->outputs_order_.insert(outputs_order_.begin(), outputs_order_.end());
|
||||||
|
casted_backend->shape_range_info_.insert(shape_range_info_.begin(), shape_range_info_.end());
|
||||||
|
casted_backend->engine_ = engine_;
|
||||||
|
casted_backend->context_ = std::shared_ptr<nvinfer1::IExecutionContext>(
|
||||||
|
casted_backend->engine_->createExecutionContext());
|
||||||
|
casted_backend->GetInputOutputInfo();
|
||||||
|
FDINFO << "TRTBackend clone finish." << std::endl;
|
||||||
|
return new_backend;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -25,6 +25,7 @@
|
|||||||
#include "NvOnnxParser.h"
|
#include "NvOnnxParser.h"
|
||||||
#include "fastdeploy/backends/backend.h"
|
#include "fastdeploy/backends/backend.h"
|
||||||
#include "fastdeploy/backends/tensorrt/utils.h"
|
#include "fastdeploy/backends/tensorrt/utils.h"
|
||||||
|
#include "fastdeploy/utils/unique_ptr.h"
|
||||||
|
|
||||||
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
||||||
public:
|
public:
|
||||||
@@ -45,7 +46,7 @@ class Int8EntropyCalibrator2 : public nvinfer1::IInt8EntropyCalibrator2 {
|
|||||||
|
|
||||||
void writeCalibrationCache(const void* cache,
|
void writeCalibrationCache(const void* cache,
|
||||||
size_t length) noexcept override {
|
size_t length) noexcept override {
|
||||||
std::cout << "NOT IMPLEMENT." << std::endl;
|
fastdeploy::FDERROR << "NOT IMPLEMENT." << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -62,6 +63,11 @@ struct TrtValueInfo {
|
|||||||
};
|
};
|
||||||
|
|
||||||
struct TrtBackendOption {
|
struct TrtBackendOption {
|
||||||
|
std::string model_file = ""; // Path of model file
|
||||||
|
std::string params_file = ""; // Path of parameters file, can be empty
|
||||||
|
// format of input model
|
||||||
|
ModelFormat model_format = ModelFormat::AUTOREC;
|
||||||
|
|
||||||
int gpu_id = 0;
|
int gpu_id = 0;
|
||||||
bool enable_fp16 = false;
|
bool enable_fp16 = false;
|
||||||
bool enable_int8 = false;
|
bool enable_int8 = false;
|
||||||
@@ -99,6 +105,8 @@ class TrtBackend : public BaseBackend {
|
|||||||
TensorInfo GetOutputInfo(int index);
|
TensorInfo GetOutputInfo(int index);
|
||||||
std::vector<TensorInfo> GetInputInfos() override;
|
std::vector<TensorInfo> GetInputInfos() override;
|
||||||
std::vector<TensorInfo> GetOutputInfos() override;
|
std::vector<TensorInfo> GetOutputInfos() override;
|
||||||
|
std::unique_ptr<BaseBackend> Clone(void *stream = nullptr,
|
||||||
|
int device_id = -1) override;
|
||||||
|
|
||||||
~TrtBackend() {
|
~TrtBackend() {
|
||||||
if (parser_) {
|
if (parser_) {
|
||||||
@@ -119,6 +127,7 @@ class TrtBackend : public BaseBackend {
|
|||||||
std::vector<TrtValueInfo> outputs_desc_;
|
std::vector<TrtValueInfo> outputs_desc_;
|
||||||
std::map<std::string, FDDeviceBuffer> inputs_device_buffer_;
|
std::map<std::string, FDDeviceBuffer> inputs_device_buffer_;
|
||||||
std::map<std::string, FDDeviceBuffer> outputs_device_buffer_;
|
std::map<std::string, FDDeviceBuffer> outputs_device_buffer_;
|
||||||
|
std::map<std::string, int> io_name_index_;
|
||||||
|
|
||||||
std::string calibration_str_;
|
std::string calibration_str_;
|
||||||
|
|
||||||
|
@@ -182,4 +182,31 @@ const FDDataType TypeToDataType<uint8_t>::dtype = UINT8;
|
|||||||
template <>
|
template <>
|
||||||
const FDDataType TypeToDataType<int8_t>::dtype = INT8;
|
const FDDataType TypeToDataType<int8_t>::dtype = INT8;
|
||||||
|
|
||||||
|
std::string Str(const ModelFormat& f) {
|
||||||
|
if (f == ModelFormat::PADDLE) {
|
||||||
|
return "ModelFormat::PADDLE";
|
||||||
|
} else if (f == ModelFormat::ONNX) {
|
||||||
|
return "ModelFormat::ONNX";
|
||||||
|
}else if (f == ModelFormat::RKNN) {
|
||||||
|
return "ModelFormat::RKNN";
|
||||||
|
} else if (f == ModelFormat::TORCHSCRIPT) {
|
||||||
|
return "ModelFormat::TORCHSCRIPT";
|
||||||
|
}
|
||||||
|
return "UNKNOWN-ModelFormat";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
|
||||||
|
if (format == ModelFormat::PADDLE) {
|
||||||
|
out << "ModelFormat::PADDLE";
|
||||||
|
} else if (format == ModelFormat::ONNX) {
|
||||||
|
out << "ModelFormat::ONNX";
|
||||||
|
} else if (format == ModelFormat::RKNN) {
|
||||||
|
out << "ModelFormat::RKNN";
|
||||||
|
} else if (format == ModelFormat::TORCHSCRIPT) {
|
||||||
|
out << "ModelFormat::TORCHSCRIPT";
|
||||||
|
}
|
||||||
|
out << "UNKNOWN-ModelFormat";
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -65,4 +65,16 @@ struct FASTDEPLOY_DECL TypeToDataType {
|
|||||||
static const FDDataType dtype;
|
static const FDDataType dtype;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/*! Deep learning model format */
|
||||||
|
enum ModelFormat {
|
||||||
|
AUTOREC, ///< Auto recognize the model format by model file name
|
||||||
|
PADDLE, ///< Model with paddlepaddle format
|
||||||
|
ONNX, ///< Model with ONNX format
|
||||||
|
RKNN, ///< Model with RKNN format
|
||||||
|
TORCHSCRIPT, ///< Model with TorchScript format
|
||||||
|
};
|
||||||
|
|
||||||
|
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
|
||||||
|
const ModelFormat& format);
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -102,19 +102,6 @@ std::string Str(const Backend& b) {
|
|||||||
return "UNKNOWN-Backend";
|
return "UNKNOWN-Backend";
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string Str(const ModelFormat& f) {
|
|
||||||
if (f == ModelFormat::PADDLE) {
|
|
||||||
return "ModelFormat::PADDLE";
|
|
||||||
} else if (f == ModelFormat::ONNX) {
|
|
||||||
return "ModelFormat::ONNX";
|
|
||||||
}else if (f == ModelFormat::RKNN) {
|
|
||||||
return "ModelFormat::RKNN";
|
|
||||||
} else if (f == ModelFormat::TORCHSCRIPT) {
|
|
||||||
return "ModelFormat::TORCHSCRIPT";
|
|
||||||
}
|
|
||||||
return "UNKNOWN-ModelFormat";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& out, const Backend& backend) {
|
std::ostream& operator<<(std::ostream& out, const Backend& backend) {
|
||||||
if (backend == Backend::ORT) {
|
if (backend == Backend::ORT) {
|
||||||
out << "Backend::ORT";
|
out << "Backend::ORT";
|
||||||
@@ -135,20 +122,6 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) {
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
|
|
||||||
if (format == ModelFormat::PADDLE) {
|
|
||||||
out << "ModelFormat::PADDLE";
|
|
||||||
} else if (format == ModelFormat::ONNX) {
|
|
||||||
out << "ModelFormat::ONNX";
|
|
||||||
} else if (format == ModelFormat::RKNN) {
|
|
||||||
out << "ModelFormat::RKNN";
|
|
||||||
} else if (format == ModelFormat::TORCHSCRIPT) {
|
|
||||||
out << "ModelFormat::TORCHSCRIPT";
|
|
||||||
}
|
|
||||||
out << "UNKNOWN-ModelFormat";
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool CheckModelFormat(const std::string& model_file,
|
bool CheckModelFormat(const std::string& model_file,
|
||||||
const ModelFormat& model_format) {
|
const ModelFormat& model_format) {
|
||||||
if (model_format == ModelFormat::PADDLE) {
|
if (model_format == ModelFormat::PADDLE) {
|
||||||
@@ -411,6 +384,10 @@ void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
|
|||||||
trt_serialize_file = cache_file_path;
|
trt_serialize_file = cache_file_path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetOpenVINOStreams(int num_streams) {
|
||||||
|
ov_num_streams = num_streams;
|
||||||
|
}
|
||||||
|
|
||||||
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
||||||
const RuntimeOption& _option) {
|
const RuntimeOption& _option) {
|
||||||
#ifdef ENABLE_POROS_BACKEND
|
#ifdef ENABLE_POROS_BACKEND
|
||||||
@@ -582,6 +559,8 @@ bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
|
|||||||
void Runtime::CreatePaddleBackend() {
|
void Runtime::CreatePaddleBackend() {
|
||||||
#ifdef ENABLE_PADDLE_BACKEND
|
#ifdef ENABLE_PADDLE_BACKEND
|
||||||
auto pd_option = PaddleBackendOption();
|
auto pd_option = PaddleBackendOption();
|
||||||
|
pd_option.model_file = option.model_file;
|
||||||
|
pd_option.params_file = option.params_file;
|
||||||
pd_option.enable_mkldnn = option.pd_enable_mkldnn;
|
pd_option.enable_mkldnn = option.pd_enable_mkldnn;
|
||||||
pd_option.enable_log_info = option.pd_enable_log_info;
|
pd_option.enable_log_info = option.pd_enable_log_info;
|
||||||
pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
|
pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
|
||||||
@@ -642,6 +621,7 @@ void Runtime::CreateOpenVINOBackend() {
|
|||||||
#ifdef ENABLE_OPENVINO_BACKEND
|
#ifdef ENABLE_OPENVINO_BACKEND
|
||||||
auto ov_option = OpenVINOBackendOption();
|
auto ov_option = OpenVINOBackendOption();
|
||||||
ov_option.cpu_thread_num = option.cpu_thread_num;
|
ov_option.cpu_thread_num = option.cpu_thread_num;
|
||||||
|
ov_option.ov_num_streams = option.ov_num_streams;
|
||||||
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||||
option.model_format == ModelFormat::ONNX,
|
option.model_format == ModelFormat::ONNX,
|
||||||
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
|
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
|
||||||
@@ -699,6 +679,9 @@ void Runtime::CreateOrtBackend() {
|
|||||||
void Runtime::CreateTrtBackend() {
|
void Runtime::CreateTrtBackend() {
|
||||||
#ifdef ENABLE_TRT_BACKEND
|
#ifdef ENABLE_TRT_BACKEND
|
||||||
auto trt_option = TrtBackendOption();
|
auto trt_option = TrtBackendOption();
|
||||||
|
trt_option.model_file = option.model_file;
|
||||||
|
trt_option.params_file = option.params_file;
|
||||||
|
trt_option.model_format = option.model_format;
|
||||||
trt_option.gpu_id = option.device_id;
|
trt_option.gpu_id = option.device_id;
|
||||||
trt_option.enable_fp16 = option.trt_enable_fp16;
|
trt_option.enable_fp16 = option.trt_enable_fp16;
|
||||||
trt_option.enable_int8 = option.trt_enable_int8;
|
trt_option.enable_int8 = option.trt_enable_int8;
|
||||||
@@ -771,4 +754,26 @@ void Runtime::CreateRKNPU2Backend() {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Runtime* Runtime::Clone(void* stream, int device_id) {
|
||||||
|
Runtime* runtime = new Runtime();
|
||||||
|
if (option.backend != Backend::OPENVINO
|
||||||
|
&& option.backend != Backend::PDINFER
|
||||||
|
&& option.backend != Backend::TRT
|
||||||
|
) {
|
||||||
|
runtime->Init(option);
|
||||||
|
FDWARNING << "Only OpenVINO/Paddle Inference/TensorRT support \
|
||||||
|
clone engine to reduce CPU/GPU memory usage now. For "
|
||||||
|
<< option.backend
|
||||||
|
<< ", FastDeploy will create a new engine which \
|
||||||
|
will not share memory with the current runtime."
|
||||||
|
<< std::endl;
|
||||||
|
return runtime;
|
||||||
|
}
|
||||||
|
FDINFO << "Runtime Clone with Backend:: " << Str(option.backend) << " in " << Str(option.device)
|
||||||
|
<< "." << std::endl;
|
||||||
|
runtime->option = option;
|
||||||
|
runtime->backend_ = backend_->Clone(stream, device_id);
|
||||||
|
return runtime;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -35,38 +35,27 @@ namespace fastdeploy {
|
|||||||
|
|
||||||
/*! Inference backend supported in FastDeploy */
|
/*! Inference backend supported in FastDeploy */
|
||||||
enum Backend {
|
enum Backend {
|
||||||
UNKNOWN, ///< Unknown inference backend
|
UNKNOWN, ///< Unknown inference backend
|
||||||
ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
|
ORT, ///< ONNX Runtime, support Paddle/ONNX format model, CPU / Nvidia GPU
|
||||||
TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
|
TRT, ///< TensorRT, support Paddle/ONNX format model, Nvidia GPU only
|
||||||
PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
|
PDINFER, ///< Paddle Inference, support Paddle format model, CPU / Nvidia GPU
|
||||||
POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU
|
POROS, ///< Poros, support TorchScript format model, CPU / Nvidia GPU
|
||||||
OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
|
OPENVINO, ///< Intel OpenVINO, support Paddle/ONNX format, CPU only
|
||||||
LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only
|
LITE, ///< Paddle Lite, support Paddle format model, ARM CPU only
|
||||||
RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only
|
RKNPU2, ///< RKNPU2, support RKNN format model, Rockchip NPU only
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! Deep learning model format */
|
|
||||||
enum ModelFormat {
|
|
||||||
AUTOREC, ///< Auto recognize the model format by model file name
|
|
||||||
PADDLE, ///< Model with paddlepaddle format
|
|
||||||
ONNX, ///< Model with ONNX format
|
|
||||||
RKNN, ///< Model with RKNN format
|
|
||||||
TORCHSCRIPT, ///< Model with TorchScript format
|
|
||||||
};
|
|
||||||
|
|
||||||
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
|
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
|
||||||
const Backend& backend);
|
const Backend& backend);
|
||||||
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& out,
|
|
||||||
const ModelFormat& format);
|
|
||||||
|
|
||||||
/*! Paddle Lite power mode for mobile device. */
|
/*! Paddle Lite power mode for mobile device. */
|
||||||
enum LitePowerMode {
|
enum LitePowerMode {
|
||||||
LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode
|
LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode
|
||||||
LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode
|
LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode
|
||||||
LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode
|
LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode
|
||||||
LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode
|
LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode
|
||||||
LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode
|
LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode
|
||||||
LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode
|
LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode
|
||||||
};
|
};
|
||||||
|
|
||||||
FASTDEPLOY_DECL std::string Str(const Backend& b);
|
FASTDEPLOY_DECL std::string Str(const Backend& b);
|
||||||
@@ -105,8 +94,10 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
/// Use Nvidia GPU to inference
|
/// Use Nvidia GPU to inference
|
||||||
void UseGpu(int gpu_id = 0);
|
void UseGpu(int gpu_id = 0);
|
||||||
|
|
||||||
void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name = fastdeploy::rknpu2::CpuName::RK3588,
|
void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name
|
||||||
fastdeploy::rknpu2::CoreMask rknpu2_core = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
|
= fastdeploy::rknpu2::CpuName::RK3588,
|
||||||
|
fastdeploy::rknpu2::CoreMask rknpu2_core
|
||||||
|
= fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
|
||||||
|
|
||||||
void SetExternalStream(void* external_stream);
|
void SetExternalStream(void* external_stream);
|
||||||
|
|
||||||
@@ -242,6 +233,11 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
*/
|
*/
|
||||||
void DisablePaddleTrtCollectShape();
|
void DisablePaddleTrtCollectShape();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @brief Set number of streams by the OpenVINO backends
|
||||||
|
*/
|
||||||
|
void SetOpenVINOStreams(int num_streams);
|
||||||
|
|
||||||
/** \Use Graphcore IPU to inference.
|
/** \Use Graphcore IPU to inference.
|
||||||
*
|
*
|
||||||
* \param[in] device_num the number of IPUs.
|
* \param[in] device_num the number of IPUs.
|
||||||
@@ -331,13 +327,19 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
int unconst_ops_thres = -1;
|
int unconst_ops_thres = -1;
|
||||||
std::string poros_file = "";
|
std::string poros_file = "";
|
||||||
|
|
||||||
|
// ======Only for OpenVINO Backend=======
|
||||||
|
int ov_num_streams = 1;
|
||||||
|
|
||||||
// ======Only for RKNPU2 Backend=======
|
// ======Only for RKNPU2 Backend=======
|
||||||
fastdeploy::rknpu2::CpuName rknpu2_cpu_name_ = fastdeploy::rknpu2::CpuName::RK3588;
|
fastdeploy::rknpu2::CpuName rknpu2_cpu_name_
|
||||||
fastdeploy::rknpu2::CoreMask rknpu2_core_mask_ = fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
|
= fastdeploy::rknpu2::CpuName::RK3588;
|
||||||
|
fastdeploy::rknpu2::CoreMask rknpu2_core_mask_
|
||||||
|
= fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO;
|
||||||
|
|
||||||
std::string model_file = ""; // Path of model file
|
std::string model_file = ""; // Path of model file
|
||||||
std::string params_file = ""; // Path of parameters file, can be empty
|
std::string params_file = ""; // Path of parameters file, can be empty
|
||||||
ModelFormat model_format = ModelFormat::AUTOREC; // format of input model
|
// format of input model
|
||||||
|
ModelFormat model_format = ModelFormat::AUTOREC;
|
||||||
};
|
};
|
||||||
|
|
||||||
/*! @brief Runtime object used to inference the loaded model on different devices
|
/*! @brief Runtime object used to inference the loaded model on different devices
|
||||||
@@ -384,6 +386,14 @@ struct FASTDEPLOY_DECL Runtime {
|
|||||||
*/
|
*/
|
||||||
std::vector<TensorInfo> GetOutputInfos();
|
std::vector<TensorInfo> GetOutputInfos();
|
||||||
|
|
||||||
|
/** \brief Clone new Runtime when multiple instances of the same model are created
|
||||||
|
*
|
||||||
|
* \param[in] stream CUDA Stream, defualt param is nullptr
|
||||||
|
* \return new Runtime* by this clone
|
||||||
|
*/
|
||||||
|
Runtime* Clone(void* stream = nullptr,
|
||||||
|
int device_id = -1);
|
||||||
|
|
||||||
RuntimeOption option;
|
RuntimeOption option;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@@ -395,4 +405,4 @@ struct FASTDEPLOY_DECL Runtime {
|
|||||||
void CreateRKNPU2Backend();
|
void CreateRKNPU2Backend();
|
||||||
std::unique_ptr<BaseBackend> backend_;
|
std::unique_ptr<BaseBackend> backend_;
|
||||||
};
|
};
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -142,8 +142,10 @@ optimization {
|
|||||||
cpu_execution_accelerator : [
|
cpu_execution_accelerator : [
|
||||||
{
|
{
|
||||||
name : "openvino"
|
name : "openvino"
|
||||||
# 设置推理并行计算线程数为4
|
# 设置推理并行计算线程数为4(所有实例总共线程数)
|
||||||
parameters { key: "cpu_threads" value: "4" }
|
parameters { key: "cpu_threads" value: "4" }
|
||||||
|
# 设置OpenVINO的num_streams(一般设置为跟实例数一致)
|
||||||
|
parameters { key: "num_streams" value: "1" }
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@@ -91,6 +91,9 @@ class ModelState : public BackendModel {
|
|||||||
|
|
||||||
// Runtime options used when creating a FastDeploy Runtime.
|
// Runtime options used when creating a FastDeploy Runtime.
|
||||||
std::unique_ptr<fastdeploy::RuntimeOption> runtime_options_;
|
std::unique_ptr<fastdeploy::RuntimeOption> runtime_options_;
|
||||||
|
bool model_load_;
|
||||||
|
fastdeploy::Runtime* main_runtime_;
|
||||||
|
bool is_clone_ = true;
|
||||||
|
|
||||||
// model_outputs is a map that contains unique outputs that the model must
|
// model_outputs is a map that contains unique outputs that the model must
|
||||||
// provide. In the model configuration, the output in the state configuration
|
// provide. In the model configuration, the output in the state configuration
|
||||||
@@ -165,7 +168,7 @@ TRITONSERVER_Error* ModelState::Create(TRITONBACKEND_Model* triton_model,
|
|||||||
}
|
}
|
||||||
|
|
||||||
ModelState::ModelState(TRITONBACKEND_Model* triton_model)
|
ModelState::ModelState(TRITONBACKEND_Model* triton_model)
|
||||||
: BackendModel(triton_model) {
|
: BackendModel(triton_model), model_load_(false), main_runtime_(nullptr), is_clone_(true) {
|
||||||
// Create runtime options that will be cloned and used for each
|
// Create runtime options that will be cloned and used for each
|
||||||
// instance when creating that instance's runtime.
|
// instance when creating that instance's runtime.
|
||||||
runtime_options_.reset(new fastdeploy::RuntimeOption());
|
runtime_options_.reset(new fastdeploy::RuntimeOption());
|
||||||
@@ -218,19 +221,6 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
|
|||||||
THROW_IF_BACKEND_MODEL_ERROR(
|
THROW_IF_BACKEND_MODEL_ERROR(
|
||||||
ParseIntValue(value_string, &cpu_thread_num));
|
ParseIntValue(value_string, &cpu_thread_num));
|
||||||
runtime_options_->SetCpuThreadNum(cpu_thread_num);
|
runtime_options_->SetCpuThreadNum(cpu_thread_num);
|
||||||
// } else if (param_key == "graph_level") {
|
|
||||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
|
||||||
// value_string, &runtime_options_->ort_graph_opt_level));
|
|
||||||
// } else if (param_key == "inter_op_num_threads") {
|
|
||||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
|
||||||
// value_string,
|
|
||||||
// &runtime_options_->ort_inter_op_num_threads));
|
|
||||||
// } else if (param_key == "execution_mode") {
|
|
||||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
|
||||||
// value_string, &runtime_options_->ort_execution_mode));
|
|
||||||
// } else if (param_key == "capacity") {
|
|
||||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
|
||||||
// value_string, &runtime_options_->pd_mkldnn_cache_size));
|
|
||||||
} else if (param_key == "use_mkldnn") {
|
} else if (param_key == "use_mkldnn") {
|
||||||
bool pd_enable_mkldnn;
|
bool pd_enable_mkldnn;
|
||||||
THROW_IF_BACKEND_MODEL_ERROR(
|
THROW_IF_BACKEND_MODEL_ERROR(
|
||||||
@@ -238,8 +228,16 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
|
|||||||
runtime_options_->SetPaddleMKLDNN(pd_enable_mkldnn);
|
runtime_options_->SetPaddleMKLDNN(pd_enable_mkldnn);
|
||||||
} else if (param_key == "use_paddle_log") {
|
} else if (param_key == "use_paddle_log") {
|
||||||
runtime_options_->EnablePaddleLogInfo();
|
runtime_options_->EnablePaddleLogInfo();
|
||||||
|
} else if (param_key == "num_streams") {
|
||||||
|
int num_streams;
|
||||||
|
THROW_IF_BACKEND_MODEL_ERROR(
|
||||||
|
ParseIntValue(value_string, &num_streams));
|
||||||
|
runtime_options_->SetOpenVINOStreams(num_streams);
|
||||||
|
} else if (param_key == "is_clone") {
|
||||||
|
THROW_IF_BACKEND_MODEL_ERROR(
|
||||||
|
ParseBoolValue(value_string, &is_clone_));
|
||||||
} else if (param_key == "use_ipu") {
|
} else if (param_key == "use_ipu") {
|
||||||
runtime_options_->UseIpu();
|
// runtime_options_->UseIpu();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -290,17 +288,6 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
|
|||||||
std::string value_string;
|
std::string value_string;
|
||||||
THROW_IF_BACKEND_MODEL_ERROR(
|
THROW_IF_BACKEND_MODEL_ERROR(
|
||||||
params.MemberAsString(param_key.c_str(), &value_string));
|
params.MemberAsString(param_key.c_str(), &value_string));
|
||||||
// if (param_key == "graph_level") {
|
|
||||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
|
||||||
// value_string, &runtime_options_->ort_graph_opt_level));
|
|
||||||
// } else if (param_key == "inter_op_num_threads") {
|
|
||||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
|
||||||
// value_string,
|
|
||||||
// &runtime_options_->ort_inter_op_num_threads));
|
|
||||||
// } else if (param_key == "execution_mode") {
|
|
||||||
// THROW_IF_BACKEND_MODEL_ERROR(ParseIntValue(
|
|
||||||
// value_string, &runtime_options_->ort_execution_mode));
|
|
||||||
// }
|
|
||||||
if (param_key == "precision") {
|
if (param_key == "precision") {
|
||||||
std::transform(value_string.begin(), value_string.end(),
|
std::transform(value_string.begin(), value_string.end(),
|
||||||
value_string.begin(), ::tolower);
|
value_string.begin(), ::tolower);
|
||||||
@@ -325,7 +312,10 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model)
|
|||||||
runtime_options_->EnablePaddleToTrt();
|
runtime_options_->EnablePaddleToTrt();
|
||||||
} else if (param_key == "use_paddle_log") {
|
} else if (param_key == "use_paddle_log") {
|
||||||
runtime_options_->EnablePaddleLogInfo();
|
runtime_options_->EnablePaddleLogInfo();
|
||||||
}
|
} else if (param_key == "is_clone") {
|
||||||
|
THROW_IF_BACKEND_MODEL_ERROR(
|
||||||
|
ParseBoolValue(value_string, &is_clone_));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -340,64 +330,79 @@ TRITONSERVER_Error* ModelState::LoadModel(
|
|||||||
const int32_t instance_group_device_id, std::string* model_path,
|
const int32_t instance_group_device_id, std::string* model_path,
|
||||||
std::string* params_path, fastdeploy::Runtime** runtime,
|
std::string* params_path, fastdeploy::Runtime** runtime,
|
||||||
cudaStream_t stream) {
|
cudaStream_t stream) {
|
||||||
auto dir_path = JoinPath({RepositoryPath(), std::to_string(Version())});
|
|
||||||
{
|
// FastDeploy Runtime creation is not thread-safe, so multiple creations
|
||||||
// ONNX Format
|
// are serialized with a global lock.
|
||||||
bool exists;
|
// The Clone interface can be invoked only when the main_runtime_ is created.
|
||||||
*model_path = JoinPath({dir_path, "model.onnx"});
|
static std::mutex global_context_mu;
|
||||||
RETURN_IF_ERROR(FileExists(*model_path, &exists));
|
std::lock_guard<std::mutex> glock(global_context_mu);
|
||||||
|
|
||||||
// Paddle Formax
|
if(model_load_ && is_clone_) {
|
||||||
if (not exists) {
|
if(main_runtime_ == nullptr) {
|
||||||
*model_path = JoinPath({dir_path, "model.pdmodel"});
|
return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_NOT_FOUND,
|
||||||
RETURN_IF_ERROR(FileExists(*model_path, &exists));
|
std::string("main_runtime is nullptr").c_str());
|
||||||
if (not exists) {
|
|
||||||
return TRITONSERVER_ErrorNew(
|
|
||||||
TRITONSERVER_ERROR_NOT_FOUND,
|
|
||||||
std::string(
|
|
||||||
"Model should be named as 'model.onnx' or 'model.pdmodel'")
|
|
||||||
.c_str());
|
|
||||||
}
|
|
||||||
*params_path = JoinPath({dir_path, "model.pdiparams"});
|
|
||||||
RETURN_IF_ERROR(FileExists(*params_path, &exists));
|
|
||||||
if (not exists) {
|
|
||||||
return TRITONSERVER_ErrorNew(
|
|
||||||
TRITONSERVER_ERROR_NOT_FOUND,
|
|
||||||
std::string("Paddle params should be named as 'model.pdiparams' or "
|
|
||||||
"not provided.'")
|
|
||||||
.c_str());
|
|
||||||
}
|
|
||||||
runtime_options_->model_format = fastdeploy::ModelFormat::PADDLE;
|
|
||||||
runtime_options_->model_file = *model_path;
|
|
||||||
runtime_options_->params_file = *params_path;
|
|
||||||
} else {
|
|
||||||
runtime_options_->model_format = fastdeploy::ModelFormat::ONNX;
|
|
||||||
runtime_options_->model_file = *model_path;
|
|
||||||
}
|
}
|
||||||
}
|
*runtime = main_runtime_->Clone((void*)stream, instance_group_device_id);
|
||||||
|
|
||||||
// GPU
|
|
||||||
#ifdef TRITON_ENABLE_GPU
|
|
||||||
if ((instance_group_kind == TRITONSERVER_INSTANCEGROUPKIND_GPU) ||
|
|
||||||
(instance_group_kind == TRITONSERVER_INSTANCEGROUPKIND_AUTO)) {
|
|
||||||
runtime_options_->UseGpu(instance_group_device_id);
|
|
||||||
runtime_options_->SetExternalStream((void*)stream);
|
|
||||||
} else {
|
} else {
|
||||||
runtime_options_->UseCpu();
|
auto dir_path = JoinPath({RepositoryPath(), std::to_string(Version())});
|
||||||
}
|
{
|
||||||
#else
|
// ONNX Format
|
||||||
if (runtime_options_->device != fastdeploy::Device::IPU) {
|
bool exists;
|
||||||
// If Device is set to IPU, just skip CPU setting.
|
*model_path = JoinPath({dir_path, "model.onnx"});
|
||||||
runtime_options_->UseCpu();
|
RETURN_IF_ERROR(FileExists(*model_path, &exists));
|
||||||
}
|
|
||||||
#endif // TRITON_ENABLE_GPU
|
|
||||||
|
|
||||||
*runtime = new fastdeploy::Runtime();
|
// Paddle Formax
|
||||||
if (!(*runtime)->Init(*runtime_options_)) {
|
if (not exists) {
|
||||||
return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_NOT_FOUND,
|
*model_path = JoinPath({dir_path, "model.pdmodel"});
|
||||||
std::string("Runtime init error").c_str());
|
RETURN_IF_ERROR(FileExists(*model_path, &exists));
|
||||||
}
|
if (not exists) {
|
||||||
|
return TRITONSERVER_ErrorNew(
|
||||||
|
TRITONSERVER_ERROR_NOT_FOUND,
|
||||||
|
std::string(
|
||||||
|
"Model should be named as 'model.onnx' or 'model.pdmodel'")
|
||||||
|
.c_str());
|
||||||
|
}
|
||||||
|
*params_path = JoinPath({dir_path, "model.pdiparams"});
|
||||||
|
RETURN_IF_ERROR(FileExists(*params_path, &exists));
|
||||||
|
if (not exists) {
|
||||||
|
return TRITONSERVER_ErrorNew(
|
||||||
|
TRITONSERVER_ERROR_NOT_FOUND,
|
||||||
|
std::string("Paddle params should be named as 'model.pdiparams' or "
|
||||||
|
"not provided.'")
|
||||||
|
.c_str());
|
||||||
|
}
|
||||||
|
runtime_options_->model_format = fastdeploy::ModelFormat::PADDLE;
|
||||||
|
runtime_options_->model_file = *model_path;
|
||||||
|
runtime_options_->params_file = *params_path;
|
||||||
|
} else {
|
||||||
|
runtime_options_->model_format = fastdeploy::ModelFormat::ONNX;
|
||||||
|
runtime_options_->model_file = *model_path;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// GPU
|
||||||
|
#ifdef TRITON_ENABLE_GPU
|
||||||
|
if ((instance_group_kind == TRITONSERVER_INSTANCEGROUPKIND_GPU) ||
|
||||||
|
(instance_group_kind == TRITONSERVER_INSTANCEGROUPKIND_AUTO)) {
|
||||||
|
runtime_options_->UseGpu(instance_group_device_id);
|
||||||
|
runtime_options_->SetExternalStream((void*)stream);
|
||||||
|
} else if (runtime_options_->device != fastdeploy::Device::IPU) {
|
||||||
|
runtime_options_->UseCpu();
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
if (runtime_options_->device != fastdeploy::Device::IPU) {
|
||||||
|
// If Device is set to IPU, just skip CPU setting.
|
||||||
|
runtime_options_->UseCpu();
|
||||||
|
}
|
||||||
|
#endif // TRITON_ENABLE_GPU
|
||||||
|
|
||||||
|
*runtime = main_runtime_ = new fastdeploy::Runtime();
|
||||||
|
if (!(*runtime)->Init(*runtime_options_)) {
|
||||||
|
return TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_NOT_FOUND,
|
||||||
|
std::string("Runtime init error").c_str());
|
||||||
|
}
|
||||||
|
model_load_ = true;
|
||||||
|
}
|
||||||
return nullptr; // success
|
return nullptr; // success
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user