[Backend & Serving] Serving and Runtime support Clone (#464)

* Add Serving and Runtime use Clone

* support TRT, OpenVINO and Paddle Backend

Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
heliqi
2022-11-04 17:16:40 +08:00
committed by GitHub
parent 61634caf28
commit 277bec38c7
13 changed files with 343 additions and 150 deletions

View File

@@ -102,19 +102,6 @@ std::string Str(const Backend& b) {
return "UNKNOWN-Backend";
}
std::string Str(const ModelFormat& f) {
if (f == ModelFormat::PADDLE) {
return "ModelFormat::PADDLE";
} else if (f == ModelFormat::ONNX) {
return "ModelFormat::ONNX";
}else if (f == ModelFormat::RKNN) {
return "ModelFormat::RKNN";
} else if (f == ModelFormat::TORCHSCRIPT) {
return "ModelFormat::TORCHSCRIPT";
}
return "UNKNOWN-ModelFormat";
}
std::ostream& operator<<(std::ostream& out, const Backend& backend) {
if (backend == Backend::ORT) {
out << "Backend::ORT";
@@ -135,20 +122,6 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) {
return out;
}
std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
if (format == ModelFormat::PADDLE) {
out << "ModelFormat::PADDLE";
} else if (format == ModelFormat::ONNX) {
out << "ModelFormat::ONNX";
} else if (format == ModelFormat::RKNN) {
out << "ModelFormat::RKNN";
} else if (format == ModelFormat::TORCHSCRIPT) {
out << "ModelFormat::TORCHSCRIPT";
}
out << "UNKNOWN-ModelFormat";
return out;
}
bool CheckModelFormat(const std::string& model_file,
const ModelFormat& model_format) {
if (model_format == ModelFormat::PADDLE) {
@@ -411,6 +384,10 @@ void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
trt_serialize_file = cache_file_path;
}
void RuntimeOption::SetOpenVINOStreams(int num_streams) {
ov_num_streams = num_streams;
}
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
const RuntimeOption& _option) {
#ifdef ENABLE_POROS_BACKEND
@@ -582,6 +559,8 @@ bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
void Runtime::CreatePaddleBackend() {
#ifdef ENABLE_PADDLE_BACKEND
auto pd_option = PaddleBackendOption();
pd_option.model_file = option.model_file;
pd_option.params_file = option.params_file;
pd_option.enable_mkldnn = option.pd_enable_mkldnn;
pd_option.enable_log_info = option.pd_enable_log_info;
pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
@@ -642,6 +621,7 @@ void Runtime::CreateOpenVINOBackend() {
#ifdef ENABLE_OPENVINO_BACKEND
auto ov_option = OpenVINOBackendOption();
ov_option.cpu_thread_num = option.cpu_thread_num;
ov_option.ov_num_streams = option.ov_num_streams;
FDASSERT(option.model_format == ModelFormat::PADDLE ||
option.model_format == ModelFormat::ONNX,
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
@@ -699,6 +679,9 @@ void Runtime::CreateOrtBackend() {
void Runtime::CreateTrtBackend() {
#ifdef ENABLE_TRT_BACKEND
auto trt_option = TrtBackendOption();
trt_option.model_file = option.model_file;
trt_option.params_file = option.params_file;
trt_option.model_format = option.model_format;
trt_option.gpu_id = option.device_id;
trt_option.enable_fp16 = option.trt_enable_fp16;
trt_option.enable_int8 = option.trt_enable_int8;
@@ -771,4 +754,26 @@ void Runtime::CreateRKNPU2Backend() {
#endif
}
Runtime* Runtime::Clone(void* stream, int device_id) {
Runtime* runtime = new Runtime();
if (option.backend != Backend::OPENVINO
&& option.backend != Backend::PDINFER
&& option.backend != Backend::TRT
) {
runtime->Init(option);
FDWARNING << "Only OpenVINO/Paddle Inference/TensorRT support \
clone engine to reduce CPU/GPU memory usage now. For "
<< option.backend
<< ", FastDeploy will create a new engine which \
will not share memory with the current runtime."
<< std::endl;
return runtime;
}
FDINFO << "Runtime Clone with Backend:: " << Str(option.backend) << " in " << Str(option.device)
<< "." << std::endl;
runtime->option = option;
runtime->backend_ = backend_->Clone(stream, device_id);
return runtime;
}
} // namespace fastdeploy