mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-19 23:16:15 +08:00
[Backend & Serving] Serving and Runtime support Clone (#464)
* Add Serving and Runtime use Clone * support TRT, OpenVINO and Paddle Backend Co-authored-by: Jason <jiangjiajun@baidu.com>
This commit is contained in:
@@ -102,19 +102,6 @@ std::string Str(const Backend& b) {
|
||||
return "UNKNOWN-Backend";
|
||||
}
|
||||
|
||||
std::string Str(const ModelFormat& f) {
|
||||
if (f == ModelFormat::PADDLE) {
|
||||
return "ModelFormat::PADDLE";
|
||||
} else if (f == ModelFormat::ONNX) {
|
||||
return "ModelFormat::ONNX";
|
||||
}else if (f == ModelFormat::RKNN) {
|
||||
return "ModelFormat::RKNN";
|
||||
} else if (f == ModelFormat::TORCHSCRIPT) {
|
||||
return "ModelFormat::TORCHSCRIPT";
|
||||
}
|
||||
return "UNKNOWN-ModelFormat";
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const Backend& backend) {
|
||||
if (backend == Backend::ORT) {
|
||||
out << "Backend::ORT";
|
||||
@@ -135,20 +122,6 @@ std::ostream& operator<<(std::ostream& out, const Backend& backend) {
|
||||
return out;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const ModelFormat& format) {
|
||||
if (format == ModelFormat::PADDLE) {
|
||||
out << "ModelFormat::PADDLE";
|
||||
} else if (format == ModelFormat::ONNX) {
|
||||
out << "ModelFormat::ONNX";
|
||||
} else if (format == ModelFormat::RKNN) {
|
||||
out << "ModelFormat::RKNN";
|
||||
} else if (format == ModelFormat::TORCHSCRIPT) {
|
||||
out << "ModelFormat::TORCHSCRIPT";
|
||||
}
|
||||
out << "UNKNOWN-ModelFormat";
|
||||
return out;
|
||||
}
|
||||
|
||||
bool CheckModelFormat(const std::string& model_file,
|
||||
const ModelFormat& model_format) {
|
||||
if (model_format == ModelFormat::PADDLE) {
|
||||
@@ -411,6 +384,10 @@ void RuntimeOption::SetTrtCacheFile(const std::string& cache_file_path) {
|
||||
trt_serialize_file = cache_file_path;
|
||||
}
|
||||
|
||||
void RuntimeOption::SetOpenVINOStreams(int num_streams) {
|
||||
ov_num_streams = num_streams;
|
||||
}
|
||||
|
||||
bool Runtime::Compile(std::vector<std::vector<FDTensor>>& prewarm_tensors,
|
||||
const RuntimeOption& _option) {
|
||||
#ifdef ENABLE_POROS_BACKEND
|
||||
@@ -582,6 +559,8 @@ bool Runtime::Infer(std::vector<FDTensor>& input_tensors,
|
||||
void Runtime::CreatePaddleBackend() {
|
||||
#ifdef ENABLE_PADDLE_BACKEND
|
||||
auto pd_option = PaddleBackendOption();
|
||||
pd_option.model_file = option.model_file;
|
||||
pd_option.params_file = option.params_file;
|
||||
pd_option.enable_mkldnn = option.pd_enable_mkldnn;
|
||||
pd_option.enable_log_info = option.pd_enable_log_info;
|
||||
pd_option.mkldnn_cache_size = option.pd_mkldnn_cache_size;
|
||||
@@ -642,6 +621,7 @@ void Runtime::CreateOpenVINOBackend() {
|
||||
#ifdef ENABLE_OPENVINO_BACKEND
|
||||
auto ov_option = OpenVINOBackendOption();
|
||||
ov_option.cpu_thread_num = option.cpu_thread_num;
|
||||
ov_option.ov_num_streams = option.ov_num_streams;
|
||||
FDASSERT(option.model_format == ModelFormat::PADDLE ||
|
||||
option.model_format == ModelFormat::ONNX,
|
||||
"OpenVINOBackend only support model format of ModelFormat::PADDLE / "
|
||||
@@ -699,6 +679,9 @@ void Runtime::CreateOrtBackend() {
|
||||
void Runtime::CreateTrtBackend() {
|
||||
#ifdef ENABLE_TRT_BACKEND
|
||||
auto trt_option = TrtBackendOption();
|
||||
trt_option.model_file = option.model_file;
|
||||
trt_option.params_file = option.params_file;
|
||||
trt_option.model_format = option.model_format;
|
||||
trt_option.gpu_id = option.device_id;
|
||||
trt_option.enable_fp16 = option.trt_enable_fp16;
|
||||
trt_option.enable_int8 = option.trt_enable_int8;
|
||||
@@ -771,4 +754,26 @@ void Runtime::CreateRKNPU2Backend() {
|
||||
#endif
|
||||
}
|
||||
|
||||
Runtime* Runtime::Clone(void* stream, int device_id) {
|
||||
Runtime* runtime = new Runtime();
|
||||
if (option.backend != Backend::OPENVINO
|
||||
&& option.backend != Backend::PDINFER
|
||||
&& option.backend != Backend::TRT
|
||||
) {
|
||||
runtime->Init(option);
|
||||
FDWARNING << "Only OpenVINO/Paddle Inference/TensorRT support \
|
||||
clone engine to reduce CPU/GPU memory usage now. For "
|
||||
<< option.backend
|
||||
<< ", FastDeploy will create a new engine which \
|
||||
will not share memory with the current runtime."
|
||||
<< std::endl;
|
||||
return runtime;
|
||||
}
|
||||
FDINFO << "Runtime Clone with Backend:: " << Str(option.backend) << " in " << Str(option.device)
|
||||
<< "." << std::endl;
|
||||
runtime->option = option;
|
||||
runtime->backend_ = backend_->Clone(stream, device_id);
|
||||
return runtime;
|
||||
}
|
||||
|
||||
} // namespace fastdeploy
|
||||
|
Reference in New Issue
Block a user