mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 01:22:59 +08:00
[Backend] Add fp16 support for ONNXRuntime-GPU (#1239)
* add fp16 support for ort-gpu * add enable_ort_fp16 option * fix * fix bugs * use fp16 for test * update code * update code * fix windows bug
This commit is contained in:
@@ -43,7 +43,7 @@ else()
|
|||||||
endif(WIN32)
|
endif(WIN32)
|
||||||
|
|
||||||
set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
|
set(PADDLE2ONNX_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
|
||||||
set(PADDLE2ONNX_VERSION "1.0.5")
|
set(PADDLE2ONNX_VERSION "1.0.6")
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
|
set(PADDLE2ONNX_FILE "paddle2onnx-win-x64-${PADDLE2ONNX_VERSION}.zip")
|
||||||
if(NOT CMAKE_CL_64)
|
if(NOT CMAKE_CL_64)
|
||||||
|
0
fastdeploy/pybind/runtime.cc
Normal file → Executable file
0
fastdeploy/pybind/runtime.cc
Normal file → Executable file
20
fastdeploy/runtime/backends/ort/option.h
Normal file → Executable file
20
fastdeploy/runtime/backends/ort/option.h
Normal file → Executable file
@@ -15,6 +15,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "fastdeploy/core/fd_type.h"
|
#include "fastdeploy/core/fd_type.h"
|
||||||
|
#include "fastdeploy/runtime/enum_variables.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
@@ -25,18 +26,27 @@ namespace fastdeploy {
|
|||||||
/*! @brief Option object to configure ONNX Runtime backend
|
/*! @brief Option object to configure ONNX Runtime backend
|
||||||
*/
|
*/
|
||||||
struct OrtBackendOption {
|
struct OrtBackendOption {
|
||||||
/// Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all
|
/// Level of graph optimization,
|
||||||
|
/// /-1: mean default(Enable all the optimization strategy)
|
||||||
|
/// /0: disable all the optimization strategy/1: enable basic strategy
|
||||||
|
/// /2:enable extend strategy/99: enable all
|
||||||
int graph_optimization_level = -1;
|
int graph_optimization_level = -1;
|
||||||
/// Number of threads to execute the operator, -1: default
|
/// Number of threads to execute the operator, -1: default
|
||||||
int intra_op_num_threads = -1;
|
int intra_op_num_threads = -1;
|
||||||
/// Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1.
|
/// Number of threads to execute the graph,
|
||||||
|
/// -1: default. This parameter only will bring effects
|
||||||
|
/// while the `OrtBackendOption::execution_mode` set to 1.
|
||||||
int inter_op_num_threads = -1;
|
int inter_op_num_threads = -1;
|
||||||
/// Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly.
|
/// Execution mode for the graph, -1: default(Sequential mode)
|
||||||
|
/// /0: Sequential mode, execute the operators in graph one by one.
|
||||||
|
/// /1: Parallel mode, execute the operators in graph parallelly.
|
||||||
int execution_mode = -1;
|
int execution_mode = -1;
|
||||||
// Inference device, OrtBackend supports CPU/GPU
|
/// Inference device, OrtBackend supports CPU/GPU
|
||||||
Device device = Device::CPU;
|
Device device = Device::CPU;
|
||||||
// Inference device id
|
/// Inference device id
|
||||||
int device_id = 0;
|
int device_id = 0;
|
||||||
void* external_stream_ = nullptr;
|
void* external_stream_ = nullptr;
|
||||||
|
/// Use fp16 to infer
|
||||||
|
bool enable_fp16 = false;
|
||||||
};
|
};
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -12,8 +12,8 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/pybind/main.h"
|
|
||||||
#include "fastdeploy/runtime/backends/ort/option.h"
|
#include "fastdeploy/runtime/backends/ort/option.h"
|
||||||
|
#include "fastdeploy/pybind/main.h"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
@@ -28,7 +28,8 @@ void BindOrtOption(pybind11::module& m) {
|
|||||||
&OrtBackendOption::inter_op_num_threads)
|
&OrtBackendOption::inter_op_num_threads)
|
||||||
.def_readwrite("execution_mode", &OrtBackendOption::execution_mode)
|
.def_readwrite("execution_mode", &OrtBackendOption::execution_mode)
|
||||||
.def_readwrite("device", &OrtBackendOption::device)
|
.def_readwrite("device", &OrtBackendOption::device)
|
||||||
.def_readwrite("device_id", &OrtBackendOption::device_id);
|
.def_readwrite("device_id", &OrtBackendOption::device_id)
|
||||||
|
.def_readwrite("enable_fp16", &OrtBackendOption::enable_fp16);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -181,22 +181,21 @@ bool OrtBackend::InitFromPaddle(const std::string& model_buffer,
|
|||||||
strcpy(ops[1].op_name, "pool2d");
|
strcpy(ops[1].op_name, "pool2d");
|
||||||
strcpy(ops[1].export_op_name, "AdaptivePool2d");
|
strcpy(ops[1].export_op_name, "AdaptivePool2d");
|
||||||
|
|
||||||
if (!paddle2onnx::Export(model_buffer.c_str(), model_buffer.size(),
|
if (!paddle2onnx::Export(
|
||||||
params_buffer.c_str(), params_buffer.size(),
|
model_buffer.c_str(), model_buffer.size(), params_buffer.c_str(),
|
||||||
&model_content_ptr, &model_content_size, 11, true,
|
params_buffer.size(), &model_content_ptr, &model_content_size, 11,
|
||||||
verbose, true, true, true, ops.data(), 2,
|
true, verbose, true, true, true, ops.data(), 2, "onnxruntime",
|
||||||
"onnxruntime", nullptr, 0, "", &save_external)) {
|
nullptr, 0, "", &save_external, false)) {
|
||||||
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
|
FDERROR << "Error occured while export PaddlePaddle to ONNX format."
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string onnx_model_proto(model_content_ptr,
|
std::string onnx_model_proto(model_content_ptr,
|
||||||
model_content_ptr + model_content_size);
|
model_content_ptr + model_content_size);
|
||||||
delete[] model_content_ptr;
|
delete[] model_content_ptr;
|
||||||
model_content_ptr = nullptr;
|
model_content_ptr = nullptr;
|
||||||
if (save_external) {
|
if (save_external) {
|
||||||
std::string model_file_name = "model.onnx";
|
model_file_name = "model.onnx";
|
||||||
std::fstream f(model_file_name, std::ios::out);
|
std::fstream f(model_file_name, std::ios::out);
|
||||||
FDASSERT(f.is_open(), "Can not open file: %s to save model.",
|
FDASSERT(f.is_open(), "Can not open file: %s to save model.",
|
||||||
model_file_name.c_str());
|
model_file_name.c_str());
|
||||||
@@ -219,6 +218,22 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
std::string onnx_model_buffer;
|
||||||
|
if (option.enable_fp16) {
|
||||||
|
if (option.device == Device::CPU) {
|
||||||
|
FDWARNING << "Turning on FP16 on CPU may result in slower inference."
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
char* model_content_ptr;
|
||||||
|
int model_content_size = 0;
|
||||||
|
paddle2onnx::ConvertFP32ToFP16(model_file.c_str(), model_file.size(),
|
||||||
|
&model_content_ptr, &model_content_size);
|
||||||
|
std::string onnx_model_proto(model_content_ptr,
|
||||||
|
model_content_ptr + model_content_size);
|
||||||
|
onnx_model_buffer = onnx_model_proto;
|
||||||
|
} else {
|
||||||
|
onnx_model_buffer = model_file;
|
||||||
|
}
|
||||||
|
|
||||||
if (!BuildOption(option)) {
|
if (!BuildOption(option)) {
|
||||||
FDERROR << "Create Ort option fail." << std::endl;
|
FDERROR << "Create Ort option fail." << std::endl;
|
||||||
@@ -226,7 +241,19 @@ bool OrtBackend::InitFromOnnx(const std::string& model_file,
|
|||||||
}
|
}
|
||||||
|
|
||||||
InitCustomOperators();
|
InitCustomOperators();
|
||||||
session_ = {env_, model_file.data(), model_file.size(), session_options_};
|
if (model_file_name.size()) {
|
||||||
|
#ifdef WIN32
|
||||||
|
std::wstring widestr =
|
||||||
|
std::wstring(model_file_name.begin(), model_file_name.end());
|
||||||
|
session_ = {env_, widestr.c_str(), session_options_};
|
||||||
|
#else
|
||||||
|
session_ = {env_, model_file_name.c_str(), session_options_};
|
||||||
|
#endif
|
||||||
|
} else {
|
||||||
|
session_ = {env_, onnx_model_buffer.data(), onnx_model_buffer.size(),
|
||||||
|
session_options_};
|
||||||
|
}
|
||||||
|
|
||||||
binding_ = std::make_shared<Ort::IoBinding>(session_);
|
binding_ = std::make_shared<Ort::IoBinding>(session_);
|
||||||
|
|
||||||
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
Ort::MemoryInfo memory_info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
|
||||||
|
4
fastdeploy/runtime/backends/ort/ort_backend.h
Normal file → Executable file
4
fastdeploy/runtime/backends/ort/ort_backend.h
Normal file → Executable file
@@ -74,6 +74,10 @@ class OrtBackend : public BaseBackend {
|
|||||||
std::shared_ptr<Ort::IoBinding> binding_;
|
std::shared_ptr<Ort::IoBinding> binding_;
|
||||||
std::vector<OrtValueInfo> inputs_desc_;
|
std::vector<OrtValueInfo> inputs_desc_;
|
||||||
std::vector<OrtValueInfo> outputs_desc_;
|
std::vector<OrtValueInfo> outputs_desc_;
|
||||||
|
|
||||||
|
// the ONNX model file name,
|
||||||
|
// when ONNX is bigger than 2G, we will set this name
|
||||||
|
std::string model_file_name;
|
||||||
#ifndef NON_64_PLATFORM
|
#ifndef NON_64_PLATFORM
|
||||||
Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
|
Ort::CustomOpDomain custom_op_domain_ = Ort::CustomOpDomain("Paddle");
|
||||||
#endif
|
#endif
|
||||||
|
14
fastdeploy/runtime/runtime_option.h
Normal file → Executable file
14
fastdeploy/runtime/runtime_option.h
Normal file → Executable file
@@ -14,7 +14,6 @@
|
|||||||
|
|
||||||
/*! \file runtime_option.h
|
/*! \file runtime_option.h
|
||||||
\brief A brief file description.
|
\brief A brief file description.
|
||||||
|
|
||||||
More details
|
More details
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@@ -159,11 +158,12 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
RKNPU2BackendOption rknpu2_option;
|
RKNPU2BackendOption rknpu2_option;
|
||||||
|
|
||||||
// \brief Set the profile mode as 'true'.
|
// \brief Set the profile mode as 'true'.
|
||||||
//
|
//
|
||||||
// \param[in] inclue_h2d_d2h Whether to include time of H2D_D2H for time of runtime.
|
// \param[in] inclue_h2d_d2h Whether to
|
||||||
|
// include time of H2D_D2H for time of runtime.
|
||||||
// \param[in] repeat Repeat times for runtime inference.
|
// \param[in] repeat Repeat times for runtime inference.
|
||||||
// \param[in] warmup Warmup times for runtime inference.
|
// \param[in] warmup Warmup times for runtime inference.
|
||||||
//
|
//
|
||||||
void EnableProfiling(bool inclue_h2d_d2h = false,
|
void EnableProfiling(bool inclue_h2d_d2h = false,
|
||||||
int repeat = 100, int warmup = 50) {
|
int repeat = 100, int warmup = 50) {
|
||||||
benchmark_option.enable_profile = true;
|
benchmark_option.enable_profile = true;
|
||||||
@@ -178,12 +178,14 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
benchmark_option.enable_profile = false;
|
benchmark_option.enable_profile = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// \brief Enable to check if current backend set by user can be found at valid_xxx_backend.
|
// \brief Enable to check if current backend set by
|
||||||
|
// user can be found at valid_xxx_backend.
|
||||||
//
|
//
|
||||||
void EnableValidBackendCheck() {
|
void EnableValidBackendCheck() {
|
||||||
enable_valid_backend_check = true;
|
enable_valid_backend_check = true;
|
||||||
}
|
}
|
||||||
// \brief Disable to check if current backend set by user can be found at valid_xxx_backend.
|
// \brief Disable to check if current backend set by
|
||||||
|
// user can be found at valid_xxx_backend.
|
||||||
//
|
//
|
||||||
void DisableValidBackendCheck() {
|
void DisableValidBackendCheck() {
|
||||||
enable_valid_backend_check = false;
|
enable_valid_backend_check = false;
|
||||||
|
@@ -64,4 +64,4 @@ class FASTDEPLOY_DECL CenterFacePostprocessor{
|
|||||||
|
|
||||||
} // namespace facedet
|
} // namespace facedet
|
||||||
} // namespace vision
|
} // namespace vision
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -51,7 +51,6 @@ class FASTDEPLOY_DECL CenterFacePreprocessor{
|
|||||||
|
|
||||||
// target size, tuple of (width, height), default size = {640, 640}
|
// target size, tuple of (width, height), default size = {640, 640}
|
||||||
std::vector<int> size_;
|
std::vector<int> size_;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace facedet
|
} // namespace facedet
|
||||||
|
Reference in New Issue
Block a user