mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-09-30 22:32:30 +08:00
[Example] Update runtime examples (#1542)
* Add notes for tensors * Optimize some apis * move some warnings
This commit is contained in:
@@ -9,6 +9,6 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
|||||||
# 添加FastDeploy依赖头文件
|
# 添加FastDeploy依赖头文件
|
||||||
include_directories(${FASTDEPLOY_INCS})
|
include_directories(${FASTDEPLOY_INCS})
|
||||||
|
|
||||||
add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc)
|
add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_paddle_paddle_inference.cc)
|
||||||
# 添加FastDeploy库依赖
|
# 添加FastDeploy库依赖
|
||||||
target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS})
|
target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS})
|
||||||
|
@@ -13,52 +13,60 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/runtime.h"
|
#include "fastdeploy/runtime.h"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
namespace fd = fastdeploy;
|
namespace fd = fastdeploy;
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
std::string model_file = "mobilenetv2.onnx";
|
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
|
||||||
|
std::string model_file = "pplcnet.onnx";
|
||||||
|
|
||||||
// setup option
|
// configure runtime
|
||||||
|
// How to configure by RuntimeOption, refer its api doc for more information
|
||||||
|
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||||
fd::RuntimeOption runtime_option;
|
fd::RuntimeOption runtime_option;
|
||||||
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
|
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
|
||||||
runtime_option.UseOrtBackend();
|
runtime_option.UseOrtBackend();
|
||||||
|
|
||||||
|
// Use CPU to inference
|
||||||
|
runtime_option.UseCpu();
|
||||||
runtime_option.SetCpuThreadNum(12);
|
runtime_option.SetCpuThreadNum(12);
|
||||||
|
|
||||||
// **** GPU ****
|
// Use Gpu to inference
|
||||||
// To use GPU, use the following commented code
|
|
||||||
// runtime_option.UseGpu(0);
|
// runtime_option.UseGpu(0);
|
||||||
|
// If need to configure ONNX Runtime backend for more option, we can configure runtime_option.ort_option
|
||||||
|
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OrtBackendOption.html
|
||||||
|
|
||||||
// init runtime
|
fd::Runtime runtime;
|
||||||
std::unique_ptr<fd::Runtime> runtime =
|
assert(runtime.Init(runtime_option));
|
||||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
|
||||||
if (!runtime->Init(runtime_option)) {
|
// Get model's inputs information
|
||||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||||
return -1;
|
|
||||||
} else {
|
// Create dummy data fill with 0.5
|
||||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
|
||||||
|
// Create inputs/outputs tensors
|
||||||
|
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||||
|
std::vector<fd::FDTensor> outputs;
|
||||||
|
|
||||||
|
// Initialize input tensors
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||||
|
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||||
|
inputs[0].name = inputs_info[0].name;
|
||||||
|
|
||||||
|
// Inference
|
||||||
|
assert(runtime.Infer(inputs, &outputs));
|
||||||
|
|
||||||
|
// Print debug information of outputs
|
||||||
|
outputs[0].PrintInfo();
|
||||||
|
|
||||||
|
// Get data pointer and print it's elements
|
||||||
|
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||||
|
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||||
|
std::cout << data_ptr[i] << " ";
|
||||||
}
|
}
|
||||||
// init input tensor shape
|
std::cout << std::endl;
|
||||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
|
||||||
info.shape = {1, 3, 224, 224};
|
|
||||||
|
|
||||||
std::vector<fd::FDTensor> input_tensors(1);
|
|
||||||
std::vector<fd::FDTensor> output_tensors(1);
|
|
||||||
|
|
||||||
std::vector<float> inputs_data;
|
|
||||||
inputs_data.resize(1 * 3 * 224 * 224);
|
|
||||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
|
||||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
|
||||||
}
|
|
||||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
|
||||||
|
|
||||||
//get input name
|
|
||||||
input_tensors[0].name = info.name;
|
|
||||||
|
|
||||||
runtime->Infer(input_tensors, &output_tensors);
|
|
||||||
|
|
||||||
output_tensors[0].PrintInfo();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -13,47 +13,57 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/runtime.h"
|
#include "fastdeploy/runtime.h"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
namespace fd = fastdeploy;
|
namespace fd = fastdeploy;
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
std::string model_file = "mobilenetv2.onnx";
|
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
|
||||||
|
std::string model_file = "pplcnet.onnx";
|
||||||
|
|
||||||
// setup option
|
// configure runtime
|
||||||
|
// How to configure by RuntimeOption, refer its api doc for more information
|
||||||
|
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||||
fd::RuntimeOption runtime_option;
|
fd::RuntimeOption runtime_option;
|
||||||
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
|
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
|
||||||
runtime_option.UseOpenVINOBackend();
|
runtime_option.UseOpenVINOBackend();
|
||||||
runtime_option.SetCpuThreadNum(12);
|
|
||||||
// init runtime
|
|
||||||
std::unique_ptr<fd::Runtime> runtime =
|
|
||||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
|
||||||
if (!runtime->Init(runtime_option)) {
|
|
||||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
|
||||||
return -1;
|
|
||||||
} else {
|
|
||||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
|
||||||
}
|
|
||||||
// init input tensor shape
|
|
||||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
|
||||||
info.shape = {1, 3, 224, 224};
|
|
||||||
|
|
||||||
std::vector<fd::FDTensor> input_tensors(1);
|
|
||||||
std::vector<fd::FDTensor> output_tensors(1);
|
|
||||||
|
|
||||||
std::vector<float> inputs_data;
|
|
||||||
inputs_data.resize(1 * 3 * 224 * 224);
|
|
||||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
|
||||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
|
||||||
}
|
|
||||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
|
||||||
|
|
||||||
//get input name
|
// Use CPU to inference
|
||||||
input_tensors[0].name = info.name;
|
// If need to configure OpenVINO backend for more option, we can configure runtime_option.openvino_option
|
||||||
|
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OpenVINOBackendOption.html
|
||||||
|
runtime_option.UseCpu();
|
||||||
|
runtime_option.SetCpuThreadNum(12);
|
||||||
|
|
||||||
runtime->Infer(input_tensors, &output_tensors);
|
fd::Runtime runtime;
|
||||||
|
assert(runtime.Init(runtime_option));
|
||||||
|
|
||||||
output_tensors[0].PrintInfo();
|
// Get model's inputs information
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||||
|
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||||
|
|
||||||
|
// Create dummy data fill with 0.5
|
||||||
|
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||||
|
|
||||||
|
// Create inputs/outputs tensors
|
||||||
|
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||||
|
std::vector<fd::FDTensor> outputs;
|
||||||
|
|
||||||
|
// Initialize input tensors
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||||
|
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||||
|
inputs[0].name = inputs_info[0].name;
|
||||||
|
|
||||||
|
// Inference
|
||||||
|
assert(runtime.Infer(inputs, &outputs));
|
||||||
|
|
||||||
|
// Print debug information of outputs
|
||||||
|
outputs[0].PrintInfo();
|
||||||
|
|
||||||
|
// Get data pointer and print it's elements
|
||||||
|
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||||
|
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||||
|
std::cout << data_ptr[i] << " ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -13,48 +13,60 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/runtime.h"
|
#include "fastdeploy/runtime.h"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
namespace fd = fastdeploy;
|
namespace fd = fastdeploy;
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
std::string model_file = "mobilenetv2.onnx";
|
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
|
||||||
|
std::string model_file = "pplcnet.onnx";
|
||||||
|
|
||||||
// setup option
|
// configure runtime
|
||||||
|
// How to configure by RuntimeOption, refer its api doc for more information
|
||||||
|
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||||
fd::RuntimeOption runtime_option;
|
fd::RuntimeOption runtime_option;
|
||||||
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
|
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
|
||||||
runtime_option.UseGpu(0);
|
|
||||||
runtime_option.UseTrtBackend();
|
runtime_option.UseTrtBackend();
|
||||||
runtime_option.SetTrtInputShape("inputs", {1, 3, 224, 224});
|
|
||||||
// init runtime
|
|
||||||
std::unique_ptr<fd::Runtime> runtime =
|
|
||||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
|
||||||
if (!runtime->Init(runtime_option)) {
|
|
||||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
|
||||||
return -1;
|
|
||||||
} else {
|
|
||||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
|
||||||
}
|
|
||||||
// init input tensor shape
|
|
||||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
|
||||||
info.shape = {1, 3, 224, 224};
|
|
||||||
|
|
||||||
std::vector<fd::FDTensor> input_tensors(1);
|
|
||||||
std::vector<fd::FDTensor> output_tensors(1);
|
|
||||||
|
|
||||||
std::vector<float> inputs_data;
|
|
||||||
inputs_data.resize(1 * 3 * 224 * 224);
|
|
||||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
|
||||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
|
||||||
}
|
|
||||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
|
||||||
|
|
||||||
//get input name
|
// Use NVIDIA GPU to inference
|
||||||
input_tensors[0].name = info.name;
|
// If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
|
||||||
|
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
|
||||||
|
runtime_option.UseGpu(0);
|
||||||
|
// Use float16 inference to improve performance
|
||||||
|
runtime_option.trt_option.enable_fp16 = true;
|
||||||
|
// Cache trt engine to reduce time cost in model initialize
|
||||||
|
runtime_option.trt_option.serialize_file = "./model.trt";
|
||||||
|
|
||||||
runtime->Infer(input_tensors, &output_tensors);
|
fd::Runtime runtime;
|
||||||
|
assert(runtime.Init(runtime_option));
|
||||||
|
|
||||||
output_tensors[0].PrintInfo();
|
// Get model's inputs information
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||||
|
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||||
|
|
||||||
|
// Create dummy data fill with 0.5
|
||||||
|
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||||
|
|
||||||
|
// Create inputs/outputs tensors
|
||||||
|
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||||
|
std::vector<fd::FDTensor> outputs;
|
||||||
|
|
||||||
|
// Initialize input tensors
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||||
|
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||||
|
inputs[0].name = inputs_info[0].name;
|
||||||
|
|
||||||
|
// Inference
|
||||||
|
assert(runtime.Infer(inputs, &outputs));
|
||||||
|
|
||||||
|
// Print debug information of outputs
|
||||||
|
outputs[0].PrintInfo();
|
||||||
|
|
||||||
|
// Get data pointer and print it's elements
|
||||||
|
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||||
|
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||||
|
std::cout << data_ptr[i] << " ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -13,53 +13,61 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/runtime.h"
|
#include "fastdeploy/runtime.h"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
namespace fd = fastdeploy;
|
namespace fd = fastdeploy;
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
std::string model_file = "mobilenetv2/inference.pdmodel";
|
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
|
||||||
std::string params_file = "mobilenetv2/inference.pdiparams";
|
std::string model_file = "pplcnet/inference.pdmodel";
|
||||||
|
std::string params_file = "pplcnet/inference.pdiparams";
|
||||||
|
|
||||||
// setup option
|
// configure runtime
|
||||||
|
// How to configure by RuntimeOption, refer its api doc for more information
|
||||||
|
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||||
fd::RuntimeOption runtime_option;
|
fd::RuntimeOption runtime_option;
|
||||||
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
|
runtime_option.SetModelPath(model_file, params_file);
|
||||||
runtime_option.UseOrtBackend();
|
runtime_option.UseOrtBackend();
|
||||||
|
|
||||||
|
// Use CPU to inference
|
||||||
|
runtime_option.UseCpu();
|
||||||
runtime_option.SetCpuThreadNum(12);
|
runtime_option.SetCpuThreadNum(12);
|
||||||
|
|
||||||
// **** GPU ****
|
// Use Gpu to inference
|
||||||
// To use GPU, use the following commented code
|
|
||||||
// runtime_option.UseGpu(0);
|
// runtime_option.UseGpu(0);
|
||||||
|
// If need to configure ONNX Runtime backend for more option, we can configure runtime_option.ort_option
|
||||||
|
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OrtBackendOption.html
|
||||||
|
|
||||||
// init runtime
|
fd::Runtime runtime;
|
||||||
std::unique_ptr<fd::Runtime> runtime =
|
assert(runtime.Init(runtime_option));
|
||||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
|
||||||
if (!runtime->Init(runtime_option)) {
|
// Get model's inputs information
|
||||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||||
return -1;
|
|
||||||
} else {
|
// Create dummy data fill with 0.5
|
||||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
|
||||||
|
// Create inputs/outputs tensors
|
||||||
|
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||||
|
std::vector<fd::FDTensor> outputs;
|
||||||
|
|
||||||
|
// Initialize input tensors
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||||
|
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||||
|
inputs[0].name = inputs_info[0].name;
|
||||||
|
|
||||||
|
// Inference
|
||||||
|
assert(runtime.Infer(inputs, &outputs));
|
||||||
|
|
||||||
|
// Print debug information of outputs
|
||||||
|
outputs[0].PrintInfo();
|
||||||
|
|
||||||
|
// Get data pointer and print it's elements
|
||||||
|
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||||
|
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||||
|
std::cout << data_ptr[i] << " ";
|
||||||
}
|
}
|
||||||
// init input tensor shape
|
std::cout << std::endl;
|
||||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
|
||||||
info.shape = {1, 3, 224, 224};
|
|
||||||
|
|
||||||
std::vector<fd::FDTensor> input_tensors(1);
|
|
||||||
std::vector<fd::FDTensor> output_tensors(1);
|
|
||||||
|
|
||||||
std::vector<float> inputs_data;
|
|
||||||
inputs_data.resize(1 * 3 * 224 * 224);
|
|
||||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
|
||||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
|
||||||
}
|
|
||||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
|
||||||
|
|
||||||
//get input name
|
|
||||||
input_tensors[0].name = info.name;
|
|
||||||
|
|
||||||
runtime->Infer(input_tensors, &output_tensors);
|
|
||||||
|
|
||||||
output_tensors[0].PrintInfo();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -13,48 +13,58 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/runtime.h"
|
#include "fastdeploy/runtime.h"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
namespace fd = fastdeploy;
|
namespace fd = fastdeploy;
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
std::string model_file = "mobilenetv2/inference.pdmodel";
|
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
|
||||||
std::string params_file = "mobilenetv2/inference.pdiparams";
|
std::string model_file = "pplcnet/inference.pdmodel";
|
||||||
|
std::string params_file = "pplcnet/inference.pdiparams";
|
||||||
|
|
||||||
// setup option
|
// configure runtime
|
||||||
|
// How to configure by RuntimeOption, refer its api doc for more information
|
||||||
|
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||||
fd::RuntimeOption runtime_option;
|
fd::RuntimeOption runtime_option;
|
||||||
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
|
runtime_option.SetModelPath(model_file, params_file);
|
||||||
runtime_option.UseOpenVINOBackend();
|
runtime_option.UseOpenVINOBackend();
|
||||||
runtime_option.SetCpuThreadNum(12);
|
|
||||||
// init runtime
|
|
||||||
std::unique_ptr<fd::Runtime> runtime =
|
|
||||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
|
||||||
if (!runtime->Init(runtime_option)) {
|
|
||||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
|
||||||
return -1;
|
|
||||||
} else {
|
|
||||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
|
||||||
}
|
|
||||||
// init input tensor shape
|
|
||||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
|
||||||
info.shape = {1, 3, 224, 224};
|
|
||||||
|
|
||||||
std::vector<fd::FDTensor> input_tensors(1);
|
|
||||||
std::vector<fd::FDTensor> output_tensors(1);
|
|
||||||
|
|
||||||
std::vector<float> inputs_data;
|
|
||||||
inputs_data.resize(1 * 3 * 224 * 224);
|
|
||||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
|
||||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
|
||||||
}
|
|
||||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
|
||||||
|
|
||||||
//get input name
|
// Use CPU to inference
|
||||||
input_tensors[0].name = info.name;
|
// If need to configure OpenVINO backend for more option, we can configure runtime_option.openvino_option
|
||||||
|
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OpenVINOBackendOption.html
|
||||||
|
runtime_option.UseCpu();
|
||||||
|
runtime_option.SetCpuThreadNum(12);
|
||||||
|
|
||||||
runtime->Infer(input_tensors, &output_tensors);
|
fd::Runtime runtime;
|
||||||
|
assert(runtime.Init(runtime_option));
|
||||||
|
|
||||||
output_tensors[0].PrintInfo();
|
// Get model's inputs information
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||||
|
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||||
|
|
||||||
|
// Create dummy data fill with 0.5
|
||||||
|
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||||
|
|
||||||
|
// Create inputs/outputs tensors
|
||||||
|
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||||
|
std::vector<fd::FDTensor> outputs;
|
||||||
|
|
||||||
|
// Initialize input tensors
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||||
|
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||||
|
inputs[0].name = inputs_info[0].name;
|
||||||
|
|
||||||
|
// Inference
|
||||||
|
assert(runtime.Infer(inputs, &outputs));
|
||||||
|
|
||||||
|
// Print debug information of outputs
|
||||||
|
outputs[0].PrintInfo();
|
||||||
|
|
||||||
|
// Get data pointer and print it's elements
|
||||||
|
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||||
|
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||||
|
std::cout << data_ptr[i] << " ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -13,53 +13,57 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/runtime.h"
|
#include "fastdeploy/runtime.h"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
namespace fd = fastdeploy;
|
namespace fd = fastdeploy;
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
std::string model_file = "mobilenetv2/inference.pdmodel";
|
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
|
||||||
std::string params_file = "mobilenetv2/inference.pdiparams";
|
std::string model_file = "pplcnet/inference.pdmodel";
|
||||||
|
std::string params_file = "pplcnet/inference.pdiparams";
|
||||||
|
|
||||||
// setup option
|
// configure runtime
|
||||||
|
// How to configure by RuntimeOption, refer its api doc for more information
|
||||||
|
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||||
fd::RuntimeOption runtime_option;
|
fd::RuntimeOption runtime_option;
|
||||||
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
|
runtime_option.SetModelPath(model_file, params_file);
|
||||||
// CPU
|
|
||||||
runtime_option.UsePaddleInferBackend();
|
runtime_option.UsePaddleInferBackend();
|
||||||
runtime_option.SetCpuThreadNum(12);
|
runtime_option.UseCpu();
|
||||||
// GPU
|
|
||||||
// runtime_option.UseGpu(0);
|
// If need to configure Paddle Inference backend for more option, we can configure runtime_option.paddle_infer_option
|
||||||
// IPU
|
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1PaddleBackendOption.html
|
||||||
// runtime_option.UseIpu();
|
runtime_option.paddle_infer_option.enable_mkldnn = true;
|
||||||
// init runtime
|
|
||||||
std::unique_ptr<fd::Runtime> runtime =
|
fd::Runtime runtime;
|
||||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
assert(runtime.Init(runtime_option));
|
||||||
if (!runtime->Init(runtime_option)) {
|
|
||||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
// Get model's inputs information
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||||
return -1;
|
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||||
} else {
|
|
||||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
// Create dummy data fill with 0.5
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||||
|
|
||||||
|
// Create inputs/outputs tensors
|
||||||
|
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||||
|
std::vector<fd::FDTensor> outputs;
|
||||||
|
|
||||||
|
// Initialize input tensors
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||||
|
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||||
|
inputs[0].name = inputs_info[0].name;
|
||||||
|
|
||||||
|
// Inference
|
||||||
|
assert(runtime.Infer(inputs, &outputs));
|
||||||
|
|
||||||
|
// Print debug information of outputs
|
||||||
|
outputs[0].PrintInfo();
|
||||||
|
|
||||||
|
// Get data pointer and print it's elements
|
||||||
|
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||||
|
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||||
|
std::cout << data_ptr[i] << " ";
|
||||||
}
|
}
|
||||||
// init input tensor shape
|
std::cout << std::endl;
|
||||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
|
||||||
info.shape = {1, 3, 224, 224};
|
|
||||||
|
|
||||||
std::vector<fd::FDTensor> input_tensors(1);
|
|
||||||
std::vector<fd::FDTensor> output_tensors(1);
|
|
||||||
|
|
||||||
std::vector<float> inputs_data;
|
|
||||||
inputs_data.resize(1 * 3 * 224 * 224);
|
|
||||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
|
||||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
|
||||||
}
|
|
||||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
|
||||||
|
|
||||||
//get input name
|
|
||||||
input_tensors[0].name = info.name;
|
|
||||||
|
|
||||||
runtime->Infer(input_tensors, &output_tensors);
|
|
||||||
|
|
||||||
output_tensors[0].PrintInfo();
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
77
examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc
Normal file
77
examples/runtime/cpp/infer_paddle_paddle_inference_trt.cc
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy/runtime.h"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
|
namespace fd = fastdeploy;
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
|
||||||
|
std::string model_file = "pplcnet/inference.pdmodel";
|
||||||
|
std::string params_file = "pplcnet/inference.pdiparams";
|
||||||
|
|
||||||
|
// configure runtime
|
||||||
|
// How to configure by RuntimeOption, refer its api doc for more information
|
||||||
|
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||||
|
fd::RuntimeOption runtime_option;
|
||||||
|
runtime_option.SetModelPath(model_file, params_file);
|
||||||
|
runtime_option.UsePaddleInferBackend();
|
||||||
|
runtime_option.UseGpu(0);
|
||||||
|
|
||||||
|
// Enable Paddle Inference + TensorRT
|
||||||
|
// If need to configure Paddle Inference backend for more option, we can configure runtime_option.paddle_infer_option
|
||||||
|
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1PaddleBackendOption.html
|
||||||
|
runtime_option.paddle_infer_option.enable_trt = true;
|
||||||
|
|
||||||
|
// If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
|
||||||
|
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
|
||||||
|
// Use float16 inference to improve performance
|
||||||
|
runtime_option.trt_option.enable_fp16 = true;
|
||||||
|
// Cache trt engine to reduce time cost in model initialize
|
||||||
|
runtime_option.trt_option.serialize_file = "./pplcnet_model.trt";
|
||||||
|
|
||||||
|
fd::Runtime runtime;
|
||||||
|
assert(runtime.Init(runtime_option));
|
||||||
|
|
||||||
|
// Get model's inputs information
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||||
|
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||||
|
|
||||||
|
// Create dummy data fill with 0.5
|
||||||
|
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||||
|
|
||||||
|
// Create inputs/outputs tensors
|
||||||
|
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||||
|
std::vector<fd::FDTensor> outputs;
|
||||||
|
|
||||||
|
// Initialize input tensors
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||||
|
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||||
|
inputs[0].name = inputs_info[0].name;
|
||||||
|
|
||||||
|
// Inference
|
||||||
|
assert(runtime.Infer(inputs, &outputs));
|
||||||
|
|
||||||
|
// Print debug information of outputs
|
||||||
|
outputs[0].PrintInfo();
|
||||||
|
|
||||||
|
// Get data pointer and print it's elements
|
||||||
|
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||||
|
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||||
|
std::cout << data_ptr[i] << " ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
|
return 0;
|
||||||
|
}
|
@@ -13,49 +13,61 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/runtime.h"
|
#include "fastdeploy/runtime.h"
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
namespace fd = fastdeploy;
|
namespace fd = fastdeploy;
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
std::string model_file = "mobilenetv2/inference.pdmodel";
|
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
|
||||||
std::string params_file = "mobilenetv2/inference.pdiparams";
|
std::string model_file = "pplcnet/inference.pdmodel";
|
||||||
|
std::string params_file = "pplcnet/inference.pdiparams";
|
||||||
|
|
||||||
// setup option
|
// configure runtime
|
||||||
|
// How to configure by RuntimeOption, refer its api doc for more information
|
||||||
|
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
|
||||||
fd::RuntimeOption runtime_option;
|
fd::RuntimeOption runtime_option;
|
||||||
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
|
runtime_option.SetModelPath(model_file, params_file);
|
||||||
runtime_option.UseGpu(0);
|
|
||||||
runtime_option.UseTrtBackend();
|
runtime_option.UseTrtBackend();
|
||||||
runtime_option.EnablePaddleToTrt();
|
|
||||||
// init runtime
|
|
||||||
std::unique_ptr<fd::Runtime> runtime =
|
|
||||||
std::unique_ptr<fd::Runtime>(new fd::Runtime());
|
|
||||||
if (!runtime->Init(runtime_option)) {
|
|
||||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
|
||||||
return -1;
|
|
||||||
} else {
|
|
||||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
|
||||||
<< "\n--- Model: " << model_file << std::endl;
|
|
||||||
}
|
|
||||||
// init input tensor shape
|
|
||||||
fd::TensorInfo info = runtime->GetInputInfo(0);
|
|
||||||
info.shape = {1, 3, 224, 224};
|
|
||||||
|
|
||||||
std::vector<fd::FDTensor> input_tensors(1);
|
|
||||||
std::vector<fd::FDTensor> output_tensors(1);
|
|
||||||
|
|
||||||
std::vector<float> inputs_data;
|
|
||||||
inputs_data.resize(1 * 3 * 224 * 224);
|
|
||||||
for (size_t i = 0; i < inputs_data.size(); ++i) {
|
|
||||||
inputs_data[i] = std::rand() % 1000 / 1000.0f;
|
|
||||||
}
|
|
||||||
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
|
|
||||||
|
|
||||||
//get input name
|
// Use NVIDIA GPU to inference
|
||||||
input_tensors[0].name = info.name;
|
// If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
|
||||||
|
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
|
||||||
|
runtime_option.UseGpu(0);
|
||||||
|
// Use float16 inference to improve performance
|
||||||
|
runtime_option.trt_option.enable_fp16 = true;
|
||||||
|
// Cache trt engine to reduce time cost in model initialize
|
||||||
|
runtime_option.trt_option.serialize_file = "./pplcnet_model.trt";
|
||||||
|
|
||||||
runtime->Infer(input_tensors, &output_tensors);
|
fd::Runtime runtime;
|
||||||
|
assert(runtime.Init(runtime_option));
|
||||||
|
|
||||||
output_tensors[0].PrintInfo();
|
// Get model's inputs information
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
|
||||||
|
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
|
||||||
|
|
||||||
|
// Create dummy data fill with 0.5
|
||||||
|
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
|
||||||
|
|
||||||
|
// Create inputs/outputs tensors
|
||||||
|
std::vector<fd::FDTensor> inputs(inputs_info.size());
|
||||||
|
std::vector<fd::FDTensor> outputs;
|
||||||
|
|
||||||
|
// Initialize input tensors
|
||||||
|
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
|
||||||
|
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
|
||||||
|
inputs[0].name = inputs_info[0].name;
|
||||||
|
|
||||||
|
// Inference
|
||||||
|
assert(runtime.Infer(inputs, &outputs));
|
||||||
|
|
||||||
|
// Print debug information of outputs
|
||||||
|
outputs[0].PrintInfo();
|
||||||
|
|
||||||
|
// Get data pointer and print it's elements
|
||||||
|
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
|
||||||
|
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
|
||||||
|
std::cout << data_ptr[i] << " ";
|
||||||
|
}
|
||||||
|
std::cout << std::endl;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -85,10 +85,10 @@ class BaseBackend {
|
|||||||
bool copy_to_fd = true) = 0;
|
bool copy_to_fd = true) = 0;
|
||||||
// Optional: For those backends which can share memory
|
// Optional: For those backends which can share memory
|
||||||
// while creating multiple inference engines with same model file
|
// while creating multiple inference engines with same model file
|
||||||
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
|
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption& runtime_option,
|
||||||
void *stream = nullptr,
|
void* stream = nullptr,
|
||||||
int device_id = -1) {
|
int device_id = -1) {
|
||||||
FDERROR << "Clone no support" << std::endl;
|
FDERROR << "Clone no support " << runtime_option.backend << " " << stream << " " << device_id << std::endl;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -53,32 +53,46 @@ enum LitePowerMode {
|
|||||||
struct LiteBackendOption {
|
struct LiteBackendOption {
|
||||||
/// Paddle Lite power mode for mobile device.
|
/// Paddle Lite power mode for mobile device.
|
||||||
int power_mode = 3;
|
int power_mode = 3;
|
||||||
/// Number of threads while use CPU
|
// Number of threads while use CPU
|
||||||
int cpu_threads = 1;
|
int cpu_threads = 1;
|
||||||
/// Enable use half precision
|
/// Enable use half precision
|
||||||
bool enable_fp16 = false;
|
bool enable_fp16 = false;
|
||||||
/// Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND
|
// Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND
|
||||||
Device device = Device::CPU;
|
Device device = Device::CPU;
|
||||||
/// Index of inference device
|
// Index of inference device
|
||||||
int device_id = 0;
|
int device_id = 0;
|
||||||
|
|
||||||
|
/// kunlunxin_l3_workspace_size
|
||||||
int kunlunxin_l3_workspace_size = 0xfffc00;
|
int kunlunxin_l3_workspace_size = 0xfffc00;
|
||||||
|
/// kunlunxin_locked
|
||||||
bool kunlunxin_locked = false;
|
bool kunlunxin_locked = false;
|
||||||
|
/// kunlunxin_autotune
|
||||||
bool kunlunxin_autotune = true;
|
bool kunlunxin_autotune = true;
|
||||||
|
/// kunlunxin_autotune_file
|
||||||
std::string kunlunxin_autotune_file = "";
|
std::string kunlunxin_autotune_file = "";
|
||||||
|
/// kunlunxin_precision
|
||||||
std::string kunlunxin_precision = "int16";
|
std::string kunlunxin_precision = "int16";
|
||||||
|
/// kunlunxin_adaptive_seqlen
|
||||||
bool kunlunxin_adaptive_seqlen = false;
|
bool kunlunxin_adaptive_seqlen = false;
|
||||||
|
/// kunlunxin_enable_multi_stream
|
||||||
bool kunlunxin_enable_multi_stream = false;
|
bool kunlunxin_enable_multi_stream = false;
|
||||||
|
|
||||||
/// Optimized model dir for CxxConfig
|
/// Optimized model dir for CxxConfig
|
||||||
std::string optimized_model_dir = "";
|
std::string optimized_model_dir = "";
|
||||||
|
/// nnadapter_subgraph_partition_config_path
|
||||||
std::string nnadapter_subgraph_partition_config_path = "";
|
std::string nnadapter_subgraph_partition_config_path = "";
|
||||||
|
/// nnadapter_subgraph_partition_config_buffer
|
||||||
std::string nnadapter_subgraph_partition_config_buffer = "";
|
std::string nnadapter_subgraph_partition_config_buffer = "";
|
||||||
|
/// nnadapter_context_properties
|
||||||
std::string nnadapter_context_properties = "";
|
std::string nnadapter_context_properties = "";
|
||||||
|
/// nnadapter_model_cache_dir
|
||||||
std::string nnadapter_model_cache_dir = "";
|
std::string nnadapter_model_cache_dir = "";
|
||||||
|
/// nnadapter_mixed_precision_quantization_config_path
|
||||||
std::string nnadapter_mixed_precision_quantization_config_path = "";
|
std::string nnadapter_mixed_precision_quantization_config_path = "";
|
||||||
|
/// nnadapter_dynamic_shape_info
|
||||||
std::map<std::string, std::vector<std::vector<int64_t>>>
|
std::map<std::string, std::vector<std::vector<int64_t>>>
|
||||||
nnadapter_dynamic_shape_info = {{"", {{0}}}};
|
nnadapter_dynamic_shape_info = {{"", {{0}}}};
|
||||||
|
/// nnadapter_device_names
|
||||||
std::vector<std::string> nnadapter_device_names = {};
|
std::vector<std::string> nnadapter_device_names = {};
|
||||||
};
|
};
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -25,27 +25,18 @@ namespace fastdeploy {
|
|||||||
/*! @brief Option object to configure ONNX Runtime backend
|
/*! @brief Option object to configure ONNX Runtime backend
|
||||||
*/
|
*/
|
||||||
struct OrtBackendOption {
|
struct OrtBackendOption {
|
||||||
/*
|
/// Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all
|
||||||
* @brief Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all
|
|
||||||
*/
|
|
||||||
int graph_optimization_level = -1;
|
int graph_optimization_level = -1;
|
||||||
/*
|
/// Number of threads to execute the operator, -1: default
|
||||||
* @brief Number of threads to execute the operator, -1: default
|
|
||||||
*/
|
|
||||||
int intra_op_num_threads = -1;
|
int intra_op_num_threads = -1;
|
||||||
/*
|
/// Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1.
|
||||||
* @brief Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1.
|
|
||||||
*/
|
|
||||||
int inter_op_num_threads = -1;
|
int inter_op_num_threads = -1;
|
||||||
/*
|
/// Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly.
|
||||||
* @brief Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly.
|
|
||||||
*/
|
|
||||||
int execution_mode = -1;
|
int execution_mode = -1;
|
||||||
/// Inference device, OrtBackend supports CPU/GPU
|
// Inference device, OrtBackend supports CPU/GPU
|
||||||
Device device = Device::CPU;
|
Device device = Device::CPU;
|
||||||
/// Inference device id
|
// Inference device id
|
||||||
int device_id = 0;
|
int device_id = 0;
|
||||||
|
|
||||||
void* external_stream_ = nullptr;
|
void* external_stream_ = nullptr;
|
||||||
};
|
};
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -54,6 +54,8 @@ struct PaddleBackendOption {
|
|||||||
bool enable_mkldnn = true;
|
bool enable_mkldnn = true;
|
||||||
/// Use Paddle Inference + TensorRT to inference model on GPU
|
/// Use Paddle Inference + TensorRT to inference model on GPU
|
||||||
bool enable_trt = false;
|
bool enable_trt = false;
|
||||||
|
/// Whether enable memory optimize, default true
|
||||||
|
bool enable_memory_optimize = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @brief IPU option, this will configure the IPU hardware, if inference model in IPU
|
* @brief IPU option, this will configure the IPU hardware, if inference model in IPU
|
||||||
|
@@ -41,6 +41,7 @@ void BindPaddleOption(pybind11::module& m) {
|
|||||||
.def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info)
|
.def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info)
|
||||||
.def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn)
|
.def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn)
|
||||||
.def_readwrite("enable_trt", &PaddleBackendOption::enable_trt)
|
.def_readwrite("enable_trt", &PaddleBackendOption::enable_trt)
|
||||||
|
.def_readwrite("enable_memory_optimize", &PaddleBackendOption::enable_memory_optimize)
|
||||||
.def_readwrite("ipu_option", &PaddleBackendOption::ipu_option)
|
.def_readwrite("ipu_option", &PaddleBackendOption::ipu_option)
|
||||||
.def_readwrite("collect_trt_shape",
|
.def_readwrite("collect_trt_shape",
|
||||||
&PaddleBackendOption::collect_trt_shape)
|
&PaddleBackendOption::collect_trt_shape)
|
||||||
|
@@ -147,7 +147,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
|
|||||||
}
|
}
|
||||||
config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
|
config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
|
||||||
params_buffer.c_str(), params_buffer.size());
|
params_buffer.c_str(), params_buffer.size());
|
||||||
config_.EnableMemoryOptim();
|
if (option.enable_memory_optimize) {
|
||||||
|
config_.EnableMemoryOptim();
|
||||||
|
}
|
||||||
BuildOption(option);
|
BuildOption(option);
|
||||||
|
|
||||||
// The input/output information get from predictor is not right, use
|
// The input/output information get from predictor is not right, use
|
||||||
|
@@ -33,9 +33,8 @@ struct TrtBackendOption {
|
|||||||
/// Enable log while converting onnx model to tensorrt
|
/// Enable log while converting onnx model to tensorrt
|
||||||
bool enable_log_info = false;
|
bool enable_log_info = false;
|
||||||
|
|
||||||
/*
|
|
||||||
* @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
|
/// Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
|
||||||
*/
|
|
||||||
bool enable_fp16 = false;
|
bool enable_fp16 = false;
|
||||||
|
|
||||||
/** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
|
/** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
|
||||||
@@ -64,9 +63,7 @@ struct TrtBackendOption {
|
|||||||
max_shape[tensor_name].assign(max.begin(), max.end());
|
max_shape[tensor_name].assign(max.begin(), max.end());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/**
|
/// Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
|
||||||
* @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
|
|
||||||
*/
|
|
||||||
std::string serialize_file = "";
|
std::string serialize_file = "";
|
||||||
|
|
||||||
// The below parameters may be removed in next version, please do not
|
// The below parameters may be removed in next version, please do not
|
||||||
|
@@ -22,4 +22,4 @@ for root, dirs, files in os.walk(third_libs):
|
|||||||
for f in files:
|
for f in files:
|
||||||
if f.strip().count(".so") > 0 or f.strip() == "plugins.xml":
|
if f.strip().count(".so") > 0 or f.strip() == "plugins.xml":
|
||||||
full_path = os.path.join(root, f)
|
full_path = os.path.join(root, f)
|
||||||
shutil.copy(full_path, os.path.join(bc_dirname, "lib"))
|
shutil.copy(full_path, os.path.join(bc_dirname, "lib"), follow_symlinks=False)
|
||||||
|
Reference in New Issue
Block a user