[Example] Update runtime examples (#1542)

* Add notes for tensors

* Optimize some apis

* move some warnings
This commit is contained in:
Jason
2023-03-08 16:56:04 +08:00
committed by GitHub
parent 3d31834193
commit 6be2c0367b
17 changed files with 425 additions and 277 deletions

View File

@@ -9,6 +9,6 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
# 添加FastDeploy依赖头文件
include_directories(${FASTDEPLOY_INCS})
add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_onnx_openvino.cc)
add_executable(runtime_demo ${PROJECT_SOURCE_DIR}/infer_paddle_paddle_inference.cc)
# 添加FastDeploy库依赖
target_link_libraries(runtime_demo ${FASTDEPLOY_LIBS})

View File

@@ -13,52 +13,60 @@
// limitations under the License.
#include "fastdeploy/runtime.h"
#include <cassert>
namespace fd = fastdeploy;
int main(int argc, char* argv[]) {
std::string model_file = "mobilenetv2.onnx";
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
std::string model_file = "pplcnet.onnx";
// setup option
// configure runtime
// How to configure by RuntimeOption, refer its api doc for more information
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
fd::RuntimeOption runtime_option;
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
runtime_option.UseOrtBackend();
// Use CPU to inference
runtime_option.UseCpu();
runtime_option.SetCpuThreadNum(12);
// **** GPU ****
// To use GPU, use the following commented code
// Use Gpu to inference
// runtime_option.UseGpu(0);
// If need to configure ONNX Runtime backend for more option, we can configure runtime_option.ort_option
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OrtBackendOption.html
// init runtime
std::unique_ptr<fd::Runtime> runtime =
std::unique_ptr<fd::Runtime>(new fd::Runtime());
if (!runtime->Init(runtime_option)) {
std::cerr << "--- Init FastDeploy Runitme Failed! "
<< "\n--- Model: " << model_file << std::endl;
return -1;
} else {
std::cout << "--- Init FastDeploy Runitme Done! "
<< "\n--- Model: " << model_file << std::endl;
fd::Runtime runtime;
assert(runtime.Init(runtime_option));
// Get model's inputs information
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
// Create dummy data fill with 0.5
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
// Create inputs/outputs tensors
std::vector<fd::FDTensor> inputs(inputs_info.size());
std::vector<fd::FDTensor> outputs;
// Initialize input tensors
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
inputs[0].name = inputs_info[0].name;
// Inference
assert(runtime.Infer(inputs, &outputs));
// Print debug information of outputs
outputs[0].PrintInfo();
// Get data pointer and print it's elements
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
std::cout << data_ptr[i] << " ";
}
// init input tensor shape
fd::TensorInfo info = runtime->GetInputInfo(0);
info.shape = {1, 3, 224, 224};
std::vector<fd::FDTensor> input_tensors(1);
std::vector<fd::FDTensor> output_tensors(1);
std::vector<float> inputs_data;
inputs_data.resize(1 * 3 * 224 * 224);
for (size_t i = 0; i < inputs_data.size(); ++i) {
inputs_data[i] = std::rand() % 1000 / 1000.0f;
}
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
//get input name
input_tensors[0].name = info.name;
runtime->Infer(input_tensors, &output_tensors);
output_tensors[0].PrintInfo();
std::cout << std::endl;
return 0;
}
}

View File

@@ -13,47 +13,57 @@
// limitations under the License.
#include "fastdeploy/runtime.h"
#include <cassert>
namespace fd = fastdeploy;
int main(int argc, char* argv[]) {
std::string model_file = "mobilenetv2.onnx";
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
std::string model_file = "pplcnet.onnx";
// setup option
// configure runtime
// How to configure by RuntimeOption, refer its api doc for more information
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
fd::RuntimeOption runtime_option;
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
runtime_option.UseOpenVINOBackend();
runtime_option.SetCpuThreadNum(12);
// init runtime
std::unique_ptr<fd::Runtime> runtime =
std::unique_ptr<fd::Runtime>(new fd::Runtime());
if (!runtime->Init(runtime_option)) {
std::cerr << "--- Init FastDeploy Runitme Failed! "
<< "\n--- Model: " << model_file << std::endl;
return -1;
} else {
std::cout << "--- Init FastDeploy Runitme Done! "
<< "\n--- Model: " << model_file << std::endl;
}
// init input tensor shape
fd::TensorInfo info = runtime->GetInputInfo(0);
info.shape = {1, 3, 224, 224};
std::vector<fd::FDTensor> input_tensors(1);
std::vector<fd::FDTensor> output_tensors(1);
std::vector<float> inputs_data;
inputs_data.resize(1 * 3 * 224 * 224);
for (size_t i = 0; i < inputs_data.size(); ++i) {
inputs_data[i] = std::rand() % 1000 / 1000.0f;
}
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
//get input name
input_tensors[0].name = info.name;
// Use CPU to inference
// If need to configure OpenVINO backend for more option, we can configure runtime_option.openvino_option
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OpenVINOBackendOption.html
runtime_option.UseCpu();
runtime_option.SetCpuThreadNum(12);
runtime->Infer(input_tensors, &output_tensors);
fd::Runtime runtime;
assert(runtime.Init(runtime_option));
output_tensors[0].PrintInfo();
// Get model's inputs information
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
// Create dummy data fill with 0.5
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
// Create inputs/outputs tensors
std::vector<fd::FDTensor> inputs(inputs_info.size());
std::vector<fd::FDTensor> outputs;
// Initialize input tensors
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
inputs[0].name = inputs_info[0].name;
// Inference
assert(runtime.Infer(inputs, &outputs));
// Print debug information of outputs
outputs[0].PrintInfo();
// Get data pointer and print it's elements
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
std::cout << data_ptr[i] << " ";
}
std::cout << std::endl;
return 0;
}
}

View File

@@ -13,48 +13,60 @@
// limitations under the License.
#include "fastdeploy/runtime.h"
#include <cassert>
namespace fd = fastdeploy;
int main(int argc, char* argv[]) {
std::string model_file = "mobilenetv2.onnx";
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.onnx
std::string model_file = "pplcnet.onnx";
// setup option
// configure runtime
// How to configure by RuntimeOption, refer its api doc for more information
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
fd::RuntimeOption runtime_option;
runtime_option.SetModelPath(model_file, "", fd::ModelFormat::ONNX);
runtime_option.UseGpu(0);
runtime_option.UseTrtBackend();
runtime_option.SetTrtInputShape("inputs", {1, 3, 224, 224});
// init runtime
std::unique_ptr<fd::Runtime> runtime =
std::unique_ptr<fd::Runtime>(new fd::Runtime());
if (!runtime->Init(runtime_option)) {
std::cerr << "--- Init FastDeploy Runitme Failed! "
<< "\n--- Model: " << model_file << std::endl;
return -1;
} else {
std::cout << "--- Init FastDeploy Runitme Done! "
<< "\n--- Model: " << model_file << std::endl;
}
// init input tensor shape
fd::TensorInfo info = runtime->GetInputInfo(0);
info.shape = {1, 3, 224, 224};
std::vector<fd::FDTensor> input_tensors(1);
std::vector<fd::FDTensor> output_tensors(1);
std::vector<float> inputs_data;
inputs_data.resize(1 * 3 * 224 * 224);
for (size_t i = 0; i < inputs_data.size(); ++i) {
inputs_data[i] = std::rand() % 1000 / 1000.0f;
}
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
//get input name
input_tensors[0].name = info.name;
// Use NVIDIA GPU to inference
// If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
runtime_option.UseGpu(0);
// Use float16 inference to improve performance
runtime_option.trt_option.enable_fp16 = true;
// Cache trt engine to reduce time cost in model initialize
runtime_option.trt_option.serialize_file = "./model.trt";
runtime->Infer(input_tensors, &output_tensors);
fd::Runtime runtime;
assert(runtime.Init(runtime_option));
output_tensors[0].PrintInfo();
// Get model's inputs information
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
// Create dummy data fill with 0.5
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
// Create inputs/outputs tensors
std::vector<fd::FDTensor> inputs(inputs_info.size());
std::vector<fd::FDTensor> outputs;
// Initialize input tensors
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
inputs[0].name = inputs_info[0].name;
// Inference
assert(runtime.Infer(inputs, &outputs));
// Print debug information of outputs
outputs[0].PrintInfo();
// Get data pointer and print it's elements
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
std::cout << data_ptr[i] << " ";
}
std::cout << std::endl;
return 0;
}
}

View File

@@ -13,53 +13,61 @@
// limitations under the License.
#include "fastdeploy/runtime.h"
#include <cassert>
namespace fd = fastdeploy;
int main(int argc, char* argv[]) {
std::string model_file = "mobilenetv2/inference.pdmodel";
std::string params_file = "mobilenetv2/inference.pdiparams";
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
std::string model_file = "pplcnet/inference.pdmodel";
std::string params_file = "pplcnet/inference.pdiparams";
// setup option
// configure runtime
// How to configure by RuntimeOption, refer its api doc for more information
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
fd::RuntimeOption runtime_option;
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
runtime_option.SetModelPath(model_file, params_file);
runtime_option.UseOrtBackend();
// Use CPU to inference
runtime_option.UseCpu();
runtime_option.SetCpuThreadNum(12);
// **** GPU ****
// To use GPU, use the following commented code
// Use Gpu to inference
// runtime_option.UseGpu(0);
// If need to configure ONNX Runtime backend for more option, we can configure runtime_option.ort_option
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OrtBackendOption.html
// init runtime
std::unique_ptr<fd::Runtime> runtime =
std::unique_ptr<fd::Runtime>(new fd::Runtime());
if (!runtime->Init(runtime_option)) {
std::cerr << "--- Init FastDeploy Runitme Failed! "
<< "\n--- Model: " << model_file << std::endl;
return -1;
} else {
std::cout << "--- Init FastDeploy Runitme Done! "
<< "\n--- Model: " << model_file << std::endl;
fd::Runtime runtime;
assert(runtime.Init(runtime_option));
// Get model's inputs information
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
// Create dummy data fill with 0.5
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
// Create inputs/outputs tensors
std::vector<fd::FDTensor> inputs(inputs_info.size());
std::vector<fd::FDTensor> outputs;
// Initialize input tensors
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
inputs[0].name = inputs_info[0].name;
// Inference
assert(runtime.Infer(inputs, &outputs));
// Print debug information of outputs
outputs[0].PrintInfo();
// Get data pointer and print it's elements
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
std::cout << data_ptr[i] << " ";
}
// init input tensor shape
fd::TensorInfo info = runtime->GetInputInfo(0);
info.shape = {1, 3, 224, 224};
std::vector<fd::FDTensor> input_tensors(1);
std::vector<fd::FDTensor> output_tensors(1);
std::vector<float> inputs_data;
inputs_data.resize(1 * 3 * 224 * 224);
for (size_t i = 0; i < inputs_data.size(); ++i) {
inputs_data[i] = std::rand() % 1000 / 1000.0f;
}
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
//get input name
input_tensors[0].name = info.name;
runtime->Infer(input_tensors, &output_tensors);
output_tensors[0].PrintInfo();
std::cout << std::endl;
return 0;
}
}

View File

@@ -13,48 +13,58 @@
// limitations under the License.
#include "fastdeploy/runtime.h"
#include <cassert>
namespace fd = fastdeploy;
int main(int argc, char* argv[]) {
std::string model_file = "mobilenetv2/inference.pdmodel";
std::string params_file = "mobilenetv2/inference.pdiparams";
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
std::string model_file = "pplcnet/inference.pdmodel";
std::string params_file = "pplcnet/inference.pdiparams";
// setup option
// configure runtime
// How to configure by RuntimeOption, refer its api doc for more information
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
fd::RuntimeOption runtime_option;
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
runtime_option.SetModelPath(model_file, params_file);
runtime_option.UseOpenVINOBackend();
runtime_option.SetCpuThreadNum(12);
// init runtime
std::unique_ptr<fd::Runtime> runtime =
std::unique_ptr<fd::Runtime>(new fd::Runtime());
if (!runtime->Init(runtime_option)) {
std::cerr << "--- Init FastDeploy Runitme Failed! "
<< "\n--- Model: " << model_file << std::endl;
return -1;
} else {
std::cout << "--- Init FastDeploy Runitme Done! "
<< "\n--- Model: " << model_file << std::endl;
}
// init input tensor shape
fd::TensorInfo info = runtime->GetInputInfo(0);
info.shape = {1, 3, 224, 224};
std::vector<fd::FDTensor> input_tensors(1);
std::vector<fd::FDTensor> output_tensors(1);
std::vector<float> inputs_data;
inputs_data.resize(1 * 3 * 224 * 224);
for (size_t i = 0; i < inputs_data.size(); ++i) {
inputs_data[i] = std::rand() % 1000 / 1000.0f;
}
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
//get input name
input_tensors[0].name = info.name;
// Use CPU to inference
// If need to configure OpenVINO backend for more option, we can configure runtime_option.openvino_option
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1OpenVINOBackendOption.html
runtime_option.UseCpu();
runtime_option.SetCpuThreadNum(12);
runtime->Infer(input_tensors, &output_tensors);
fd::Runtime runtime;
assert(runtime.Init(runtime_option));
output_tensors[0].PrintInfo();
// Get model's inputs information
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
// Create dummy data fill with 0.5
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
// Create inputs/outputs tensors
std::vector<fd::FDTensor> inputs(inputs_info.size());
std::vector<fd::FDTensor> outputs;
// Initialize input tensors
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
inputs[0].name = inputs_info[0].name;
// Inference
assert(runtime.Infer(inputs, &outputs));
// Print debug information of outputs
outputs[0].PrintInfo();
// Get data pointer and print it's elements
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
std::cout << data_ptr[i] << " ";
}
std::cout << std::endl;
return 0;
}
}

View File

@@ -13,53 +13,57 @@
// limitations under the License.
#include "fastdeploy/runtime.h"
#include <cassert>
namespace fd = fastdeploy;
int main(int argc, char* argv[]) {
std::string model_file = "mobilenetv2/inference.pdmodel";
std::string params_file = "mobilenetv2/inference.pdiparams";
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
std::string model_file = "pplcnet/inference.pdmodel";
std::string params_file = "pplcnet/inference.pdiparams";
// setup option
// configure runtime
// How to configure by RuntimeOption, refer its api doc for more information
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
fd::RuntimeOption runtime_option;
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
// CPU
runtime_option.SetModelPath(model_file, params_file);
runtime_option.UsePaddleInferBackend();
runtime_option.SetCpuThreadNum(12);
// GPU
// runtime_option.UseGpu(0);
// IPU
// runtime_option.UseIpu();
// init runtime
std::unique_ptr<fd::Runtime> runtime =
std::unique_ptr<fd::Runtime>(new fd::Runtime());
if (!runtime->Init(runtime_option)) {
std::cerr << "--- Init FastDeploy Runitme Failed! "
<< "\n--- Model: " << model_file << std::endl;
return -1;
} else {
std::cout << "--- Init FastDeploy Runitme Done! "
<< "\n--- Model: " << model_file << std::endl;
runtime_option.UseCpu();
// If need to configure Paddle Inference backend for more option, we can configure runtime_option.paddle_infer_option
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1PaddleBackendOption.html
runtime_option.paddle_infer_option.enable_mkldnn = true;
fd::Runtime runtime;
assert(runtime.Init(runtime_option));
// Get model's inputs information
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
// Create dummy data fill with 0.5
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
// Create inputs/outputs tensors
std::vector<fd::FDTensor> inputs(inputs_info.size());
std::vector<fd::FDTensor> outputs;
// Initialize input tensors
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
inputs[0].name = inputs_info[0].name;
// Inference
assert(runtime.Infer(inputs, &outputs));
// Print debug information of outputs
outputs[0].PrintInfo();
// Get data pointer and print it's elements
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
std::cout << data_ptr[i] << " ";
}
// init input tensor shape
fd::TensorInfo info = runtime->GetInputInfo(0);
info.shape = {1, 3, 224, 224};
std::vector<fd::FDTensor> input_tensors(1);
std::vector<fd::FDTensor> output_tensors(1);
std::vector<float> inputs_data;
inputs_data.resize(1 * 3 * 224 * 224);
for (size_t i = 0; i < inputs_data.size(); ++i) {
inputs_data[i] = std::rand() % 1000 / 1000.0f;
}
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
//get input name
input_tensors[0].name = info.name;
runtime->Infer(input_tensors, &output_tensors);
output_tensors[0].PrintInfo();
std::cout << std::endl;
return 0;
}
}

View File

@@ -0,0 +1,77 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/runtime.h"
#include <cassert>
namespace fd = fastdeploy;
int main(int argc, char* argv[]) {
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
std::string model_file = "pplcnet/inference.pdmodel";
std::string params_file = "pplcnet/inference.pdiparams";
// configure runtime
// How to configure by RuntimeOption, refer its api doc for more information
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
fd::RuntimeOption runtime_option;
runtime_option.SetModelPath(model_file, params_file);
runtime_option.UsePaddleInferBackend();
runtime_option.UseGpu(0);
// Enable Paddle Inference + TensorRT
// If need to configure Paddle Inference backend for more option, we can configure runtime_option.paddle_infer_option
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1PaddleBackendOption.html
runtime_option.paddle_infer_option.enable_trt = true;
// If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
// Use float16 inference to improve performance
runtime_option.trt_option.enable_fp16 = true;
// Cache trt engine to reduce time cost in model initialize
runtime_option.trt_option.serialize_file = "./pplcnet_model.trt";
fd::Runtime runtime;
assert(runtime.Init(runtime_option));
// Get model's inputs information
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
// Create dummy data fill with 0.5
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
// Create inputs/outputs tensors
std::vector<fd::FDTensor> inputs(inputs_info.size());
std::vector<fd::FDTensor> outputs;
// Initialize input tensors
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
inputs[0].name = inputs_info[0].name;
// Inference
assert(runtime.Infer(inputs, &outputs));
// Print debug information of outputs
outputs[0].PrintInfo();
// Get data pointer and print it's elements
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
std::cout << data_ptr[i] << " ";
}
std::cout << std::endl;
return 0;
}

View File

@@ -13,49 +13,61 @@
// limitations under the License.
#include "fastdeploy/runtime.h"
#include <cassert>
namespace fd = fastdeploy;
int main(int argc, char* argv[]) {
std::string model_file = "mobilenetv2/inference.pdmodel";
std::string params_file = "mobilenetv2/inference.pdiparams";
// Download from https://bj.bcebos.com/paddle2onnx/model_zoo/pplcnet.tar.gz
std::string model_file = "pplcnet/inference.pdmodel";
std::string params_file = "pplcnet/inference.pdiparams";
// setup option
// configure runtime
// How to configure by RuntimeOption, refer its api doc for more information
// https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1RuntimeOption.html
fd::RuntimeOption runtime_option;
runtime_option.SetModelPath(model_file, params_file, fd::ModelFormat::PADDLE);
runtime_option.UseGpu(0);
runtime_option.SetModelPath(model_file, params_file);
runtime_option.UseTrtBackend();
runtime_option.EnablePaddleToTrt();
// init runtime
std::unique_ptr<fd::Runtime> runtime =
std::unique_ptr<fd::Runtime>(new fd::Runtime());
if (!runtime->Init(runtime_option)) {
std::cerr << "--- Init FastDeploy Runitme Failed! "
<< "\n--- Model: " << model_file << std::endl;
return -1;
} else {
std::cout << "--- Init FastDeploy Runitme Done! "
<< "\n--- Model: " << model_file << std::endl;
}
// init input tensor shape
fd::TensorInfo info = runtime->GetInputInfo(0);
info.shape = {1, 3, 224, 224};
std::vector<fd::FDTensor> input_tensors(1);
std::vector<fd::FDTensor> output_tensors(1);
std::vector<float> inputs_data;
inputs_data.resize(1 * 3 * 224 * 224);
for (size_t i = 0; i < inputs_data.size(); ++i) {
inputs_data[i] = std::rand() % 1000 / 1000.0f;
}
input_tensors[0].SetExternalData({1, 3, 224, 224}, fd::FDDataType::FP32, inputs_data.data());
//get input name
input_tensors[0].name = info.name;
// Use NVIDIA GPU to inference
// If need to configure TensorRT backend for more option, we can configure runtime_option.trt_option
// refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1TrtBackendOption.html
runtime_option.UseGpu(0);
// Use float16 inference to improve performance
runtime_option.trt_option.enable_fp16 = true;
// Cache trt engine to reduce time cost in model initialize
runtime_option.trt_option.serialize_file = "./pplcnet_model.trt";
runtime->Infer(input_tensors, &output_tensors);
fd::Runtime runtime;
assert(runtime.Init(runtime_option));
output_tensors[0].PrintInfo();
// Get model's inputs information
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1Runtime.html
std::vector<fd::TensorInfo> inputs_info = runtime.GetInputInfos();
// Create dummy data fill with 0.5
std::vector<float> dummy_data(1 * 3 * 224 * 224, 0.5);
// Create inputs/outputs tensors
std::vector<fd::FDTensor> inputs(inputs_info.size());
std::vector<fd::FDTensor> outputs;
// Initialize input tensors
// API doc refer https://baidu-paddle.github.io/fastdeploy-api/cpp/html/structfastdeploy_1_1FDTensor.html
inputs[0].SetData({1, 3, 224, 224}, fd::FDDataType::FP32, dummy_data.data());
inputs[0].name = inputs_info[0].name;
// Inference
assert(runtime.Infer(inputs, &outputs));
// Print debug information of outputs
outputs[0].PrintInfo();
// Get data pointer and print it's elements
const float* data_ptr = reinterpret_cast<const float*>(outputs[0].GetData());
for (size_t i = 0; i < 10 && i < outputs[0].Numel(); ++i) {
std::cout << data_ptr[i] << " ";
}
std::cout << std::endl;
return 0;
}
}

View File

@@ -85,10 +85,10 @@ class BaseBackend {
bool copy_to_fd = true) = 0;
// Optional: For those backends which can share memory
// while creating multiple inference engines with same model file
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption &runtime_option,
void *stream = nullptr,
virtual std::unique_ptr<BaseBackend> Clone(RuntimeOption& runtime_option,
void* stream = nullptr,
int device_id = -1) {
FDERROR << "Clone no support" << std::endl;
FDERROR << "Clone no support " << runtime_option.backend << " " << stream << " " << device_id << std::endl;
return nullptr;
}

View File

@@ -53,32 +53,46 @@ enum LitePowerMode {
struct LiteBackendOption {
/// Paddle Lite power mode for mobile device.
int power_mode = 3;
/// Number of threads while use CPU
// Number of threads while use CPU
int cpu_threads = 1;
/// Enable use half precision
bool enable_fp16 = false;
/// Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND
// Inference device, Paddle Lite support CPU/KUNLUNXIN/TIMVX/ASCEND
Device device = Device::CPU;
/// Index of inference device
// Index of inference device
int device_id = 0;
/// kunlunxin_l3_workspace_size
int kunlunxin_l3_workspace_size = 0xfffc00;
/// kunlunxin_locked
bool kunlunxin_locked = false;
/// kunlunxin_autotune
bool kunlunxin_autotune = true;
/// kunlunxin_autotune_file
std::string kunlunxin_autotune_file = "";
/// kunlunxin_precision
std::string kunlunxin_precision = "int16";
/// kunlunxin_adaptive_seqlen
bool kunlunxin_adaptive_seqlen = false;
/// kunlunxin_enable_multi_stream
bool kunlunxin_enable_multi_stream = false;
/// Optimized model dir for CxxConfig
std::string optimized_model_dir = "";
/// nnadapter_subgraph_partition_config_path
std::string nnadapter_subgraph_partition_config_path = "";
/// nnadapter_subgraph_partition_config_buffer
std::string nnadapter_subgraph_partition_config_buffer = "";
/// nnadapter_context_properties
std::string nnadapter_context_properties = "";
/// nnadapter_model_cache_dir
std::string nnadapter_model_cache_dir = "";
/// nnadapter_mixed_precision_quantization_config_path
std::string nnadapter_mixed_precision_quantization_config_path = "";
/// nnadapter_dynamic_shape_info
std::map<std::string, std::vector<std::vector<int64_t>>>
nnadapter_dynamic_shape_info = {{"", {{0}}}};
/// nnadapter_device_names
std::vector<std::string> nnadapter_device_names = {};
};
} // namespace fastdeploy

View File

@@ -25,27 +25,18 @@ namespace fastdeploy {
/*! @brief Option object to configure ONNX Runtime backend
*/
struct OrtBackendOption {
/*
* @brief Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all
*/
/// Level of graph optimization, -1: mean default(Enable all the optimization strategy)/0: disable all the optimization strategy/1: enable basic strategy/2:enable extend strategy/99: enable all
int graph_optimization_level = -1;
/*
* @brief Number of threads to execute the operator, -1: default
*/
/// Number of threads to execute the operator, -1: default
int intra_op_num_threads = -1;
/*
* @brief Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1.
*/
/// Number of threads to execute the graph, -1: default. This parameter only will bring effects while the `OrtBackendOption::execution_mode` set to 1.
int inter_op_num_threads = -1;
/*
* @brief Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly.
*/
/// Execution mode for the graph, -1: default(Sequential mode)/0: Sequential mode, execute the operators in graph one by one. /1: Parallel mode, execute the operators in graph parallelly.
int execution_mode = -1;
/// Inference device, OrtBackend supports CPU/GPU
// Inference device, OrtBackend supports CPU/GPU
Device device = Device::CPU;
/// Inference device id
// Inference device id
int device_id = 0;
void* external_stream_ = nullptr;
};
} // namespace fastdeploy

View File

@@ -54,6 +54,8 @@ struct PaddleBackendOption {
bool enable_mkldnn = true;
/// Use Paddle Inference + TensorRT to inference model on GPU
bool enable_trt = false;
/// Whether enable memory optimize, default true
bool enable_memory_optimize = true;
/*
* @brief IPU option, this will configure the IPU hardware, if inference model in IPU

View File

@@ -41,6 +41,7 @@ void BindPaddleOption(pybind11::module& m) {
.def_readwrite("enable_log_info", &PaddleBackendOption::enable_log_info)
.def_readwrite("enable_mkldnn", &PaddleBackendOption::enable_mkldnn)
.def_readwrite("enable_trt", &PaddleBackendOption::enable_trt)
.def_readwrite("enable_memory_optimize", &PaddleBackendOption::enable_memory_optimize)
.def_readwrite("ipu_option", &PaddleBackendOption::ipu_option)
.def_readwrite("collect_trt_shape",
&PaddleBackendOption::collect_trt_shape)

View File

@@ -147,7 +147,9 @@ bool PaddleBackend::InitFromPaddle(const std::string& model_buffer,
}
config_.SetModelBuffer(model_buffer.c_str(), model_buffer.size(),
params_buffer.c_str(), params_buffer.size());
config_.EnableMemoryOptim();
if (option.enable_memory_optimize) {
config_.EnableMemoryOptim();
}
BuildOption(option);
// The input/output information get from predictor is not right, use

View File

@@ -33,9 +33,8 @@ struct TrtBackendOption {
/// Enable log while converting onnx model to tensorrt
bool enable_log_info = false;
/*
* @brief Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
*/
/// Enable half precison inference, on some device not support half precision, it will fallback to float32 mode
bool enable_fp16 = false;
/** \brief Set shape range of input tensor for the model that contain dynamic input shape while using TensorRT backend
@@ -64,9 +63,7 @@ struct TrtBackendOption {
max_shape[tensor_name].assign(max.begin(), max.end());
}
}
/**
* @brief Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
*/
/// Set cache file path while use TensorRT backend. Loadding a Paddle/ONNX model and initialize TensorRT will take a long time, by this interface it will save the tensorrt engine to `cache_file_path`, and load it directly while execute the code again
std::string serialize_file = "";
// The below parameters may be removed in next version, please do not

View File

@@ -22,4 +22,4 @@ for root, dirs, files in os.walk(third_libs):
for f in files:
if f.strip().count(".so") > 0 or f.strip() == "plugins.xml":
full_path = os.path.join(root, f)
shutil.copy(full_path, os.path.join(bc_dirname, "lib"))
shutil.copy(full_path, os.path.join(bc_dirname, "lib"), follow_symlinks=False)