mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-09 02:20:17 +08:00
Merge branch 'develop' into doc
This commit is contained in:
@@ -715,6 +715,16 @@ if(WITH_ASCEND)
|
|||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(WITH_CAPI)
|
||||||
|
install(
|
||||||
|
DIRECTORY ${PROJECT_SOURCE_DIR}/c_api/fastdeploy_capi
|
||||||
|
DESTINATION ${CMAKE_INSTALL_PREFIX}/include
|
||||||
|
FILES_MATCHING
|
||||||
|
PATTERN "*.h"
|
||||||
|
PATTERN "*/types_internal.h" EXCLUDE
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
include(${PROJECT_SOURCE_DIR}/cmake/config_cpack.cmake)
|
include(${PROJECT_SOURCE_DIR}/cmake/config_cpack.cmake)
|
||||||
|
|
||||||
############################### Building: FastDeploy Python Wheel #############################
|
############################### Building: FastDeploy Python Wheel #############################
|
||||||
|
@@ -33,6 +33,7 @@ set(ORT_DIRECTORY "@ORT_DIRECTORY@")
|
|||||||
set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
|
set(OPENVINO_DIRECTORY "@OPENVINO_DIRECTORY@")
|
||||||
set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
|
set(RKNN2_TARGET_SOC "@RKNN2_TARGET_SOC@")
|
||||||
set(WITH_KUNLUNXIN @WITH_KUNLUNXIN@)
|
set(WITH_KUNLUNXIN @WITH_KUNLUNXIN@)
|
||||||
|
set(WITH_CAPI @WITH_CAPI@)
|
||||||
# Whether to use FastDeploy static lib. The default
|
# Whether to use FastDeploy static lib. The default
|
||||||
# value for this option is determined by the SDK
|
# value for this option is determined by the SDK
|
||||||
# build-time options.
|
# build-time options.
|
||||||
@@ -357,6 +358,7 @@ message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}")
|
|||||||
message(STATUS " CMAKE_MODULE_PATH : ${CMAKE_MODULE_PATH}")
|
message(STATUS " CMAKE_MODULE_PATH : ${CMAKE_MODULE_PATH}")
|
||||||
message(STATUS "")
|
message(STATUS "")
|
||||||
message(STATUS " WITH_GPU : ${WITH_GPU}")
|
message(STATUS " WITH_GPU : ${WITH_GPU}")
|
||||||
|
message(STATUS " WITH_CAPI : ${WITH_CAPI}")
|
||||||
message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}")
|
message(STATUS " ENABLE_ORT_BACKEND : ${ENABLE_ORT_BACKEND}")
|
||||||
message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}")
|
message(STATUS " ENABLE_RKNPU2_BACKEND : ${ENABLE_RKNPU2_BACKEND}")
|
||||||
message(STATUS " ENABLE_SOPHGO_BACKEND : ${ENABLE_SOPHGO_BACKEND}")
|
message(STATUS " ENABLE_SOPHGO_BACKEND : ${ENABLE_SOPHGO_BACKEND}")
|
||||||
@@ -365,6 +367,7 @@ message(STATUS " ENABLE_POROS_BACKEND : ${ENABLE_POROS_BACKEND}")
|
|||||||
message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}")
|
message(STATUS " ENABLE_OPENVINO_BACKEND : ${ENABLE_OPENVINO_BACKEND}")
|
||||||
message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}")
|
message(STATUS " ENABLE_TRT_BACKEND : ${ENABLE_TRT_BACKEND}")
|
||||||
message(STATUS " ENABLE_LITE_BACKEND : ${ENABLE_LITE_BACKEND}")
|
message(STATUS " ENABLE_LITE_BACKEND : ${ENABLE_LITE_BACKEND}")
|
||||||
|
|
||||||
if(ENABLE_PADDLE_BACKEND)
|
if(ENABLE_PADDLE_BACKEND)
|
||||||
message(STATUS " Paddle Inference version : ${PADDLEINFERENCE_VERSION}")
|
message(STATUS " Paddle Inference version : ${PADDLEINFERENCE_VERSION}")
|
||||||
endif()
|
endif()
|
||||||
|
115
benchmark/cpp/benchmark_ppyolov8.cc
Normal file → Executable file
115
benchmark/cpp/benchmark_ppyolov8.cc
Normal file → Executable file
@@ -12,114 +12,29 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/benchmark/utils.h"
|
#include "macros.h"
|
||||||
#include "fastdeploy/vision.h"
|
|
||||||
#include "flags.h"
|
#include "flags.h"
|
||||||
|
#include "option.h"
|
||||||
|
|
||||||
#ifdef WIN32
|
int main(int argc, char* argv[]) {
|
||||||
const char sep = '\\';
|
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||||
#else
|
auto im = cv::imread(FLAGS_image);
|
||||||
const char sep = '/';
|
|
||||||
#endif
|
|
||||||
|
|
||||||
bool RunModel(std::string model_dir, std::string image_file, size_t warmup,
|
|
||||||
size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
|
|
||||||
std::string gpu_mem_file_name) {
|
|
||||||
// Initialization
|
// Initialization
|
||||||
auto option = fastdeploy::RuntimeOption();
|
auto option = fastdeploy::RuntimeOption();
|
||||||
if (!CreateRuntimeOption(&option)) {
|
if (!CreateRuntimeOption(&option)) {
|
||||||
PrintUsage();
|
PrintUsage();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
auto model_file = model_dir + sep + "model.pdmodel";
|
PrintBenchmarkInfo();
|
||||||
auto params_file = model_dir + sep + "model.pdiparams";
|
auto model_file = FLAGS_model + sep + "model.pdmodel";
|
||||||
auto config_file = model_dir + sep + "infer_cfg.yml";
|
auto params_file = FLAGS_model + sep + "model.pdiparams";
|
||||||
|
auto config_file = FLAGS_model + sep + "infer_cfg.yml";
|
||||||
if (FLAGS_profile_mode == "runtime") {
|
auto model_ppyolov8 = fastdeploy::vision::detection::PaddleYOLOv8(
|
||||||
option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
|
|
||||||
}
|
|
||||||
auto model = fastdeploy::vision::detection::PaddleYOLOv8(
|
|
||||||
model_file, params_file, config_file, option);
|
model_file, params_file, config_file, option);
|
||||||
if (!model.Initialized()) {
|
fastdeploy::vision::DetectionResult res;
|
||||||
std::cerr << "Failed to initialize." << std::endl;
|
BENCHMARK_MODEL(model_ppyolov8, model_ppyolov8.Predict(im, &res))
|
||||||
return false;
|
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
||||||
}
|
cv::imwrite("vis_result.jpg", vis_im);
|
||||||
auto im = cv::imread(image_file);
|
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||||
// For Runtime
|
|
||||||
if (FLAGS_profile_mode == "runtime") {
|
|
||||||
fastdeploy::vision::DetectionResult res;
|
|
||||||
if (!model.Predict(im, &res)) {
|
|
||||||
std::cerr << "Failed to predict." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
double profile_time = model.GetProfileTime() * 1000;
|
|
||||||
std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
|
|
||||||
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
|
||||||
cv::imwrite("vis_result.jpg", vis_im);
|
|
||||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
|
||||||
} else {
|
|
||||||
// For End2End
|
|
||||||
// Step1: warm up for warmup times
|
|
||||||
std::cout << "Warmup " << warmup << " times..." << std::endl;
|
|
||||||
for (int i = 0; i < warmup; i++) {
|
|
||||||
fastdeploy::vision::DetectionResult res;
|
|
||||||
if (!model.Predict(im, &res)) {
|
|
||||||
std::cerr << "Failed to predict." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::vector<float> end2end_statis;
|
|
||||||
// Step2: repeat for repeats times
|
|
||||||
std::cout << "Counting time..." << std::endl;
|
|
||||||
fastdeploy::TimeCounter tc;
|
|
||||||
fastdeploy::vision::DetectionResult res;
|
|
||||||
for (int i = 0; i < repeats; i++) {
|
|
||||||
if (FLAGS_collect_memory_info && i % dump_period == 0) {
|
|
||||||
fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
|
|
||||||
#if defined(WITH_GPU)
|
|
||||||
fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
|
|
||||||
FLAGS_device_id);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
tc.Start();
|
|
||||||
if (!model.Predict(im, &res)) {
|
|
||||||
std::cerr << "Failed to predict." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
tc.End();
|
|
||||||
end2end_statis.push_back(tc.Duration() * 1000);
|
|
||||||
}
|
|
||||||
float end2end = std::accumulate(end2end_statis.end() - repeats,
|
|
||||||
end2end_statis.end(), 0.f) /
|
|
||||||
repeats;
|
|
||||||
std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
|
|
||||||
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
|
||||||
cv::imwrite("vis_result.jpg", vis_im);
|
|
||||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
|
||||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
|
||||||
int repeats = FLAGS_repeat;
|
|
||||||
int warmup = FLAGS_warmup;
|
|
||||||
int dump_period = FLAGS_dump_period;
|
|
||||||
std::string cpu_mem_file_name = "result_cpu.txt";
|
|
||||||
std::string gpu_mem_file_name = "result_gpu.txt";
|
|
||||||
// Run model
|
|
||||||
if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
|
|
||||||
cpu_mem_file_name, gpu_mem_file_name) != true) {
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
if (FLAGS_collect_memory_info) {
|
|
||||||
float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
|
|
||||||
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
|
|
||||||
#if defined(WITH_GPU)
|
|
||||||
float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
|
|
||||||
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
@@ -12,96 +12,26 @@
|
|||||||
// See the License for the specific language governing permissions and
|
// See the License for the specific language governing permissions and
|
||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include "fastdeploy/benchmark/utils.h"
|
#include "macros.h"
|
||||||
#include "fastdeploy/vision.h"
|
|
||||||
#include "flags.h"
|
#include "flags.h"
|
||||||
|
#include "option.h"
|
||||||
|
|
||||||
bool RunModel(std::string model_file, std::string image_file, size_t warmup,
|
int main(int argc, char* argv[]) {
|
||||||
size_t repeats, size_t sampling_interval) {
|
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||||
|
auto im = cv::imread(FLAGS_image);
|
||||||
// Initialization
|
// Initialization
|
||||||
auto option = fastdeploy::RuntimeOption();
|
auto option = fastdeploy::RuntimeOption();
|
||||||
if (!CreateRuntimeOption(&option)) {
|
if (!CreateRuntimeOption(&option)) {
|
||||||
PrintUsage();
|
PrintUsage();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (FLAGS_profile_mode == "runtime") {
|
PrintBenchmarkInfo();
|
||||||
option.EnableProfiling(FLAGS_include_h2d_d2h, repeats, warmup);
|
auto model_yolov5 =
|
||||||
}
|
fastdeploy::vision::detection::YOLOv5(FLAGS_model, "", option);
|
||||||
auto model = fastdeploy::vision::detection::YOLOv5(model_file, "", option);
|
fastdeploy::vision::DetectionResult res;
|
||||||
if (!model.Initialized()) {
|
BENCHMARK_MODEL(model_yolov5, model_yolov5.Predict(im, &res))
|
||||||
std::cerr << "Failed to initialize." << std::endl;
|
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
||||||
return false;
|
cv::imwrite("vis_result.jpg", vis_im);
|
||||||
}
|
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||||
auto im = cv::imread(image_file);
|
|
||||||
// For collect memory info
|
|
||||||
fastdeploy::benchmark::ResourceUsageMonitor resource_moniter(
|
|
||||||
sampling_interval, FLAGS_device_id);
|
|
||||||
if (FLAGS_collect_memory_info) {
|
|
||||||
resource_moniter.Start();
|
|
||||||
}
|
|
||||||
// For Runtime
|
|
||||||
if (FLAGS_profile_mode == "runtime") {
|
|
||||||
fastdeploy::vision::DetectionResult res;
|
|
||||||
if (!model.Predict(im, &res)) {
|
|
||||||
std::cerr << "Failed to predict." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
double profile_time = model.GetProfileTime() * 1000;
|
|
||||||
std::cout << "Runtime(ms): " << profile_time << "ms." << std::endl;
|
|
||||||
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
|
||||||
cv::imwrite("vis_result.jpg", vis_im);
|
|
||||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
|
||||||
} else {
|
|
||||||
// For End2End
|
|
||||||
// Step1: warm up for warmup times
|
|
||||||
std::cout << "Warmup " << warmup << " times..." << std::endl;
|
|
||||||
for (int i = 0; i < warmup; i++) {
|
|
||||||
fastdeploy::vision::DetectionResult res;
|
|
||||||
if (!model.Predict(im, &res)) {
|
|
||||||
std::cerr << "Failed to predict." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Step2: repeat for repeats times
|
|
||||||
std::cout << "Counting time..." << std::endl;
|
|
||||||
std::cout << "Repeat " << repeats << " times..." << std::endl;
|
|
||||||
fastdeploy::vision::DetectionResult res;
|
|
||||||
fastdeploy::TimeCounter tc;
|
|
||||||
tc.Start();
|
|
||||||
for (int i = 0; i < repeats; i++) {
|
|
||||||
if (!model.Predict(im, &res)) {
|
|
||||||
std::cerr << "Failed to predict." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tc.End();
|
|
||||||
double end2end = tc.Duration() / repeats * 1000;
|
|
||||||
std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
|
|
||||||
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
|
||||||
cv::imwrite("vis_result.jpg", vis_im);
|
|
||||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
|
||||||
}
|
|
||||||
if (FLAGS_collect_memory_info) {
|
|
||||||
float cpu_mem = resource_moniter.GetMaxCpuMem();
|
|
||||||
float gpu_mem = resource_moniter.GetMaxGpuMem();
|
|
||||||
float gpu_util = resource_moniter.GetMaxGpuUtil();
|
|
||||||
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
|
|
||||||
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
|
|
||||||
std::cout << "gpu_util: " << gpu_util << std::endl;
|
|
||||||
resource_moniter.Stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
|
||||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
|
||||||
int repeats = FLAGS_repeat;
|
|
||||||
int warmup = FLAGS_warmup;
|
|
||||||
int sampling_interval = FLAGS_sampling_interval;
|
|
||||||
// Run model
|
|
||||||
if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) {
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
@@ -15,7 +15,12 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "gflags/gflags.h"
|
#include "gflags/gflags.h"
|
||||||
#include "fastdeploy/utils/perf.h"
|
|
||||||
|
#ifdef WIN32
|
||||||
|
const char sep = '\\';
|
||||||
|
#else
|
||||||
|
const char sep = '/';
|
||||||
|
#endif
|
||||||
|
|
||||||
DEFINE_string(model, "", "Directory of the inference model.");
|
DEFINE_string(model, "", "Directory of the inference model.");
|
||||||
DEFINE_string(image, "", "Path of the image file.");
|
DEFINE_string(image, "", "Path of the image file.");
|
||||||
@@ -50,74 +55,34 @@ void PrintUsage() {
|
|||||||
std::cout << "Default value of use_fp16: false" << std::endl;
|
std::cout << "Default value of use_fp16: false" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
|
void PrintBenchmarkInfo() {
|
||||||
if (FLAGS_device == "gpu") {
|
// Get model name
|
||||||
option->UseGpu(FLAGS_device_id);
|
std::vector<std::string> model_names;
|
||||||
if (FLAGS_backend == "ort") {
|
fastdeploy::benchmark::Split(FLAGS_model, model_names, sep);
|
||||||
option->UseOrtBackend();
|
// Save benchmark info
|
||||||
} else if (FLAGS_backend == "paddle") {
|
std::stringstream ss;
|
||||||
option->UsePaddleInferBackend();
|
ss.precision(3);
|
||||||
} else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
|
ss << "\n======= Model Info =======\n";
|
||||||
option->UseTrtBackend();
|
ss << "model_name: " << model_names[model_names.size() - 1] << std::endl;
|
||||||
if (FLAGS_backend == "paddle_trt") {
|
ss << "profile_mode: " << FLAGS_profile_mode << std::endl;
|
||||||
option->EnablePaddleToTrt();
|
if (FLAGS_profile_mode == "runtime") {
|
||||||
}
|
ss << "include_h2d_d2h: " << FLAGS_include_h2d_d2h << std::endl;
|
||||||
if (FLAGS_use_fp16) {
|
|
||||||
option->EnableTrtFP16();
|
|
||||||
}
|
|
||||||
} else if (FLAGS_backend == "default") {
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
std::cout << "While inference with GPU, only support "
|
|
||||||
"default/ort/paddle/trt/paddle_trt now, "
|
|
||||||
<< FLAGS_backend << " is not supported." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else if (FLAGS_device == "cpu") {
|
|
||||||
option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
|
|
||||||
if (FLAGS_backend == "ort") {
|
|
||||||
option->UseOrtBackend();
|
|
||||||
} else if (FLAGS_backend == "ov") {
|
|
||||||
option->UseOpenVINOBackend();
|
|
||||||
} else if (FLAGS_backend == "paddle") {
|
|
||||||
option->UsePaddleInferBackend();
|
|
||||||
} else if (FLAGS_backend == "lite") {
|
|
||||||
option->UsePaddleLiteBackend();
|
|
||||||
if (FLAGS_use_fp16) {
|
|
||||||
option->EnableLiteFP16();
|
|
||||||
}
|
|
||||||
} else if (FLAGS_backend == "default") {
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
std::cout << "While inference with CPU, only support "
|
|
||||||
"default/ort/ov/paddle/lite now, "
|
|
||||||
<< FLAGS_backend << " is not supported." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else if (FLAGS_device == "xpu") {
|
|
||||||
option->UseKunlunXin(FLAGS_device_id);
|
|
||||||
if (FLAGS_backend == "ort") {
|
|
||||||
option->UseOrtBackend();
|
|
||||||
} else if (FLAGS_backend == "paddle") {
|
|
||||||
option->UsePaddleInferBackend();
|
|
||||||
} else if (FLAGS_backend == "lite") {
|
|
||||||
option->UsePaddleLiteBackend();
|
|
||||||
if (FLAGS_use_fp16) {
|
|
||||||
option->EnableLiteFP16();
|
|
||||||
}
|
|
||||||
} else if (FLAGS_backend == "default") {
|
|
||||||
return true;
|
|
||||||
} else {
|
|
||||||
std::cout << "While inference with XPU, only support "
|
|
||||||
"default/ort/paddle/lite now, "
|
|
||||||
<< FLAGS_backend << " is not supported." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
|
|
||||||
<< " is not supported." << std::endl;
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
ss << "\n======= Backend Info =======\n";
|
||||||
return true;
|
ss << "warmup: " << FLAGS_warmup << std::endl;
|
||||||
|
ss << "repeats: " << FLAGS_repeat << std::endl;
|
||||||
|
ss << "device: " << FLAGS_device << std::endl;
|
||||||
|
if (FLAGS_device == "gpu") {
|
||||||
|
ss << "device_id: " << FLAGS_device_id << std::endl;
|
||||||
|
}
|
||||||
|
ss << "backend: " << FLAGS_backend << std::endl;
|
||||||
|
ss << "cpu_thread_nums: " << FLAGS_cpu_thread_nums << std::endl;
|
||||||
|
ss << "use_fp16: " << FLAGS_use_fp16 << std::endl;
|
||||||
|
ss << "collect_memory_info: " << FLAGS_collect_memory_info << std::endl;
|
||||||
|
if (FLAGS_collect_memory_info) {
|
||||||
|
ss << "sampling_interval: " << std::to_string(FLAGS_sampling_interval)
|
||||||
|
<< "ms" << std::endl;
|
||||||
|
}
|
||||||
|
std::cout << ss.str() << std::endl;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
69
benchmark/cpp/macros.h
Executable file
69
benchmark/cpp/macros.h
Executable file
@@ -0,0 +1,69 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fastdeploy/benchmark/utils.h"
|
||||||
|
#include "fastdeploy/utils/perf.h"
|
||||||
|
|
||||||
|
#define BENCHMARK_MODEL(MODEL_NAME, BENCHMARK_FUNC) \
|
||||||
|
{ \
|
||||||
|
if (!MODEL_NAME.Initialized()) { \
|
||||||
|
std::cerr << "Failed to initialize." << std::endl; \
|
||||||
|
return 0; \
|
||||||
|
} \
|
||||||
|
auto __im__ = cv::imread(FLAGS_image); \
|
||||||
|
fastdeploy::benchmark::ResourceUsageMonitor __resource_moniter__( \
|
||||||
|
FLAGS_sampling_interval, FLAGS_device_id); \
|
||||||
|
if (FLAGS_collect_memory_info) { \
|
||||||
|
__resource_moniter__.Start(); \
|
||||||
|
} \
|
||||||
|
if (FLAGS_profile_mode == "runtime") { \
|
||||||
|
if (!BENCHMARK_FUNC) { \
|
||||||
|
std::cerr << "Failed to predict." << std::endl; \
|
||||||
|
return 0; \
|
||||||
|
} \
|
||||||
|
double __profile_time__ = MODEL_NAME.GetProfileTime() * 1000; \
|
||||||
|
std::cout << "Runtime(ms): " << __profile_time__ << "ms." << std::endl; \
|
||||||
|
} else { \
|
||||||
|
std::cout << "Warmup " << FLAGS_warmup << " times..." << std::endl; \
|
||||||
|
for (int __i__ = 0; __i__ < FLAGS_warmup; __i__++) { \
|
||||||
|
if (!BENCHMARK_FUNC) { \
|
||||||
|
std::cerr << "Failed to predict." << std::endl; \
|
||||||
|
return 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
std::cout << "Counting time..." << std::endl; \
|
||||||
|
std::cout << "Repeat " << FLAGS_repeat << " times..." << std::endl; \
|
||||||
|
fastdeploy::TimeCounter __tc__; \
|
||||||
|
__tc__.Start(); \
|
||||||
|
for (int __i__ = 0; __i__ < FLAGS_repeat; __i__++) { \
|
||||||
|
if (!BENCHMARK_FUNC) { \
|
||||||
|
std::cerr << "Failed to predict." << std::endl; \
|
||||||
|
return 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
__tc__.End(); \
|
||||||
|
double __end2end__ = __tc__.Duration() / FLAGS_repeat * 1000; \
|
||||||
|
std::cout << "End2End(ms): " << __end2end__ << "ms." << std::endl; \
|
||||||
|
} \
|
||||||
|
if (FLAGS_collect_memory_info) { \
|
||||||
|
float __cpu_mem__ = __resource_moniter__.GetMaxCpuMem(); \
|
||||||
|
float __gpu_mem__ = __resource_moniter__.GetMaxGpuMem(); \
|
||||||
|
float __gpu_util__ = __resource_moniter__.GetMaxGpuUtil(); \
|
||||||
|
std::cout << "cpu_rss_mb: " << __cpu_mem__ << "MB." << std::endl; \
|
||||||
|
std::cout << "gpu_rss_mb: " << __gpu_mem__ << "MB." << std::endl; \
|
||||||
|
std::cout << "gpu_util: " << __gpu_util__ << std::endl; \
|
||||||
|
__resource_moniter__.Stop(); \
|
||||||
|
} \
|
||||||
|
}
|
92
benchmark/cpp/option.h
Executable file
92
benchmark/cpp/option.h
Executable file
@@ -0,0 +1,92 @@
|
|||||||
|
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fastdeploy/vision.h"
|
||||||
|
|
||||||
|
static bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
|
||||||
|
if (FLAGS_profile_mode == "runtime") {
|
||||||
|
option->EnableProfiling(FLAGS_include_h2d_d2h, FLAGS_repeat, FLAGS_warmup);
|
||||||
|
}
|
||||||
|
if (FLAGS_device == "gpu") {
|
||||||
|
option->UseGpu(FLAGS_device_id);
|
||||||
|
if (FLAGS_backend == "ort") {
|
||||||
|
option->UseOrtBackend();
|
||||||
|
} else if (FLAGS_backend == "paddle") {
|
||||||
|
option->UsePaddleInferBackend();
|
||||||
|
} else if (FLAGS_backend == "trt" || FLAGS_backend == "paddle_trt") {
|
||||||
|
option->UseTrtBackend();
|
||||||
|
if (FLAGS_backend == "paddle_trt") {
|
||||||
|
option->EnablePaddleToTrt();
|
||||||
|
}
|
||||||
|
if (FLAGS_use_fp16) {
|
||||||
|
option->EnableTrtFP16();
|
||||||
|
}
|
||||||
|
} else if (FLAGS_backend == "default") {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
std::cout << "While inference with GPU, only support "
|
||||||
|
"default/ort/paddle/trt/paddle_trt now, "
|
||||||
|
<< FLAGS_backend << " is not supported." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (FLAGS_device == "cpu") {
|
||||||
|
option->SetCpuThreadNum(FLAGS_cpu_thread_nums);
|
||||||
|
if (FLAGS_backend == "ort") {
|
||||||
|
option->UseOrtBackend();
|
||||||
|
} else if (FLAGS_backend == "ov") {
|
||||||
|
option->UseOpenVINOBackend();
|
||||||
|
} else if (FLAGS_backend == "paddle") {
|
||||||
|
option->UsePaddleInferBackend();
|
||||||
|
} else if (FLAGS_backend == "lite") {
|
||||||
|
option->UsePaddleLiteBackend();
|
||||||
|
if (FLAGS_use_fp16) {
|
||||||
|
option->EnableLiteFP16();
|
||||||
|
}
|
||||||
|
} else if (FLAGS_backend == "default") {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
std::cout << "While inference with CPU, only support "
|
||||||
|
"default/ort/ov/paddle/lite now, "
|
||||||
|
<< FLAGS_backend << " is not supported." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else if (FLAGS_device == "xpu") {
|
||||||
|
option->UseKunlunXin(FLAGS_device_id);
|
||||||
|
if (FLAGS_backend == "ort") {
|
||||||
|
option->UseOrtBackend();
|
||||||
|
} else if (FLAGS_backend == "paddle") {
|
||||||
|
option->UsePaddleInferBackend();
|
||||||
|
} else if (FLAGS_backend == "lite") {
|
||||||
|
option->UsePaddleLiteBackend();
|
||||||
|
if (FLAGS_use_fp16) {
|
||||||
|
option->EnableLiteFP16();
|
||||||
|
}
|
||||||
|
} else if (FLAGS_backend == "default") {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
std::cout << "While inference with XPU, only support "
|
||||||
|
"default/ort/paddle/lite now, "
|
||||||
|
<< FLAGS_backend << " is not supported." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
std::cerr << "Only support device CPU/GPU/XPU now, " << FLAGS_device
|
||||||
|
<< " is not supported." << std::endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
@@ -19,6 +19,7 @@ if(NOT WITH_CAPI)
|
|||||||
return()
|
return()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/config.h)
|
||||||
file(GLOB_RECURSE DEPLOY_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/*.cc)
|
file(GLOB_RECURSE DEPLOY_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/*.cc)
|
||||||
if(NOT ENABLE_VISION)
|
if(NOT ENABLE_VISION)
|
||||||
file(GLOB_RECURSE DEPLOY_VISION_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/vision/*.cc)
|
file(GLOB_RECURSE DEPLOY_VISION_CAPI_SRCS ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/c_api/fastdeploy_capi/vision/*.cc)
|
||||||
|
22
c_api/fastdeploy_capi/config.h
Executable file
22
c_api/fastdeploy_capi/config.h
Executable file
@@ -0,0 +1,22 @@
|
|||||||
|
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef ENABLE_VISION
|
||||||
|
#define ENABLE_VISION
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef ENABLE_TEXT
|
||||||
|
/* #undef ENABLE_TEXT */
|
||||||
|
#endif
|
22
c_api/fastdeploy_capi/config.h.in
Executable file
22
c_api/fastdeploy_capi/config.h.in
Executable file
@@ -0,0 +1,22 @@
|
|||||||
|
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef ENABLE_VISION
|
||||||
|
#cmakedefine ENABLE_VISION
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef ENABLE_TEXT
|
||||||
|
#cmakedefine ENABLE_TEXT
|
||||||
|
#endif
|
71
c_api/fastdeploy_capi/enum_variables.h
Normal file
71
c_api/fastdeploy_capi/enum_variables.h
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#define FD_ENUM(type) \
|
||||||
|
typedef int32_t type; \
|
||||||
|
enum
|
||||||
|
|
||||||
|
FD_ENUM(FD_C_ModelFormat){
|
||||||
|
AUTOREC, ///< Auto recognize the model format by model file name
|
||||||
|
PADDLE, ///< Model with paddlepaddle format
|
||||||
|
ONNX, ///< Model with ONNX format
|
||||||
|
RKNN, ///< Model with RKNN format
|
||||||
|
TORCHSCRIPT, ///< Model with TorchScript format
|
||||||
|
SOPHGO, ///< Model with SOPHGO format
|
||||||
|
};
|
||||||
|
|
||||||
|
FD_ENUM(FD_C_rknpu2_CpuName){
|
||||||
|
RK356X = 0, /* run on RK356X. */
|
||||||
|
RK3588 = 1, /* default,run on RK3588. */
|
||||||
|
UNDEFINED,
|
||||||
|
};
|
||||||
|
|
||||||
|
FD_ENUM(FD_C_rknpu2_CoreMask){
|
||||||
|
RKNN_NPU_CORE_AUTO = 0, //< default, run on NPU core randomly.
|
||||||
|
RKNN_NPU_CORE_0 = 1, //< run on NPU core 0.
|
||||||
|
RKNN_NPU_CORE_1 = 2, //< run on NPU core 1.
|
||||||
|
RKNN_NPU_CORE_2 = 4, //< run on NPU core 2.
|
||||||
|
RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 |
|
||||||
|
RKNN_NPU_CORE_1, //< run on NPU core 1 and core 2.
|
||||||
|
RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 |
|
||||||
|
RKNN_NPU_CORE_2, //< run on NPU core 1 and core 2.
|
||||||
|
RKNN_NPU_CORE_UNDEFINED,
|
||||||
|
};
|
||||||
|
|
||||||
|
FD_ENUM(FD_C_LitePowerMode){
|
||||||
|
LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode
|
||||||
|
LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode
|
||||||
|
LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode
|
||||||
|
LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode
|
||||||
|
LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode
|
||||||
|
LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode
|
||||||
|
};
|
||||||
|
|
||||||
|
FD_ENUM(FD_C_ResultType){
|
||||||
|
UNKNOWN_RESULT,
|
||||||
|
CLASSIFY,
|
||||||
|
DETECTION,
|
||||||
|
SEGMENTATION,
|
||||||
|
OCR,
|
||||||
|
MOT,
|
||||||
|
FACE_DETECTION,
|
||||||
|
FACE_ALIGNMENT,
|
||||||
|
FACE_RECOGNITION,
|
||||||
|
MATTING,
|
||||||
|
MASK,
|
||||||
|
KEYPOINT_DETECTION,
|
||||||
|
HEADPOSE,
|
||||||
|
};
|
@@ -58,43 +58,3 @@
|
|||||||
typedef int8_t FD_C_Bool;
|
typedef int8_t FD_C_Bool;
|
||||||
#define TRUE 1
|
#define TRUE 1
|
||||||
#define FALSE 0
|
#define FALSE 0
|
||||||
|
|
||||||
#define FD_ENUM(type) \
|
|
||||||
typedef int32_t type; \
|
|
||||||
enum
|
|
||||||
|
|
||||||
FD_ENUM(FD_C_ModelFormat){
|
|
||||||
AUTOREC, ///< Auto recognize the model format by model file name
|
|
||||||
PADDLE, ///< Model with paddlepaddle format
|
|
||||||
ONNX, ///< Model with ONNX format
|
|
||||||
RKNN, ///< Model with RKNN format
|
|
||||||
TORCHSCRIPT, ///< Model with TorchScript format
|
|
||||||
SOPHGO, ///< Model with SOPHGO format
|
|
||||||
};
|
|
||||||
|
|
||||||
FD_ENUM(FD_C_rknpu2_CpuName){
|
|
||||||
RK356X = 0, /* run on RK356X. */
|
|
||||||
RK3588 = 1, /* default,run on RK3588. */
|
|
||||||
UNDEFINED,
|
|
||||||
};
|
|
||||||
|
|
||||||
FD_ENUM(FD_C_rknpu2_CoreMask){
|
|
||||||
RKNN_NPU_CORE_AUTO = 0, //< default, run on NPU core randomly.
|
|
||||||
RKNN_NPU_CORE_0 = 1, //< run on NPU core 0.
|
|
||||||
RKNN_NPU_CORE_1 = 2, //< run on NPU core 1.
|
|
||||||
RKNN_NPU_CORE_2 = 4, //< run on NPU core 2.
|
|
||||||
RKNN_NPU_CORE_0_1 = RKNN_NPU_CORE_0 |
|
|
||||||
RKNN_NPU_CORE_1, //< run on NPU core 1 and core 2.
|
|
||||||
RKNN_NPU_CORE_0_1_2 = RKNN_NPU_CORE_0_1 |
|
|
||||||
RKNN_NPU_CORE_2, //< run on NPU core 1 and core 2.
|
|
||||||
RKNN_NPU_CORE_UNDEFINED,
|
|
||||||
};
|
|
||||||
|
|
||||||
FD_ENUM(FD_C_LitePowerMode){
|
|
||||||
LITE_POWER_HIGH = 0, ///< Use Lite Backend with high power mode
|
|
||||||
LITE_POWER_LOW = 1, ///< Use Lite Backend with low power mode
|
|
||||||
LITE_POWER_FULL = 2, ///< Use Lite Backend with full power mode
|
|
||||||
LITE_POWER_NO_BIND = 3, ///< Use Lite Backend with no bind power mode
|
|
||||||
LITE_POWER_RAND_HIGH = 4, ///< Use Lite Backend with rand high mode
|
|
||||||
LITE_POWER_RAND_LOW = 5 ///< Use Lite Backend with rand low power mode
|
|
||||||
};
|
|
||||||
|
40
c_api/fastdeploy_capi/fd_type.cc
Normal file
40
c_api/fastdeploy_capi/fd_type.cc
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include "fastdeploy_capi/fd_type.h"
|
||||||
|
|
||||||
|
#include <opencv2/imgcodecs.hpp>
|
||||||
|
|
||||||
|
#include "fastdeploy_capi/fd_common.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FD_C_Mat FD_C_Imread(const char* imgpath) {
|
||||||
|
cv::Mat image = cv::imread(imgpath);
|
||||||
|
return new cv::Mat(image);
|
||||||
|
}
|
||||||
|
|
||||||
|
FD_C_Bool FD_C_Imwrite(const char* savepath, FD_C_Mat img) {
|
||||||
|
cv::Mat cv_img = *(reinterpret_cast<cv::Mat*>(img));
|
||||||
|
bool result = cv::imwrite(savepath, cv_img);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void FD_C_DestroyMat(FD_C_Mat mat) { delete reinterpret_cast<cv::Mat*>(mat); }
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
@@ -17,7 +17,8 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "fastdeploy_capi/fd_common.h" // NOLINT
|
#include "fastdeploy_capi/enum_variables.h"
|
||||||
|
#include "fastdeploy_capi/fd_common.h"
|
||||||
|
|
||||||
typedef struct FD_C_OneDimArrayUint8 {
|
typedef struct FD_C_OneDimArrayUint8 {
|
||||||
size_t size;
|
size_t size;
|
||||||
@@ -65,3 +66,19 @@ typedef struct FD_C_TwoDimArrayFloat {
|
|||||||
} FD_C_TwoDimArrayFloat; // std::vector<std::vector<float>>
|
} FD_C_TwoDimArrayFloat; // std::vector<std::vector<float>>
|
||||||
|
|
||||||
typedef void* FD_C_Mat;
|
typedef void* FD_C_Mat;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
FASTDEPLOY_CAPI_EXPORT extern __fd_give FD_C_Mat
|
||||||
|
FD_C_Imread(const char* imgpath);
|
||||||
|
|
||||||
|
FASTDEPLOY_CAPI_EXPORT extern FD_C_Bool FD_C_Imwrite(const char* savepath,
|
||||||
|
__fd_keep FD_C_Mat);
|
||||||
|
|
||||||
|
FASTDEPLOY_CAPI_EXPORT extern void FD_C_DestroyMat(__fd_take FD_C_Mat mat);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
@@ -17,7 +17,9 @@
|
|||||||
#include "fastdeploy/utils/utils.h"
|
#include "fastdeploy/utils/utils.h"
|
||||||
#include "fastdeploy_capi/types_internal.h"
|
#include "fastdeploy_capi/types_internal.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() {
|
FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() {
|
||||||
FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper =
|
FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper =
|
||||||
@@ -28,7 +30,7 @@ FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper() {
|
|||||||
return fd_c_runtime_option_wrapper;
|
return fd_c_runtime_option_wrapper;
|
||||||
}
|
}
|
||||||
|
|
||||||
void FD_C_DestroyRuntimeOption(
|
void FD_C_DestroyRuntimeOptionWrapper(
|
||||||
__fd_take FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
|
__fd_take FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper) {
|
||||||
delete fd_c_runtime_option_wrapper;
|
delete fd_c_runtime_option_wrapper;
|
||||||
}
|
}
|
||||||
@@ -404,15 +406,6 @@ void FD_C_RuntimeOptionWrapperUseIpu(
|
|||||||
batches_per_step);
|
batches_per_step);
|
||||||
}
|
}
|
||||||
|
|
||||||
void FD_C_RuntimeOptionWrapperSetIpuConfig(
|
#ifdef __cplusplus
|
||||||
__fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
|
|
||||||
FD_C_Bool enable_fp16, int replica_num, float available_memory_proportion,
|
|
||||||
FD_C_Bool enable_half_partial) {
|
|
||||||
auto& runtime_option = CHECK_AND_CONVERT_FD_TYPE(RuntimeOptionWrapper,
|
|
||||||
fd_c_runtime_option_wrapper);
|
|
||||||
runtime_option->SetIpuConfig(enable_fp16, replica_num,
|
|
||||||
available_memory_proportion,
|
|
||||||
enable_half_partial);
|
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
} // extern "C"
|
|
||||||
|
@@ -14,9 +14,7 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <memory>
|
#include "fastdeploy_capi/fd_type.h"
|
||||||
|
|
||||||
#include "fastdeploy_capi/fd_common.h"
|
|
||||||
|
|
||||||
typedef struct FD_C_RuntimeOptionWrapper FD_C_RuntimeOptionWrapper;
|
typedef struct FD_C_RuntimeOptionWrapper FD_C_RuntimeOptionWrapper;
|
||||||
|
|
||||||
@@ -499,19 +497,6 @@ FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperUseIpu(
|
|||||||
int device_num, int micro_batch_size, FD_C_Bool enable_pipelining,
|
int device_num, int micro_batch_size, FD_C_Bool enable_pipelining,
|
||||||
int batches_per_step);
|
int batches_per_step);
|
||||||
|
|
||||||
/** \brief Set IPU config.
|
|
||||||
*
|
|
||||||
* \param[in] fd_c_runtime_option_wrapper pointer to FD_C_RuntimeOptionWrapper object
|
|
||||||
* \param[in] enable_fp16 enable fp16.
|
|
||||||
* \param[in] replica_num the number of graph replication.
|
|
||||||
* \param[in] available_memory_proportion the available memory proportion for matmul/conv.
|
|
||||||
* \param[in] enable_half_partial enable fp16 partial for matmul, only work with fp16.
|
|
||||||
*/
|
|
||||||
FASTDEPLOY_CAPI_EXPORT extern void FD_C_RuntimeOptionWrapperSetIpuConfig(
|
|
||||||
__fd_keep FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
|
|
||||||
FD_C_Bool enable_fp16, int replica_num, float available_memory_proportion,
|
|
||||||
FD_C_Bool enable_half_partial);
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
} // extern "C"
|
} // extern "C"
|
||||||
#endif
|
#endif
|
||||||
|
26
c_api/fastdeploy_capi/vision.h
Normal file
26
c_api/fastdeploy_capi/vision.h
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fastdeploy_capi/config.h"
|
||||||
|
|
||||||
|
#ifdef ENABLE_VISION
|
||||||
|
#include "fastdeploy_capi/vision/classification/ppcls/model.h"
|
||||||
|
#include "fastdeploy_capi/vision/detection/ppdet/model.h"
|
||||||
|
#include "fastdeploy_capi/vision/result.h"
|
||||||
|
#include "fastdeploy_capi/vision/visualize.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "fastdeploy_capi/fd_type.h"
|
||||||
|
#include "fastdeploy_capi/runtime_option.h"
|
@@ -16,7 +16,9 @@
|
|||||||
|
|
||||||
#include "fastdeploy_capi/types_internal.h"
|
#include "fastdeploy_capi/types_internal.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
FD_C_PaddleClasModelWrapper* FD_C_CreatePaddleClasModelWrapper(
|
FD_C_PaddleClasModelWrapper* FD_C_CreatePaddleClasModelWrapper(
|
||||||
const char* model_file, const char* params_file, const char* config_file,
|
const char* model_file, const char* params_file, const char* config_file,
|
||||||
@@ -50,4 +52,7 @@ FD_C_Bool FD_C_PaddleClasModelWrapperPredict(
|
|||||||
ClassifyResultWrapper, fd_c_classify_result_wrapper);
|
ClassifyResultWrapper, fd_c_classify_result_wrapper);
|
||||||
return paddleclas_model->Predict(im, classify_result.get());
|
return paddleclas_model->Predict(im, classify_result.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
#endif
|
@@ -17,7 +17,9 @@
|
|||||||
#include "fastdeploy_capi/types_internal.h"
|
#include "fastdeploy_capi/types_internal.h"
|
||||||
#include "fastdeploy_capi/vision/visualize.h"
|
#include "fastdeploy_capi/vision/visualize.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper(
|
FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper(
|
||||||
const char* model_file, const char* params_file, const char* config_file,
|
const char* model_file, const char* params_file, const char* config_file,
|
||||||
@@ -50,4 +52,7 @@ FD_C_Bool FD_C_PPYOLOEWrapperPredict(
|
|||||||
DetectionResultWrapper, fd_c_detection_result_wrapper);
|
DetectionResultWrapper, fd_c_detection_result_wrapper);
|
||||||
return ppyoloe_model->Predict(im, detection_result.get());
|
return ppyoloe_model->Predict(im, detection_result.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
#endif
|
@@ -17,7 +17,9 @@
|
|||||||
#include "fastdeploy/utils/utils.h"
|
#include "fastdeploy/utils/utils.h"
|
||||||
#include "fastdeploy_capi/types_internal.h"
|
#include "fastdeploy_capi/types_internal.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
// Classification Results
|
// Classification Results
|
||||||
|
|
||||||
@@ -235,4 +237,6 @@ FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapperFromData(
|
|||||||
|
|
||||||
return fd_c_detection_result_wrapper;
|
return fd_c_detection_result_wrapper;
|
||||||
}
|
}
|
||||||
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
#endif
|
@@ -24,22 +24,6 @@ typedef struct FD_C_DetectionResultWrapper FD_C_DetectionResultWrapper;
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
FD_ENUM(FD_C_ResultType){
|
|
||||||
UNKNOWN_RESULT,
|
|
||||||
CLASSIFY,
|
|
||||||
DETECTION,
|
|
||||||
SEGMENTATION,
|
|
||||||
OCR,
|
|
||||||
MOT,
|
|
||||||
FACE_DETECTION,
|
|
||||||
FACE_ALIGNMENT,
|
|
||||||
FACE_RECOGNITION,
|
|
||||||
MATTING,
|
|
||||||
MASK,
|
|
||||||
KEYPOINT_DETECTION,
|
|
||||||
HEADPOSE,
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef struct FD_C_ClassifyResult {
|
typedef struct FD_C_ClassifyResult {
|
||||||
FD_C_OneDimArrayInt32 label_ids;
|
FD_C_OneDimArrayInt32 label_ids;
|
||||||
FD_C_OneDimArrayFloat scores;
|
FD_C_OneDimArrayFloat scores;
|
||||||
|
@@ -17,7 +17,9 @@
|
|||||||
#include "fastdeploy/vision/visualize/visualize.h"
|
#include "fastdeploy/vision/visualize/visualize.h"
|
||||||
#include "fastdeploy_capi/types_internal.h"
|
#include "fastdeploy_capi/types_internal.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
FD_C_Mat FD_C_VisDetection(FD_C_Mat im,
|
FD_C_Mat FD_C_VisDetection(FD_C_Mat im,
|
||||||
FD_C_DetectionResult* fd_c_detection_result,
|
FD_C_DetectionResult* fd_c_detection_result,
|
||||||
@@ -32,4 +34,6 @@ FD_C_Mat FD_C_VisDetection(FD_C_Mat im,
|
|||||||
line_size, font_size);
|
line_size, font_size);
|
||||||
return new cv::Mat(result);
|
return new cv::Mat(result);
|
||||||
}
|
}
|
||||||
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
#endif
|
13
examples/vision/detection/paddledetection/c/CMakeLists.txt
Normal file
13
examples/vision/detection/paddledetection/c/CMakeLists.txt
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
PROJECT(infer_demo C)
|
||||||
|
CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
|
||||||
|
|
||||||
|
# 指定下载解压后的fastdeploy库路径
|
||||||
|
option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
|
||||||
|
|
||||||
|
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||||
|
|
||||||
|
# 添加FastDeploy依赖头文件
|
||||||
|
include_directories(${FASTDEPLOY_INCS})
|
||||||
|
|
||||||
|
add_executable(infer_ppyoloe_demo ${PROJECT_SOURCE_DIR}/infer_ppyoloe.c)
|
||||||
|
target_link_libraries(infer_ppyoloe_demo ${FASTDEPLOY_LIBS})
|
200
examples/vision/detection/paddledetection/c/README.md
Normal file
200
examples/vision/detection/paddledetection/c/README.md
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
English | [简体中文](README_CN.md)
|
||||||
|
# PaddleDetection C Deployment Example
|
||||||
|
|
||||||
|
This directory provides examples that `infer_xxx.c` fast finishes the deployment of PaddleDetection models, including PPYOLOE on CPU/GPU.
|
||||||
|
|
||||||
|
Before deployment, two steps require confirmation
|
||||||
|
|
||||||
|
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
|
||||||
|
- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
|
||||||
|
|
||||||
|
Taking inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. FastDeploy version 1.0.4 or above (x.x.x>=1.0.4) is required to support this model.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
ppyoloe is taken as an example for inference deployment
|
||||||
|
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
|
||||||
|
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
|
||||||
|
tar xvf fastdeploy-linux-x64-x.x.x.tgz
|
||||||
|
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
|
||||||
|
make -j
|
||||||
|
|
||||||
|
# Download the PPYOLOE model file and test images
|
||||||
|
wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
|
||||||
|
tar xvf ppyoloe_crn_l_300e_coco.tgz
|
||||||
|
|
||||||
|
|
||||||
|
# CPU inference
|
||||||
|
./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 0
|
||||||
|
# GPU inference
|
||||||
|
./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1
|
||||||
|
```
|
||||||
|
|
||||||
|
The above command works for Linux or MacOS. For SDK use-pattern in Windows, refer to:
|
||||||
|
- [How to use FastDeploy C++ SDK in Windows](../../../../../docs/en/faq/use_sdk_on_windows.md)
|
||||||
|
|
||||||
|
## PaddleDetection C Interface
|
||||||
|
|
||||||
|
### RuntimeOption
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper()
|
||||||
|
```
|
||||||
|
|
||||||
|
> Create a RuntimeOption object, and return a pointer to manipulate it.
|
||||||
|
>
|
||||||
|
> **Return**
|
||||||
|
> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): Pointer to manipulate RuntimeOption object.
|
||||||
|
|
||||||
|
|
||||||
|
```c
|
||||||
|
void FD_C_RuntimeOptionWrapperUseCpu(
|
||||||
|
FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper)
|
||||||
|
```
|
||||||
|
|
||||||
|
> Enable Cpu inference.
|
||||||
|
>
|
||||||
|
> **Params**
|
||||||
|
>
|
||||||
|
> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): Pointer to manipulate RuntimeOption object.
|
||||||
|
|
||||||
|
```c
|
||||||
|
void FD_C_RuntimeOptionWrapperUseGpu(
|
||||||
|
FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
|
||||||
|
int gpu_id)
|
||||||
|
```
|
||||||
|
> Enable Gpu inference.
|
||||||
|
>
|
||||||
|
> **Params**
|
||||||
|
>
|
||||||
|
> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): Pointer to manipulate RuntimeOption object.
|
||||||
|
> * **gpu_id**(int): gpu id
|
||||||
|
|
||||||
|
|
||||||
|
### Model
|
||||||
|
|
||||||
|
```c
|
||||||
|
|
||||||
|
FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper(
|
||||||
|
const char* model_file, const char* params_file, const char* config_file,
|
||||||
|
FD_C_RuntimeOptionWrapper* runtime_option,
|
||||||
|
const FD_C_ModelFormat model_format)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
> Create a PPYOLOE model object, and return a pointer to manipulate it.
|
||||||
|
>
|
||||||
|
> **Params**
|
||||||
|
>
|
||||||
|
> * **model_file**(const char*): Model file path
|
||||||
|
> * **params_file**(const char*): Parameter file path
|
||||||
|
> * **config_file**(const char*): Configuration file path, which is the deployment yaml file exported by PaddleDetection
|
||||||
|
> * **runtime_option**(FD_C_RuntimeOptionWrapper*): Backend inference configuration. None by default, which is the default configuration
|
||||||
|
> * **model_format**(FD_C_ModelFormat): Model format. Paddle format by default
|
||||||
|
>
|
||||||
|
> **Return**
|
||||||
|
> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): Pointer to manipulate PPYOLOE object.
|
||||||
|
|
||||||
|
|
||||||
|
#### Read and write image
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_Mat FD_C_Imread(const char* imgpath)
|
||||||
|
```
|
||||||
|
|
||||||
|
> Read an image, and return a pointer to cv::Mat.
|
||||||
|
>
|
||||||
|
> **Params**
|
||||||
|
>
|
||||||
|
> * **imgpath**(const char*): image path
|
||||||
|
>
|
||||||
|
> **Return**
|
||||||
|
>
|
||||||
|
> * **imgmat**(FD_C_Mat): pointer to cv::Mat object which holds the image.
|
||||||
|
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_Bool FD_C_Imwrite(const char* savepath, FD_C_Mat img);
|
||||||
|
```
|
||||||
|
|
||||||
|
> Write image to a file.
|
||||||
|
>
|
||||||
|
> **Params**
|
||||||
|
>
|
||||||
|
> * **savepath**(const char*): save path
|
||||||
|
> * **img**(FD_C_Mat): pointer to cv::Mat object
|
||||||
|
>
|
||||||
|
> **Return**
|
||||||
|
>
|
||||||
|
> * **result**(FD_C_Bool): bool to indicate success or failure
|
||||||
|
|
||||||
|
|
||||||
|
#### Prediction
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_Bool FD_C_PPYOLOEWrapperPredict(
|
||||||
|
__fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img,
|
||||||
|
FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper)
|
||||||
|
```
|
||||||
|
>
|
||||||
|
> Predict an image, and generate detection result.
|
||||||
|
>
|
||||||
|
> **Params**
|
||||||
|
> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): pointer to manipulate PPYOLOE object
|
||||||
|
> * **img**(FD_C_Mat): pointer to cv::Mat object, which can be obained by FD_C_Imread interface
|
||||||
|
> * **result**(FD_C_DetectionResultWrapper*): Detection result, including detection box and confidence of each box. Refer to [Vision Model Prediction Result](../../../../../docs/api/vision_results/) for DetectionResult
|
||||||
|
|
||||||
|
|
||||||
|
#### Result
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapper();
|
||||||
|
```
|
||||||
|
>
|
||||||
|
> Create a DetectionResult object to keep the detection result,return a pointer to manipulate it.
|
||||||
|
>
|
||||||
|
> **Return**
|
||||||
|
> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): pointer to manipulate DetectionResult object
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_DetectionResult* FD_C_DetectionResultWrapperGetData(
|
||||||
|
FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper)
|
||||||
|
```
|
||||||
|
>
|
||||||
|
> Get the C DetectionResult structure from FD_C_DetectionResultWrapper, which can access the fileds directly.
|
||||||
|
>
|
||||||
|
> **Params**
|
||||||
|
> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): pointer to manipulate DetectionResult object
|
||||||
|
>
|
||||||
|
> **Return**
|
||||||
|
> * **fd_c_detection_result**(FD_C_DetectionResult*): pointer to C DetectionResult structure
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_Mat FD_C_VisDetection(FD_C_Mat im, FD_C_DetectionResult* fd_detection_result,
|
||||||
|
float score_threshold, int line_size, float font_size);
|
||||||
|
```
|
||||||
|
>
|
||||||
|
> Visualize detection results and return visualization image.
|
||||||
|
>
|
||||||
|
> **Params**
|
||||||
|
> * **im**(FD_C_Mat): pointer to input image
|
||||||
|
> * **fd_detection_result**(FD_C_DetectionResult*): pointer to C DetectionResult structure
|
||||||
|
> * **score_threshold**(float): score threshold
|
||||||
|
> * **line_size**(int): line size
|
||||||
|
> * **font_size**(float): font size
|
||||||
|
>
|
||||||
|
> **Return**
|
||||||
|
> * **vis_im**(FD_C_Mat): pointer to visualization image.
|
||||||
|
|
||||||
|
|
||||||
|
- [Model Description](../../)
|
||||||
|
- [Python Deployment](../python)
|
||||||
|
- [Vision Model prediction results](../../../../../docs/api/vision_results/)
|
||||||
|
- [How to switch the model inference backend engine](../../../../../docs/en/faq/how_to_change_backend.md)
|
204
examples/vision/detection/paddledetection/c/README_CN.md
Normal file
204
examples/vision/detection/paddledetection/c/README_CN.md
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
[English](README.md) | 简体中文
|
||||||
|
# PaddleDetection C 部署示例
|
||||||
|
|
||||||
|
本目录下提供`infer_xxx.c`来调用C API快速完成PaddleDetection模型PPYOLOE在CPU/GPU上部署的示例。
|
||||||
|
|
||||||
|
在部署前,需确认以下两个步骤
|
||||||
|
|
||||||
|
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
|
||||||
|
- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
|
||||||
|
|
||||||
|
以Linux上推理为例,在本目录执行如下命令即可完成编译测试,支持此模型需保证FastDeploy版本1.0.4以上(x.x.x>=1.0.4)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
以ppyoloe为例进行推理部署
|
||||||
|
|
||||||
|
mkdir build
|
||||||
|
cd build
|
||||||
|
# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
|
||||||
|
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
|
||||||
|
tar xvf fastdeploy-linux-x64-x.x.x.tgz
|
||||||
|
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
|
||||||
|
make -j
|
||||||
|
|
||||||
|
# 下载PPYOLOE模型文件和测试图片
|
||||||
|
wget https://bj.bcebos.com/paddlehub/fastdeploy/ppyoloe_crn_l_300e_coco.tgz
|
||||||
|
wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
|
||||||
|
tar xvf ppyoloe_crn_l_300e_coco.tgz
|
||||||
|
|
||||||
|
|
||||||
|
# CPU推理
|
||||||
|
./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 0
|
||||||
|
# GPU推理
|
||||||
|
./infer_ppyoloe_demo ./ppyoloe_crn_l_300e_coco 000000014439.jpg 1
|
||||||
|
```
|
||||||
|
|
||||||
|
以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:
|
||||||
|
- [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
|
||||||
|
|
||||||
|
如果用户使用华为昇腾NPU部署, 请参考以下方式在部署前初始化部署环境:
|
||||||
|
- [如何使用华为昇腾NPU部署](../../../../../docs/cn/faq/use_sdk_on_ascend.md)
|
||||||
|
|
||||||
|
## PaddleDetection C API接口
|
||||||
|
|
||||||
|
### 配置
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_RuntimeOptionWrapper* FD_C_CreateRuntimeOptionWrapper()
|
||||||
|
```
|
||||||
|
|
||||||
|
> 创建一个RuntimeOption的配置对象,并且返回操作它的指针。
|
||||||
|
>
|
||||||
|
> **返回**
|
||||||
|
>
|
||||||
|
> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption对象的指针
|
||||||
|
|
||||||
|
|
||||||
|
```c
|
||||||
|
void FD_C_RuntimeOptionWrapperUseCpu(
|
||||||
|
FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper)
|
||||||
|
```
|
||||||
|
|
||||||
|
> 开启CPU推理
|
||||||
|
>
|
||||||
|
> **参数**
|
||||||
|
>
|
||||||
|
> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption对象的指针
|
||||||
|
|
||||||
|
```c
|
||||||
|
void FD_C_RuntimeOptionWrapperUseGpu(
|
||||||
|
FD_C_RuntimeOptionWrapper* fd_c_runtime_option_wrapper,
|
||||||
|
int gpu_id)
|
||||||
|
```
|
||||||
|
> 开启GPU推理
|
||||||
|
>
|
||||||
|
> **参数**
|
||||||
|
>
|
||||||
|
> * **fd_c_runtime_option_wrapper**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption对象的指针
|
||||||
|
> * **gpu_id**(int): 显卡号
|
||||||
|
|
||||||
|
|
||||||
|
### 模型
|
||||||
|
|
||||||
|
```c
|
||||||
|
|
||||||
|
FD_C_PPYOLOEWrapper* FD_C_CreatesPPYOLOEWrapper(
|
||||||
|
const char* model_file, const char* params_file, const char* config_file,
|
||||||
|
FD_C_RuntimeOptionWrapper* runtime_option,
|
||||||
|
const FD_C_ModelFormat model_format)
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
> 创建一个PPYOLOE的模型,并且返回操作它的指针。
|
||||||
|
>
|
||||||
|
> **参数**
|
||||||
|
>
|
||||||
|
> * **model_file**(const char*): 模型文件路径
|
||||||
|
> * **params_file**(const char*): 参数文件路径
|
||||||
|
> * **config_file**(const char*): 配置文件路径,即PaddleDetection导出的部署yaml文件
|
||||||
|
> * **runtime_option**(FD_C_RuntimeOptionWrapper*): 指向RuntimeOption的指针,表示后端推理配置
|
||||||
|
> * **model_format**(FD_C_ModelFormat): 模型格式
|
||||||
|
>
|
||||||
|
> **返回**
|
||||||
|
> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): 指向PPYOLOE模型对象的指针
|
||||||
|
|
||||||
|
|
||||||
|
#### 读写图像
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_Mat FD_C_Imread(const char* imgpath)
|
||||||
|
```
|
||||||
|
|
||||||
|
> 读取一个图像,并且返回cv::Mat的指针。
|
||||||
|
>
|
||||||
|
> **参数**
|
||||||
|
>
|
||||||
|
> * **imgpath**(const char*): 图像文件路径
|
||||||
|
>
|
||||||
|
> **返回**
|
||||||
|
>
|
||||||
|
> * **imgmat**(FD_C_Mat): 指向图像数据cv::Mat的指针。
|
||||||
|
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_Bool FD_C_Imwrite(const char* savepath, FD_C_Mat img);
|
||||||
|
```
|
||||||
|
|
||||||
|
> 将图像写入文件中。
|
||||||
|
>
|
||||||
|
> **参数**
|
||||||
|
>
|
||||||
|
> * **savepath**(const char*): 保存图像的路径
|
||||||
|
> * **img**(FD_C_Mat): 指向图像数据的指针
|
||||||
|
>
|
||||||
|
> **返回**
|
||||||
|
>
|
||||||
|
> * **result**(FD_C_Bool): 表示操作是否成功
|
||||||
|
|
||||||
|
|
||||||
|
#### Predict函数
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_Bool FD_C_PPYOLOEWrapperPredict(
|
||||||
|
__fd_take FD_C_PPYOLOEWrapper* fd_c_ppyoloe_wrapper, FD_C_Mat img,
|
||||||
|
FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper)
|
||||||
|
```
|
||||||
|
>
|
||||||
|
> 模型预测接口,输入图像直接并生成检测结果。
|
||||||
|
>
|
||||||
|
> **参数**
|
||||||
|
> * **fd_c_ppyoloe_wrapper**(FD_C_PPYOLOEWrapper*): 指向PPYOLOE模型的指针
|
||||||
|
> * **img**(FD_C_Mat): 输入图像的指针,指向cv::Mat对象,可以调用FD_C_Imread读取图像获取
|
||||||
|
> * **result**(FD_C_DetectionResultWrapper*): 指向检测结果的指针,检测结果包括检测框,各个框的置信度, DetectionResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
|
||||||
|
|
||||||
|
|
||||||
|
#### Predict结果
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_DetectionResultWrapper* FD_C_CreateDetectionResultWrapper();
|
||||||
|
```
|
||||||
|
>
|
||||||
|
> 创建一个DetectionResult对象,用来保存推理的结果,并返回所创建的DetectionResult对象的指针。
|
||||||
|
>
|
||||||
|
> **返回**
|
||||||
|
> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): 指向DetectionResult对象的指针
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_DetectionResult* FD_C_DetectionResultWrapperGetData(
|
||||||
|
FD_C_DetectionResultWrapper* fd_c_detection_result_wrapper)
|
||||||
|
```
|
||||||
|
>
|
||||||
|
> 从DetectionResult对象中提取纯C结构的DetectionResult结果,并返回结构指针,通过该指针可直接返回结构中的字段。
|
||||||
|
>
|
||||||
|
> **参数**
|
||||||
|
> * **fd_c_detection_result_wrapper**(FD_C_DetectionResultWrapper*): 指向DetectionResult对象的指针
|
||||||
|
>
|
||||||
|
> **返回**
|
||||||
|
> * **fd_c_detection_result**(FD_C_DetectionResult*): 指向纯C结构的DetectionResult的指针
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
```c
|
||||||
|
FD_C_Mat FD_C_VisDetection(FD_C_Mat im, FD_C_DetectionResult* fd_detection_result,
|
||||||
|
float score_threshold, int line_size, float font_size);
|
||||||
|
```
|
||||||
|
>
|
||||||
|
> 对检测结果进行可视化,返回可视化的图像。
|
||||||
|
>
|
||||||
|
> **参数**
|
||||||
|
> * **im**(FD_C_Mat): 指向输入图像的指针
|
||||||
|
> * **fd_detection_result**(FD_C_DetectionResult*): 指向纯C结构DetectionResult的指针
|
||||||
|
> * **score_threshold**(float): 检测阈值
|
||||||
|
> * **line_size**(int): 检测框线大小
|
||||||
|
> * **font_size**(float): 检测框字体大小
|
||||||
|
>
|
||||||
|
> **返回**
|
||||||
|
> * **vis_im**(FD_C_Mat): 指向可视化图像的指针
|
||||||
|
|
||||||
|
|
||||||
|
- [模型介绍](../../)
|
||||||
|
- [Python部署](../python)
|
||||||
|
- [视觉模型预测结果](../../../../../docs/api/vision_results/)
|
||||||
|
- [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
|
124
examples/vision/detection/paddledetection/c/infer_ppyoloe.c
Normal file
124
examples/vision/detection/paddledetection/c/infer_ppyoloe.c
Normal file
@@ -0,0 +1,124 @@
|
|||||||
|
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#include "fastdeploy_capi/vision.h"
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
const char sep = '\\';
|
||||||
|
#else
|
||||||
|
const char sep = '/';
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void CpuInfer(const char* model_dir, const char* image_file) {
|
||||||
|
char model_file[100];
|
||||||
|
char params_file[100];
|
||||||
|
char config_file[100];
|
||||||
|
int max_size = 99;
|
||||||
|
snprintf(model_file, max_size, "%s%c%s", model_dir, sep, "model.pdmodel");
|
||||||
|
snprintf(params_file, max_size, "%s%c%s", model_dir, sep, "model.pdiparams");
|
||||||
|
snprintf(config_file, max_size, "%s%c%s", model_dir, sep, "infer_cfg.yml");
|
||||||
|
|
||||||
|
FD_C_RuntimeOptionWrapper* option = FD_C_CreateRuntimeOptionWrapper();
|
||||||
|
FD_C_RuntimeOptionWrapperUseCpu(option);
|
||||||
|
|
||||||
|
FD_C_PPYOLOEWrapper* model = FD_C_CreatesPPYOLOEWrapper(
|
||||||
|
model_file, params_file, config_file, option, PADDLE);
|
||||||
|
|
||||||
|
FD_C_Mat im = FD_C_Imread(image_file);
|
||||||
|
|
||||||
|
FD_C_DetectionResultWrapper* result_wrapper =
|
||||||
|
FD_C_CreateDetectionResultWrapper();
|
||||||
|
|
||||||
|
if (!FD_C_PPYOLOEWrapperPredict(model, im, result_wrapper)) {
|
||||||
|
printf("Failed to predict.\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FD_C_DetectionResult* result =
|
||||||
|
FD_C_DetectionResultWrapperGetData(result_wrapper);
|
||||||
|
FD_C_Mat vis_im = FD_C_VisDetection(im, result, 0.5, 1, 0.5);
|
||||||
|
|
||||||
|
FD_C_Imwrite("vis_result.jpg", vis_im);
|
||||||
|
printf("Visualized result saved in ./vis_result.jpg\n");
|
||||||
|
|
||||||
|
FD_C_DestroyRuntimeOptionWrapper(option);
|
||||||
|
FD_C_DestroyPPYOLOEWrapper(model);
|
||||||
|
FD_C_DestroyDetectionResultWrapper(result_wrapper);
|
||||||
|
FD_C_DestroyDetectionResult(result);
|
||||||
|
FD_C_DestroyMat(im);
|
||||||
|
FD_C_DestroyMat(vis_im);
|
||||||
|
}
|
||||||
|
|
||||||
|
void GpuInfer(const char* model_dir, const char* image_file) {
|
||||||
|
char model_file[100];
|
||||||
|
char params_file[100];
|
||||||
|
char config_file[100];
|
||||||
|
int max_size = 99;
|
||||||
|
snprintf(model_file, max_size, "%s%c%s", model_dir, sep, "model.pdmodel");
|
||||||
|
snprintf(params_file, max_size, "%s%c%s", model_dir, sep, "model.pdiparams");
|
||||||
|
snprintf(config_file, max_size, "%s%c%s", model_dir, sep, "infer_cfg.yml");
|
||||||
|
|
||||||
|
FD_C_RuntimeOptionWrapper* option = FD_C_CreateRuntimeOptionWrapper();
|
||||||
|
FD_C_RuntimeOptionWrapperUseGpu(option, 0);
|
||||||
|
|
||||||
|
FD_C_PPYOLOEWrapper* model = FD_C_CreatesPPYOLOEWrapper(
|
||||||
|
model_file, params_file, config_file, option, PADDLE);
|
||||||
|
|
||||||
|
FD_C_Mat im = FD_C_Imread(image_file);
|
||||||
|
|
||||||
|
FD_C_DetectionResultWrapper* result_wrapper =
|
||||||
|
FD_C_CreateDetectionResultWrapper();
|
||||||
|
|
||||||
|
if (!FD_C_PPYOLOEWrapperPredict(model, im, result_wrapper)) {
|
||||||
|
printf("Failed to predict.\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
FD_C_DetectionResult* result =
|
||||||
|
FD_C_DetectionResultWrapperGetData(result_wrapper);
|
||||||
|
FD_C_Mat vis_im = FD_C_VisDetection(im, result, 0.5, 1, 0.5);
|
||||||
|
|
||||||
|
FD_C_Imwrite("vis_result.jpg", vis_im);
|
||||||
|
printf("Visualized result saved in ./vis_result.jpg\n");
|
||||||
|
|
||||||
|
FD_C_DestroyRuntimeOptionWrapper(option);
|
||||||
|
FD_C_DestroyPPYOLOEWrapper(model);
|
||||||
|
FD_C_DestroyDetectionResultWrapper(result_wrapper);
|
||||||
|
FD_C_DestroyDetectionResult(result);
|
||||||
|
FD_C_DestroyMat(im);
|
||||||
|
FD_C_DestroyMat(vis_im);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
if (argc < 4) {
|
||||||
|
printf(
|
||||||
|
"Usage: infer_demo path/to/model_dir path/to/image run_option, "
|
||||||
|
"e.g ./infer_model ./ppyoloe_model_dir ./test.jpeg 0"
|
||||||
|
"\n");
|
||||||
|
printf(
|
||||||
|
"The data type of run_option is int, 0: run with cpu; 1: run with gpu"
|
||||||
|
"\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (atoi(argv[3]) == 0) {
|
||||||
|
CpuInfer(argv[1], argv[2]);
|
||||||
|
} else if (atoi(argv[3]) == 1) {
|
||||||
|
GpuInfer(argv[1], argv[2]);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
@@ -4,12 +4,14 @@
|
|||||||
|
|
||||||
## 支持模型列表
|
## 支持模型列表
|
||||||
|
|
||||||
目前FastDeploy使用RKNPU2支持如下PaddleDetection模型的部署:
|
在RKNPU2上已经通过测试的PaddleDetection模型如下:
|
||||||
|
|
||||||
- Picodet
|
- Picodet
|
||||||
- PPYOLOE
|
- PPYOLOE(int8)
|
||||||
- YOLOV8
|
- YOLOV8
|
||||||
|
|
||||||
|
如果你需要查看详细的速度信息,请查看[RKNPU2模型速度一览表](../../../../../docs/cn/faq/rknpu2/rknpu2.md)
|
||||||
|
|
||||||
## 准备PaddleDetection部署模型以及转换模型
|
## 准备PaddleDetection部署模型以及转换模型
|
||||||
|
|
||||||
RKNPU部署模型前需要将Paddle模型转换成RKNN模型,具体步骤如下:
|
RKNPU部署模型前需要将Paddle模型转换成RKNN模型,具体步骤如下:
|
||||||
@@ -20,8 +22,79 @@ RKNPU部署模型前需要将Paddle模型转换成RKNN模型,具体步骤如
|
|||||||
|
|
||||||
## 模型转换example
|
## 模型转换example
|
||||||
|
|
||||||
- [Picodet RKNPU2模型转换文档](./picodet.md)
|
### 注意点
|
||||||
- [YOLOv8 RKNPU2模型转换文档](./yolov8.md)
|
|
||||||
|
PPDetection模型在RKNPU2上部署时要注意以下几点:
|
||||||
|
|
||||||
|
* 模型导出需要包含Decode
|
||||||
|
* 由于RKNPU2不支持NMS,因此输出节点必须裁剪至NMS之前
|
||||||
|
* 由于RKNPU2 Div算子的限制,模型的输出节点需要裁剪至Div算子之前
|
||||||
|
|
||||||
|
### Paddle模型转换为ONNX模型
|
||||||
|
|
||||||
|
由于Rockchip提供的rknn-toolkit2工具暂时不支持Paddle模型直接导出为RKNN模型,因此需要先将Paddle模型导出为ONNX模型,再将ONNX模型转为RKNN模型。
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 以Picodet为例
|
||||||
|
# 下载Paddle静态图模型并解压
|
||||||
|
wget https://paddledet.bj.bcebos.com/deploy/Inference/picodet_s_416_coco_lcnet.tar
|
||||||
|
tar xvf picodet_s_416_coco_lcnet.tar
|
||||||
|
|
||||||
|
# 静态图转ONNX模型,注意,这里的save_file请和压缩包名对齐
|
||||||
|
paddle2onnx --model_dir picodet_s_416_coco_lcnet \
|
||||||
|
--model_filename model.pdmodel \
|
||||||
|
--params_filename model.pdiparams \
|
||||||
|
--save_file picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
|
||||||
|
--enable_dev_version True
|
||||||
|
|
||||||
|
# 固定shape
|
||||||
|
python -m paddle2onnx.optimize --input_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
|
||||||
|
--output_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
|
||||||
|
--input_shape_dict "{'image':[1,3,416,416]}"
|
||||||
|
```
|
||||||
|
|
||||||
|
### 编写yaml文件
|
||||||
|
|
||||||
|
**修改normalize参数**
|
||||||
|
|
||||||
|
如果你需要在NPU上执行normalize操作,请根据你的模型配置normalize参数,例如:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
mean:
|
||||||
|
-
|
||||||
|
- 123.675
|
||||||
|
- 116.28
|
||||||
|
- 103.53
|
||||||
|
std:
|
||||||
|
-
|
||||||
|
- 58.395
|
||||||
|
- 57.12
|
||||||
|
- 57.375
|
||||||
|
```
|
||||||
|
|
||||||
|
**修改outputs参数**
|
||||||
|
由于Paddle2ONNX版本的不同,转换模型的输出节点名称也有所不同,请使用[Netron](https://netron.app)对模型进行可视化,并找到以下蓝色方框标记的NonMaxSuppression节点,红色方框的节点名称即为目标名称。
|
||||||
|
|
||||||
|
例如,使用Netron可视化后,得到以下图片:
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
找到蓝色方框标记的NonMaxSuppression节点,可以看到红色方框标记的两个节点名称为p2o.Div.79和p2o.Concat.9,因此需要修改outputs参数,修改后如下:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
outputs_nodes:
|
||||||
|
- 'p2o.Mul.179'
|
||||||
|
- 'p2o.Concat.9'
|
||||||
|
```
|
||||||
|
|
||||||
|
### ONNX模型转RKNN模型
|
||||||
|
|
||||||
|
为了方便大家使用,我们提供了python脚本,通过我们预配置的config文件,你将能够快速地转换ONNX模型到RKNN模型
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python tools/rknpu2/export.py --config_path tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml \
|
||||||
|
--target_platform rk3588
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
## 其他链接
|
## 其他链接
|
||||||
|
@@ -1,68 +0,0 @@
|
|||||||
# Picodet RKNPU2模型转换文档
|
|
||||||
|
|
||||||
以下步骤均在Ubuntu电脑上完成,请参考配置文档完成转换模型环境配置。下面以Picodet-s为例子,教大家如何转换PaddleDetection模型到RKNN模型。
|
|
||||||
|
|
||||||
|
|
||||||
### 导出ONNX模型
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 下载Paddle静态图模型并解压
|
|
||||||
wget https://paddledet.bj.bcebos.com/deploy/Inference/picodet_s_416_coco_lcnet.tar
|
|
||||||
tar xvf picodet_s_416_coco_lcnet.tar
|
|
||||||
|
|
||||||
# 静态图转ONNX模型,注意,这里的save_file请和压缩包名对齐
|
|
||||||
paddle2onnx --model_dir picodet_s_416_coco_lcnet \
|
|
||||||
--model_filename model.pdmodel \
|
|
||||||
--params_filename model.pdiparams \
|
|
||||||
--save_file picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
|
|
||||||
--enable_dev_version True
|
|
||||||
|
|
||||||
# 固定shape
|
|
||||||
python -m paddle2onnx.optimize --input_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
|
|
||||||
--output_model picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx \
|
|
||||||
--input_shape_dict "{'image':[1,3,416,416]}"
|
|
||||||
```
|
|
||||||
|
|
||||||
### 编写模型导出配置文件
|
|
||||||
|
|
||||||
以转化RK3568的RKNN模型为例子,我们需要编辑tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml,来转换ONNX模型到RKNN模型。
|
|
||||||
|
|
||||||
**修改normalize参数**
|
|
||||||
|
|
||||||
如果你需要在NPU上执行normalize操作,请根据你的模型配置normalize参数,例如:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
mean:
|
|
||||||
-
|
|
||||||
- 127.5
|
|
||||||
- 127.5
|
|
||||||
- 127.5
|
|
||||||
std:
|
|
||||||
-
|
|
||||||
- 127.5
|
|
||||||
- 127.5
|
|
||||||
- 127.5
|
|
||||||
```
|
|
||||||
|
|
||||||
**修改outputs参数**
|
|
||||||
由于Paddle2ONNX版本的不同,转换模型的输出节点名称也有所不同,请使用[Netron](https://netron.app)对模型进行可视化,并找到以下蓝色方框标记的NonMaxSuppression节点,红色方框的节点名称即为目标名称。
|
|
||||||
|
|
||||||
例如,使用Netron可视化后,得到以下图片:
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
找到蓝色方框标记的NonMaxSuppression节点,可以看到红色方框标记的两个节点名称为p2o.Div.79和p2o.Concat.9,因此需要修改outputs参数,修改后如下:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
outputs_nodes: [ 'p2o.Div.79','p2o.Concat.9' ]
|
|
||||||
```
|
|
||||||
|
|
||||||
### 转换模型
|
|
||||||
|
|
||||||
```bash
|
|
||||||
|
|
||||||
# ONNX模型转RKNN模型
|
|
||||||
# 转换模型,模型将生成在picodet_s_320_coco_lcnet_non_postprocess目录下
|
|
||||||
python tools/rknpu2/export.py --config_path tools/rknpu2/config/picodet_s_416_coco_lcnet_unquantized.yaml \
|
|
||||||
--target_platform rk3588
|
|
||||||
```
|
|
@@ -45,15 +45,16 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
# 配置runtime,加载模型
|
# 配置runtime,加载模型
|
||||||
runtime_option = fd.RuntimeOption()
|
runtime_option = fd.RuntimeOption()
|
||||||
runtime_option.use_cpu()
|
runtime_option.use_rknpu2()
|
||||||
|
|
||||||
model = fd.vision.detection.PPYOLOE(
|
model = fd.vision.detection.PPYOLOE(
|
||||||
model_file,
|
model_file,
|
||||||
params_file,
|
params_file,
|
||||||
config_file,
|
config_file,
|
||||||
runtime_option=runtime_option,
|
runtime_option=runtime_option,
|
||||||
model_format=fd.ModelFormat.ONNX)
|
model_format=fd.ModelFormat.RKNN)
|
||||||
|
model.preprocessor.disable_normalize()
|
||||||
|
model.preprocessor.disable_permute()
|
||||||
model.postprocessor.apply_decode_and_nms()
|
model.postprocessor.apply_decode_and_nms()
|
||||||
|
|
||||||
# 预测图片分割结果
|
# 预测图片分割结果
|
||||||
|
@@ -1,50 +0,0 @@
|
|||||||
# YOLOv8 RKNPU2模型转换文档
|
|
||||||
|
|
||||||
以下步骤均在Ubuntu电脑上完成,请参考配置文档完成转换模型环境配置。下面以yolov8为例子,教大家如何转换PaddleDetection模型到RKNN模型。
|
|
||||||
|
|
||||||
|
|
||||||
### 导出ONNX模型
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 下载Paddle静态图模型并解压
|
|
||||||
|
|
||||||
# 静态图转ONNX模型,注意,这里的save_file请和压缩包名对齐
|
|
||||||
paddle2onnx --model_dir yolov8_n_500e_coco \
|
|
||||||
--model_filename model.pdmodel \
|
|
||||||
--params_filename model.pdiparams \
|
|
||||||
--save_file yolov8_n_500e_coco/yolov8_n_500e_coco.onnx \
|
|
||||||
--enable_dev_version True
|
|
||||||
|
|
||||||
# 固定shape
|
|
||||||
python -m paddle2onnx.optimize --input_model yolov8_n_500e_coco/yolov8_n_500e_coco.onnx \
|
|
||||||
--output_model yolov8_n_500e_coco/yolov8_n_500e_coco.onnx \
|
|
||||||
--input_shape_dict "{'image':[1,3,640,640],'scale_factor':[1,2]}"
|
|
||||||
```
|
|
||||||
|
|
||||||
### 编写模型导出配置文件
|
|
||||||
**修改outputs参数**
|
|
||||||
由于Paddle2ONNX版本的不同,转换模型的输出节点名称也有所不同,请使用[Netron](https://netron.app)对模型进行可视化,并找到以下蓝色方框标记的NonMaxSuppression节点,红色方框的节点名称即为目标名称。
|
|
||||||
|
|
||||||
例如,使用Netron可视化后,得到以下图片:
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
找到蓝色方框标记的NonMaxSuppression节点,可以看到红色方框标记的两个节点名称为p2o.Div.1和p2o.Concat.9,因此需要修改outputs参数,修改后如下:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
outputs_nodes: [ 'p2o.Div.1','p2o.Concat.49' ]
|
|
||||||
```
|
|
||||||
|
|
||||||
### 转换模型
|
|
||||||
|
|
||||||
```bash
|
|
||||||
|
|
||||||
# ONNX模型转RKNN模型
|
|
||||||
# 转换非全量化模型,模型将生成在yolov8_n目录下
|
|
||||||
python tools/rknpu2/export.py --config_path tools/rknpu2/config/yolov8_n_unquantized.yaml \
|
|
||||||
--target_platform rk3588
|
|
||||||
|
|
||||||
# 转换全量化模型,模型将生成在yolov8_n目录下
|
|
||||||
python tools/rknpu2/export.py --config_path tools/rknpu2/config/yolov8_n_quantized.yaml \
|
|
||||||
--target_platform rk3588
|
|
||||||
```
|
|
@@ -13,8 +13,8 @@
|
|||||||
// limitations under the License.
|
// limitations under the License.
|
||||||
|
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#if defined(__linux__) || defined(__ANDROID__)
|
#ifdef __linux__
|
||||||
#include <unistd.h>
|
#include <sys/resource.h>
|
||||||
#endif
|
#endif
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
@@ -23,8 +23,7 @@
|
|||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
namespace benchmark {
|
namespace benchmark {
|
||||||
|
|
||||||
// Remove the ch characters at both ends of str
|
std::string Strip(const std::string& str, char ch) {
|
||||||
static std::string strip(const std::string& str, char ch = ' ') {
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
while (str[i] == ch) {
|
while (str[i] == ch) {
|
||||||
i++;
|
i++;
|
||||||
@@ -36,9 +35,8 @@ static std::string strip(const std::string& str, char ch = ' ') {
|
|||||||
return str.substr(i, j + 1 - i);
|
return str.substr(i, j + 1 - i);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Split string
|
void Split(const std::string& s, std::vector<std::string>& tokens,
|
||||||
static void split(const std::string& s, std::vector<std::string>& tokens,
|
char delim) {
|
||||||
char delim = ' ') {
|
|
||||||
tokens.clear();
|
tokens.clear();
|
||||||
size_t lastPos = s.find_first_not_of(delim, 0);
|
size_t lastPos = s.find_first_not_of(delim, 0);
|
||||||
size_t pos = s.find(delim, lastPos);
|
size_t pos = s.find(delim, lastPos);
|
||||||
@@ -54,7 +52,7 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
|
|||||||
: is_supported_(false),
|
: is_supported_(false),
|
||||||
sampling_interval_(sampling_interval_ms),
|
sampling_interval_(sampling_interval_ms),
|
||||||
gpu_id_(gpu_id) {
|
gpu_id_(gpu_id) {
|
||||||
#if defined(__linux__) || defined(__ANDROID__)
|
#ifdef __linux__
|
||||||
is_supported_ = true;
|
is_supported_ = true;
|
||||||
#else
|
#else
|
||||||
is_supported_ = false;
|
is_supported_ = false;
|
||||||
@@ -67,7 +65,9 @@ ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ResourceUsageMonitor::Start() {
|
void ResourceUsageMonitor::Start() {
|
||||||
if (!is_supported_) return;
|
if (!is_supported_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
if (check_memory_thd_ != nullptr) {
|
if (check_memory_thd_ != nullptr) {
|
||||||
FDINFO << "Memory monitoring has already started!" << std::endl;
|
FDINFO << "Memory monitoring has already started!" << std::endl;
|
||||||
return;
|
return;
|
||||||
@@ -77,20 +77,24 @@ void ResourceUsageMonitor::Start() {
|
|||||||
check_memory_thd_.reset(new std::thread(([this]() {
|
check_memory_thd_.reset(new std::thread(([this]() {
|
||||||
// Note we retrieve the memory usage at the very beginning of the thread.
|
// Note we retrieve the memory usage at the very beginning of the thread.
|
||||||
while (true) {
|
while (true) {
|
||||||
std::string cpu_mem_info = GetCurrentCpuMemoryInfo();
|
#ifdef __linux__
|
||||||
// get max_cpu_mem
|
rusage res;
|
||||||
std::vector<std::string> cpu_tokens;
|
if (getrusage(RUSAGE_SELF, &res) == 0) {
|
||||||
split(cpu_mem_info, cpu_tokens, ' ');
|
max_cpu_mem_ =
|
||||||
max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024);
|
std::max(max_cpu_mem_, static_cast<float>(res.ru_maxrss / 1024.0));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#if defined(WITH_GPU)
|
#if defined(WITH_GPU)
|
||||||
std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
|
std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
|
||||||
// get max_gpu_mem and max_gpu_util
|
// get max_gpu_mem and max_gpu_util
|
||||||
std::vector<std::string> gpu_tokens;
|
std::vector<std::string> gpu_tokens;
|
||||||
split(gpu_mem_info, gpu_tokens, ',');
|
Split(gpu_mem_info, gpu_tokens, ',');
|
||||||
max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
|
max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
|
||||||
max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
|
max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
|
||||||
#endif
|
#endif
|
||||||
if (stop_signal_) break;
|
if (stop_signal_) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
std::this_thread::sleep_for(
|
std::this_thread::sleep_for(
|
||||||
std::chrono::milliseconds(sampling_interval_));
|
std::chrono::milliseconds(sampling_interval_));
|
||||||
}
|
}
|
||||||
@@ -121,26 +125,6 @@ void ResourceUsageMonitor::StopInternal() {
|
|||||||
check_memory_thd_.reset(nullptr);
|
check_memory_thd_.reset(nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() {
|
|
||||||
std::string result = "";
|
|
||||||
#if defined(__linux__) || defined(__ANDROID__)
|
|
||||||
int iPid = static_cast<int>(getpid());
|
|
||||||
std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
|
|
||||||
FILE* pp = popen(command.data(), "r");
|
|
||||||
if (!pp) return "";
|
|
||||||
char tmp[1024];
|
|
||||||
|
|
||||||
while (fgets(tmp, sizeof(tmp), pp) != NULL) {
|
|
||||||
result += tmp;
|
|
||||||
}
|
|
||||||
pclose(pp);
|
|
||||||
#else
|
|
||||||
FDASSERT(false,
|
|
||||||
"Currently collect cpu memory info only supports Linux and ANDROID.")
|
|
||||||
#endif
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
|
std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
|
||||||
std::string result = "";
|
std::string result = "";
|
||||||
#if defined(__linux__) && defined(WITH_GPU)
|
#if defined(__linux__) && defined(WITH_GPU)
|
||||||
|
@@ -65,20 +65,26 @@ class FASTDEPLOY_DECL ResourceUsageMonitor {
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void StopInternal();
|
void StopInternal();
|
||||||
// Get current cpu memory info
|
|
||||||
std::string GetCurrentCpuMemoryInfo();
|
|
||||||
// Get current gpu memory info
|
// Get current gpu memory info
|
||||||
std::string GetCurrentGpuMemoryInfo(int device_id);
|
std::string GetCurrentGpuMemoryInfo(int device_id);
|
||||||
|
|
||||||
bool is_supported_ = false;
|
bool is_supported_ = false;
|
||||||
bool stop_signal_ = false;
|
bool stop_signal_ = false;
|
||||||
const int sampling_interval_;
|
const int sampling_interval_;
|
||||||
float max_cpu_mem_ = 0.0f;
|
float max_cpu_mem_ = 0.0f; // MB
|
||||||
float max_gpu_mem_ = 0.0f;
|
float max_gpu_mem_ = 0.0f; // MB
|
||||||
float max_gpu_util_ = 0.0f;
|
float max_gpu_util_ = 0.0f;
|
||||||
const int gpu_id_ = 0;
|
const int gpu_id_ = 0;
|
||||||
std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
|
std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Remove the ch characters at both ends of str
|
||||||
|
FASTDEPLOY_DECL std::string Strip(const std::string& str, char ch = ' ');
|
||||||
|
|
||||||
|
// Split string
|
||||||
|
FASTDEPLOY_DECL void Split(const std::string& s,
|
||||||
|
std::vector<std::string>& tokens,
|
||||||
|
char delim = ' ');
|
||||||
|
|
||||||
} // namespace benchmark
|
} // namespace benchmark
|
||||||
} // namespace fastdeploy
|
} // namespace fastdeploy
|
||||||
|
@@ -15,9 +15,9 @@
|
|||||||
#include <dlpack/dlpack.h>
|
#include <dlpack/dlpack.h>
|
||||||
|
|
||||||
#include "fastdeploy/core/fd_type.h"
|
#include "fastdeploy/core/fd_type.h"
|
||||||
#include "fastdeploy/utils/utils.h"
|
|
||||||
#include "fastdeploy/fastdeploy_model.h"
|
#include "fastdeploy/fastdeploy_model.h"
|
||||||
#include "fastdeploy/pybind/main.h"
|
#include "fastdeploy/pybind/main.h"
|
||||||
|
#include "fastdeploy/utils/utils.h"
|
||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
|
|
||||||
@@ -68,8 +68,8 @@ DLDataType FDToDlpackType(FDDataType fd_dtype) {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
FDASSERT(false,
|
FDASSERT(false, "Convert to DlPack, FDType \"%s\" is not supported.",
|
||||||
"Convert to DlPack, FDType \"%s\" is not supported.", Str(fd_dtype).c_str());
|
Str(fd_dtype).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
dl_dtype.code = dl_code;
|
dl_dtype.code = dl_code;
|
||||||
@@ -77,10 +77,8 @@ DLDataType FDToDlpackType(FDDataType fd_dtype) {
|
|||||||
return dl_dtype;
|
return dl_dtype;
|
||||||
}
|
}
|
||||||
|
|
||||||
FDDataType
|
FDDataType DlpackToFDType(const DLDataType& data_type) {
|
||||||
DlpackToFDType(const DLDataType& data_type) {
|
FDASSERT(data_type.lanes == 1, "FDTensor does not support dlpack lanes != 1")
|
||||||
FDASSERT(data_type.lanes == 1,
|
|
||||||
"FDTensor does not support dlpack lanes != 1")
|
|
||||||
|
|
||||||
if (data_type.code == DLDataTypeCode::kDLFloat) {
|
if (data_type.code == DLDataTypeCode::kDLFloat) {
|
||||||
if (data_type.bits == 16) {
|
if (data_type.bits == 16) {
|
||||||
@@ -152,7 +150,7 @@ pybind11::capsule FDTensorToDLPack(FDTensor& fd_tensor) {
|
|||||||
dlpack_tensor->dl_tensor.dtype = FDToDlpackType(fd_tensor.dtype);
|
dlpack_tensor->dl_tensor.dtype = FDToDlpackType(fd_tensor.dtype);
|
||||||
|
|
||||||
dlpack_tensor->dl_tensor.device.device_id = fd_tensor.device_id;
|
dlpack_tensor->dl_tensor.device.device_id = fd_tensor.device_id;
|
||||||
if(fd_tensor.device == Device::GPU) {
|
if (fd_tensor.device == Device::GPU) {
|
||||||
if (fd_tensor.is_pinned_memory) {
|
if (fd_tensor.is_pinned_memory) {
|
||||||
dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCUDAHost;
|
dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCUDAHost;
|
||||||
} else {
|
} else {
|
||||||
@@ -162,8 +160,8 @@ pybind11::capsule FDTensorToDLPack(FDTensor& fd_tensor) {
|
|||||||
dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU;
|
dlpack_tensor->dl_tensor.device.device_type = DLDeviceType::kDLCPU;
|
||||||
}
|
}
|
||||||
|
|
||||||
return pybind11::capsule(
|
return pybind11::capsule(static_cast<void*>(dlpack_tensor), "dltensor",
|
||||||
static_cast<void*>(dlpack_tensor), "dltensor", &DeleteUnusedDltensor);
|
&DeleteUnusedDltensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
FDTensor FDTensorFromDLPack(const std::string& name,
|
FDTensor FDTensorFromDLPack(const std::string& name,
|
||||||
@@ -178,9 +176,8 @@ FDTensor FDTensorFromDLPack(const std::string& name,
|
|||||||
int64_t* strides = dl_managed_tensor->dl_tensor.strides;
|
int64_t* strides = dl_managed_tensor->dl_tensor.strides;
|
||||||
|
|
||||||
int ndim = dl_managed_tensor->dl_tensor.ndim;
|
int ndim = dl_managed_tensor->dl_tensor.ndim;
|
||||||
std::vector<int64_t> dims(
|
std::vector<int64_t> dims(dl_managed_tensor->dl_tensor.shape,
|
||||||
dl_managed_tensor->dl_tensor.shape,
|
dl_managed_tensor->dl_tensor.shape + ndim);
|
||||||
dl_managed_tensor->dl_tensor.shape + ndim);
|
|
||||||
|
|
||||||
// Check if the input is contiguous and in C order
|
// Check if the input is contiguous and in C order
|
||||||
if (strides != nullptr) {
|
if (strides != nullptr) {
|
||||||
@@ -196,8 +193,8 @@ FDTensor FDTensorFromDLPack(const std::string& name,
|
|||||||
}
|
}
|
||||||
|
|
||||||
FDASSERT(is_contiguous_c_order,
|
FDASSERT(is_contiguous_c_order,
|
||||||
"DLPack tensor is not contiguous. Only contiguous DLPack "
|
"DLPack tensor is not contiguous. Only contiguous DLPack "
|
||||||
"tensors that are stored in C-Order are supported.");
|
"tensors that are stored in C-Order are supported.");
|
||||||
}
|
}
|
||||||
|
|
||||||
Device device;
|
Device device;
|
||||||
@@ -216,21 +213,20 @@ FDTensor FDTensorFromDLPack(const std::string& name,
|
|||||||
is_pinned_memory = true;
|
is_pinned_memory = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
FDASSERT(false,
|
FDASSERT(
|
||||||
|
false,
|
||||||
("DLDevice type " +
|
("DLDevice type " +
|
||||||
std::to_string(dl_managed_tensor->dl_tensor.device.device_type) +
|
std::to_string(dl_managed_tensor->dl_tensor.device.device_type) +
|
||||||
" is not support by Python backend.").c_str());
|
" is not support by Python backend.")
|
||||||
|
.c_str());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
FDDataType dtype =
|
FDDataType dtype = DlpackToFDType(dl_managed_tensor->dl_tensor.dtype);
|
||||||
DlpackToFDType(dl_managed_tensor->dl_tensor.dtype);
|
|
||||||
|
|
||||||
PyCapsule_SetName(dlpack_tensor.ptr(), "used_dlpack");
|
PyCapsule_SetName(dlpack_tensor.ptr(), "used_dlpack");
|
||||||
FDTensor fd_tensor(name);
|
FDTensor fd_tensor(name);
|
||||||
fd_tensor.SetExternalData(
|
fd_tensor.SetExternalData(dims, dtype, memory_ptr, device, device_id);
|
||||||
dims, dtype, memory_ptr, device, device_id
|
|
||||||
);
|
|
||||||
fd_tensor.is_pinned_memory = is_pinned_memory;
|
fd_tensor.is_pinned_memory = is_pinned_memory;
|
||||||
return fd_tensor;
|
return fd_tensor;
|
||||||
}
|
}
|
||||||
@@ -242,15 +238,52 @@ void BindFDTensor(pybind11::module& m) {
|
|||||||
.def_readonly("shape", &FDTensor::shape)
|
.def_readonly("shape", &FDTensor::shape)
|
||||||
.def_readonly("dtype", &FDTensor::dtype)
|
.def_readonly("dtype", &FDTensor::dtype)
|
||||||
.def_readonly("device", &FDTensor::device)
|
.def_readonly("device", &FDTensor::device)
|
||||||
.def("numpy", [](FDTensor& self) {
|
.def("numpy", [](FDTensor& self) { return TensorToPyArray(self); })
|
||||||
return TensorToPyArray(self);
|
|
||||||
})
|
|
||||||
.def("data", &FDTensor::MutableData)
|
.def("data", &FDTensor::MutableData)
|
||||||
.def("from_numpy", [](FDTensor& self, pybind11::array& pyarray, bool share_buffer = false) {
|
.def("from_numpy",
|
||||||
PyArrayToTensor(pyarray, &self, share_buffer);
|
[](FDTensor& self, pybind11::array& pyarray,
|
||||||
})
|
bool share_buffer = false) {
|
||||||
|
PyArrayToTensor(pyarray, &self, share_buffer);
|
||||||
|
})
|
||||||
|
.def("from_external_data",
|
||||||
|
[](const std::string& name, size_t data_addr,
|
||||||
|
const std::vector<int64_t>& shape, const std::string& data_type,
|
||||||
|
const std::string& data_place, int device_id) {
|
||||||
|
auto fd_data_type = FDDataType::UNKNOWN1;
|
||||||
|
if (data_type == "FP32") {
|
||||||
|
fd_data_type = FDDataType::FP32;
|
||||||
|
} else if (data_type == "FP16") {
|
||||||
|
fd_data_type = FDDataType::FP16;
|
||||||
|
} else if (data_type == "INT32") {
|
||||||
|
fd_data_type = FDDataType::INT32;
|
||||||
|
} else if (data_type == "INT64") {
|
||||||
|
fd_data_type = FDDataType::INT64;
|
||||||
|
} else {
|
||||||
|
FDASSERT(false,
|
||||||
|
"FDTensor.from_external_data, datatype \"%s\" is not "
|
||||||
|
"supported.",
|
||||||
|
data_type.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
Device fd_data_place;
|
||||||
|
if (data_place.find("gpu") != data_place.npos) {
|
||||||
|
fd_data_place = Device::GPU;
|
||||||
|
} else {
|
||||||
|
FDASSERT(false,
|
||||||
|
("Device type " + data_place +
|
||||||
|
" is not support by FDTensor.from_external_data.")
|
||||||
|
.c_str());
|
||||||
|
}
|
||||||
|
void* data_ptr = nullptr;
|
||||||
|
data_ptr = reinterpret_cast<void*>(data_addr);
|
||||||
|
FDTensor fd_tensor(name);
|
||||||
|
fd_tensor.SetExternalData(shape, fd_data_type,
|
||||||
|
static_cast<void*>(data_ptr),
|
||||||
|
fd_data_place, device_id);
|
||||||
|
return fd_tensor;
|
||||||
|
})
|
||||||
.def("to_dlpack", &FDTensorToDLPack)
|
.def("to_dlpack", &FDTensorToDLPack)
|
||||||
.def("from_dlpack",&FDTensorFromDLPack)
|
.def("from_dlpack", &FDTensorFromDLPack)
|
||||||
.def("print_info", &FDTensor::PrintInfo);
|
.def("print_info", &FDTensor::PrintInfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -110,6 +110,7 @@ void BindRuntime(pybind11::module& m) {
|
|||||||
return outputs;
|
return outputs;
|
||||||
})
|
})
|
||||||
.def("bind_input_tensor", &Runtime::BindInputTensor)
|
.def("bind_input_tensor", &Runtime::BindInputTensor)
|
||||||
|
.def("bind_output_tensor", &Runtime::BindOutputTensor)
|
||||||
.def("infer", [](Runtime& self) { self.Infer(); })
|
.def("infer", [](Runtime& self) { self.Infer(); })
|
||||||
.def("get_output_tensor",
|
.def("get_output_tensor",
|
||||||
[](Runtime& self, const std::string& name) {
|
[](Runtime& self, const std::string& name) {
|
||||||
|
37
fastdeploy/runtime/backends/paddle/paddle_backend.cc
Normal file → Executable file
37
fastdeploy/runtime/backends/paddle/paddle_backend.cc
Normal file → Executable file
@@ -25,9 +25,14 @@ void PaddleBackend::BuildOption(const PaddleBackendOption& option) {
|
|||||||
if (option.device == Device::GPU) {
|
if (option.device == Device::GPU) {
|
||||||
config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id);
|
config_.EnableUseGpu(option.gpu_mem_init_size, option.device_id);
|
||||||
if (option_.external_stream_) {
|
if (option_.external_stream_) {
|
||||||
|
FDINFO << "Will use external stream for Paddle Backend." << std::endl;
|
||||||
config_.SetExecStream(option_.external_stream_);
|
config_.SetExecStream(option_.external_stream_);
|
||||||
}
|
}
|
||||||
if (option.enable_trt) {
|
if (option.enable_trt) {
|
||||||
|
if (!option.trt_option.enable_fp16) {
|
||||||
|
FDINFO << "Will try to use tensorrt inference with Paddle Backend."
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
|
config_.Exp_DisableTensorRtOPs(option.trt_disabled_ops_);
|
||||||
auto precision = paddle_infer::PrecisionType::kFloat32;
|
auto precision = paddle_infer::PrecisionType::kFloat32;
|
||||||
if (option.trt_option.enable_fp16) {
|
if (option.trt_option.enable_fp16) {
|
||||||
@@ -226,23 +231,47 @@ bool PaddleBackend::Infer(std::vector<FDTensor>& inputs,
|
|||||||
<< inputs_desc_.size() << ")." << std::endl;
|
<< inputs_desc_.size() << ")." << std::endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
// output share backend memory only support CPU or GPU
|
||||||
|
if (option_.device == Device::IPU) {
|
||||||
|
copy_to_fd = true;
|
||||||
|
}
|
||||||
|
|
||||||
RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
|
RUNTIME_PROFILE_LOOP_H2D_D2H_BEGIN
|
||||||
for (size_t i = 0; i < inputs.size(); ++i) {
|
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||||
auto handle = predictor_->GetInputHandle(inputs[i].name);
|
auto handle = predictor_->GetInputHandle(inputs[i].name);
|
||||||
ShareTensorFromFDTensor(handle.get(), inputs[i]);
|
ShareTensorFromFDTensor(handle.get(), inputs[i]);
|
||||||
}
|
}
|
||||||
|
std::unordered_set<std::string> prebinded_output_name;
|
||||||
|
// prebinded output only support for GPU
|
||||||
|
if (!copy_to_fd) {
|
||||||
|
for (size_t i = 0; i < (*outputs).size(); ++i) {
|
||||||
|
auto output_name = (*outputs)[i].name;
|
||||||
|
// if a output is not prebinded,
|
||||||
|
// the name of output is expected to be empty.
|
||||||
|
// We skip here
|
||||||
|
if (output_name.empty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Record the prebinded output_name.
|
||||||
|
// Those outputs do not need PaddleTensorToFDTensor
|
||||||
|
// after predictor_.Run()
|
||||||
|
prebinded_output_name.insert(output_name);
|
||||||
|
auto handle = predictor_->GetOutputHandle(output_name);
|
||||||
|
ShareOutTensorFromFDTensor(handle.get(), (*outputs)[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
RUNTIME_PROFILE_LOOP_BEGIN(1)
|
RUNTIME_PROFILE_LOOP_BEGIN(1)
|
||||||
predictor_->Run();
|
predictor_->Run();
|
||||||
RUNTIME_PROFILE_LOOP_END
|
RUNTIME_PROFILE_LOOP_END
|
||||||
|
|
||||||
// output share backend memory only support CPU or GPU
|
|
||||||
if (option_.device == Device::IPU) {
|
|
||||||
copy_to_fd = true;
|
|
||||||
}
|
|
||||||
outputs->resize(outputs_desc_.size());
|
outputs->resize(outputs_desc_.size());
|
||||||
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
for (size_t i = 0; i < outputs_desc_.size(); ++i) {
|
||||||
|
// skip prebinded output
|
||||||
|
if (copy_to_fd == false &&
|
||||||
|
prebinded_output_name.count(outputs_desc_[i].name)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
|
auto handle = predictor_->GetOutputHandle(outputs_desc_[i].name);
|
||||||
if (copy_to_fd) {
|
if (copy_to_fd) {
|
||||||
(*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;
|
(*outputs)[i].is_pinned_memory = option_.enable_pinned_memory;
|
||||||
|
@@ -35,6 +35,9 @@ paddle_infer::PlaceType ConvertFDDeviceToPlace(Device device);
|
|||||||
// Share memory buffer with paddle_infer::Tensor from fastdeploy::FDTensor
|
// Share memory buffer with paddle_infer::Tensor from fastdeploy::FDTensor
|
||||||
void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, FDTensor& fd_tensor);
|
void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor, FDTensor& fd_tensor);
|
||||||
|
|
||||||
|
void ShareOutTensorFromFDTensor(paddle_infer::Tensor* tensor,
|
||||||
|
FDTensor& fd_tensor);
|
||||||
|
|
||||||
// convert paddle_infer::Tensor to fastdeploy::FDTensor
|
// convert paddle_infer::Tensor to fastdeploy::FDTensor
|
||||||
// if copy_to_fd is true, copy memory data to FDTensor
|
// if copy_to_fd is true, copy memory data to FDTensor
|
||||||
/// else share memory to FDTensor
|
/// else share memory to FDTensor
|
||||||
|
@@ -61,6 +61,43 @@ void ShareTensorFromFDTensor(paddle_infer::Tensor* tensor,
|
|||||||
Str(fd_tensor.dtype).c_str());
|
Str(fd_tensor.dtype).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ShareOutTensorFromFDTensor(paddle_infer::Tensor* tensor,
|
||||||
|
FDTensor& fd_tensor) {
|
||||||
|
std::vector<int> shape(fd_tensor.shape.begin(), fd_tensor.shape.end());
|
||||||
|
auto place = ConvertFDDeviceToPlace(fd_tensor.device);
|
||||||
|
if (fd_tensor.dtype == FDDataType::FP32) {
|
||||||
|
if (place == paddle_infer::PlaceType::kGPU) {
|
||||||
|
tensor->ShareExternalData(static_cast<float*>(fd_tensor.MutableData()),
|
||||||
|
shape, place);
|
||||||
|
} else {
|
||||||
|
tensor->CopyToCpu(static_cast<float*>(fd_tensor.MutableData()));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} else if (fd_tensor.dtype == FDDataType::INT32) {
|
||||||
|
if (place == paddle_infer::PlaceType::kGPU) {
|
||||||
|
tensor->ShareExternalData(static_cast<int32_t*>(fd_tensor.MutableData()),
|
||||||
|
shape, place);
|
||||||
|
} else {
|
||||||
|
tensor->CopyToCpu(static_cast<int32_t*>(fd_tensor.MutableData()));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} else if (fd_tensor.dtype == FDDataType::INT64) {
|
||||||
|
if (place == paddle_infer::PlaceType::kGPU) {
|
||||||
|
tensor->ShareExternalData(static_cast<int64_t*>(fd_tensor.MutableData()),
|
||||||
|
shape, place);
|
||||||
|
} else {
|
||||||
|
tensor->CopyToCpu(static_cast<int64_t*>(fd_tensor.MutableData()));
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
} else if (fd_tensor.dtype == FDDataType::UINT8) {
|
||||||
|
tensor->ShareExternalData(static_cast<uint8_t*>(fd_tensor.MutableData()),
|
||||||
|
shape, paddle_infer::PlaceType::kCPU);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
FDASSERT(false, "Unexpected data type(%s) while infer with PaddleBackend.",
|
||||||
|
Str(fd_tensor.dtype).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
|
void PaddleTensorToFDTensor(std::unique_ptr<paddle_infer::Tensor>& tensor,
|
||||||
FDTensor* fd_tensor, bool copy_to_fd) {
|
FDTensor* fd_tensor, bool copy_to_fd) {
|
||||||
auto fd_dtype = PaddleDataTypeToFD(tensor->type());
|
auto fd_dtype = PaddleDataTypeToFD(tensor->type());
|
||||||
|
@@ -67,6 +67,7 @@ bool RKNPU2Backend::GetSDKAndDeviceVersion() {
|
|||||||
***************************************************************/
|
***************************************************************/
|
||||||
void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) {
|
void RKNPU2Backend::BuildOption(const RKNPU2BackendOption& option) {
|
||||||
this->option_ = option;
|
this->option_ = option;
|
||||||
|
|
||||||
// save cpu_name
|
// save cpu_name
|
||||||
this->option_.cpu_name = option.cpu_name;
|
this->option_.cpu_name = option.cpu_name;
|
||||||
|
|
||||||
@@ -123,7 +124,7 @@ bool RKNPU2Backend::InitFromRKNN(const std::string& model_file,
|
|||||||
* @return bool
|
* @return bool
|
||||||
* @note Only support RK3588
|
* @note Only support RK3588
|
||||||
***************************************************************/
|
***************************************************************/
|
||||||
bool RKNPU2Backend::SetCoreMask(rknpu2::CoreMask& core_mask) const {
|
bool RKNPU2Backend::SetCoreMask(const rknpu2::CoreMask& core_mask) {
|
||||||
int ret = rknn_set_core_mask(ctx, static_cast<rknn_core_mask>(core_mask));
|
int ret = rknn_set_core_mask(ctx, static_cast<rknn_core_mask>(core_mask));
|
||||||
if (ret != RKNN_SUCC) {
|
if (ret != RKNN_SUCC) {
|
||||||
FDERROR << "rknn_set_core_mask fail! ret=" << ret << std::endl;
|
FDERROR << "rknn_set_core_mask fail! ret=" << ret << std::endl;
|
||||||
|
@@ -25,7 +25,7 @@
|
|||||||
|
|
||||||
namespace fastdeploy {
|
namespace fastdeploy {
|
||||||
struct RKNPU2BackendOption {
|
struct RKNPU2BackendOption {
|
||||||
rknpu2::CpuName cpu_name = rknpu2::CpuName::RK3588;
|
rknpu2::CpuName cpu_name = rknpu2::CpuName::RK356X;
|
||||||
|
|
||||||
// The specification of NPU core setting.It has the following choices :
|
// The specification of NPU core setting.It has the following choices :
|
||||||
// RKNN_NPU_CORE_AUTO : Referring to automatic mode, meaning that it will
|
// RKNN_NPU_CORE_AUTO : Referring to automatic mode, meaning that it will
|
||||||
@@ -49,7 +49,7 @@ class RKNPU2Backend : public BaseBackend {
|
|||||||
|
|
||||||
bool GetSDKAndDeviceVersion();
|
bool GetSDKAndDeviceVersion();
|
||||||
|
|
||||||
bool SetCoreMask(rknpu2::CoreMask& core_mask) const;
|
bool SetCoreMask(const rknpu2::CoreMask& core_mask);
|
||||||
|
|
||||||
bool GetModelInputOutputInfos();
|
bool GetModelInputOutputInfos();
|
||||||
|
|
||||||
|
@@ -35,6 +35,7 @@ void BindOption(pybind11::module& m) {
|
|||||||
.def(pybind11::init())
|
.def(pybind11::init())
|
||||||
.def("set_model_path", &RuntimeOption::SetModelPath)
|
.def("set_model_path", &RuntimeOption::SetModelPath)
|
||||||
.def("set_model_buffer", &RuntimeOption::SetModelBuffer)
|
.def("set_model_buffer", &RuntimeOption::SetModelBuffer)
|
||||||
|
.def("set_encryption_key", &RuntimeOption::SetEncryptionKey)
|
||||||
.def("use_gpu", &RuntimeOption::UseGpu)
|
.def("use_gpu", &RuntimeOption::UseGpu)
|
||||||
.def("use_cpu", &RuntimeOption::UseCpu)
|
.def("use_cpu", &RuntimeOption::UseCpu)
|
||||||
.def("use_rknpu2", &RuntimeOption::UseRKNPU2)
|
.def("use_rknpu2", &RuntimeOption::UseRKNPU2)
|
||||||
@@ -48,6 +49,10 @@ void BindOption(pybind11::module& m) {
|
|||||||
.def_readwrite("poros_option", &RuntimeOption::poros_option)
|
.def_readwrite("poros_option", &RuntimeOption::poros_option)
|
||||||
.def_readwrite("paddle_infer_option", &RuntimeOption::paddle_infer_option)
|
.def_readwrite("paddle_infer_option", &RuntimeOption::paddle_infer_option)
|
||||||
.def("set_external_stream", &RuntimeOption::SetExternalStream)
|
.def("set_external_stream", &RuntimeOption::SetExternalStream)
|
||||||
|
.def("set_external_raw_stream",
|
||||||
|
[](RuntimeOption& self, size_t external_stream) {
|
||||||
|
self.SetExternalStream(reinterpret_cast<void*>(external_stream));
|
||||||
|
})
|
||||||
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
|
.def("set_cpu_thread_num", &RuntimeOption::SetCpuThreadNum)
|
||||||
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
|
.def("use_paddle_backend", &RuntimeOption::UsePaddleBackend)
|
||||||
.def("use_poros_backend", &RuntimeOption::UsePorosBackend)
|
.def("use_poros_backend", &RuntimeOption::UsePorosBackend)
|
||||||
|
@@ -104,7 +104,33 @@ bool AutoSelectBackend(RuntimeOption& option) {
|
|||||||
|
|
||||||
bool Runtime::Init(const RuntimeOption& _option) {
|
bool Runtime::Init(const RuntimeOption& _option) {
|
||||||
option = _option;
|
option = _option;
|
||||||
|
// decrypt encrypted model
|
||||||
|
if ("" != option.encryption_key_) {
|
||||||
|
#ifdef ENABLE_ENCRYPTION
|
||||||
|
if (option.model_from_memory_) {
|
||||||
|
option.model_file = Decrypt(option.model_file, option.encryption_key_);
|
||||||
|
if (!(option.params_file.empty())) {
|
||||||
|
option.params_file =
|
||||||
|
Decrypt(option.params_file, option.encryption_key_);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
std::string model_buffer = "";
|
||||||
|
FDASSERT(ReadBinaryFromFile(option.model_file, &model_buffer),
|
||||||
|
"Fail to read binary from model file");
|
||||||
|
option.model_file = Decrypt(model_buffer, option.encryption_key_);
|
||||||
|
if (!(option.params_file.empty())) {
|
||||||
|
std::string params_buffer = "";
|
||||||
|
FDASSERT(ReadBinaryFromFile(option.params_file, ¶ms_buffer),
|
||||||
|
"Fail to read binary from parameter file");
|
||||||
|
option.params_file = Decrypt(params_buffer, option.encryption_key_);
|
||||||
|
}
|
||||||
|
option.model_from_memory_ = true;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
FDERROR << "The FastDeploy didn't compile with encryption function."
|
||||||
|
<< std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
// Choose default backend by model format and device if backend is not
|
// Choose default backend by model format and device if backend is not
|
||||||
// specified
|
// specified
|
||||||
if (option.backend == Backend::UNKNOWN) {
|
if (option.backend == Backend::UNKNOWN) {
|
||||||
@@ -198,6 +224,25 @@ void Runtime::BindInputTensor(const std::string& name, FDTensor& input) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Runtime::BindOutputTensor(const std::string& name, FDTensor& output) {
|
||||||
|
bool is_exist = false;
|
||||||
|
for (auto& t : output_tensors_) {
|
||||||
|
if (t.name == name) {
|
||||||
|
FDINFO << "The output name [" << name << "] is exist." << std::endl;
|
||||||
|
is_exist = true;
|
||||||
|
t.SetExternalData(output.shape, output.dtype, output.MutableData(),
|
||||||
|
output.device, output.device_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!is_exist) {
|
||||||
|
FDINFO << "The output name [" << name << "] is prebinded added into output tensor list." << std::endl;
|
||||||
|
FDTensor new_tensor(name);
|
||||||
|
new_tensor.SetExternalData(output.shape, output.dtype, output.MutableData(),
|
||||||
|
output.device, output.device_id);
|
||||||
|
output_tensors_.emplace_back(std::move(new_tensor));
|
||||||
|
}
|
||||||
|
}
|
||||||
FDTensor* Runtime::GetOutputTensor(const std::string& name) {
|
FDTensor* Runtime::GetOutputTensor(const std::string& name) {
|
||||||
for (auto& t : output_tensors_) {
|
for (auto& t : output_tensors_) {
|
||||||
if (t.name == name) {
|
if (t.name == name) {
|
||||||
|
@@ -23,6 +23,9 @@
|
|||||||
#include "fastdeploy/core/fd_tensor.h"
|
#include "fastdeploy/core/fd_tensor.h"
|
||||||
#include "fastdeploy/runtime/runtime_option.h"
|
#include "fastdeploy/runtime/runtime_option.h"
|
||||||
#include "fastdeploy/utils/perf.h"
|
#include "fastdeploy/utils/perf.h"
|
||||||
|
#ifdef ENABLE_ENCRYPTION
|
||||||
|
#include "fastdeploy/encryption/include/decrypt.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
/** \brief All C++ FastDeploy APIs are defined inside this namespace
|
/** \brief All C++ FastDeploy APIs are defined inside this namespace
|
||||||
*
|
*
|
||||||
@@ -72,6 +75,12 @@ struct FASTDEPLOY_DECL Runtime {
|
|||||||
/** \brief Bind FDTensor by name, no copy and share input memory
|
/** \brief Bind FDTensor by name, no copy and share input memory
|
||||||
*/
|
*/
|
||||||
void BindInputTensor(const std::string& name, FDTensor& input);
|
void BindInputTensor(const std::string& name, FDTensor& input);
|
||||||
|
|
||||||
|
/** \brief Bind FDTensor by name, no copy and share output memory.
|
||||||
|
* Please make share the correctness of tensor shape of output.
|
||||||
|
*/
|
||||||
|
void BindOutputTensor(const std::string& name, FDTensor& output);
|
||||||
|
|
||||||
/** \brief Get output FDTensor by name, no copy and share backend output memory
|
/** \brief Get output FDTensor by name, no copy and share backend output memory
|
||||||
*/
|
*/
|
||||||
FDTensor* GetOutputTensor(const std::string& name);
|
FDTensor* GetOutputTensor(const std::string& name);
|
||||||
|
@@ -36,6 +36,15 @@ void RuntimeOption::SetModelBuffer(const std::string& model_buffer,
|
|||||||
model_from_memory_ = true;
|
model_from_memory_ = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RuntimeOption::SetEncryptionKey(const std::string& encryption_key) {
|
||||||
|
#ifdef ENABLE_ENCRYPTION
|
||||||
|
encryption_key_ = encryption_key;
|
||||||
|
#else
|
||||||
|
FDERROR << "The FastDeploy didn't compile with encryption function."
|
||||||
|
<< std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
void RuntimeOption::UseGpu(int gpu_id) {
|
void RuntimeOption::UseGpu(int gpu_id) {
|
||||||
#ifdef WITH_GPU
|
#ifdef WITH_GPU
|
||||||
device = Device::GPU;
|
device = Device::GPU;
|
||||||
|
@@ -59,15 +59,21 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
const std::string& params_buffer = "",
|
const std::string& params_buffer = "",
|
||||||
const ModelFormat& format = ModelFormat::PADDLE);
|
const ModelFormat& format = ModelFormat::PADDLE);
|
||||||
|
|
||||||
|
/** \brief When loading encrypted model, encryption_key is required to decrypte model
|
||||||
|
*
|
||||||
|
* \param[in] encryption_key The key for decrypting model
|
||||||
|
*/
|
||||||
|
void SetEncryptionKey(const std::string& encryption_key);
|
||||||
|
|
||||||
/// Use cpu to inference, the runtime will inference on CPU by default
|
/// Use cpu to inference, the runtime will inference on CPU by default
|
||||||
void UseCpu();
|
void UseCpu();
|
||||||
/// Use Nvidia GPU to inference
|
/// Use Nvidia GPU to inference
|
||||||
void UseGpu(int gpu_id = 0);
|
void UseGpu(int gpu_id = 0);
|
||||||
/// Use RKNPU2 e.g RK3588/RK356X to inference
|
/// Use RKNPU2 e.g RK3588/RK356X to inference
|
||||||
void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name =
|
void UseRKNPU2(fastdeploy::rknpu2::CpuName rknpu2_name =
|
||||||
fastdeploy::rknpu2::CpuName::RK3588,
|
fastdeploy::rknpu2::CpuName::RK356X,
|
||||||
fastdeploy::rknpu2::CoreMask rknpu2_core =
|
fastdeploy::rknpu2::CoreMask rknpu2_core =
|
||||||
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_0);
|
fastdeploy::rknpu2::CoreMask::RKNN_NPU_CORE_AUTO);
|
||||||
/// Use TimVX e.g RV1126/A311D to inference
|
/// Use TimVX e.g RV1126/A311D to inference
|
||||||
void UseTimVX();
|
void UseTimVX();
|
||||||
/// Use Huawei Ascend to inference
|
/// Use Huawei Ascend to inference
|
||||||
@@ -104,6 +110,7 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
bool enable_multi_stream = false);
|
bool enable_multi_stream = false);
|
||||||
|
|
||||||
void SetExternalStream(void* external_stream);
|
void SetExternalStream(void* external_stream);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
|
* @brief Set number of cpu threads while inference on CPU, by default it will decided by the different backends
|
||||||
*/
|
*/
|
||||||
@@ -178,6 +185,8 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
|||||||
/// format of input model
|
/// format of input model
|
||||||
ModelFormat model_format = ModelFormat::PADDLE;
|
ModelFormat model_format = ModelFormat::PADDLE;
|
||||||
|
|
||||||
|
std::string encryption_key_ = "";
|
||||||
|
|
||||||
// for cpu inference
|
// for cpu inference
|
||||||
// default will let the backend choose their own default value
|
// default will let the backend choose their own default value
|
||||||
int cpu_thread_num = -1;
|
int cpu_thread_num = -1;
|
||||||
|
@@ -24,7 +24,7 @@ namespace facedet {
|
|||||||
Yolov7FacePostprocessor::Yolov7FacePostprocessor() {
|
Yolov7FacePostprocessor::Yolov7FacePostprocessor() {
|
||||||
conf_threshold_ = 0.5;
|
conf_threshold_ = 0.5;
|
||||||
nms_threshold_ = 0.45;
|
nms_threshold_ = 0.45;
|
||||||
max_wh_ = 7680.0;
|
landmarks_per_face_ = 5;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
|
bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
|
||||||
@@ -36,6 +36,8 @@ bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
|
|||||||
|
|
||||||
for (size_t bs = 0; bs < batch; ++bs) {
|
for (size_t bs = 0; bs < batch; ++bs) {
|
||||||
(*results)[bs].Clear();
|
(*results)[bs].Clear();
|
||||||
|
// must be setup landmarks_per_face before reserve
|
||||||
|
(*results)[bs].landmarks_per_face = landmarks_per_face_;
|
||||||
(*results)[bs].Reserve(infer_result[0].shape[1]);
|
(*results)[bs].Reserve(infer_result[0].shape[1]);
|
||||||
if (infer_result[0].dtype != FDDataType::FP32) {
|
if (infer_result[0].dtype != FDDataType::FP32) {
|
||||||
FDERROR << "Only support post process with float32 data." << std::endl;
|
FDERROR << "Only support post process with float32 data." << std::endl;
|
||||||
@@ -61,6 +63,15 @@ bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
|
|||||||
(*results)[bs].boxes.emplace_back(std::array<float, 4>{
|
(*results)[bs].boxes.emplace_back(std::array<float, 4>{
|
||||||
(x - w / 2.f), (y - h / 2.f), (x + w / 2.f), (y + h / 2.f)});
|
(x - w / 2.f), (y - h / 2.f), (x + w / 2.f), (y + h / 2.f)});
|
||||||
(*results)[bs].scores.push_back(confidence);
|
(*results)[bs].scores.push_back(confidence);
|
||||||
|
|
||||||
|
// decode landmarks (default 5 landmarks)
|
||||||
|
if (landmarks_per_face_ > 0) {
|
||||||
|
float* landmarks_ptr = const_cast<float*>(reg_cls_ptr + 6);
|
||||||
|
for (size_t j = 0; j < landmarks_per_face_ * 3; j += 3) {
|
||||||
|
(*results)[bs].landmarks.emplace_back(
|
||||||
|
std::array<float, 2>{landmarks_ptr[j], landmarks_ptr[j + 1]});
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((*results)[bs].boxes.size() == 0) {
|
if ((*results)[bs].boxes.size() == 0) {
|
||||||
@@ -79,9 +90,9 @@ bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
|
|||||||
float ipt_h = iter_ipt->second[0];
|
float ipt_h = iter_ipt->second[0];
|
||||||
float ipt_w = iter_ipt->second[1];
|
float ipt_w = iter_ipt->second[1];
|
||||||
float scale = std::min(out_h / ipt_h, out_w / ipt_w);
|
float scale = std::min(out_h / ipt_h, out_w / ipt_w);
|
||||||
|
float pad_h = (out_h - ipt_h * scale) / 2;
|
||||||
|
float pad_w = (out_w - ipt_w * scale) / 2;
|
||||||
for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
|
for (size_t i = 0; i < (*results)[bs].boxes.size(); ++i) {
|
||||||
float pad_h = (out_h - ipt_h * scale) / 2;
|
|
||||||
float pad_w = (out_w - ipt_w * scale) / 2;
|
|
||||||
// clip box
|
// clip box
|
||||||
(*results)[bs].boxes[i][0] = std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
|
(*results)[bs].boxes[i][0] = std::max(((*results)[bs].boxes[i][0] - pad_w) / scale, 0.0f);
|
||||||
(*results)[bs].boxes[i][1] = std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
|
(*results)[bs].boxes[i][1] = std::max(((*results)[bs].boxes[i][1] - pad_h) / scale, 0.0f);
|
||||||
@@ -92,6 +103,16 @@ bool Yolov7FacePostprocessor::Run(const std::vector<FDTensor>& infer_result,
|
|||||||
(*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f);
|
(*results)[bs].boxes[i][2] = std::min((*results)[bs].boxes[i][2], ipt_w - 1.0f);
|
||||||
(*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f);
|
(*results)[bs].boxes[i][3] = std::min((*results)[bs].boxes[i][3], ipt_h - 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// scale and clip landmarks
|
||||||
|
for (size_t i = 0; i < (*results)[bs].landmarks.size(); ++i) {
|
||||||
|
(*results)[bs].landmarks[i][0] =
|
||||||
|
std::max(((*results)[bs].landmarks[i][0] - pad_w) / scale, 0.0f);
|
||||||
|
(*results)[bs].landmarks[i][1] =
|
||||||
|
std::max(((*results)[bs].landmarks[i][1] - pad_h) / scale, 0.0f);
|
||||||
|
(*results)[bs].landmarks[i][0] = std::min((*results)[bs].landmarks[i][0], ipt_w - 1.0f);
|
||||||
|
(*results)[bs].landmarks[i][1] = std::min((*results)[bs].landmarks[i][1], ipt_h - 1.0f);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@@ -56,11 +56,19 @@ class FASTDEPLOY_DECL Yolov7FacePostprocessor{
|
|||||||
/// Get nms_threshold, default 0.45
|
/// Get nms_threshold, default 0.45
|
||||||
float GetNMSThreshold() const { return nms_threshold_; }
|
float GetNMSThreshold() const { return nms_threshold_; }
|
||||||
|
|
||||||
|
/// Set landmarks_per_face, default 5
|
||||||
|
void SetLandmarksPerFace(const int& landmarks_per_face) {
|
||||||
|
landmarks_per_face_ = landmarks_per_face;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get landmarks_per_face, default 5
|
||||||
|
int GetLandmarksPerFace() const { return landmarks_per_face_; }
|
||||||
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
float conf_threshold_;
|
float conf_threshold_;
|
||||||
float nms_threshold_;
|
float nms_threshold_;
|
||||||
bool multi_label_;
|
int landmarks_per_face_;
|
||||||
float max_wh_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace facedet
|
} // namespace facedet
|
||||||
|
@@ -60,7 +60,8 @@ void BindYOLOv7Face(pybind11::module& m) {
|
|||||||
return results;
|
return results;
|
||||||
})
|
})
|
||||||
.def_property("conf_threshold", &vision::facedet::Yolov7FacePostprocessor::GetConfThreshold, &vision::facedet::Yolov7FacePostprocessor::SetConfThreshold)
|
.def_property("conf_threshold", &vision::facedet::Yolov7FacePostprocessor::GetConfThreshold, &vision::facedet::Yolov7FacePostprocessor::SetConfThreshold)
|
||||||
.def_property("nms_threshold", &vision::facedet::Yolov7FacePostprocessor::GetNMSThreshold, &vision::facedet::Yolov7FacePostprocessor::SetNMSThreshold);
|
.def_property("nms_threshold", &vision::facedet::Yolov7FacePostprocessor::GetNMSThreshold, &vision::facedet::Yolov7FacePostprocessor::SetNMSThreshold)
|
||||||
|
.def_property("landmarks_per_face", &vision::facedet::Yolov7FacePostprocessor::GetLandmarksPerFace, &vision::facedet::Yolov7FacePostprocessor::SetLandmarksPerFace);
|
||||||
|
|
||||||
pybind11::class_<vision::facedet::YOLOv7Face, FastDeployModel>(m, "YOLOv7Face")
|
pybind11::class_<vision::facedet::YOLOv7Face, FastDeployModel>(m, "YOLOv7Face")
|
||||||
.def(pybind11::init<std::string, std::string, RuntimeOption,
|
.def(pybind11::init<std::string, std::string, RuntimeOption,
|
||||||
|
37
python/fastdeploy/runtime.py
Normal file → Executable file
37
python/fastdeploy/runtime.py
Normal file → Executable file
@@ -72,6 +72,14 @@ class Runtime:
|
|||||||
"""
|
"""
|
||||||
self._runtime.bind_input_tensor(name, fdtensor)
|
self._runtime.bind_input_tensor(name, fdtensor)
|
||||||
|
|
||||||
|
def bind_output_tensor(self, name, fdtensor):
|
||||||
|
"""Bind FDTensor by name, no copy and share output memory
|
||||||
|
|
||||||
|
:param name: (str)The name of output data.
|
||||||
|
:param fdtensor: (fastdeploy.FDTensor)The output FDTensor.
|
||||||
|
"""
|
||||||
|
self._runtime.bind_output_tensor(name, fdtensor)
|
||||||
|
|
||||||
def zero_copy_infer(self):
|
def zero_copy_infer(self):
|
||||||
"""No params inference the model.
|
"""No params inference the model.
|
||||||
|
|
||||||
@@ -187,6 +195,12 @@ class RuntimeOption:
|
|||||||
return self._option.set_model_buffer(model_buffer, params_buffer,
|
return self._option.set_model_buffer(model_buffer, params_buffer,
|
||||||
model_format)
|
model_format)
|
||||||
|
|
||||||
|
def set_encryption_key(self, encryption_key):
|
||||||
|
"""When loading encrypted model, encryption_key is required to decrypte model
|
||||||
|
:param encryption_key: (str)The key for decrypting model
|
||||||
|
"""
|
||||||
|
return self._option.set_encryption_key(encryption_key)
|
||||||
|
|
||||||
def use_gpu(self, device_id=0):
|
def use_gpu(self, device_id=0):
|
||||||
"""Inference with Nvidia GPU
|
"""Inference with Nvidia GPU
|
||||||
|
|
||||||
@@ -518,9 +532,10 @@ class RuntimeOption:
|
|||||||
logging.warning(" option = fd.RuntimeOption()")
|
logging.warning(" option = fd.RuntimeOption()")
|
||||||
logging.warning(" option.use_gpu(0)")
|
logging.warning(" option.use_gpu(0)")
|
||||||
logging.warning(" option.use_paddle_infer_backend()")
|
logging.warning(" option.use_paddle_infer_backend()")
|
||||||
logging.warning(" option.paddle_infer_option.enabel_trt = True")
|
logging.warning(" option.paddle_infer_option.enable_trt = True")
|
||||||
logging.warning(" ==============================================")
|
logging.warning(" ==============================================")
|
||||||
return self._option.enable_paddle_to_trt()
|
self._option.use_paddle_backend()
|
||||||
|
self._option.paddle_infer_option.enable_trt = True
|
||||||
|
|
||||||
def set_trt_max_workspace_size(self, trt_max_workspace_size):
|
def set_trt_max_workspace_size(self, trt_max_workspace_size):
|
||||||
"""Set max workspace size while using TensorRT backend.
|
"""Set max workspace size while using TensorRT backend.
|
||||||
@@ -583,10 +598,12 @@ class RuntimeOption:
|
|||||||
replica_num=1,
|
replica_num=1,
|
||||||
available_memory_proportion=1.0,
|
available_memory_proportion=1.0,
|
||||||
enable_half_partial=False):
|
enable_half_partial=False):
|
||||||
logging.warning("`RuntimeOption.set_ipu_config` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.set_ipu_config()` instead.")
|
logging.warning(
|
||||||
self._option.paddle_infer_option.set_ipu_config(enable_fp16, replica_num,
|
"`RuntimeOption.set_ipu_config` will be deprecated in v1.2.0, please use `RuntimeOption.paddle_infer_option.set_ipu_config()` instead."
|
||||||
available_memory_proportion,
|
)
|
||||||
enable_half_partial)
|
self._option.paddle_infer_option.set_ipu_config(
|
||||||
|
enable_fp16, replica_num, available_memory_proportion,
|
||||||
|
enable_half_partial)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def poros_option(self):
|
def poros_option(self):
|
||||||
@@ -649,6 +666,11 @@ class RuntimeOption:
|
|||||||
"""
|
"""
|
||||||
return self._option.disable_profiling()
|
return self._option.disable_profiling()
|
||||||
|
|
||||||
|
def set_external_raw_stream(self, cuda_stream):
|
||||||
|
"""Set the external raw stream used by fastdeploy runtime.
|
||||||
|
"""
|
||||||
|
self._option.set_external_raw_stream(cuda_stream)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
attrs = dir(self._option)
|
attrs = dir(self._option)
|
||||||
message = "RuntimeOption(\n"
|
message = "RuntimeOption(\n"
|
||||||
@@ -657,7 +679,8 @@ class RuntimeOption:
|
|||||||
continue
|
continue
|
||||||
if hasattr(getattr(self._option, attr), "__call__"):
|
if hasattr(getattr(self._option, attr), "__call__"):
|
||||||
continue
|
continue
|
||||||
message += " {} : {}\t\n".format(attr, getattr(self._option, attr))
|
message += " {} : {}\t\n".format(attr,
|
||||||
|
getattr(self._option, attr))
|
||||||
message.strip("\n")
|
message.strip("\n")
|
||||||
message += ")"
|
message += ")"
|
||||||
return message
|
return message
|
||||||
|
@@ -107,6 +107,13 @@ class Yolov7FacePostprocessor:
|
|||||||
"""
|
"""
|
||||||
return self._postprocessor.nms_threshold
|
return self._postprocessor.nms_threshold
|
||||||
|
|
||||||
|
@property
|
||||||
|
def landmarks_per_face(self):
|
||||||
|
"""
|
||||||
|
landmarks per face for postprocessing, default is 5
|
||||||
|
"""
|
||||||
|
return self._postprocessor.landmarks_per_face
|
||||||
|
|
||||||
@conf_threshold.setter
|
@conf_threshold.setter
|
||||||
def conf_threshold(self, conf_threshold):
|
def conf_threshold(self, conf_threshold):
|
||||||
assert isinstance(conf_threshold, float),\
|
assert isinstance(conf_threshold, float),\
|
||||||
@@ -119,6 +126,11 @@ class Yolov7FacePostprocessor:
|
|||||||
"The value to set `nms_threshold` must be type of float."
|
"The value to set `nms_threshold` must be type of float."
|
||||||
self._postprocessor.nms_threshold = nms_threshold
|
self._postprocessor.nms_threshold = nms_threshold
|
||||||
|
|
||||||
|
@landmarks_per_face.setter
|
||||||
|
def landmarks_per_face(self, landmarks_per_face):
|
||||||
|
assert isinstance(landmarks_per_face, int),\
|
||||||
|
"The value to set `landmarks_per_face` must be type of int."
|
||||||
|
self._postprocessor.landmarks_per_face = landmarks_per_face
|
||||||
|
|
||||||
class YOLOv7Face(FastDeployModel):
|
class YOLOv7Face(FastDeployModel):
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
|
@@ -10,7 +10,7 @@ std:
|
|||||||
- 57.375
|
- 57.375
|
||||||
model_path: ./picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx
|
model_path: ./picodet_s_416_coco_lcnet/picodet_s_416_coco_lcnet.onnx
|
||||||
outputs_nodes:
|
outputs_nodes:
|
||||||
- 'p2o.Div.79'
|
- 'p2o.Mul.179'
|
||||||
- 'p2o.Concat.9'
|
- 'p2o.Concat.9'
|
||||||
do_quantization: False
|
do_quantization: False
|
||||||
dataset:
|
dataset:
|
||||||
|
@@ -0,0 +1,17 @@
|
|||||||
|
mean:
|
||||||
|
-
|
||||||
|
- 0
|
||||||
|
- 0
|
||||||
|
- 0
|
||||||
|
std:
|
||||||
|
-
|
||||||
|
- 255
|
||||||
|
- 255
|
||||||
|
- 255
|
||||||
|
model_path: ./ppyoloe_plus_crn_s_80e_coco/ppyoloe_plus_crn_s_80e_coco.onnx
|
||||||
|
outputs_nodes:
|
||||||
|
- 'p2o.Mul.224'
|
||||||
|
- 'p2o.Concat.29'
|
||||||
|
do_quantization: True
|
||||||
|
dataset: "./ppyoloe_plus_crn_s_80e_coco/dataset.txt"
|
||||||
|
output_folder: "./ppyoloe_plus_crn_s_80e_coco"
|
46
tutorials/encrypt_model/README.md
Normal file
46
tutorials/encrypt_model/README.md
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
English | [中文](README_CN.md)
|
||||||
|
|
||||||
|
# FastDeploy generates an encrypted model
|
||||||
|
|
||||||
|
This directory provides `encrypt.py` to quickly complete the encryption of the model and parameter files of ResNet50_vd
|
||||||
|
|
||||||
|
## encryption
|
||||||
|
```bash
|
||||||
|
# Download deployment example code
|
||||||
|
git clone https://github.com/PaddlePaddle/FastDeploy.git
|
||||||
|
cd FastDeploy/tutorials/encrypt_model
|
||||||
|
|
||||||
|
# Download the ResNet50_vd model file
|
||||||
|
wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
|
||||||
|
tar -xvf ResNet50_vd_infer.tgz
|
||||||
|
|
||||||
|
python encrypt.py --model_file ResNet50_vd_infer/inference.pdmodel --params_file ResNet50_vd_infer/inference.pdiparams --encrypted_model_dir ResNet50_vd_infer_encrypt
|
||||||
|
```
|
||||||
|
>> **Note** After the encryption is completed, the ResNet50_vd_infer_encrypt folder will be generated, including `__model__.encrypted`, `__params__.encrypted`, `encryption_key.txt` three files, where `encryption_key.txt` contains the encrypted key. At the same time, you need to copy the `inference_cls.yaml` configuration file in the original folder to the ResNet50_vd_infer_encrypt folder for subsequent deployment
|
||||||
|
|
||||||
|
### Python encryption interface
|
||||||
|
|
||||||
|
Use the encrypted interface through the following interface settings
|
||||||
|
```python
|
||||||
|
import fastdeploy as fd
|
||||||
|
import os
|
||||||
|
# when key is not given, key will be automatically generated.
|
||||||
|
# otherwise, the file will be encrypted by specific key
|
||||||
|
encrypted_model, key = fd.encryption.encrypt(model_file.read())
|
||||||
|
encrypted_params, key= fd.encryption.encrypt(params_file.read(), key)
|
||||||
|
```
|
||||||
|
|
||||||
|
### FastDeploy deployment encryption model (decryption)
|
||||||
|
|
||||||
|
Through the setting of the following interface, FastDeploy can deploy the encryption model
|
||||||
|
```python
|
||||||
|
import fastdeploy as fd
|
||||||
|
option = fd.RuntimeOption()
|
||||||
|
option.set_encryption_key(key)
|
||||||
|
```
|
||||||
|
|
||||||
|
```C++
|
||||||
|
fastdeploy::RuntimeOption option;
|
||||||
|
option.SetEncryptionKey(key)
|
||||||
|
```
|
||||||
|
>> **Note** For more details about RuntimeOption, please refer to [RuntimeOption Python Documentation](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/runtime_option.html), [ RuntimeOption C++ Documentation](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/structfastdeploy_1_1RuntimeOption.html)
|
48
tutorials/encrypt_model/README_CN.md
Normal file
48
tutorials/encrypt_model/README_CN.md
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
[English](README.md) | 中文
|
||||||
|
|
||||||
|
# 使用FastDeploy生成加密模型
|
||||||
|
|
||||||
|
本目录下提供`encrypt.py`快速完成ResNet50_vd的模型和参数文件加密
|
||||||
|
|
||||||
|
FastDeploy支持对称加密的方案,通过调用OpenSSL中的对称加密算法(AES)对模型进行加密并产生密钥
|
||||||
|
|
||||||
|
## 加密
|
||||||
|
```bash
|
||||||
|
#下载加密示例代码
|
||||||
|
git clone https://github.com/PaddlePaddle/FastDeploy.git
|
||||||
|
cd FastDeploy/tutorials/encrypt_model
|
||||||
|
|
||||||
|
# 下载ResNet50_vd模型文件
|
||||||
|
wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
|
||||||
|
tar -xvf ResNet50_vd_infer.tgz
|
||||||
|
|
||||||
|
python encrypt.py --model_file ResNet50_vd_infer/inference.pdmodel --params_file ResNet50_vd_infer/inference.pdiparams --encrypted_model_dir ResNet50_vd_infer_encrypt
|
||||||
|
```
|
||||||
|
>> **注意** 加密完成后会生成ResNet50_vd_infer_encrypt文件夹,包含`__model__.encrypted`,`__params__.encrypted`,`encryption_key.txt`三个文件,其中`encryption_key.txt`包含加密后的秘钥,同时需要将原文件夹中的、`inference_cls.yaml`配置文件 拷贝至ResNet50_vd_infer_encrypt文件夹,以便后续部署使用
|
||||||
|
|
||||||
|
### Python加密接口
|
||||||
|
|
||||||
|
通过如下接口的设定,使用加密接口(解密)
|
||||||
|
```python
|
||||||
|
import fastdeploy as fd
|
||||||
|
import os
|
||||||
|
# when key is not given, key will be automatically generated.
|
||||||
|
# otherwise, the file will be encrypted by specific key
|
||||||
|
encrypted_model, key = fd.encryption.encrypt(model_file.read())
|
||||||
|
encrypted_params, key= fd.encryption.encrypt(params_file.read(), key)
|
||||||
|
```
|
||||||
|
|
||||||
|
### FastDeploy 部署加密模型
|
||||||
|
|
||||||
|
通过如下接口的设定,完成加密模型的推理
|
||||||
|
```python
|
||||||
|
import fastdeploy as fd
|
||||||
|
option = fd.RuntimeOption()
|
||||||
|
option.set_encryption_key(key)
|
||||||
|
```
|
||||||
|
|
||||||
|
```C++
|
||||||
|
fastdeploy::RuntimeOption option;
|
||||||
|
option.SetEncryptionKey(key)
|
||||||
|
```
|
||||||
|
>> **注意** RuntimeOption的更多详细信息,请参考[RuntimeOption Python文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/python/html/runtime_option.html),[RuntimeOption C++文档](https://www.paddlepaddle.org.cn/fastdeploy-api-doc/cpp/html/structfastdeploy_1_1RuntimeOption.html)
|
47
tutorials/encrypt_model/encrypt.py
Normal file
47
tutorials/encrypt_model/encrypt.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
import fastdeploy as fd
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
import argparse
|
||||||
|
import ast
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument(
|
||||||
|
"--encrypted_model_dir",
|
||||||
|
required=False,
|
||||||
|
help="Path of model directory.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--model_file", required=True, help="Path of model file directory.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--params_file",
|
||||||
|
required=True,
|
||||||
|
help="Path of parameters file directory.")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
args = parse_arguments()
|
||||||
|
model_buffer = open(args.model_file, 'rb')
|
||||||
|
params_buffer = open(args.params_file, 'rb')
|
||||||
|
encrypted_model, key = fd.encryption.encrypt(model_buffer.read())
|
||||||
|
# use the same key to encrypt parameter file
|
||||||
|
encrypted_params, key = fd.encryption.encrypt(params_buffer.read(), key)
|
||||||
|
encrypted_model_dir = "encrypt_model_dir"
|
||||||
|
if args.encrypted_model_dir:
|
||||||
|
encrypted_model_dir = args.encrypted_model_dir
|
||||||
|
model_buffer.close()
|
||||||
|
params_buffer.close()
|
||||||
|
os.mkdir(encrypted_model_dir)
|
||||||
|
with open(os.path.join(encrypted_model_dir, "__model__.encrypted"),
|
||||||
|
"w") as f:
|
||||||
|
f.write(encrypted_model)
|
||||||
|
|
||||||
|
with open(os.path.join(encrypted_model_dir, "__params__.encrypted"),
|
||||||
|
"w") as f:
|
||||||
|
f.write(encrypted_params)
|
||||||
|
|
||||||
|
with open(os.path.join(encrypted_model_dir, "encryption_key.txt"),
|
||||||
|
"w") as f:
|
||||||
|
f.write(key)
|
||||||
|
print("encryption key: ", key)
|
||||||
|
print("encryption success")
|
Reference in New Issue
Block a user