mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 00:57:33 +08:00
Merge branch 'encrypt_model' of https://github.com/felixhjh/FastDeploy into encrypt_model
This commit is contained in:
53
benchmark/cpp/benchmark_yolov5.cc
Normal file → Executable file
53
benchmark/cpp/benchmark_yolov5.cc
Normal file → Executable file
@@ -17,8 +17,7 @@
|
||||
#include "flags.h"
|
||||
|
||||
bool RunModel(std::string model_file, std::string image_file, size_t warmup,
|
||||
size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
|
||||
std::string gpu_mem_file_name) {
|
||||
size_t repeats, size_t sampling_interval) {
|
||||
// Initialization
|
||||
auto option = fastdeploy::RuntimeOption();
|
||||
if (!CreateRuntimeOption(&option)) {
|
||||
@@ -34,6 +33,12 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
|
||||
return false;
|
||||
}
|
||||
auto im = cv::imread(image_file);
|
||||
// For collect memory info
|
||||
fastdeploy::benchmark::ResourceUsageMonitor resource_moniter(
|
||||
sampling_interval, FLAGS_device_id);
|
||||
if (FLAGS_collect_memory_info) {
|
||||
resource_moniter.Start();
|
||||
}
|
||||
// For Runtime
|
||||
if (FLAGS_profile_mode == "runtime") {
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
@@ -57,35 +62,34 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
|
||||
return false;
|
||||
}
|
||||
}
|
||||
std::vector<float> end2end_statis;
|
||||
// Step2: repeat for repeats times
|
||||
std::cout << "Counting time..." << std::endl;
|
||||
fastdeploy::TimeCounter tc;
|
||||
std::cout << "Repeat " << repeats << " times..." << std::endl;
|
||||
fastdeploy::vision::DetectionResult res;
|
||||
for (int i = 0; i < repeats; i++) {
|
||||
if (FLAGS_collect_memory_info && i % dump_period == 0) {
|
||||
fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
|
||||
#if defined(WITH_GPU)
|
||||
fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
|
||||
FLAGS_device_id);
|
||||
#endif
|
||||
}
|
||||
fastdeploy::TimeCounter tc;
|
||||
tc.Start();
|
||||
for (int i = 0; i < repeats; i++) {
|
||||
if (!model.Predict(im, &res)) {
|
||||
std::cerr << "Failed to predict." << std::endl;
|
||||
return false;
|
||||
}
|
||||
tc.End();
|
||||
end2end_statis.push_back(tc.Duration() * 1000);
|
||||
}
|
||||
float end2end = std::accumulate(end2end_statis.end() - repeats,
|
||||
end2end_statis.end(), 0.f) /
|
||||
repeats;
|
||||
tc.End();
|
||||
double end2end = tc.Duration() / repeats * 1000;
|
||||
std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
|
||||
auto vis_im = fastdeploy::vision::VisDetection(im, res);
|
||||
cv::imwrite("vis_result.jpg", vis_im);
|
||||
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
|
||||
}
|
||||
if (FLAGS_collect_memory_info) {
|
||||
float cpu_mem = resource_moniter.GetMaxCpuMem();
|
||||
float gpu_mem = resource_moniter.GetMaxGpuMem();
|
||||
float gpu_util = resource_moniter.GetMaxGpuUtil();
|
||||
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
|
||||
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
|
||||
std::cout << "gpu_util: " << gpu_util << std::endl;
|
||||
resource_moniter.Stop();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -94,21 +98,10 @@ int main(int argc, char* argv[]) {
|
||||
google::ParseCommandLineFlags(&argc, &argv, true);
|
||||
int repeats = FLAGS_repeat;
|
||||
int warmup = FLAGS_warmup;
|
||||
int dump_period = FLAGS_dump_period;
|
||||
std::string cpu_mem_file_name = "result_cpu.txt";
|
||||
std::string gpu_mem_file_name = "result_gpu.txt";
|
||||
int sampling_interval = FLAGS_sampling_interval;
|
||||
// Run model
|
||||
if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
|
||||
cpu_mem_file_name, gpu_mem_file_name) != true) {
|
||||
if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) {
|
||||
exit(1);
|
||||
}
|
||||
if (FLAGS_collect_memory_info) {
|
||||
float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
|
||||
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
|
||||
#if defined(WITH_GPU)
|
||||
float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
|
||||
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
|
||||
#endif
|
||||
}
|
||||
return 0;
|
||||
}
|
@@ -37,7 +37,7 @@ DEFINE_bool(
|
||||
"and 'lite' backend");
|
||||
DEFINE_bool(
|
||||
collect_memory_info, false, "Whether to collect memory info");
|
||||
DEFINE_int32(dump_period, 100, "How often to collect memory info.");
|
||||
DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");
|
||||
|
||||
void PrintUsage() {
|
||||
std::cout << "Usage: infer_demo --model model_path --image img_path --device "
|
||||
|
@@ -61,7 +61,7 @@ endif(WIN32)
|
||||
message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")
|
||||
|
||||
set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/")
|
||||
set(FASTTOKENIZER_VERSION "1.0.1")
|
||||
set(FASTTOKENIZER_VERSION "1.0.2")
|
||||
|
||||
# Set download url
|
||||
if(WIN32)
|
||||
|
@@ -80,7 +80,7 @@ if(PADDLEINFERENCE_DIRECTORY)
|
||||
endif()
|
||||
else()
|
||||
set(PADDLEINFERENCE_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
|
||||
set(PADDLEINFERENCE_VERSION "2.4-dev4")
|
||||
set(PADDLEINFERENCE_VERSION "2.4-dev5")
|
||||
if(WIN32)
|
||||
if (WITH_GPU)
|
||||
set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-gpu-trt-${PADDLEINFERENCE_VERSION}.zip")
|
||||
|
@@ -36,86 +36,131 @@ static std::string strip(const std::string& str, char ch = ' ') {
|
||||
return str.substr(i, j + 1 - i);
|
||||
}
|
||||
|
||||
void DumpCurrentCpuMemoryUsage(const std::string& name) {
|
||||
// Split string
|
||||
static void split(const std::string& s, std::vector<std::string>& tokens,
|
||||
char delim = ' ') {
|
||||
tokens.clear();
|
||||
size_t lastPos = s.find_first_not_of(delim, 0);
|
||||
size_t pos = s.find(delim, lastPos);
|
||||
while (lastPos != std::string::npos) {
|
||||
tokens.emplace_back(s.substr(lastPos, pos - lastPos));
|
||||
lastPos = s.find_first_not_of(delim, pos);
|
||||
pos = s.find(delim, lastPos);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
|
||||
: is_supported_(false),
|
||||
sampling_interval_(sampling_interval_ms),
|
||||
gpu_id_(gpu_id) {
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
is_supported_ = true;
|
||||
#else
|
||||
is_supported_ = false;
|
||||
#endif
|
||||
if (!is_supported_) {
|
||||
FDASSERT(false,
|
||||
"Currently ResourceUsageMonitor only supports Linux and ANDROID.")
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void ResourceUsageMonitor::Start() {
|
||||
if (!is_supported_) return;
|
||||
if (check_memory_thd_ != nullptr) {
|
||||
FDINFO << "Memory monitoring has already started!" << std::endl;
|
||||
return;
|
||||
}
|
||||
FDINFO << "Start monitoring memory!" << std::endl;
|
||||
stop_signal_ = false;
|
||||
check_memory_thd_.reset(new std::thread(([this]() {
|
||||
// Note we retrieve the memory usage at the very beginning of the thread.
|
||||
while (true) {
|
||||
std::string cpu_mem_info = GetCurrentCpuMemoryInfo();
|
||||
// get max_cpu_mem
|
||||
std::vector<std::string> cpu_tokens;
|
||||
split(cpu_mem_info, cpu_tokens, ' ');
|
||||
max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024);
|
||||
#if defined(WITH_GPU)
|
||||
std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
|
||||
// get max_gpu_mem and max_gpu_util
|
||||
std::vector<std::string> gpu_tokens;
|
||||
split(gpu_mem_info, gpu_tokens, ',');
|
||||
max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
|
||||
max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
|
||||
#endif
|
||||
if (stop_signal_) break;
|
||||
std::this_thread::sleep_for(
|
||||
std::chrono::milliseconds(sampling_interval_));
|
||||
}
|
||||
})));
|
||||
}
|
||||
|
||||
void ResourceUsageMonitor::Stop() {
|
||||
if (!is_supported_) {
|
||||
return;
|
||||
}
|
||||
if (check_memory_thd_ == nullptr) {
|
||||
FDINFO << "Memory monitoring hasn't started yet or has stopped!"
|
||||
<< std::endl;
|
||||
return;
|
||||
}
|
||||
FDINFO << "Stop monitoring memory!" << std::endl;
|
||||
StopInternal();
|
||||
}
|
||||
|
||||
void ResourceUsageMonitor::StopInternal() {
|
||||
stop_signal_ = true;
|
||||
if (check_memory_thd_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
if (check_memory_thd_ != nullptr) {
|
||||
check_memory_thd_->join();
|
||||
}
|
||||
check_memory_thd_.reset(nullptr);
|
||||
}
|
||||
|
||||
std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() {
|
||||
std::string result = "";
|
||||
#if defined(__linux__) || defined(__ANDROID__)
|
||||
int iPid = static_cast<int>(getpid());
|
||||
std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
|
||||
FILE* pp = popen(command.data(), "r");
|
||||
if (!pp) return;
|
||||
if (!pp) return "";
|
||||
char tmp[1024];
|
||||
|
||||
while (fgets(tmp, sizeof(tmp), pp) != NULL) {
|
||||
std::ofstream write;
|
||||
write.open(name, std::ios::app);
|
||||
write << tmp;
|
||||
write.close();
|
||||
result += tmp;
|
||||
}
|
||||
pclose(pp);
|
||||
#else
|
||||
FDASSERT(false,
|
||||
"Currently collect cpu memory info only supports Linux and ANDROID.")
|
||||
#endif
|
||||
return;
|
||||
return result;
|
||||
}
|
||||
|
||||
void DumpCurrentGpuMemoryUsage(const std::string& name, int device_id) {
|
||||
std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
|
||||
std::string result = "";
|
||||
#if defined(__linux__) && defined(WITH_GPU)
|
||||
std::string command = "nvidia-smi --id=" + std::to_string(device_id) +
|
||||
" --query-gpu=index,uuid,name,timestamp,memory.total,"
|
||||
"memory.free,memory.used,utilization.gpu,utilization."
|
||||
"memory --format=csv,noheader,nounits";
|
||||
FILE* pp = popen(command.data(), "r");
|
||||
if (!pp) return;
|
||||
if (!pp) return "";
|
||||
char tmp[1024];
|
||||
|
||||
while (fgets(tmp, sizeof(tmp), pp) != NULL) {
|
||||
std::ofstream write;
|
||||
write.open(name, std::ios::app);
|
||||
write << tmp;
|
||||
write.close();
|
||||
result += tmp;
|
||||
}
|
||||
pclose(pp);
|
||||
#else
|
||||
FDASSERT(false,
|
||||
"Currently collect gpu memory info only supports Linux in GPU.")
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
float GetCpuMemoryUsage(const std::string& name) {
|
||||
std::ifstream read(name);
|
||||
std::string line;
|
||||
float max_cpu_mem = -1;
|
||||
while (getline(read, line)) {
|
||||
std::stringstream ss(line);
|
||||
std::string tmp;
|
||||
std::vector<std::string> nums;
|
||||
while (getline(ss, tmp, ' ')) {
|
||||
tmp = strip(tmp);
|
||||
if (tmp.empty()) continue;
|
||||
nums.push_back(tmp);
|
||||
}
|
||||
max_cpu_mem = std::max(max_cpu_mem, stof(nums[3]));
|
||||
}
|
||||
return max_cpu_mem / 1024;
|
||||
}
|
||||
|
||||
float GetGpuMemoryUsage(const std::string& name) {
|
||||
std::ifstream read(name);
|
||||
std::string line;
|
||||
float max_gpu_mem = -1;
|
||||
while (getline(read, line)) {
|
||||
std::stringstream ss(line);
|
||||
std::string tmp;
|
||||
std::vector<std::string> nums;
|
||||
while (getline(ss, tmp, ',')) {
|
||||
tmp = strip(tmp);
|
||||
if (tmp.empty()) continue;
|
||||
nums.push_back(tmp);
|
||||
}
|
||||
max_gpu_mem = std::max(max_gpu_mem, stof(nums[6]));
|
||||
}
|
||||
return max_gpu_mem;
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace benchmark
|
||||
|
@@ -13,23 +13,72 @@
|
||||
// limitations under the License.
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <thread> // NOLINT
|
||||
#include "fastdeploy/utils/utils.h"
|
||||
|
||||
namespace fastdeploy {
|
||||
namespace benchmark {
|
||||
/*! @brief ResourceUsageMonitor object used when to collect memory info.
|
||||
*/
|
||||
class FASTDEPLOY_DECL ResourceUsageMonitor {
|
||||
public:
|
||||
/** \brief Set sampling_interval_ms and gpu_id for ResourceUsageMonitor.
|
||||
*
|
||||
* \param[in] sampling_interval_ms How often to collect memory info(ms).
|
||||
* \param[in] gpu_id Device(gpu) id, default 0.
|
||||
*/
|
||||
explicit ResourceUsageMonitor(int sampling_interval_ms, int gpu_id = 0);
|
||||
|
||||
// Record current cpu memory usage into file
|
||||
FASTDEPLOY_DECL void DumpCurrentCpuMemoryUsage(const std::string& name);
|
||||
~ResourceUsageMonitor() { StopInternal(); }
|
||||
|
||||
// Record current gpu memory usage into file
|
||||
FASTDEPLOY_DECL void DumpCurrentGpuMemoryUsage(const std::string& name,
|
||||
int device_id);
|
||||
/// Start memory info collect
|
||||
void Start();
|
||||
/// Stop memory info collect
|
||||
void Stop();
|
||||
/// Get maximum cpu memory usage
|
||||
float GetMaxCpuMem() const {
|
||||
if (!is_supported_ || check_memory_thd_ == nullptr) {
|
||||
return -1.0f;
|
||||
}
|
||||
return max_cpu_mem_;
|
||||
}
|
||||
/// Get maximum gpu memory usage
|
||||
float GetMaxGpuMem() const {
|
||||
if (!is_supported_ || check_memory_thd_ == nullptr) {
|
||||
return -1.0f;
|
||||
}
|
||||
return max_gpu_mem_;
|
||||
}
|
||||
/// Get maximum gpu util
|
||||
float GetMaxGpuUtil() const {
|
||||
if (!is_supported_ || check_memory_thd_ == nullptr) {
|
||||
return -1.0f;
|
||||
}
|
||||
return max_gpu_util_;
|
||||
}
|
||||
|
||||
// Get Max cpu memory usage
|
||||
FASTDEPLOY_DECL float GetCpuMemoryUsage(const std::string& name);
|
||||
ResourceUsageMonitor(ResourceUsageMonitor&) = delete;
|
||||
ResourceUsageMonitor& operator=(const ResourceUsageMonitor&) = delete;
|
||||
ResourceUsageMonitor(ResourceUsageMonitor&&) = delete;
|
||||
ResourceUsageMonitor& operator=(const ResourceUsageMonitor&&) = delete;
|
||||
|
||||
// Get Max gpu memory usage
|
||||
FASTDEPLOY_DECL float GetGpuMemoryUsage(const std::string& name);
|
||||
private:
|
||||
void StopInternal();
|
||||
// Get current cpu memory info
|
||||
std::string GetCurrentCpuMemoryInfo();
|
||||
// Get current gpu memory info
|
||||
std::string GetCurrentGpuMemoryInfo(int device_id);
|
||||
|
||||
bool is_supported_ = false;
|
||||
bool stop_signal_ = false;
|
||||
const int sampling_interval_;
|
||||
float max_cpu_mem_ = 0.0f;
|
||||
float max_gpu_mem_ = 0.0f;
|
||||
float max_gpu_util_ = 0.0f;
|
||||
const int gpu_id_ = 0;
|
||||
std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace benchmark
|
||||
} // namespace fastdeploy
|
||||
|
1
fastdeploy/runtime/runtime_option.h
Normal file → Executable file
1
fastdeploy/runtime/runtime_option.h
Normal file → Executable file
@@ -206,7 +206,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
|
||||
|
||||
// *** The belowing api are deprecated, will be removed in v1.2.0
|
||||
// *** Do not use it anymore
|
||||
|
||||
void SetPaddleMKLDNN(bool pd_mkldnn = true);
|
||||
void EnablePaddleToTrt();
|
||||
void DeletePaddleBackendPass(const std::string& delete_pass_name);
|
||||
|
@@ -80,6 +80,10 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor {
|
||||
obj_class_num_ = num;
|
||||
prob_box_size_ = obj_class_num_ + 5;
|
||||
}
|
||||
/// Get the number of class
|
||||
int GetClassNum() {
|
||||
return obj_class_num_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<int> anchors_ = {10, 13, 16, 30, 33, 23, 30, 61, 62,
|
||||
|
@@ -65,7 +65,9 @@ void BindRKYOLO(pybind11::module& m) {
|
||||
.def_property("conf_threshold", &vision::detection::RKYOLOPostprocessor::GetConfThreshold,
|
||||
&vision::detection::RKYOLOPostprocessor::SetConfThreshold)
|
||||
.def_property("nms_threshold", &vision::detection::RKYOLOPostprocessor::GetNMSThreshold,
|
||||
&vision::detection::RKYOLOPostprocessor::SetNMSThreshold);
|
||||
&vision::detection::RKYOLOPostprocessor::SetNMSThreshold)
|
||||
.def_property("class_num", &vision::detection::RKYOLOPostprocessor::GetClassNum,
|
||||
&vision::detection::RKYOLOPostprocessor::SetClassNum);
|
||||
|
||||
pybind11::class_<vision::detection::RKYOLOV5, FastDeployModel>(m, "RKYOLOV5")
|
||||
.def(pybind11::init<std::string,
|
||||
|
@@ -108,11 +108,11 @@ class RKYOLOPostprocessor:
|
||||
return self._postprocessor.nms_threshold
|
||||
|
||||
@property
|
||||
def multi_label(self):
|
||||
def class_num(self):
|
||||
"""
|
||||
multi_label for postprocessing, set true for eval, default is True
|
||||
class_num for postprocessing, default is 80
|
||||
"""
|
||||
return self._postprocessor.multi_label
|
||||
return self._postprocessor.class_num
|
||||
|
||||
@conf_threshold.setter
|
||||
def conf_threshold(self, conf_threshold):
|
||||
@@ -126,13 +126,14 @@ class RKYOLOPostprocessor:
|
||||
"The value to set `nms_threshold` must be type of float."
|
||||
self._postprocessor.nms_threshold = nms_threshold
|
||||
|
||||
@multi_label.setter
|
||||
def multi_label(self, value):
|
||||
assert isinstance(
|
||||
value,
|
||||
bool), "The value to set `multi_label` must be type of bool."
|
||||
self._postprocessor.multi_label = value
|
||||
|
||||
@class_num.setter
|
||||
def class_num(self, class_num):
|
||||
"""
|
||||
class_num for postprocessing, default is 80
|
||||
"""
|
||||
assert isinstance(class_num, int), \
|
||||
"The value to set `nms_threshold` must be type of float."
|
||||
self._postprocessor.class_num = class_num
|
||||
|
||||
class RKYOLOV5(FastDeployModel):
|
||||
def __init__(self,
|
||||
|
Reference in New Issue
Block a user