Merge branch 'encrypt_model' of https://github.com/felixhjh/FastDeploy into encrypt_model

This commit is contained in:
felixhjh
2023-02-13 06:32:19 +00:00
10 changed files with 198 additions and 105 deletions

53
benchmark/cpp/benchmark_yolov5.cc Normal file → Executable file
View File

@@ -17,8 +17,7 @@
#include "flags.h"
bool RunModel(std::string model_file, std::string image_file, size_t warmup,
size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
std::string gpu_mem_file_name) {
size_t repeats, size_t sampling_interval) {
// Initialization
auto option = fastdeploy::RuntimeOption();
if (!CreateRuntimeOption(&option)) {
@@ -34,6 +33,12 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
return false;
}
auto im = cv::imread(image_file);
// For collect memory info
fastdeploy::benchmark::ResourceUsageMonitor resource_moniter(
sampling_interval, FLAGS_device_id);
if (FLAGS_collect_memory_info) {
resource_moniter.Start();
}
// For Runtime
if (FLAGS_profile_mode == "runtime") {
fastdeploy::vision::DetectionResult res;
@@ -57,35 +62,34 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
return false;
}
}
std::vector<float> end2end_statis;
// Step2: repeat for repeats times
std::cout << "Counting time..." << std::endl;
fastdeploy::TimeCounter tc;
std::cout << "Repeat " << repeats << " times..." << std::endl;
fastdeploy::vision::DetectionResult res;
for (int i = 0; i < repeats; i++) {
if (FLAGS_collect_memory_info && i % dump_period == 0) {
fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
#if defined(WITH_GPU)
fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
FLAGS_device_id);
#endif
}
fastdeploy::TimeCounter tc;
tc.Start();
for (int i = 0; i < repeats; i++) {
if (!model.Predict(im, &res)) {
std::cerr << "Failed to predict." << std::endl;
return false;
}
tc.End();
end2end_statis.push_back(tc.Duration() * 1000);
}
float end2end = std::accumulate(end2end_statis.end() - repeats,
end2end_statis.end(), 0.f) /
repeats;
tc.End();
double end2end = tc.Duration() / repeats * 1000;
std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
auto vis_im = fastdeploy::vision::VisDetection(im, res);
cv::imwrite("vis_result.jpg", vis_im);
std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
}
if (FLAGS_collect_memory_info) {
float cpu_mem = resource_moniter.GetMaxCpuMem();
float gpu_mem = resource_moniter.GetMaxGpuMem();
float gpu_util = resource_moniter.GetMaxGpuUtil();
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
std::cout << "gpu_util: " << gpu_util << std::endl;
resource_moniter.Stop();
}
return true;
}
@@ -94,21 +98,10 @@ int main(int argc, char* argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
int repeats = FLAGS_repeat;
int warmup = FLAGS_warmup;
int dump_period = FLAGS_dump_period;
std::string cpu_mem_file_name = "result_cpu.txt";
std::string gpu_mem_file_name = "result_gpu.txt";
int sampling_interval = FLAGS_sampling_interval;
// Run model
if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
cpu_mem_file_name, gpu_mem_file_name) != true) {
if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) {
exit(1);
}
if (FLAGS_collect_memory_info) {
float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
#if defined(WITH_GPU)
float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
#endif
}
return 0;
}

View File

@@ -37,7 +37,7 @@ DEFINE_bool(
"and 'lite' backend");
DEFINE_bool(
collect_memory_info, false, "Whether to collect memory info");
DEFINE_int32(dump_period, 100, "How often to collect memory info.");
DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");
void PrintUsage() {
std::cout << "Usage: infer_demo --model model_path --image img_path --device "

View File

@@ -61,7 +61,7 @@ endif(WIN32)
message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")
set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/")
set(FASTTOKENIZER_VERSION "1.0.1")
set(FASTTOKENIZER_VERSION "1.0.2")
# Set download url
if(WIN32)

View File

@@ -80,7 +80,7 @@ if(PADDLEINFERENCE_DIRECTORY)
endif()
else()
set(PADDLEINFERENCE_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
set(PADDLEINFERENCE_VERSION "2.4-dev4")
set(PADDLEINFERENCE_VERSION "2.4-dev5")
if(WIN32)
if (WITH_GPU)
set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-gpu-trt-${PADDLEINFERENCE_VERSION}.zip")

View File

@@ -36,86 +36,131 @@ static std::string strip(const std::string& str, char ch = ' ') {
return str.substr(i, j + 1 - i);
}
void DumpCurrentCpuMemoryUsage(const std::string& name) {
// Split string
static void split(const std::string& s, std::vector<std::string>& tokens,
char delim = ' ') {
tokens.clear();
size_t lastPos = s.find_first_not_of(delim, 0);
size_t pos = s.find(delim, lastPos);
while (lastPos != std::string::npos) {
tokens.emplace_back(s.substr(lastPos, pos - lastPos));
lastPos = s.find_first_not_of(delim, pos);
pos = s.find(delim, lastPos);
}
return;
}
ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
: is_supported_(false),
sampling_interval_(sampling_interval_ms),
gpu_id_(gpu_id) {
#if defined(__linux__) || defined(__ANDROID__)
is_supported_ = true;
#else
is_supported_ = false;
#endif
if (!is_supported_) {
FDASSERT(false,
"Currently ResourceUsageMonitor only supports Linux and ANDROID.")
return;
}
}
void ResourceUsageMonitor::Start() {
if (!is_supported_) return;
if (check_memory_thd_ != nullptr) {
FDINFO << "Memory monitoring has already started!" << std::endl;
return;
}
FDINFO << "Start monitoring memory!" << std::endl;
stop_signal_ = false;
check_memory_thd_.reset(new std::thread(([this]() {
// Note we retrieve the memory usage at the very beginning of the thread.
while (true) {
std::string cpu_mem_info = GetCurrentCpuMemoryInfo();
// get max_cpu_mem
std::vector<std::string> cpu_tokens;
split(cpu_mem_info, cpu_tokens, ' ');
max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024);
#if defined(WITH_GPU)
std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
// get max_gpu_mem and max_gpu_util
std::vector<std::string> gpu_tokens;
split(gpu_mem_info, gpu_tokens, ',');
max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
#endif
if (stop_signal_) break;
std::this_thread::sleep_for(
std::chrono::milliseconds(sampling_interval_));
}
})));
}
void ResourceUsageMonitor::Stop() {
if (!is_supported_) {
return;
}
if (check_memory_thd_ == nullptr) {
FDINFO << "Memory monitoring hasn't started yet or has stopped!"
<< std::endl;
return;
}
FDINFO << "Stop monitoring memory!" << std::endl;
StopInternal();
}
void ResourceUsageMonitor::StopInternal() {
stop_signal_ = true;
if (check_memory_thd_ == nullptr) {
return;
}
if (check_memory_thd_ != nullptr) {
check_memory_thd_->join();
}
check_memory_thd_.reset(nullptr);
}
std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() {
std::string result = "";
#if defined(__linux__) || defined(__ANDROID__)
int iPid = static_cast<int>(getpid());
std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
FILE* pp = popen(command.data(), "r");
if (!pp) return;
if (!pp) return "";
char tmp[1024];
while (fgets(tmp, sizeof(tmp), pp) != NULL) {
std::ofstream write;
write.open(name, std::ios::app);
write << tmp;
write.close();
result += tmp;
}
pclose(pp);
#else
FDASSERT(false,
"Currently collect cpu memory info only supports Linux and ANDROID.")
#endif
return;
return result;
}
void DumpCurrentGpuMemoryUsage(const std::string& name, int device_id) {
std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
std::string result = "";
#if defined(__linux__) && defined(WITH_GPU)
std::string command = "nvidia-smi --id=" + std::to_string(device_id) +
" --query-gpu=index,uuid,name,timestamp,memory.total,"
"memory.free,memory.used,utilization.gpu,utilization."
"memory --format=csv,noheader,nounits";
FILE* pp = popen(command.data(), "r");
if (!pp) return;
if (!pp) return "";
char tmp[1024];
while (fgets(tmp, sizeof(tmp), pp) != NULL) {
std::ofstream write;
write.open(name, std::ios::app);
write << tmp;
write.close();
result += tmp;
}
pclose(pp);
#else
FDASSERT(false,
"Currently collect gpu memory info only supports Linux in GPU.")
#endif
return;
}
float GetCpuMemoryUsage(const std::string& name) {
std::ifstream read(name);
std::string line;
float max_cpu_mem = -1;
while (getline(read, line)) {
std::stringstream ss(line);
std::string tmp;
std::vector<std::string> nums;
while (getline(ss, tmp, ' ')) {
tmp = strip(tmp);
if (tmp.empty()) continue;
nums.push_back(tmp);
}
max_cpu_mem = std::max(max_cpu_mem, stof(nums[3]));
}
return max_cpu_mem / 1024;
}
float GetGpuMemoryUsage(const std::string& name) {
std::ifstream read(name);
std::string line;
float max_gpu_mem = -1;
while (getline(read, line)) {
std::stringstream ss(line);
std::string tmp;
std::vector<std::string> nums;
while (getline(ss, tmp, ',')) {
tmp = strip(tmp);
if (tmp.empty()) continue;
nums.push_back(tmp);
}
max_gpu_mem = std::max(max_gpu_mem, stof(nums[6]));
}
return max_gpu_mem;
return result;
}
} // namespace benchmark

View File

@@ -13,23 +13,72 @@
// limitations under the License.
#pragma once
#include <memory>
#include <thread> // NOLINT
#include "fastdeploy/utils/utils.h"
namespace fastdeploy {
namespace benchmark {
/*! @brief ResourceUsageMonitor object used when to collect memory info.
*/
class FASTDEPLOY_DECL ResourceUsageMonitor {
public:
/** \brief Set sampling_interval_ms and gpu_id for ResourceUsageMonitor.
*
* \param[in] sampling_interval_ms How often to collect memory info(ms).
* \param[in] gpu_id Device(gpu) id, default 0.
*/
explicit ResourceUsageMonitor(int sampling_interval_ms, int gpu_id = 0);
// Record current cpu memory usage into file
FASTDEPLOY_DECL void DumpCurrentCpuMemoryUsage(const std::string& name);
~ResourceUsageMonitor() { StopInternal(); }
// Record current gpu memory usage into file
FASTDEPLOY_DECL void DumpCurrentGpuMemoryUsage(const std::string& name,
int device_id);
/// Start memory info collect
void Start();
/// Stop memory info collect
void Stop();
/// Get maximum cpu memory usage
float GetMaxCpuMem() const {
if (!is_supported_ || check_memory_thd_ == nullptr) {
return -1.0f;
}
return max_cpu_mem_;
}
/// Get maximum gpu memory usage
float GetMaxGpuMem() const {
if (!is_supported_ || check_memory_thd_ == nullptr) {
return -1.0f;
}
return max_gpu_mem_;
}
/// Get maximum gpu util
float GetMaxGpuUtil() const {
if (!is_supported_ || check_memory_thd_ == nullptr) {
return -1.0f;
}
return max_gpu_util_;
}
// Get Max cpu memory usage
FASTDEPLOY_DECL float GetCpuMemoryUsage(const std::string& name);
ResourceUsageMonitor(ResourceUsageMonitor&) = delete;
ResourceUsageMonitor& operator=(const ResourceUsageMonitor&) = delete;
ResourceUsageMonitor(ResourceUsageMonitor&&) = delete;
ResourceUsageMonitor& operator=(const ResourceUsageMonitor&&) = delete;
// Get Max gpu memory usage
FASTDEPLOY_DECL float GetGpuMemoryUsage(const std::string& name);
private:
void StopInternal();
// Get current cpu memory info
std::string GetCurrentCpuMemoryInfo();
// Get current gpu memory info
std::string GetCurrentGpuMemoryInfo(int device_id);
bool is_supported_ = false;
bool stop_signal_ = false;
const int sampling_interval_;
float max_cpu_mem_ = 0.0f;
float max_gpu_mem_ = 0.0f;
float max_gpu_util_ = 0.0f;
const int gpu_id_ = 0;
std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
};
} // namespace benchmark
} // namespace fastdeploy

1
fastdeploy/runtime/runtime_option.h Normal file → Executable file
View File

@@ -206,7 +206,6 @@ struct FASTDEPLOY_DECL RuntimeOption {
// *** The belowing api are deprecated, will be removed in v1.2.0
// *** Do not use it anymore
void SetPaddleMKLDNN(bool pd_mkldnn = true);
void EnablePaddleToTrt();
void DeletePaddleBackendPass(const std::string& delete_pass_name);

View File

@@ -56,7 +56,7 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor {
float GetNMSThreshold() const { return nms_threshold_; }
/// Set height and weight
void SetHeightAndWeight(int height,int width) {
void SetHeightAndWeight(int height, int width) {
height_ = height;
width_ = width;
}
@@ -80,6 +80,10 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor {
obj_class_num_ = num;
prob_box_size_ = obj_class_num_ + 5;
}
/// Get the number of class
int GetClassNum() {
return obj_class_num_;
}
private:
std::vector<int> anchors_ = {10, 13, 16, 30, 33, 23, 30, 61, 62,

View File

@@ -65,7 +65,9 @@ void BindRKYOLO(pybind11::module& m) {
.def_property("conf_threshold", &vision::detection::RKYOLOPostprocessor::GetConfThreshold,
&vision::detection::RKYOLOPostprocessor::SetConfThreshold)
.def_property("nms_threshold", &vision::detection::RKYOLOPostprocessor::GetNMSThreshold,
&vision::detection::RKYOLOPostprocessor::SetNMSThreshold);
&vision::detection::RKYOLOPostprocessor::SetNMSThreshold)
.def_property("class_num", &vision::detection::RKYOLOPostprocessor::GetClassNum,
&vision::detection::RKYOLOPostprocessor::SetClassNum);
pybind11::class_<vision::detection::RKYOLOV5, FastDeployModel>(m, "RKYOLOV5")
.def(pybind11::init<std::string,

View File

@@ -108,11 +108,11 @@ class RKYOLOPostprocessor:
return self._postprocessor.nms_threshold
@property
def multi_label(self):
def class_num(self):
"""
multi_label for postprocessing, set true for eval, default is True
class_num for postprocessing, default is 80
"""
return self._postprocessor.multi_label
return self._postprocessor.class_num
@conf_threshold.setter
def conf_threshold(self, conf_threshold):
@@ -126,13 +126,14 @@ class RKYOLOPostprocessor:
"The value to set `nms_threshold` must be type of float."
self._postprocessor.nms_threshold = nms_threshold
@multi_label.setter
def multi_label(self, value):
assert isinstance(
value,
bool), "The value to set `multi_label` must be type of bool."
self._postprocessor.multi_label = value
@class_num.setter
def class_num(self, class_num):
"""
class_num for postprocessing, default is 80
"""
assert isinstance(class_num, int), \
"The value to set `nms_threshold` must be type of float."
self._postprocessor.class_num = class_num
class RKYOLOV5(FastDeployModel):
def __init__(self,