Merge branch 'encrypt_model' of https://github.com/felixhjh/FastDeploy into encrypt_model

2025-10-06 00:57:33 +08:00 · 2023-02-13 06:32:19 +00:00
parent 45b070e495 a30213b504
commit 031d02275b
10 changed files with 198 additions and 105 deletions
--- a/benchmark/cpp/benchmark_yolov5.cc
+++ b/benchmark/cpp/benchmark_yolov5.cc
@@ -17,8 +17,7 @@
 #include "flags.h"

 bool RunModel(std::string model_file, std::string image_file, size_t warmup,
-              size_t repeats, size_t dump_period, std::string cpu_mem_file_name,
-              std::string gpu_mem_file_name) {
+              size_t repeats, size_t sampling_interval) {
  // Initialization
  auto option = fastdeploy::RuntimeOption();
  if (!CreateRuntimeOption(&option)) {
@@ -34,6 +33,12 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
    return false;
  }
  auto im = cv::imread(image_file);
+  // For collect memory info
+  fastdeploy::benchmark::ResourceUsageMonitor resource_moniter(
+      sampling_interval, FLAGS_device_id);
+  if (FLAGS_collect_memory_info) {
+    resource_moniter.Start();
+  }
  // For Runtime
  if (FLAGS_profile_mode == "runtime") {
    fastdeploy::vision::DetectionResult res;
@@ -57,35 +62,34 @@ bool RunModel(std::string model_file, std::string image_file, size_t warmup,
        return false;
      }
    }
-    std::vector<float> end2end_statis;
    // Step2: repeat for repeats times
    std::cout << "Counting time..." << std::endl;
-    fastdeploy::TimeCounter tc;
+    std::cout << "Repeat " << repeats << " times..." << std::endl;
    fastdeploy::vision::DetectionResult res;
-    for (int i = 0; i < repeats; i++) {
-      if (FLAGS_collect_memory_info && i % dump_period == 0) {
-        fastdeploy::benchmark::DumpCurrentCpuMemoryUsage(cpu_mem_file_name);
-#if defined(WITH_GPU)
-        fastdeploy::benchmark::DumpCurrentGpuMemoryUsage(gpu_mem_file_name,
-                                                         FLAGS_device_id);
-#endif
-      }
+    fastdeploy::TimeCounter tc;
    tc.Start();
+    for (int i = 0; i < repeats; i++) {
      if (!model.Predict(im, &res)) {
        std::cerr << "Failed to predict." << std::endl;
        return false;
      }
-      tc.End();
-      end2end_statis.push_back(tc.Duration() * 1000);
    }
-    float end2end = std::accumulate(end2end_statis.end() - repeats,
-                                    end2end_statis.end(), 0.f) /
-                    repeats;
+    tc.End();
+    double end2end = tc.Duration() / repeats * 1000;
    std::cout << "End2End(ms): " << end2end << "ms." << std::endl;
    auto vis_im = fastdeploy::vision::VisDetection(im, res);
    cv::imwrite("vis_result.jpg", vis_im);
    std::cout << "Visualized result saved in ./vis_result.jpg" << std::endl;
  }
+  if (FLAGS_collect_memory_info) {
+    float cpu_mem = resource_moniter.GetMaxCpuMem();
+    float gpu_mem = resource_moniter.GetMaxGpuMem();
+    float gpu_util = resource_moniter.GetMaxGpuUtil();
+    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
+    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
+    std::cout << "gpu_util: " << gpu_util << std::endl;
+    resource_moniter.Stop();
+  }

  return true;
 }
@@ -94,21 +98,10 @@ int main(int argc, char* argv[]) {
  google::ParseCommandLineFlags(&argc, &argv, true);
  int repeats = FLAGS_repeat;
  int warmup = FLAGS_warmup;
-  int dump_period = FLAGS_dump_period;
-  std::string cpu_mem_file_name = "result_cpu.txt";
-  std::string gpu_mem_file_name = "result_gpu.txt";
+  int sampling_interval = FLAGS_sampling_interval;
  // Run model
-  if (RunModel(FLAGS_model, FLAGS_image, warmup, repeats, dump_period,
-               cpu_mem_file_name, gpu_mem_file_name) != true) {
+  if (!RunModel(FLAGS_model, FLAGS_image, warmup, repeats, sampling_interval)) {
    exit(1);
  }
-  if (FLAGS_collect_memory_info) {
-    float cpu_mem = fastdeploy::benchmark::GetCpuMemoryUsage(cpu_mem_file_name);
-    std::cout << "cpu_pss_mb: " << cpu_mem << "MB." << std::endl;
-#if defined(WITH_GPU)
-    float gpu_mem = fastdeploy::benchmark::GetGpuMemoryUsage(gpu_mem_file_name);
-    std::cout << "gpu_pss_mb: " << gpu_mem << "MB." << std::endl;
-#endif
-  }
  return 0;
 }
--- a/benchmark/cpp/flags.h
+++ b/benchmark/cpp/flags.h
@@ -37,7 +37,7 @@ DEFINE_bool(
    "and 'lite' backend");
 DEFINE_bool(
    collect_memory_info, false, "Whether to collect memory info");
-DEFINE_int32(dump_period, 100, "How often to collect memory info.");
+DEFINE_int32(sampling_interval, 50, "How often to collect memory info(ms).");

 void PrintUsage() {
  std::cout << "Usage: infer_demo --model model_path --image img_path --device "
--- a/cmake/fast_tokenizer.cmake
+++ b/cmake/fast_tokenizer.cmake
@@ -61,7 +61,7 @@ endif(WIN32)
 message("FASTTOKENIZER_COMPILE_LIB = ${FASTTOKENIZER_COMPILE_LIB}")

 set(FASTTOKENIZER_URL_BASE "https://bj.bcebos.com/paddlenlp/fast_tokenizer/")
-set(FASTTOKENIZER_VERSION "1.0.1")
+set(FASTTOKENIZER_VERSION "1.0.2")

 # Set download url
 if(WIN32)
--- a/cmake/paddle_inference.cmake
+++ b/cmake/paddle_inference.cmake
@@ -80,7 +80,7 @@ if(PADDLEINFERENCE_DIRECTORY)
  endif()
 else()
  set(PADDLEINFERENCE_URL_BASE "https://bj.bcebos.com/fastdeploy/third_libs/")
-  set(PADDLEINFERENCE_VERSION "2.4-dev4")
+  set(PADDLEINFERENCE_VERSION "2.4-dev5")
  if(WIN32)
    if (WITH_GPU)
      set(PADDLEINFERENCE_FILE "paddle_inference-win-x64-gpu-trt-${PADDLEINFERENCE_VERSION}.zip")
--- a/fastdeploy/benchmark/utils.cc
+++ b/fastdeploy/benchmark/utils.cc
@@ -36,86 +36,131 @@ static std::string strip(const std::string& str, char ch = ' ') {
  return str.substr(i, j + 1 - i);
 }

-void DumpCurrentCpuMemoryUsage(const std::string& name) {
+// Split string
+static void split(const std::string& s, std::vector<std::string>& tokens,
+                  char delim = ' ') {
+  tokens.clear();
+  size_t lastPos = s.find_first_not_of(delim, 0);
+  size_t pos = s.find(delim, lastPos);
+  while (lastPos != std::string::npos) {
+    tokens.emplace_back(s.substr(lastPos, pos - lastPos));
+    lastPos = s.find_first_not_of(delim, pos);
+    pos = s.find(delim, lastPos);
+  }
+  return;
+}
+
+ResourceUsageMonitor::ResourceUsageMonitor(int sampling_interval_ms, int gpu_id)
+    : is_supported_(false),
+      sampling_interval_(sampling_interval_ms),
+      gpu_id_(gpu_id) {
+#if defined(__linux__) || defined(__ANDROID__)
+  is_supported_ = true;
+#else
+  is_supported_ = false;
+#endif
+  if (!is_supported_) {
+    FDASSERT(false,
+             "Currently ResourceUsageMonitor only supports Linux and ANDROID.")
+    return;
+  }
+}
+
+void ResourceUsageMonitor::Start() {
+  if (!is_supported_) return;
+  if (check_memory_thd_ != nullptr) {
+    FDINFO << "Memory monitoring has already started!" << std::endl;
+    return;
+  }
+  FDINFO << "Start monitoring memory!" << std::endl;
+  stop_signal_ = false;
+  check_memory_thd_.reset(new std::thread(([this]() {
+    // Note we retrieve the memory usage at the very beginning of the thread.
+    while (true) {
+      std::string cpu_mem_info = GetCurrentCpuMemoryInfo();
+      // get max_cpu_mem
+      std::vector<std::string> cpu_tokens;
+      split(cpu_mem_info, cpu_tokens, ' ');
+      max_cpu_mem_ = std::max(max_cpu_mem_, stof(cpu_tokens[3]) / 1024);
+#if defined(WITH_GPU)
+      std::string gpu_mem_info = GetCurrentGpuMemoryInfo(gpu_id_);
+      // get max_gpu_mem and max_gpu_util
+      std::vector<std::string> gpu_tokens;
+      split(gpu_mem_info, gpu_tokens, ',');
+      max_gpu_mem_ = std::max(max_gpu_mem_, stof(gpu_tokens[6]));
+      max_gpu_util_ = std::max(max_gpu_util_, stof(gpu_tokens[7]));
+#endif
+      if (stop_signal_) break;
+      std::this_thread::sleep_for(
+          std::chrono::milliseconds(sampling_interval_));
+    }
+  })));
+}
+
+void ResourceUsageMonitor::Stop() {
+  if (!is_supported_) {
+    return;
+  }
+  if (check_memory_thd_ == nullptr) {
+    FDINFO << "Memory monitoring hasn't started yet or has stopped!"
+           << std::endl;
+    return;
+  }
+  FDINFO << "Stop monitoring memory!" << std::endl;
+  StopInternal();
+}
+
+void ResourceUsageMonitor::StopInternal() {
+  stop_signal_ = true;
+  if (check_memory_thd_ == nullptr) {
+    return;
+  }
+  if (check_memory_thd_ != nullptr) {
+    check_memory_thd_->join();
+  }
+  check_memory_thd_.reset(nullptr);
+}
+
+std::string ResourceUsageMonitor::GetCurrentCpuMemoryInfo() {
+  std::string result = "";
 #if defined(__linux__) || defined(__ANDROID__)
  int iPid = static_cast<int>(getpid());
  std::string command = "pmap -x " + std::to_string(iPid) + " | grep total";
  FILE* pp = popen(command.data(), "r");
-  if (!pp) return;
+  if (!pp) return "";
  char tmp[1024];

  while (fgets(tmp, sizeof(tmp), pp) != NULL) {
-    std::ofstream write;
-    write.open(name, std::ios::app);
-    write << tmp;
-    write.close();
+    result += tmp;
  }
  pclose(pp);
 #else
  FDASSERT(false,
           "Currently collect cpu memory info only supports Linux and ANDROID.")
 #endif
-  return;
+  return result;
 }

-void DumpCurrentGpuMemoryUsage(const std::string& name, int device_id) {
+std::string ResourceUsageMonitor::GetCurrentGpuMemoryInfo(int device_id) {
+  std::string result = "";
 #if defined(__linux__) && defined(WITH_GPU)
  std::string command = "nvidia-smi --id=" + std::to_string(device_id) +
                        " --query-gpu=index,uuid,name,timestamp,memory.total,"
                        "memory.free,memory.used,utilization.gpu,utilization."
                        "memory --format=csv,noheader,nounits";
  FILE* pp = popen(command.data(), "r");
-  if (!pp) return;
+  if (!pp) return "";
  char tmp[1024];

  while (fgets(tmp, sizeof(tmp), pp) != NULL) {
-    std::ofstream write;
-    write.open(name, std::ios::app);
-    write << tmp;
-    write.close();
+    result += tmp;
  }
  pclose(pp);
 #else
  FDASSERT(false,
           "Currently collect gpu memory info only supports Linux in GPU.")
 #endif
-  return;
-}
-
-float GetCpuMemoryUsage(const std::string& name) {
-  std::ifstream read(name);
-  std::string line;
-  float max_cpu_mem = -1;
-  while (getline(read, line)) {
-    std::stringstream ss(line);
-    std::string tmp;
-    std::vector<std::string> nums;
-    while (getline(ss, tmp, ' ')) {
-      tmp = strip(tmp);
-      if (tmp.empty()) continue;
-      nums.push_back(tmp);
-    }
-    max_cpu_mem = std::max(max_cpu_mem, stof(nums[3]));
-  }
-  return max_cpu_mem / 1024;
-}
-
-float GetGpuMemoryUsage(const std::string& name) {
-  std::ifstream read(name);
-  std::string line;
-  float max_gpu_mem = -1;
-  while (getline(read, line)) {
-    std::stringstream ss(line);
-    std::string tmp;
-    std::vector<std::string> nums;
-    while (getline(ss, tmp, ',')) {
-      tmp = strip(tmp);
-      if (tmp.empty()) continue;
-      nums.push_back(tmp);
-    }
-    max_gpu_mem = std::max(max_gpu_mem, stof(nums[6]));
-  }
-  return max_gpu_mem;
+  return result;
 }

 }  // namespace benchmark
--- a/fastdeploy/benchmark/utils.h
+++ b/fastdeploy/benchmark/utils.h
@@ -13,23 +13,72 @@
 // limitations under the License.
 #pragma once

+#include <memory>
+#include <thread>  // NOLINT
 #include "fastdeploy/utils/utils.h"

 namespace fastdeploy {
 namespace benchmark {
+/*! @brief ResourceUsageMonitor object used when to collect memory info.
+ */
+class FASTDEPLOY_DECL ResourceUsageMonitor {
+ public:
+   /** \brief  Set sampling_interval_ms and gpu_id for ResourceUsageMonitor.
+   *
+   * \param[in] sampling_interval_ms How often to collect memory info(ms).
+   * \param[in] gpu_id Device(gpu) id, default 0.
+   */
+  explicit ResourceUsageMonitor(int sampling_interval_ms, int gpu_id = 0);

-// Record current cpu memory usage into file
-FASTDEPLOY_DECL void DumpCurrentCpuMemoryUsage(const std::string& name);
+  ~ResourceUsageMonitor() { StopInternal(); }

-// Record current gpu memory usage into file
-FASTDEPLOY_DECL void DumpCurrentGpuMemoryUsage(const std::string& name,
-                                               int device_id);
+  /// Start memory info collect
+  void Start();
+  /// Stop memory info collect
+  void Stop();
+  /// Get maximum cpu memory usage
+  float GetMaxCpuMem() const {
+    if (!is_supported_ || check_memory_thd_ == nullptr) {
+      return -1.0f;
+    }
+    return max_cpu_mem_;
+  }
+  /// Get maximum gpu memory usage
+  float GetMaxGpuMem() const {
+    if (!is_supported_ || check_memory_thd_ == nullptr) {
+      return -1.0f;
+    }
+    return max_gpu_mem_;
+  }
+  /// Get maximum gpu util
+  float GetMaxGpuUtil() const {
+    if (!is_supported_ || check_memory_thd_ == nullptr) {
+      return -1.0f;
+    }
+    return max_gpu_util_;
+  }

-// Get Max cpu memory usage
-FASTDEPLOY_DECL float GetCpuMemoryUsage(const std::string& name);
+  ResourceUsageMonitor(ResourceUsageMonitor&) = delete;
+  ResourceUsageMonitor& operator=(const ResourceUsageMonitor&) = delete;
+  ResourceUsageMonitor(ResourceUsageMonitor&&) = delete;
+  ResourceUsageMonitor& operator=(const ResourceUsageMonitor&&) = delete;

-// Get Max gpu memory usage
-FASTDEPLOY_DECL float GetGpuMemoryUsage(const std::string& name);
+ private:
+  void StopInternal();
+  // Get current cpu memory info
+  std::string GetCurrentCpuMemoryInfo();
+  // Get current gpu memory info
+  std::string GetCurrentGpuMemoryInfo(int device_id);
+
+  bool is_supported_ = false;
+  bool stop_signal_ = false;
+  const int sampling_interval_;
+  float max_cpu_mem_ = 0.0f;
+  float max_gpu_mem_ = 0.0f;
+  float max_gpu_util_ = 0.0f;
+  const int gpu_id_ = 0;
+  std::unique_ptr<std::thread> check_memory_thd_ = nullptr;
+};

 }  // namespace benchmark
 }  // namespace fastdeploy
--- a/fastdeploy/runtime/runtime_option.h
+++ b/fastdeploy/runtime/runtime_option.h
@@ -206,7 +206,6 @@ struct FASTDEPLOY_DECL RuntimeOption {

  // *** The belowing api are deprecated, will be removed in v1.2.0
  // *** Do not use it anymore
-
  void SetPaddleMKLDNN(bool pd_mkldnn = true);
  void EnablePaddleToTrt();
  void DeletePaddleBackendPass(const std::string& delete_pass_name);
--- a/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h
+++ b/fastdeploy/vision/detection/contrib/rknpu2/postprocessor.h
@@ -80,6 +80,10 @@ class FASTDEPLOY_DECL RKYOLOPostprocessor {
    obj_class_num_ = num;
    prob_box_size_ = obj_class_num_ + 5;
  }
+  /// Get the number of class
+  int GetClassNum() {
+    return obj_class_num_;
+  }

 private:
  std::vector<int> anchors_ = {10, 13, 16,  30,  33, 23,  30,  61,  62,
--- a/fastdeploy/vision/detection/contrib/rknpu2/rkyolo_pybind.cc
+++ b/fastdeploy/vision/detection/contrib/rknpu2/rkyolo_pybind.cc
@@ -65,7 +65,9 @@ void BindRKYOLO(pybind11::module& m) {
      .def_property("conf_threshold", &vision::detection::RKYOLOPostprocessor::GetConfThreshold,
                    &vision::detection::RKYOLOPostprocessor::SetConfThreshold)
      .def_property("nms_threshold", &vision::detection::RKYOLOPostprocessor::GetNMSThreshold,
-                    &vision::detection::RKYOLOPostprocessor::SetNMSThreshold);
+                    &vision::detection::RKYOLOPostprocessor::SetNMSThreshold)
+      .def_property("class_num", &vision::detection::RKYOLOPostprocessor::GetClassNum,
+                    &vision::detection::RKYOLOPostprocessor::SetClassNum);

  pybind11::class_<vision::detection::RKYOLOV5, FastDeployModel>(m, "RKYOLOV5")
      .def(pybind11::init<std::string,
--- a/python/fastdeploy/vision/detection/contrib/rkyolo/rkyolov5.py
+++ b/python/fastdeploy/vision/detection/contrib/rkyolo/rkyolov5.py
@@ -108,11 +108,11 @@ class RKYOLOPostprocessor:
        return self._postprocessor.nms_threshold

    @property
-    def multi_label(self):
+    def class_num(self):
        """
-        multi_label for postprocessing, set true for eval, default is True
+        class_num for postprocessing, default is 80
        """
-        return self._postprocessor.multi_label
+        return self._postprocessor.class_num

    @conf_threshold.setter
    def conf_threshold(self, conf_threshold):
@@ -126,13 +126,14 @@ class RKYOLOPostprocessor:
            "The value to set `nms_threshold` must be type of float."
        self._postprocessor.nms_threshold = nms_threshold

-    @multi_label.setter
-    def multi_label(self, value):
-        assert isinstance(
-            value,
-            bool), "The value to set `multi_label` must be type of bool."
-        self._postprocessor.multi_label = value
-
+    @class_num.setter
+    def class_num(self, class_num):
+        """
+        class_num for postprocessing, default is 80
+        """
+        assert isinstance(class_num, int), \
+            "The value to set `nms_threshold` must be type of float."
+        self._postprocessor.class_num = class_num

 class RKYOLOV5(FastDeployModel):
    def __init__(self,