[Other]Update python && cpp multi_thread examples (#876)

* Refactor PaddleSeg with preprocessor && postprocessor * Fix bugs * Delete redundancy code * Modify by comments * Refactor according to comments * Add batch evaluation * Add single test script * Add ppliteseg single test script && fix eval(raise) error * fix bug * Fix evaluation segmentation.py batch predict * Fix segmentation evaluation bug * Fix evaluation segmentation bugs * Update segmentation result docs * Update old predict api and DisableNormalizeAndPermute * Update resize segmentation label map with cv::INTER_NEAREST * Add Model Clone function for PaddleClas && PaddleDet && PaddleSeg * Add multi thread demo * Add python model clone function * Add multi thread python && C++ example * Fix bug * Update python && cpp multi_thread examples * Add cpp && python directory * Add README.md for examples * Delete redundant code Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-05 16:48:03 +08:00 · 2022-12-14 19:18:53 +08:00
parent ce4867d14e
commit ada54bfd47
6 changed files with 334 additions and 39 deletions
--- a/fastdeploy/vision/vision_pybind.cc
+++ b/fastdeploy/vision/vision_pybind.cc
@@ -37,6 +37,21 @@ void BindVision(pybind11::module& m) {
      .def(pybind11::init())
      .def_readwrite("data", &vision::Mask::data)
      .def_readwrite("shape", &vision::Mask::shape)
      .def(pybind11::pickle(
        [](const vision::Mask &m) { 
            return pybind11::make_tuple(m.data, m.shape);
        },
        [](pybind11::tuple t) { 
            if (t.size() != 2)
                throw std::runtime_error("vision::Mask pickle with invalid state!");
            vision::Mask m;
            m.data = t[0].cast<std::vector<int32_t>>();
            m.shape = t[1].cast<std::vector<int64_t>>();
            return m;
        }
      ))
      .def("__repr__", &vision::Mask::Str)
      .def("__str__", &vision::Mask::Str);
@@ -44,6 +59,21 @@ void BindVision(pybind11::module& m) {
      .def(pybind11::init())
      .def_readwrite("label_ids", &vision::ClassifyResult::label_ids)
      .def_readwrite("scores", &vision::ClassifyResult::scores)
      .def(pybind11::pickle(
        [](const vision::ClassifyResult &c) { 
            return pybind11::make_tuple(c.label_ids, c.scores);
        },
        [](pybind11::tuple t) { 
            if (t.size() != 2)
                throw std::runtime_error("vision::ClassifyResult pickle with invalid state!");
            vision::ClassifyResult c;
            c.label_ids = t[0].cast<std::vector<int32_t>>();
            c.scores = t[1].cast<std::vector<float>>();
            return c;
        }
      ))
      .def("__repr__", &vision::ClassifyResult::Str)
      .def("__str__", &vision::ClassifyResult::Str);
@@ -54,6 +84,24 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("label_ids", &vision::DetectionResult::label_ids)
      .def_readwrite("masks", &vision::DetectionResult::masks)
      .def_readwrite("contain_masks", &vision::DetectionResult::contain_masks)
      .def(pybind11::pickle(
        [](const vision::DetectionResult &d) { 
            return pybind11::make_tuple(d.boxes, d.scores, d.label_ids, d.masks, d.contain_masks);
        },
        [](pybind11::tuple t) { 
            if (t.size() != 5)
                throw std::runtime_error("vision::DetectionResult pickle with Invalid state!");
            vision::DetectionResult d;
            d.boxes = t[0].cast<std::vector<std::array<float, 4>>>();
            d.scores = t[1].cast<std::vector<float>>();
            d.label_ids = t[2].cast<std::vector<int32_t>>();
            d.masks = t[3].cast<std::vector<vision::Mask>>();
            d.contain_masks = t[4].cast<bool>();
            return d;
        }
      ))
      .def("__repr__", &vision::DetectionResult::Str)
      .def("__str__", &vision::DetectionResult::Str);
@@ -104,6 +152,23 @@ void BindVision(pybind11::module& m) {
      .def_readwrite("score_map", &vision::SegmentationResult::score_map)
      .def_readwrite("shape", &vision::SegmentationResult::shape)
      .def_readwrite("contain_score_map", &vision::SegmentationResult::contain_score_map)
      .def(pybind11::pickle(
        [](const vision::SegmentationResult &s) { 
            return pybind11::make_tuple(s.label_map, s.score_map, s.shape, s.contain_score_map);
        },
        [](pybind11::tuple t) { 
            if (t.size() != 4)
                throw std::runtime_error("vision::SegmentationResult pickle with Invalid state!");
            vision::SegmentationResult s;
            s.label_map = t[0].cast<std::vector<uint8_t>>();
            s.score_map = t[1].cast<std::vector<float>>();
            s.shape = t[2].cast<std::vector<int64_t>>();
            s.contain_score_map = t[3].cast<bool>();
            return s;
        }
      ))
      .def("__repr__", &vision::SegmentationResult::Str)
      .def("__str__", &vision::SegmentationResult::Str);
--- a/tutorials/multi_thread/cpp/CMakeLists.txt
+++ b/tutorials/multi_thread/cpp/CMakeLists.txt
@@ -0,0 +1,14 @@
 PROJECT(multi_thread_demo C CXX)
 CMAKE_MINIMUM_REQUIRED (VERSION 3.10)
 # 指定下载解压后的fastdeploy库路径
 option(FASTDEPLOY_INSTALL_DIR "Path of downloaded fastdeploy sdk.")
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
 # 添加FastDeploy依赖头文件
 include_directories(${FASTDEPLOY_INCS})
 add_executable(multi_thread_demo ${PROJECT_SOURCE_DIR}/multi_thread.cc)
 # 添加FastDeploy库依赖
 target_link_libraries(multi_thread_demo ${FASTDEPLOY_LIBS} pthread)
--- a/tutorials/multi_thread/cpp/README.md
+++ b/tutorials/multi_thread/cpp/README.md
@@ -0,0 +1,79 @@
 # PaddleClas C++部署示例
 本目录下提供`infer.cc`快速完成PaddleClas系列模型在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。
 在部署前，需确认以下两个步骤
 - 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
 - 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
 以Linux上ResNet50_vd推理为例，在本目录执行如下命令即可完成编译测试，支持此模型需保证FastDeploy版本0.7.0以上(x.x.x>=0.7.0)
 ```bash
 mkdir build
 cd build
 # 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
 wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
 tar xvf fastdeploy-linux-x64-x.x.x.tgz
 cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
 make -j
 # 下载ResNet50_vd模型文件和测试图片
 wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
 tar -xvf ResNet50_vd_infer.tgz
 wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg
 # CPU推理
 ./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 0
 # GPU推理
 ./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 1
 # GPU上TensorRT推理
 ./infer_demo ResNet50_vd_infer ILSVRC2012_val_00000010.jpeg 2
 ```
 以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:  
 - [如何在Windows中使用FastDeploy C++ SDK](../../../../../docs/cn/faq/use_sdk_on_windows.md)
 ## PaddleClas C++接口
 ### PaddleClas类
 ```c++
 fastdeploy::vision::classification::PaddleClasModel(
        const string& model_file,
        const string& params_file,
        const string& config_file,
        const RuntimeOption& runtime_option = RuntimeOption(),
        const ModelFormat& model_format = ModelFormat::PADDLE)
 ```
 PaddleClas模型加载和初始化，其中model_file, params_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.4/docs/zh_CN/inference_deployment/export_model.md#2-%E5%88%86%E7%B1%BB%E6%A8%A1%E5%9E%8B%E5%AF%BC%E5%87%BA)
 **参数**
 > * **model_file**(str): 模型文件路径
 > * **params_file**(str): 参数文件路径
 > * **config_file**(str): 推理部署配置文件
 > * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
 > * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
 #### Predict函数
 > ```c++
 > PaddleClasModel::Predict(cv::Mat* im, ClassifyResult* result, int topk = 1)
 > ```
 >
 > 模型预测接口，输入图像直接输出检测结果。
 >
 > **参数**
 >
 > > * **im**: 输入图像，注意需为HWC，BGR格式
 > > * **result**: 分类结果，包括label_id，以及相应的置信度, ClassifyResult说明参考[视觉模型预测结果](../../../../../docs/api/vision_results/)
 > > * **topk**(int):返回预测概率最高的topk个分类结果，默认为1
 - [模型介绍](../../)
 - [Python部署](../python)
 - [视觉模型预测结果](../../../../../docs/api/vision_results/)
 - [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/tutorials/multi_thread/cpp/multi_thread.cc
+++ b/tutorials/multi_thread/cpp/multi_thread.cc
@@ -6,21 +6,44 @@ const char sep = '\\';
 const char sep = '/';
 #endif
-void predict(fastdeploy::vision::classification::PaddleClasModel *model, int thread_id, const std::string& image_file) {
+void Predict(fastdeploy::vision::classification::PaddleClasModel *model, int thread_id, const std::vector<std::string>& images) {
-  auto im = cv::imread(image_file);
+  for (auto const &image_file : images) {
      auto im = cv::imread(image_file);
-  fastdeploy::vision::ClassifyResult res;
+      fastdeploy::vision::ClassifyResult res;
-  if (!model->Predict(im, &res)) {
+      if (!model->Predict(im, &res)) {
-    std::cerr << "Failed to predict." << std::endl;
+        std::cerr << "Failed to predict." << std::endl;
-    return;
+        return;
      }
      // print res
      std::cout << "Thread Id: " << thread_id << std::endl;
      std::cout << res.Str() << std::endl;
  }
  // print res
  std::cout << "Thread Id: " << thread_id << std::endl;
  std::cout << res.Str() << std::endl;
 }
-void CpuInfer(const std::string& model_dir, const std::string& image_file, int thread_num) {
+void GetImageList(std::vector<std::vector<std::string>>* image_list, const std::string& image_file_path, int thread_num){
  std::vector<cv::String> images;
  cv::glob(image_file_path, images, false);
  // number of image files in images folder
  size_t count = images.size(); 
  size_t num = count / thread_num;
  for (int i = 0; i < thread_num; i++) {
    std::vector<std::string> temp_list;
    if (i == thread_num - 1) {
      for (size_t j = i*num; j < count; j++){
        temp_list.push_back(images[j]);
      }
    } else {
      for (size_t j = 0; j < num; j++){
        temp_list.push_back(images[i * num + j]);
      }
    }
    (*image_list)[i] = temp_list;
  }
 }
 void CpuInfer(const std::string& model_dir, const std::string& image_file_path, int thread_num) {
  auto model_file = model_dir + sep + "inference.pdmodel";
  auto params_file = model_dir + sep + "inference.pdiparams";
  auto config_file = model_dir + sep + "inference_cls.yaml";
@@ -39,9 +62,12 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file, int t
    models.emplace_back(std::move(model.Clone()));
  }
  std::vector<std::vector<std::string>> image_list(thread_num);
  GetImageList(&image_list, image_file_path, thread_num);
  std::vector<std::thread> threads;
  for (int i = 0; i < thread_num; ++i) {
-    threads.emplace_back(predict, models[i].get(), i, image_file);
+    threads.emplace_back(Predict, models[i].get(), i, image_list[i]);
  }
  for (int i = 0; i < thread_num; ++i) {
@@ -49,7 +75,7 @@ void CpuInfer(const std::string& model_dir, const std::string& image_file, int t
  }
 }
-void GpuInfer(const std::string& model_dir, const std::string& image_file, int thread_num) {
+void GpuInfer(const std::string& model_dir, const std::string& image_file_path, int thread_num) {
  auto model_file = model_dir + sep + "inference.pdmodel";
  auto params_file = model_dir + sep + "inference.pdiparams";
  auto config_file = model_dir + sep + "inference_cls.yaml";
@@ -68,9 +94,12 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file, int t
    models.emplace_back(std::move(model.Clone()));
  }
  std::vector<std::vector<std::string>> image_list(thread_num);
  GetImageList(&image_list, image_file_path, thread_num);
  std::vector<std::thread> threads;
  for (int i = 0; i < thread_num; ++i) {
-    threads.emplace_back(predict, models[i].get(), i, image_file);
+    threads.emplace_back(Predict, models[i].get(), i, image_list[i]);
  }
  for (int i = 0; i < thread_num; ++i) {
@@ -78,7 +107,7 @@ void GpuInfer(const std::string& model_dir, const std::string& image_file, int t
  }
 }
-void TrtInfer(const std::string& model_dir, const std::string& image_file, int thread_num) {
+void TrtInfer(const std::string& model_dir, const std::string& image_file_path, int thread_num) {
  auto model_file = model_dir + sep + "inference.pdmodel";
  auto params_file = model_dir + sep + "inference.pdiparams";
  auto config_file = model_dir + sep + "inference_cls.yaml";
@@ -99,9 +128,12 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file, int t
    models.emplace_back(std::move(model.Clone()));
  }
  std::vector<std::vector<std::string>> image_list(thread_num);
  GetImageList(&image_list, image_file_path, thread_num);
  std::vector<std::thread> threads;
  for (int i = 0; i < thread_num; ++i) {
-    threads.emplace_back(predict, models[i].get(), i, image_file);
+    threads.emplace_back(Predict, models[i].get(), i, image_list[i]);
  }
  for (int i = 0; i < thread_num; ++i) {
@@ -112,7 +144,7 @@ void TrtInfer(const std::string& model_dir, const std::string& image_file, int t
 int main(int argc, char **argv) {
  if (argc < 5) {
    std::cout << "Usage: infer_demo path/to/model path/to/image run_option thread_num, "
-                 "e.g ./infer_demo ./ResNet50_vd ./test.jpeg 0 3"
+                 "e.g ./multi_thread_demo ./ResNet50_vd ./test.jpeg 0 3"
              << std::endl;
    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
                 "with gpu; 2: run with gpu and use tensorrt backend."
--- a/tutorials/multi_thread/python/README.md
+++ b/tutorials/multi_thread/python/README.md
@@ -0,0 +1,77 @@
 # PaddleClas模型 Python部署示例
 在部署前，需确认以下两个步骤
 - 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)  
 - 2. FastDeploy Python whl包安装，参考[FastDeploy Python安装](../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
 本目录下提供`infer.py`快速完成ResNet50_vd在CPU/GPU，以及GPU上通过TensorRT加速部署的示例。执行如下脚本即可完成
 ```bash
 #下载部署示例代码
 git clone https://github.com/PaddlePaddle/FastDeploy.git
 cd  FastDeploy/examples/vision/classification/paddleclas/python
 # 下载ResNet50_vd模型文件和测试图片
 wget https://bj.bcebos.com/paddlehub/fastdeploy/ResNet50_vd_infer.tgz
 tar -xvf ResNet50_vd_infer.tgz
 wget https://gitee.com/paddlepaddle/PaddleClas/raw/release/2.4/deploy/images/ImageNet/ILSVRC2012_val_00000010.jpeg
 # CPU推理
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device cpu --topk 1
 # GPU推理
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --topk 1
 # GPU上使用TensorRT推理 （注意：TensorRT推理第一次运行，有序列化模型的操作，有一定耗时，需要耐心等待）
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device gpu --use_trt True --topk 1
 # IPU推理（注意：IPU推理首次运行会有序列化模型的操作，有一定耗时，需要耐心等待）
 python infer.py --model ResNet50_vd_infer --image ILSVRC2012_val_00000010.jpeg --device ipu --topk 1
 ```
 运行完成后返回结果如下所示
 ```bash
 ClassifyResult(
 label_ids: 153,
 scores: 0.686229,
 )
 ```
 ## PaddleClasModel Python接口
 ```python
 fd.vision.classification.PaddleClasModel(model_file, params_file, config_file, runtime_option=None, model_format=ModelFormat.PADDLE)
 ```
 PaddleClas模型加载和初始化，其中model_file, params_file为训练模型导出的Paddle inference文件，具体请参考其文档说明[模型导出](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.4/docs/zh_CN/inference_deployment/export_model.md#2-%E5%88%86%E7%B1%BB%E6%A8%A1%E5%9E%8B%E5%AF%BC%E5%87%BA)
 **参数**
 > * **model_file**(str): 模型文件路径
 > * **params_file**(str): 参数文件路径
 > * **config_file**(str): 推理部署配置文件
 > * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
 > * **model_format**(ModelFormat): 模型格式，默认为Paddle格式
 ### predict函数
 > ```python
 > PaddleClasModel.predict(input_image, topk=1)
 > ```
 >
 > 模型预测结口，输入图像直接输出分类topk结果。
 >
 > **参数**
 >
 > > * **input_image**(np.ndarray): 输入数据，注意需为HWC，BGR格式
 > > * **topk**(int):返回预测概率最高的topk个分类结果，默认为1
 > **返回**
 >
 > > 返回`fastdeploy.vision.ClassifyResult`结构体，结构体说明参考文档[视觉模型预测结果](../../../../../docs/api/vision_results/)
 ## 其它文档
 - [PaddleClas 模型介绍](..)
 - [PaddleClas C++部署](../cpp)
 - [模型预测结果说明](../../../../../docs/api/vision_results/)
 - [如何切换模型推理后端引擎](../../../../../docs/cn/faq/how_to_change_backend.md)
--- a/tutorials/multi_thread/python/multi_thread_process.py
+++ b/tutorials/multi_thread/python/multi_thread_process.py
@@ -4,6 +4,7 @@ import fastdeploy as fd
 import cv2
 import os
 import psutil
 from multiprocessing import Pool
 def parse_arguments():
@@ -31,6 +32,13 @@ def parse_arguments():
        default=False,
        help="Wether to use tensorrt.")
    parser.add_argument("--thread_num", type=int, default=1, help="thread num")
    parser.add_argument(
        "--use_multi_process",
        type=ast.literal_eval,
        default=False,
        help="Wether to use multi process.")
    parser.add_argument(
        "--process_num", type=int, default=1, help="process num")
    return parser.parse_args()
@@ -71,7 +79,7 @@ def build_option(args):
 def predict(model, img_list, topk):
    result_list = []
-    # 预测图片分类结果
+    # predict classification result
    for image in img_list:
        im = cv2.imread(image)
        result = model.predict(im, topk)
@@ -79,6 +87,13 @@ def predict(model, img_list, topk):
    return result_list
 def process_predict(image):
    # predict classification result
    im = cv2.imread(image)
    result = model.predict(im, args.topk)
    return result
 class WrapperThread(Thread):
    def __init__(self, func, args):
        super(WrapperThread, self).__init__()
@@ -95,9 +110,8 @@ class WrapperThread(Thread):
 if __name__ == '__main__':
    args = parse_arguments()
    thread_num = args.thread_num
    imgs_list = get_image_list(args.image_path)
-    # 配置runtime，加载模型
+    # configure runtime and load model
    runtime_option = build_option(args)
    model_file = os.path.join(args.model, "inference.pdmodel")
@@ -105,24 +119,38 @@ if __name__ == '__main__':
    config_file = os.path.join(args.model, "inference_cls.yaml")
    model = fd.vision.classification.PaddleClasModel(
        model_file, params_file, config_file, runtime_option=runtime_option)
-    threads = []
+    if args.use_multi_process:
-    image_num_each_thread = int(len(imgs_list) / thread_num)
+        results = []
-    for i in range(thread_num):
+        process_num = args.process_num
-        if i == thread_num - 1:
+        with Pool(process_num) as pool:
-            t = WrapperThread(
+            results = pool.map(process_predict, imgs_list)
-                predict,
+        for result in results:
-                args=(model, imgs_list[i * image_num_each_thread:], i))
+            print(result)
-        else:
+    else:
-            t = WrapperThread(
+        threads = []
-                predict,
+        thread_num = args.thread_num
-                args=(model.clone(), imgs_list[i * image_num_each_thread:(
+        image_num_each_thread = int(len(imgs_list) / thread_num)
-                    i + 1) * image_num_each_thread - 1], i))
+        # unless you want independent model in each thread, actually model.clone()
-        threads.append(t)
+        # is the same as model when creating thead because of the existence of
-        t.start()
+        # GIL(Global Interpreter Lock) in python. In addition, model.clone() will consume
        # additional memory to store independent member variables
        for i in range(thread_num):
            if i == thread_num - 1:
                t = WrapperThread(
                    predict,
                    args=(model.clone(), imgs_list[i * image_num_each_thread:],
                          args.topk))
            else:
                t = WrapperThread(
                    predict,
                    args=(model.clone(), imgs_list[i * image_num_each_thread:(
                        i + 1) * image_num_each_thread - 1], args.topk))
            threads.append(t)
            t.start()
-    for i in range(thread_num):
+        for i in range(thread_num):
-        threads[i].join()
+            threads[i].join()
-    for i in range(thread_num):
+        for i in range(thread_num):
-        for result in threads[i].get_result():
+            for result in threads[i].get_result():
-            print('thread:', i, ', result: ', result)
+                print('thread:', i, ', result: ', result)