From d19510fe770e5cb22982439b2c7dd65af65b48b0 Mon Sep 17 00:00:00 2001
From: yunyaoXYY <109218879+yunyaoXYY@users.noreply.github.com>
Date: Thu, 8 Dec 2022 23:32:14 +0800
Subject: [PATCH] [Other] Update PP-OCRv2/v3 example. (#838)

* Fix links in readme

* Fix links in readme

* Update PPOCRv2/v3 examples
---
 examples/vision/ocr/PP-OCRv2/cpp/infer.cc    | 17 +++++++++++++---
 examples/vision/ocr/PP-OCRv2/python/infer.py | 21 ++++++++++++++++----
 examples/vision/ocr/PP-OCRv3/cpp/infer.cc    | 18 ++++++++++++++---
 examples/vision/ocr/PP-OCRv3/python/infer.py | 21 ++++++++++++++++----
 4 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/examples/vision/ocr/PP-OCRv2/cpp/infer.cc b/examples/vision/ocr/PP-OCRv2/cpp/infer.cc
index 7bac320d5..6cde6390f 100644
--- a/examples/vision/ocr/PP-OCRv2/cpp/infer.cc
+++ b/examples/vision/ocr/PP-OCRv2/cpp/infer.cc
@@ -33,13 +33,18 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model
   auto cls_option = option;
   auto rec_option = option;
 
+  // The cls and rec model can inference a batch of images now.
+  // User could initialize the inference batch size and set them after create PPOCR model.
+  int cls_batch_size = 1;
+  int rec_batch_size = 6;
+
   // If use TRT backend, the dynamic shape will be set as follow.
   // We recommend that users set the length and height of the detection model to a multiple of 32.
   det_option.SetTrtInputShape("x", {1, 3, 64,64}, {1, 3, 640, 640},
                                 {1, 3, 960, 960});
-  cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320}, {32, 3, 48, 1024});
-  rec_option.SetTrtInputShape("x", {1, 3, 32, 10}, {10, 3, 32, 320},
-                                {32, 3, 32, 2304});
+  cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {cls_batch_size, 3, 48, 320}, {cls_batch_size, 3, 48, 1024});
+  rec_option.SetTrtInputShape("x", {1, 3, 32, 10}, {rec_batch_size, 3, 32, 320},
+                                {rec_batch_size, 3, 32, 2304});
   
   // Users could save TRT cache file to disk as follow. 
   // det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt");
@@ -58,6 +63,12 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model
   // auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &rec_model);
   auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &cls_model, &rec_model);
 
+  // Set inference batch size for cls model and rec model, the value could be -1 and 1 to positive infinity.
+  // When inference batch size is set to -1, it means that the inference batch size 
+  // of the cls and rec models will be the same as the number of boxes detected by the det model.  
+  ppocr_v2.SetClsBatchSize(cls_batch_size);
+  ppocr_v2.SetRecBatchSize(rec_batch_size);
+
   if(!ppocr_v2.Initialized()){
     std::cerr << "Failed to initialize PP-OCR." << std::endl;
     return;
diff --git a/examples/vision/ocr/PP-OCRv2/python/infer.py b/examples/vision/ocr/PP-OCRv2/python/infer.py
index af915143a..1487d795f 100644
--- a/examples/vision/ocr/PP-OCRv2/python/infer.py
+++ b/examples/vision/ocr/PP-OCRv2/python/infer.py
@@ -106,6 +106,11 @@ rec_label_file = args.rec_label_file
 # 用户也可根据自行需求分别配置
 runtime_option = build_option(args)
 
+# PPOCR的cls和rec模型现在已经支持推理一个Batch的数据
+# 定义下面两个变量后, 可用于设置trt输入shape, 并在PPOCR模型初始化后, 完成Batch推理设置
+cls_batch_size = 1
+rec_batch_size = 6
+
 # 当使用TRT时，分别给三个模型的runtime设置动态shape,并完成模型的创建.
 # 注意: 需要在检测模型创建完成后，再设置分类模型的动态输入并创建分类模型, 识别模型同理.
 # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数.
@@ -118,16 +123,18 @@ det_model = fd.vision.ocr.DBDetector(
     det_model_file, det_params_file, runtime_option=det_option)
 
 cls_option = runtime_option
-cls_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320],
-                               [32, 3, 48, 1024])
+cls_option.set_trt_input_shape("x", [1, 3, 48, 10],
+                               [cls_batch_size, 3, 48, 320],
+                               [cls_batch_size, 3, 48, 1024])
 # 用户可以把TRT引擎文件保存至本地
 # cls_option.set_trt_cache_file(args.cls_model  + "/cls_trt_cache.trt")
 cls_model = fd.vision.ocr.Classifier(
     cls_model_file, cls_params_file, runtime_option=cls_option)
 
 rec_option = runtime_option
-rec_option.set_trt_input_shape("x", [1, 3, 32, 10], [10, 3, 32, 320],
-                               [32, 3, 32, 2304])
+rec_option.set_trt_input_shape("x", [1, 3, 32, 10],
+                               [rec_batch_size, 3, 32, 320],
+                               [rec_batch_size, 3, 32, 2304])
 # 用户可以把TRT引擎文件保存至本地
 # rec_option.set_trt_cache_file(args.rec_model  + "/rec_trt_cache.trt")
 rec_model = fd.vision.ocr.Recognizer(
@@ -137,6 +144,12 @@ rec_model = fd.vision.ocr.Recognizer(
 ppocr_v2 = fd.vision.ocr.PPOCRv2(
     det_model=det_model, cls_model=cls_model, rec_model=rec_model)
 
+# 给cls和rec模型设置推理时的batch size
+# 此值能为-1, 和1到正无穷
+# 当此值为-1时, cls和rec模型的batch size将默认和det模型检测出的框的数量相同
+ppocr_v2.cls_batch_size = cls_batch_size
+ppocr_v2.rec_batch_size = rec_batch_size
+
 # 预测图片准备
 im = cv2.imread(args.image)
 
diff --git a/examples/vision/ocr/PP-OCRv3/cpp/infer.cc b/examples/vision/ocr/PP-OCRv3/cpp/infer.cc
index 911b311e3..90b77679f 100644
--- a/examples/vision/ocr/PP-OCRv3/cpp/infer.cc
+++ b/examples/vision/ocr/PP-OCRv3/cpp/infer.cc
@@ -33,13 +33,19 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model
   auto cls_option = option;
   auto rec_option = option;
 
+  // The cls and rec model can inference a batch of images now.
+  // User could initialize the inference batch size and set them after create PPOCR model.
+  int cls_batch_size = 1;
+  int rec_batch_size = 6;
+
   // If use TRT backend, the dynamic shape will be set as follow.
   // We recommend that users set the length and height of the detection model to a multiple of 32.
+  // We also recommend that users set the Trt input shape as follow.
   det_option.SetTrtInputShape("x", {1, 3, 64,64}, {1, 3, 640, 640},
                                 {1, 3, 960, 960});
-  cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320}, {64, 3, 48, 1024});
-  rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320},
-                                {64, 3, 48, 2304});
+  cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {cls_batch_size, 3, 48, 320}, {cls_batch_size, 3, 48, 1024});
+  rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {rec_batch_size, 3, 48, 320},
+                                {rec_batch_size, 3, 48, 2304});
   
   // Users could save TRT cache file to disk as follow. 
   // det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt");
@@ -57,6 +63,12 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model
   // The classification model is optional, so the PP-OCR can also be connected in series as follows
   // auto ppocr_v3 = fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model);
   auto ppocr_v3 = fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
+  
+  // Set inference batch size for cls model and rec model, the value could be -1 and 1 to positive infinity.
+  // When inference batch size is set to -1, it means that the inference batch size 
+  // of the cls and rec models will be the same as the number of boxes detected by the det model.  
+  ppocr_v3.SetClsBatchSize(cls_batch_size);
+  ppocr_v3.SetRecBatchSize(rec_batch_size);
 
   if(!ppocr_v3.Initialized()){
     std::cerr << "Failed to initialize PP-OCR." << std::endl;
diff --git a/examples/vision/ocr/PP-OCRv3/python/infer.py b/examples/vision/ocr/PP-OCRv3/python/infer.py
index b6b27b240..1ec962cb5 100644
--- a/examples/vision/ocr/PP-OCRv3/python/infer.py
+++ b/examples/vision/ocr/PP-OCRv3/python/infer.py
@@ -106,6 +106,11 @@ rec_label_file = args.rec_label_file
 # 用户也可根据自行需求分别配置
 runtime_option = build_option(args)
 
+# PPOCR的cls和rec模型现在已经支持推理一个Batch的数据
+# 定义下面两个变量后, 可用于设置trt输入shape, 并在PPOCR模型初始化后, 完成Batch推理设置
+cls_batch_size = 1
+rec_batch_size = 6
+
 # 当使用TRT时，分别给三个模型的runtime设置动态shape,并完成模型的创建.
 # 注意: 需要在检测模型创建完成后，再设置分类模型的动态输入并创建分类模型, 识别模型同理.
 # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数.
@@ -118,16 +123,18 @@ det_model = fd.vision.ocr.DBDetector(
     det_model_file, det_params_file, runtime_option=det_option)
 
 cls_option = runtime_option
-cls_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320],
-                               [64, 3, 48, 1024])
+cls_option.set_trt_input_shape("x", [1, 3, 48, 10],
+                               [cls_batch_size, 3, 48, 320],
+                               [cls_batch_size, 3, 48, 1024])
 # 用户可以把TRT引擎文件保存至本地
 # cls_option.set_trt_cache_file(args.cls_model  + "/cls_trt_cache.trt")
 cls_model = fd.vision.ocr.Classifier(
     cls_model_file, cls_params_file, runtime_option=cls_option)
 
 rec_option = runtime_option
-rec_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320],
-                               [64, 3, 48, 2304])
+rec_option.set_trt_input_shape("x", [1, 3, 48, 10],
+                               [rec_batch_size, 3, 48, 320],
+                               [rec_batch_size, 3, 48, 2304])
 # 用户可以把TRT引擎文件保存至本地
 # rec_option.set_trt_cache_file(args.rec_model  + "/rec_trt_cache.trt")
 rec_model = fd.vision.ocr.Recognizer(
@@ -137,6 +144,12 @@ rec_model = fd.vision.ocr.Recognizer(
 ppocr_v3 = fd.vision.ocr.PPOCRv3(
     det_model=det_model, cls_model=cls_model, rec_model=rec_model)
 
+# 给cls和rec模型设置推理时的batch size
+# 此值能为-1, 和1到正无穷
+# 当此值为-1时, cls和rec模型的batch size将默认和det模型检测出的框的数量相同
+ppocr_v3.cls_batch_size = cls_batch_size
+ppocr_v3.rec_batch_size = rec_batch_size
+
 # 预测图片准备
 im = cv2.imread(args.image)