[Other] Update PP-OCRv2/v3 example. (#838)

* Fix links in readme * Fix links in readme * Update PPOCRv2/v3 examples
2025-10-06 00:57:33 +08:00 · 2022-12-08 23:32:14 +08:00
parent b3bc76e536
commit d19510fe77
4 changed files with 63 additions and 14 deletions
--- a/examples/vision/ocr/PP-OCRv2/cpp/infer.cc
+++ b/examples/vision/ocr/PP-OCRv2/cpp/infer.cc
@@ -33,13 +33,18 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model
  auto cls_option = option;
  auto rec_option = option;
  // The cls and rec model can inference a batch of images now.
  // User could initialize the inference batch size and set them after create PPOCR model.
  int cls_batch_size = 1;
  int rec_batch_size = 6;
  // If use TRT backend, the dynamic shape will be set as follow.
  // We recommend that users set the length and height of the detection model to a multiple of 32.
  det_option.SetTrtInputShape("x", {1, 3, 64,64}, {1, 3, 640, 640},
                                {1, 3, 960, 960});
-  cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320}, {32, 3, 48, 1024});
+  cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {cls_batch_size, 3, 48, 320}, {cls_batch_size, 3, 48, 1024});
-  rec_option.SetTrtInputShape("x", {1, 3, 32, 10}, {10, 3, 32, 320},
+  rec_option.SetTrtInputShape("x", {1, 3, 32, 10}, {rec_batch_size, 3, 32, 320},
-                                {32, 3, 32, 2304});
+                                {rec_batch_size, 3, 32, 2304});
  // Users could save TRT cache file to disk as follow. 
  // det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt");
@@ -58,6 +63,12 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model
  // auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &rec_model);
  auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &cls_model, &rec_model);
  // Set inference batch size for cls model and rec model, the value could be -1 and 1 to positive infinity.
  // When inference batch size is set to -1, it means that the inference batch size 
  // of the cls and rec models will be the same as the number of boxes detected by the det model.  
  ppocr_v2.SetClsBatchSize(cls_batch_size);
  ppocr_v2.SetRecBatchSize(rec_batch_size);
  if(!ppocr_v2.Initialized()){
    std::cerr << "Failed to initialize PP-OCR." << std::endl;
    return;
--- a/examples/vision/ocr/PP-OCRv2/python/infer.py
+++ b/examples/vision/ocr/PP-OCRv2/python/infer.py
@@ -106,6 +106,11 @@ rec_label_file = args.rec_label_file
 # 用户也可根据自行需求分别配置
 runtime_option = build_option(args)
 # PPOCR的cls和rec模型现在已经支持推理一个Batch的数据
 # 定义下面两个变量后, 可用于设置trt输入shape, 并在PPOCR模型初始化后, 完成Batch推理设置
 cls_batch_size = 1
 rec_batch_size = 6
 # 当使用TRT时，分别给三个模型的runtime设置动态shape,并完成模型的创建.
 # 注意: 需要在检测模型创建完成后，再设置分类模型的动态输入并创建分类模型, 识别模型同理.
 # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数.
@@ -118,16 +123,18 @@ det_model = fd.vision.ocr.DBDetector(
    det_model_file, det_params_file, runtime_option=det_option)
 cls_option = runtime_option
-cls_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320],
+cls_option.set_trt_input_shape("x", [1, 3, 48, 10],
-                               [32, 3, 48, 1024])
+                               [cls_batch_size, 3, 48, 320],
                               [cls_batch_size, 3, 48, 1024])
 # 用户可以把TRT引擎文件保存至本地
 # cls_option.set_trt_cache_file(args.cls_model  + "/cls_trt_cache.trt")
 cls_model = fd.vision.ocr.Classifier(
    cls_model_file, cls_params_file, runtime_option=cls_option)
 rec_option = runtime_option
-rec_option.set_trt_input_shape("x", [1, 3, 32, 10], [10, 3, 32, 320],
+rec_option.set_trt_input_shape("x", [1, 3, 32, 10],
-                               [32, 3, 32, 2304])
+                               [rec_batch_size, 3, 32, 320],
                               [rec_batch_size, 3, 32, 2304])
 # 用户可以把TRT引擎文件保存至本地
 # rec_option.set_trt_cache_file(args.rec_model  + "/rec_trt_cache.trt")
 rec_model = fd.vision.ocr.Recognizer(
@@ -137,6 +144,12 @@ rec_model = fd.vision.ocr.Recognizer(
 ppocr_v2 = fd.vision.ocr.PPOCRv2(
    det_model=det_model, cls_model=cls_model, rec_model=rec_model)
 # 给cls和rec模型设置推理时的batch size
 # 此值能为-1, 和1到正无穷
 # 当此值为-1时, cls和rec模型的batch size将默认和det模型检测出的框的数量相同
 ppocr_v2.cls_batch_size = cls_batch_size
 ppocr_v2.rec_batch_size = rec_batch_size
 # 预测图片准备
 im = cv2.imread(args.image)
--- a/examples/vision/ocr/PP-OCRv3/cpp/infer.cc
+++ b/examples/vision/ocr/PP-OCRv3/cpp/infer.cc
@@ -33,13 +33,19 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model
  auto cls_option = option;
  auto rec_option = option;
  // The cls and rec model can inference a batch of images now.
  // User could initialize the inference batch size and set them after create PPOCR model.
  int cls_batch_size = 1;
  int rec_batch_size = 6;
  // If use TRT backend, the dynamic shape will be set as follow.
  // We recommend that users set the length and height of the detection model to a multiple of 32.
  // We also recommend that users set the Trt input shape as follow.
  det_option.SetTrtInputShape("x", {1, 3, 64,64}, {1, 3, 640, 640},
                                {1, 3, 960, 960});
-  cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320}, {64, 3, 48, 1024});
+  cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {cls_batch_size, 3, 48, 320}, {cls_batch_size, 3, 48, 1024});
-  rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320},
+  rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {rec_batch_size, 3, 48, 320},
-                                {64, 3, 48, 2304});
+                                {rec_batch_size, 3, 48, 2304});
  // Users could save TRT cache file to disk as follow. 
  // det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt");
@@ -58,6 +64,12 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model
  // auto ppocr_v3 = fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model);
  auto ppocr_v3 = fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model);
  // Set inference batch size for cls model and rec model, the value could be -1 and 1 to positive infinity.
  // When inference batch size is set to -1, it means that the inference batch size 
  // of the cls and rec models will be the same as the number of boxes detected by the det model.  
  ppocr_v3.SetClsBatchSize(cls_batch_size);
  ppocr_v3.SetRecBatchSize(rec_batch_size);
  if(!ppocr_v3.Initialized()){
    std::cerr << "Failed to initialize PP-OCR." << std::endl;
    return;
--- a/examples/vision/ocr/PP-OCRv3/python/infer.py
+++ b/examples/vision/ocr/PP-OCRv3/python/infer.py
@@ -106,6 +106,11 @@ rec_label_file = args.rec_label_file
 # 用户也可根据自行需求分别配置
 runtime_option = build_option(args)
 # PPOCR的cls和rec模型现在已经支持推理一个Batch的数据
 # 定义下面两个变量后, 可用于设置trt输入shape, 并在PPOCR模型初始化后, 完成Batch推理设置
 cls_batch_size = 1
 rec_batch_size = 6
 # 当使用TRT时，分别给三个模型的runtime设置动态shape,并完成模型的创建.
 # 注意: 需要在检测模型创建完成后，再设置分类模型的动态输入并创建分类模型, 识别模型同理.
 # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数.
@@ -118,16 +123,18 @@ det_model = fd.vision.ocr.DBDetector(
    det_model_file, det_params_file, runtime_option=det_option)
 cls_option = runtime_option
-cls_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320],
+cls_option.set_trt_input_shape("x", [1, 3, 48, 10],
-                               [64, 3, 48, 1024])
+                               [cls_batch_size, 3, 48, 320],
                               [cls_batch_size, 3, 48, 1024])
 # 用户可以把TRT引擎文件保存至本地
 # cls_option.set_trt_cache_file(args.cls_model  + "/cls_trt_cache.trt")
 cls_model = fd.vision.ocr.Classifier(
    cls_model_file, cls_params_file, runtime_option=cls_option)
 rec_option = runtime_option
-rec_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320],
+rec_option.set_trt_input_shape("x", [1, 3, 48, 10],
-                               [64, 3, 48, 2304])
+                               [rec_batch_size, 3, 48, 320],
                               [rec_batch_size, 3, 48, 2304])
 # 用户可以把TRT引擎文件保存至本地
 # rec_option.set_trt_cache_file(args.rec_model  + "/rec_trt_cache.trt")
 rec_model = fd.vision.ocr.Recognizer(
@@ -137,6 +144,12 @@ rec_model = fd.vision.ocr.Recognizer(
 ppocr_v3 = fd.vision.ocr.PPOCRv3(
    det_model=det_model, cls_model=cls_model, rec_model=rec_model)
 # 给cls和rec模型设置推理时的batch size
 # 此值能为-1, 和1到正无穷
 # 当此值为-1时, cls和rec模型的batch size将默认和det模型检测出的框的数量相同
 ppocr_v3.cls_batch_size = cls_batch_size
 ppocr_v3.rec_batch_size = rec_batch_size
 # 预测图片准备
 im = cv2.imread(args.image)