From d19510fe770e5cb22982439b2c7dd65af65b48b0 Mon Sep 17 00:00:00 2001 From: yunyaoXYY <109218879+yunyaoXYY@users.noreply.github.com> Date: Thu, 8 Dec 2022 23:32:14 +0800 Subject: [PATCH] [Other] Update PP-OCRv2/v3 example. (#838) * Fix links in readme * Fix links in readme * Update PPOCRv2/v3 examples --- examples/vision/ocr/PP-OCRv2/cpp/infer.cc | 17 +++++++++++++--- examples/vision/ocr/PP-OCRv2/python/infer.py | 21 ++++++++++++++++---- examples/vision/ocr/PP-OCRv3/cpp/infer.cc | 18 ++++++++++++++--- examples/vision/ocr/PP-OCRv3/python/infer.py | 21 ++++++++++++++++---- 4 files changed, 63 insertions(+), 14 deletions(-) diff --git a/examples/vision/ocr/PP-OCRv2/cpp/infer.cc b/examples/vision/ocr/PP-OCRv2/cpp/infer.cc index 7bac320d5..6cde6390f 100644 --- a/examples/vision/ocr/PP-OCRv2/cpp/infer.cc +++ b/examples/vision/ocr/PP-OCRv2/cpp/infer.cc @@ -33,13 +33,18 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model auto cls_option = option; auto rec_option = option; + // The cls and rec model can inference a batch of images now. + // User could initialize the inference batch size and set them after create PPOCR model. + int cls_batch_size = 1; + int rec_batch_size = 6; + // If use TRT backend, the dynamic shape will be set as follow. // We recommend that users set the length and height of the detection model to a multiple of 32. det_option.SetTrtInputShape("x", {1, 3, 64,64}, {1, 3, 640, 640}, {1, 3, 960, 960}); - cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320}, {32, 3, 48, 1024}); - rec_option.SetTrtInputShape("x", {1, 3, 32, 10}, {10, 3, 32, 320}, - {32, 3, 32, 2304}); + cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {cls_batch_size, 3, 48, 320}, {cls_batch_size, 3, 48, 1024}); + rec_option.SetTrtInputShape("x", {1, 3, 32, 10}, {rec_batch_size, 3, 32, 320}, + {rec_batch_size, 3, 32, 2304}); // Users could save TRT cache file to disk as follow. // det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt"); @@ -58,6 +63,12 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model // auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &rec_model); auto ppocr_v2 = fastdeploy::pipeline::PPOCRv2(&det_model, &cls_model, &rec_model); + // Set inference batch size for cls model and rec model, the value could be -1 and 1 to positive infinity. + // When inference batch size is set to -1, it means that the inference batch size + // of the cls and rec models will be the same as the number of boxes detected by the det model. + ppocr_v2.SetClsBatchSize(cls_batch_size); + ppocr_v2.SetRecBatchSize(rec_batch_size); + if(!ppocr_v2.Initialized()){ std::cerr << "Failed to initialize PP-OCR." << std::endl; return; diff --git a/examples/vision/ocr/PP-OCRv2/python/infer.py b/examples/vision/ocr/PP-OCRv2/python/infer.py index af915143a..1487d795f 100644 --- a/examples/vision/ocr/PP-OCRv2/python/infer.py +++ b/examples/vision/ocr/PP-OCRv2/python/infer.py @@ -106,6 +106,11 @@ rec_label_file = args.rec_label_file # 用户也可根据自行需求分别配置 runtime_option = build_option(args) +# PPOCR的cls和rec模型现在已经支持推理一个Batch的数据 +# 定义下面两个变量后, 可用于设置trt输入shape, 并在PPOCR模型初始化后, 完成Batch推理设置 +cls_batch_size = 1 +rec_batch_size = 6 + # 当使用TRT时,分别给三个模型的runtime设置动态shape,并完成模型的创建. # 注意: 需要在检测模型创建完成后,再设置分类模型的动态输入并创建分类模型, 识别模型同理. # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数. @@ -118,16 +123,18 @@ det_model = fd.vision.ocr.DBDetector( det_model_file, det_params_file, runtime_option=det_option) cls_option = runtime_option -cls_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320], - [32, 3, 48, 1024]) +cls_option.set_trt_input_shape("x", [1, 3, 48, 10], + [cls_batch_size, 3, 48, 320], + [cls_batch_size, 3, 48, 1024]) # 用户可以把TRT引擎文件保存至本地 # cls_option.set_trt_cache_file(args.cls_model + "/cls_trt_cache.trt") cls_model = fd.vision.ocr.Classifier( cls_model_file, cls_params_file, runtime_option=cls_option) rec_option = runtime_option -rec_option.set_trt_input_shape("x", [1, 3, 32, 10], [10, 3, 32, 320], - [32, 3, 32, 2304]) +rec_option.set_trt_input_shape("x", [1, 3, 32, 10], + [rec_batch_size, 3, 32, 320], + [rec_batch_size, 3, 32, 2304]) # 用户可以把TRT引擎文件保存至本地 # rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt") rec_model = fd.vision.ocr.Recognizer( @@ -137,6 +144,12 @@ rec_model = fd.vision.ocr.Recognizer( ppocr_v2 = fd.vision.ocr.PPOCRv2( det_model=det_model, cls_model=cls_model, rec_model=rec_model) +# 给cls和rec模型设置推理时的batch size +# 此值能为-1, 和1到正无穷 +# 当此值为-1时, cls和rec模型的batch size将默认和det模型检测出的框的数量相同 +ppocr_v2.cls_batch_size = cls_batch_size +ppocr_v2.rec_batch_size = rec_batch_size + # 预测图片准备 im = cv2.imread(args.image) diff --git a/examples/vision/ocr/PP-OCRv3/cpp/infer.cc b/examples/vision/ocr/PP-OCRv3/cpp/infer.cc index 911b311e3..90b77679f 100644 --- a/examples/vision/ocr/PP-OCRv3/cpp/infer.cc +++ b/examples/vision/ocr/PP-OCRv3/cpp/infer.cc @@ -33,13 +33,19 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model auto cls_option = option; auto rec_option = option; + // The cls and rec model can inference a batch of images now. + // User could initialize the inference batch size and set them after create PPOCR model. + int cls_batch_size = 1; + int rec_batch_size = 6; + // If use TRT backend, the dynamic shape will be set as follow. // We recommend that users set the length and height of the detection model to a multiple of 32. + // We also recommend that users set the Trt input shape as follow. det_option.SetTrtInputShape("x", {1, 3, 64,64}, {1, 3, 640, 640}, {1, 3, 960, 960}); - cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320}, {64, 3, 48, 1024}); - rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {10, 3, 48, 320}, - {64, 3, 48, 2304}); + cls_option.SetTrtInputShape("x", {1, 3, 48, 10}, {cls_batch_size, 3, 48, 320}, {cls_batch_size, 3, 48, 1024}); + rec_option.SetTrtInputShape("x", {1, 3, 48, 10}, {rec_batch_size, 3, 48, 320}, + {rec_batch_size, 3, 48, 2304}); // Users could save TRT cache file to disk as follow. // det_option.SetTrtCacheFile(det_model_dir + sep + "det_trt_cache.trt"); @@ -57,6 +63,12 @@ void InitAndInfer(const std::string& det_model_dir, const std::string& cls_model // The classification model is optional, so the PP-OCR can also be connected in series as follows // auto ppocr_v3 = fastdeploy::pipeline::PPOCRv3(&det_model, &rec_model); auto ppocr_v3 = fastdeploy::pipeline::PPOCRv3(&det_model, &cls_model, &rec_model); + + // Set inference batch size for cls model and rec model, the value could be -1 and 1 to positive infinity. + // When inference batch size is set to -1, it means that the inference batch size + // of the cls and rec models will be the same as the number of boxes detected by the det model. + ppocr_v3.SetClsBatchSize(cls_batch_size); + ppocr_v3.SetRecBatchSize(rec_batch_size); if(!ppocr_v3.Initialized()){ std::cerr << "Failed to initialize PP-OCR." << std::endl; diff --git a/examples/vision/ocr/PP-OCRv3/python/infer.py b/examples/vision/ocr/PP-OCRv3/python/infer.py index b6b27b240..1ec962cb5 100644 --- a/examples/vision/ocr/PP-OCRv3/python/infer.py +++ b/examples/vision/ocr/PP-OCRv3/python/infer.py @@ -106,6 +106,11 @@ rec_label_file = args.rec_label_file # 用户也可根据自行需求分别配置 runtime_option = build_option(args) +# PPOCR的cls和rec模型现在已经支持推理一个Batch的数据 +# 定义下面两个变量后, 可用于设置trt输入shape, 并在PPOCR模型初始化后, 完成Batch推理设置 +cls_batch_size = 1 +rec_batch_size = 6 + # 当使用TRT时,分别给三个模型的runtime设置动态shape,并完成模型的创建. # 注意: 需要在检测模型创建完成后,再设置分类模型的动态输入并创建分类模型, 识别模型同理. # 如果用户想要自己改动检测模型的输入shape, 我们建议用户把检测模型的长和高设置为32的倍数. @@ -118,16 +123,18 @@ det_model = fd.vision.ocr.DBDetector( det_model_file, det_params_file, runtime_option=det_option) cls_option = runtime_option -cls_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320], - [64, 3, 48, 1024]) +cls_option.set_trt_input_shape("x", [1, 3, 48, 10], + [cls_batch_size, 3, 48, 320], + [cls_batch_size, 3, 48, 1024]) # 用户可以把TRT引擎文件保存至本地 # cls_option.set_trt_cache_file(args.cls_model + "/cls_trt_cache.trt") cls_model = fd.vision.ocr.Classifier( cls_model_file, cls_params_file, runtime_option=cls_option) rec_option = runtime_option -rec_option.set_trt_input_shape("x", [1, 3, 48, 10], [10, 3, 48, 320], - [64, 3, 48, 2304]) +rec_option.set_trt_input_shape("x", [1, 3, 48, 10], + [rec_batch_size, 3, 48, 320], + [rec_batch_size, 3, 48, 2304]) # 用户可以把TRT引擎文件保存至本地 # rec_option.set_trt_cache_file(args.rec_model + "/rec_trt_cache.trt") rec_model = fd.vision.ocr.Recognizer( @@ -137,6 +144,12 @@ rec_model = fd.vision.ocr.Recognizer( ppocr_v3 = fd.vision.ocr.PPOCRv3( det_model=det_model, cls_model=cls_model, rec_model=rec_model) +# 给cls和rec模型设置推理时的batch size +# 此值能为-1, 和1到正无穷 +# 当此值为-1时, cls和rec模型的batch size将默认和det模型检测出的框的数量相同 +ppocr_v3.cls_batch_size = cls_batch_size +ppocr_v3.rec_batch_size = rec_batch_size + # 预测图片准备 im = cv2.imread(args.image)