From 60f8f0e11b4ea94d3ca0e44373511a06f61c2d41 Mon Sep 17 00:00:00 2001
From: zhoushunjie <zhoushunjie@baidu.com>
Date: Tue, 27 Dec 2022 15:22:09 +0000
Subject: [PATCH] Add batch size argument for uie

---
 examples/text/uie/cpp/infer.cc         | 28 +++++------
 examples/text/uie/python/infer.py      |  4 +-
 fastdeploy/text/uie/model.cc           | 68 +++++++++++++++++---------
 fastdeploy/text/uie/model.h            | 31 ++++++------
 fastdeploy/text/uie/uie_pybind.cc      | 51 ++++++++++---------
 python/fastdeploy/text/uie/__init__.py |  8 +--
 6 files changed, 114 insertions(+), 76 deletions(-)
diff --git a/examples/text/uie/cpp/infer.cc b/examples/text/uie/cpp/infer.cc
index 7f68bc8c2..4ef66adf4 100644
--- a/examples/text/uie/cpp/infer.cc
+++ b/examples/text/uie/cpp/infer.cc
@@ -49,17 +49,17 @@ int main(int argc, char* argv[]) {
     backend_type = std::atoi(argv[3]);
   }
   switch (backend_type) {
-    case 0:
-      option.UsePaddleInferBackend();
-      break;
-    case 1:
-      option.UseOrtBackend();
-      break;
-    case 2:
-      option.UseOpenVINOBackend();
-      break;
-    default:
-      break;
+  case 0:
+    option.UsePaddleInferBackend();
+    break;
+  case 1:
+    option.UseOrtBackend();
+    break;
+  case 2:
+    option.UseOpenVINOBackend();
+    break;
+  default:
+    break;
   }
   std::string model_dir(argv[1]);
   std::string model_path = model_dir + sep + "inference.pdmodel";
@@ -68,9 +68,9 @@ int main(int argc, char* argv[]) {
   using fastdeploy::text::SchemaNode;
   using fastdeploy::text::UIEResult;
 
-  auto predictor =
-      fastdeploy::text::UIEModel(model_path, param_path, vocab_path, 0.5, 128,
-                                 {"时间", "选手", "赛事名称"}, option);
+  auto predictor = fastdeploy::text::UIEModel(
+      model_path, param_path, vocab_path, 0.5, 128,
+      {"时间", "选手", "赛事名称"}, /* batch_size = */ 1, option);
   std::cout << "After init predictor" << std::endl;
   std::vector<std::unordered_map<std::string, std::vector<UIEResult>>> results;
   // Named Entity Recognition
diff --git a/examples/text/uie/python/infer.py b/examples/text/uie/python/infer.py
index d06e10cbb..10442f95e 100644
--- a/examples/text/uie/python/infer.py
+++ b/examples/text/uie/python/infer.py
@@ -129,6 +129,7 @@ if __name__ == "__main__":
         position_prob=0.5,
         max_length=args.max_length,
         schema=schema,
+        batch_size=args.batch_size,
         runtime_option=runtime_option,
         schema_language=SchemaLanguage.ZH)
 
@@ -181,7 +182,8 @@ if __name__ == "__main__":
     schema = {"评价维度": ["观点词", "情感倾向[正向，负向]"]}
     print(f"The extraction schema: {schema}")
     uie.set_schema(schema)
-    results = uie.predict(["店面干净，很清静"], return_dict=True)
+    results = uie.predict(
+        ["店面干净，很清静，服务员服务热情，性价比很高，发现收银台有排队"], return_dict=True)
     pprint(results)
     print()
 
diff --git a/fastdeploy/text/uie/model.cc b/fastdeploy/text/uie/model.cc
index cd90db4f1..3ae7631fb 100644
--- a/fastdeploy/text/uie/model.cc
+++ b/fastdeploy/text/uie/model.cc
@@ -13,6 +13,8 @@
 // limitations under the License.
 
 #include "fastdeploy/text/uie/model.h"
+#include "fastdeploy/function/concat.h"
+#include "fastdeploy/function/split.h"
 #include <algorithm>
 #include <codecvt>
 #include <locale>
@@ -42,8 +44,7 @@ static std::string DBC2SBC(const std::string& content) {
       result.append(content.data() + content_utf8_len, content_char_width);
     } else {
       char dst_char[5] = {0};
-      uint32_t utf8_uint32 =
-          fast_tokenizer::utils::UnicodeToUTF8(content_char);
+      uint32_t utf8_uint32 = fast_tokenizer::utils::UnicodeToUTF8(content_char);
       uint32_t utf8_char_count =
           fast_tokenizer::utils::UnicodeToUTF8Char(utf8_uint32, dst_char);
       result.append(dst_char, utf8_char_count);
@@ -164,12 +165,12 @@ UIEModel::UIEModel(const std::string& model_file,
                    const std::string& params_file,
                    const std::string& vocab_file, float position_prob,
                    size_t max_length, const std::vector<std::string>& schema,
+                   int batch_size,
                    const fastdeploy::RuntimeOption& custom_option,
                    const fastdeploy::ModelFormat& model_format,
                    SchemaLanguage schema_language)
-    : max_length_(max_length),
-      position_prob_(position_prob),
-      schema_language_(schema_language),
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
       tokenizer_(vocab_file) {
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
@@ -185,12 +186,12 @@ UIEModel::UIEModel(const std::string& model_file,
                    const std::string& params_file,
                    const std::string& vocab_file, float position_prob,
                    size_t max_length, const std::vector<SchemaNode>& schema,
+                   int batch_size,
                    const fastdeploy::RuntimeOption& custom_option,
                    const fastdeploy::ModelFormat& model_format,
                    SchemaLanguage schema_language)
-    : max_length_(max_length),
-      position_prob_(position_prob),
-      schema_language_(schema_language),
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
       tokenizer_(vocab_file) {
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
@@ -205,13 +206,12 @@ UIEModel::UIEModel(const std::string& model_file,
 UIEModel::UIEModel(const std::string& model_file,
                    const std::string& params_file,
                    const std::string& vocab_file, float position_prob,
-                   size_t max_length, const SchemaNode& schema,
+                   size_t max_length, const SchemaNode& schema, int batch_size,
                    const fastdeploy::RuntimeOption& custom_option,
                    const fastdeploy::ModelFormat& model_format,
                    SchemaLanguage schema_language)
-    : max_length_(max_length),
-      position_prob_(position_prob),
-      schema_language_(schema_language),
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
       tokenizer_(vocab_file) {
   runtime_option = custom_option;
   runtime_option.model_format = model_format;
@@ -230,7 +230,8 @@ bool UIEModel::Initialize() {
 
 void UIEModel::SetValidBackend() {
   // TODO(zhoushunjie): Add lite backend in future
-  valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER, Backend::LITE};
+  valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER,
+                        Backend::LITE};
   valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
 }
 
@@ -253,8 +254,8 @@ void UIEModel::AutoSplitter(const std::vector<std::string>& texts,
   size_t cnt_org = 0;
   size_t cnt_short = 0;
   for (auto& text : texts) {
-    auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(
-        text.c_str(), text.length());
+    auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(text.c_str(),
+                                                                 text.length());
     if (text_len <= max_length) {
       short_texts->push_back(text);
       if (input_mapping->size() <= cnt_org) {
@@ -264,8 +265,7 @@ void UIEModel::AutoSplitter(const std::vector<std::string>& texts,
       }
       cnt_short += 1;
     } else {
-      fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
-          text);
+      fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(text);
       for (size_t start = 0; start < text_len; start += max_length) {
         size_t end = start + max_length;
         if (end > text_len) {
@@ -742,13 +742,37 @@ void UIEModel::Predict(
       std::vector<fast_tokenizer::core::Encoding> encodings;
       Preprocess(short_input_texts, short_prompts, &encodings, &inputs);
 
-      // 3. Infer
-      std::vector<fastdeploy::FDTensor> outputs(NumOutputsOfRuntime());
-      if (!Infer(inputs, &outputs)) {
-        FDERROR << "Failed to inference while using model:" << ModelName()
-                << "." << std::endl;
+      std::vector<std::vector<FDTensor>> inputs_vec(NumInputsOfRuntime());
+      int encoding_size = encodings.size();
+      std::vector<int> num_or_sections;
+      for (int i = 0; i < encoding_size; ++i) {
+        int actual_batch_size = (std::min)(batch_size_, encoding_size - i);
+        num_or_sections.push_back(actual_batch_size);
+      }
+      for (int i = 0; i < NumInputsOfRuntime(); ++i) {
+        function::Split(inputs[i], num_or_sections, &inputs_vec[i]);
       }
 
+      // 3. Infer
+      std::vector<fastdeploy::FDTensor> outputs(NumOutputsOfRuntime());
+      std::vector<fastdeploy::FDTensor> outputs0, outputs1;
+
+      for (int i = 0; i < inputs_vec[0].size(); ++i) {
+        std::vector<fastdeploy::FDTensor> curr_inputs(NumInputsOfRuntime());
+        std::vector<fastdeploy::FDTensor> curr_outputs(NumOutputsOfRuntime());
+        for (int j = 0; j < NumInputsOfRuntime(); ++j) {
+          curr_inputs[j] = std::move(inputs_vec[j][i]);
+          curr_inputs[j].name = inputs[j].name;
+        }
+        if (!Infer(curr_inputs, &curr_outputs)) {
+          FDERROR << "Failed to inference while using model:" << ModelName()
+                  << "." << std::endl;
+        }
+        outputs0.push_back(curr_outputs[0]);
+        outputs1.push_back(curr_outputs[1]);
+      }
+      function::Concat(outputs0, &outputs[0]);
+      function::Concat(outputs1, &outputs[1]);
       // 4. Convert FDTensor to UIEResult
       Postprocess(outputs, encodings, short_input_texts, short_prompts,
                   input_mapping_with_short_text, &results_list);
diff --git a/fastdeploy/text/uie/model.h b/fastdeploy/text/uie/model.h
index c813369d3..5e2244c08 100644
--- a/fastdeploy/text/uie/model.h
+++ b/fastdeploy/text/uie/model.h
@@ -14,14 +14,14 @@
 
 #pragma once
 
+#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/utils/unique_ptr.h"
 #include <ostream>
 #include <set>
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/utils/unique_ptr.h"
-#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"
 
 using namespace paddlenlp;
 
@@ -99,6 +99,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
   UIEModel(const std::string& model_file, const std::string& params_file,
            const std::string& vocab_file, float position_prob,
            size_t max_length, const std::vector<std::string>& schema,
+           int batch_size,
            const fastdeploy::RuntimeOption& custom_option =
                fastdeploy::RuntimeOption(),
            const fastdeploy::ModelFormat& model_format =
@@ -106,7 +107,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
            SchemaLanguage schema_language = SchemaLanguage::ZH);
   UIEModel(const std::string& model_file, const std::string& params_file,
            const std::string& vocab_file, float position_prob,
-           size_t max_length, const SchemaNode& schema,
+           size_t max_length, const SchemaNode& schema, int batch_size,
            const fastdeploy::RuntimeOption& custom_option =
                fastdeploy::RuntimeOption(),
            const fastdeploy::ModelFormat& model_format =
@@ -115,6 +116,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
   UIEModel(const std::string& model_file, const std::string& params_file,
            const std::string& vocab_file, float position_prob,
            size_t max_length, const std::vector<SchemaNode>& schema,
+           int batch_size,
            const fastdeploy::RuntimeOption& custom_option =
                fastdeploy::RuntimeOption(),
            const fastdeploy::ModelFormat& model_format =
@@ -154,10 +156,10 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
       std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
           results,
       std::vector<std::vector<UIEResult*>>* new_relations);
-  void Predict(
-      const std::vector<std::string>& texts,
-      std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
-          results);
+  void
+  Predict(const std::vector<std::string>& texts,
+          std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
+              results);
 
  protected:
   using IDX_PROB = std::pair<int64_t, float>;
@@ -190,15 +192,16 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
       const SPAN_SET& span_set,
       const std::vector<fast_tokenizer::core::Offset>& offset_mapping,
       std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const;
-  void ConvertSpanToUIEResult(
-      const std::vector<std::string>& texts,
-      const std::vector<std::string>& prompts,
-      const std::vector<std::vector<SpanIdx>>& span_idxs,
-      const std::vector<std::vector<float>>& probs,
-      std::vector<std::vector<UIEResult>>* results) const;
+  void
+  ConvertSpanToUIEResult(const std::vector<std::string>& texts,
+                         const std::vector<std::string>& prompts,
+                         const std::vector<std::vector<SpanIdx>>& span_idxs,
+                         const std::vector<std::vector<float>>& probs,
+                         std::vector<std::vector<UIEResult>>* results) const;
   std::unique_ptr<Schema> schema_;
   size_t max_length_;
   float position_prob_;
+  int batch_size_;
   SchemaLanguage schema_language_;
   fast_tokenizer::tokenizers_impl::ErnieFastTokenizer tokenizer_;
 };
diff --git a/fastdeploy/text/uie/uie_pybind.cc b/fastdeploy/text/uie/uie_pybind.cc
index 146dcc2c6..ad7374229 100644
--- a/fastdeploy/text/uie/uie_pybind.cc
+++ b/fastdeploy/text/uie/uie_pybind.cc
@@ -35,24 +35,29 @@ void BindUIE(pybind11::module& m) {
 
   py::class_<text::UIEModel, FastDeployModel>(m, "UIEModel")
       .def(py::init<std::string, std::string, std::string, float, size_t,
-                    std::vector<std::string>, RuntimeOption, ModelFormat, text::SchemaLanguage>(),
+                    std::vector<std::string>, int, RuntimeOption, ModelFormat,
+                    text::SchemaLanguage>(),
            py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
            py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
            py::arg("custom_option") = fastdeploy::RuntimeOption(),
            py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
            py::arg("schema_language") = text::SchemaLanguage::ZH)
-      .def(
-          py::init<std::string, std::string, std::string, float, size_t,
-                   std::vector<text::SchemaNode>, RuntimeOption, ModelFormat, text::SchemaLanguage>(),
-          py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
-          py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
-          py::arg("custom_option") = fastdeploy::RuntimeOption(),
-          py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
-          py::arg("schema_language") = text::SchemaLanguage::ZH)
       .def(py::init<std::string, std::string, std::string, float, size_t,
-                    text::SchemaNode, RuntimeOption, ModelFormat, text::SchemaLanguage>(),
+                    std::vector<text::SchemaNode>, int, RuntimeOption,
+                    ModelFormat, text::SchemaLanguage>(),
            py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
            py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
+           py::arg("custom_option") = fastdeploy::RuntimeOption(),
+           py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
+           py::arg("schema_language") = text::SchemaLanguage::ZH)
+      .def(py::init<std::string, std::string, std::string, float, size_t,
+                    text::SchemaNode, int, RuntimeOption, ModelFormat,
+                    text::SchemaLanguage>(),
+           py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
+           py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
            py::arg("custom_option") = fastdeploy::RuntimeOption(),
            py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
            py::arg("schema_language") = text::SchemaLanguage::ZH)
@@ -60,23 +65,25 @@ void BindUIE(pybind11::module& m) {
            static_cast<void (text::UIEModel::*)(
                const std::vector<std::string>&)>(&text::UIEModel::SetSchema),
            py::arg("schema"))
-      .def("set_schema", static_cast<void (text::UIEModel::*)(
-                             const std::vector<text::SchemaNode>&)>(
-                             &text::UIEModel::SetSchema),
+      .def("set_schema",
+           static_cast<void (text::UIEModel::*)(
+               const std::vector<text::SchemaNode>&)>(
+               &text::UIEModel::SetSchema),
            py::arg("schema"))
       .def("set_schema",
            static_cast<void (text::UIEModel::*)(const text::SchemaNode&)>(
                &text::UIEModel::SetSchema),
            py::arg("schema"))
-      .def("predict",
-           [](text::UIEModel& self, const std::vector<std::string>& texts) {
-             std::vector<
-                 std::unordered_map<std::string, std::vector<text::UIEResult>>>
-                 results;
-             self.Predict(texts, &results);
-             return results;
-           },
-           py::arg("text"));
+      .def(
+          "predict",
+          [](text::UIEModel& self, const std::vector<std::string>& texts) {
+            std::vector<
+                std::unordered_map<std::string, std::vector<text::UIEResult>>>
+                results;
+            self.Predict(texts, &results);
+            return results;
+          },
+          py::arg("text"));
 }
 
 }  // namespace fastdeploy
diff --git a/python/fastdeploy/text/uie/__init__.py b/python/fastdeploy/text/uie/__init__.py
index 00ed67a90..295cea34a 100644
--- a/python/fastdeploy/text/uie/__init__.py
+++ b/python/fastdeploy/text/uie/__init__.py
@@ -50,6 +50,7 @@ class UIEModel(FastDeployModel):
                  position_prob=0.5,
                  max_length=128,
                  schema=[],
+                 batch_size=64,
                  runtime_option=RuntimeOption(),
                  model_format=ModelFormat.PADDLE,
                  schema_language=SchemaLanguage.ZH):
@@ -63,9 +64,10 @@ class UIEModel(FastDeployModel):
         else:
             assert "The type of schema should be list or dict."
         schema_language = C.text.SchemaLanguage(schema_language)
-        self._model = C.text.UIEModel(
-            model_file, params_file, vocab_file, position_prob, max_length,
-            schema, runtime_option._option, model_format, schema_language)
+        self._model = C.text.UIEModel(model_file, params_file, vocab_file,
+                                      position_prob, max_length, schema,
+                                      batch_size, runtime_option._option,
+                                      model_format, schema_language)
         assert self.initialized, "UIEModel initialize failed."
 
     def set_schema(self, schema):