Add batch size argument for uie

2025-10-06 00:57:33 +08:00 · 2022-12-27 15:22:09 +00:00
parent df940b750f
commit 60f8f0e11b
6 changed files with 114 additions and 76 deletions
--- a/examples/text/uie/cpp/infer.cc
+++ b/examples/text/uie/cpp/infer.cc
@@ -49,17 +49,17 @@ int main(int argc, char* argv[]) {
    backend_type = std::atoi(argv[3]);
  }
  switch (backend_type) {
-    case 0:
-      option.UsePaddleInferBackend();
-      break;
-    case 1:
-      option.UseOrtBackend();
-      break;
-    case 2:
-      option.UseOpenVINOBackend();
-      break;
-    default:
-      break;
+  case 0:
+    option.UsePaddleInferBackend();
+    break;
+  case 1:
+    option.UseOrtBackend();
+    break;
+  case 2:
+    option.UseOpenVINOBackend();
+    break;
+  default:
+    break;
  }
  std::string model_dir(argv[1]);
  std::string model_path = model_dir + sep + "inference.pdmodel";
@@ -68,9 +68,9 @@ int main(int argc, char* argv[]) {
  using fastdeploy::text::SchemaNode;
  using fastdeploy::text::UIEResult;

-  auto predictor =
-      fastdeploy::text::UIEModel(model_path, param_path, vocab_path, 0.5, 128,
-                                 {"时间", "选手", "赛事名称"}, option);
+  auto predictor = fastdeploy::text::UIEModel(
+      model_path, param_path, vocab_path, 0.5, 128,
+      {"时间", "选手", "赛事名称"}, /* batch_size = */ 1, option);
  std::cout << "After init predictor" << std::endl;
  std::vector<std::unordered_map<std::string, std::vector<UIEResult>>> results;
  // Named Entity Recognition
--- a/examples/text/uie/python/infer.py
+++ b/examples/text/uie/python/infer.py
@@ -129,6 +129,7 @@ if __name__ == "__main__":
        position_prob=0.5,
        max_length=args.max_length,
        schema=schema,
+        batch_size=args.batch_size,
        runtime_option=runtime_option,
        schema_language=SchemaLanguage.ZH)

@@ -181,7 +182,8 @@ if __name__ == "__main__":
    schema = {"评价维度": ["观点词", "情感倾向[正向，负向]"]}
    print(f"The extraction schema: {schema}")
    uie.set_schema(schema)
-    results = uie.predict(["店面干净，很清静"], return_dict=True)
+    results = uie.predict(
+        ["店面干净，很清静，服务员服务热情，性价比很高，发现收银台有排队"], return_dict=True)
    pprint(results)
    print()

--- a/fastdeploy/text/uie/model.cc
+++ b/fastdeploy/text/uie/model.cc
@@ -13,6 +13,8 @@
 // limitations under the License.

 #include "fastdeploy/text/uie/model.h"
+#include "fastdeploy/function/concat.h"
+#include "fastdeploy/function/split.h"
 #include <algorithm>
 #include <codecvt>
 #include <locale>
@@ -42,8 +44,7 @@ static std::string DBC2SBC(const std::string& content) {
      result.append(content.data() + content_utf8_len, content_char_width);
    } else {
      char dst_char[5] = {0};
-      uint32_t utf8_uint32 =
-          fast_tokenizer::utils::UnicodeToUTF8(content_char);
+      uint32_t utf8_uint32 = fast_tokenizer::utils::UnicodeToUTF8(content_char);
      uint32_t utf8_char_count =
          fast_tokenizer::utils::UnicodeToUTF8Char(utf8_uint32, dst_char);
      result.append(dst_char, utf8_char_count);
@@ -164,12 +165,12 @@ UIEModel::UIEModel(const std::string& model_file,
                   const std::string& params_file,
                   const std::string& vocab_file, float position_prob,
                   size_t max_length, const std::vector<std::string>& schema,
+                   int batch_size,
                   const fastdeploy::RuntimeOption& custom_option,
                   const fastdeploy::ModelFormat& model_format,
                   SchemaLanguage schema_language)
-    : max_length_(max_length),
-      position_prob_(position_prob),
-      schema_language_(schema_language),
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
      tokenizer_(vocab_file) {
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
@@ -185,12 +186,12 @@ UIEModel::UIEModel(const std::string& model_file,
                   const std::string& params_file,
                   const std::string& vocab_file, float position_prob,
                   size_t max_length, const std::vector<SchemaNode>& schema,
+                   int batch_size,
                   const fastdeploy::RuntimeOption& custom_option,
                   const fastdeploy::ModelFormat& model_format,
                   SchemaLanguage schema_language)
-    : max_length_(max_length),
-      position_prob_(position_prob),
-      schema_language_(schema_language),
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
      tokenizer_(vocab_file) {
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
@@ -205,13 +206,12 @@ UIEModel::UIEModel(const std::string& model_file,
 UIEModel::UIEModel(const std::string& model_file,
                   const std::string& params_file,
                   const std::string& vocab_file, float position_prob,
-                   size_t max_length, const SchemaNode& schema,
+                   size_t max_length, const SchemaNode& schema, int batch_size,
                   const fastdeploy::RuntimeOption& custom_option,
                   const fastdeploy::ModelFormat& model_format,
                   SchemaLanguage schema_language)
-    : max_length_(max_length),
-      position_prob_(position_prob),
-      schema_language_(schema_language),
+    : max_length_(max_length), position_prob_(position_prob),
+      schema_language_(schema_language), batch_size_(batch_size),
      tokenizer_(vocab_file) {
  runtime_option = custom_option;
  runtime_option.model_format = model_format;
@@ -230,7 +230,8 @@ bool UIEModel::Initialize() {

 void UIEModel::SetValidBackend() {
  // TODO(zhoushunjie): Add lite backend in future
-  valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER, Backend::LITE};
+  valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER,
+                        Backend::LITE};
  valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT};
 }

@@ -253,8 +254,8 @@ void UIEModel::AutoSplitter(const std::vector<std::string>& texts,
  size_t cnt_org = 0;
  size_t cnt_short = 0;
  for (auto& text : texts) {
-    auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(
-        text.c_str(), text.length());
+    auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(text.c_str(),
+                                                                 text.length());
    if (text_len <= max_length) {
      short_texts->push_back(text);
      if (input_mapping->size() <= cnt_org) {
@@ -264,8 +265,7 @@ void UIEModel::AutoSplitter(const std::vector<std::string>& texts,
      }
      cnt_short += 1;
    } else {
-      fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
-          text);
+      fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(text);
      for (size_t start = 0; start < text_len; start += max_length) {
        size_t end = start + max_length;
        if (end > text_len) {
@@ -742,13 +742,37 @@ void UIEModel::Predict(
      std::vector<fast_tokenizer::core::Encoding> encodings;
      Preprocess(short_input_texts, short_prompts, &encodings, &inputs);

-      // 3. Infer
-      std::vector<fastdeploy::FDTensor> outputs(NumOutputsOfRuntime());
-      if (!Infer(inputs, &outputs)) {
-        FDERROR << "Failed to inference while using model:" << ModelName()
-                << "." << std::endl;
+      std::vector<std::vector<FDTensor>> inputs_vec(NumInputsOfRuntime());
+      int encoding_size = encodings.size();
+      std::vector<int> num_or_sections;
+      for (int i = 0; i < encoding_size; ++i) {
+        int actual_batch_size = (std::min)(batch_size_, encoding_size - i);
+        num_or_sections.push_back(actual_batch_size);
+      }
+      for (int i = 0; i < NumInputsOfRuntime(); ++i) {
+        function::Split(inputs[i], num_or_sections, &inputs_vec[i]);
      }

+      // 3. Infer
+      std::vector<fastdeploy::FDTensor> outputs(NumOutputsOfRuntime());
+      std::vector<fastdeploy::FDTensor> outputs0, outputs1;
+
+      for (int i = 0; i < inputs_vec[0].size(); ++i) {
+        std::vector<fastdeploy::FDTensor> curr_inputs(NumInputsOfRuntime());
+        std::vector<fastdeploy::FDTensor> curr_outputs(NumOutputsOfRuntime());
+        for (int j = 0; j < NumInputsOfRuntime(); ++j) {
+          curr_inputs[j] = std::move(inputs_vec[j][i]);
+          curr_inputs[j].name = inputs[j].name;
+        }
+        if (!Infer(curr_inputs, &curr_outputs)) {
+          FDERROR << "Failed to inference while using model:" << ModelName()
+                  << "." << std::endl;
+        }
+        outputs0.push_back(curr_outputs[0]);
+        outputs1.push_back(curr_outputs[1]);
+      }
+      function::Concat(outputs0, &outputs[0]);
+      function::Concat(outputs1, &outputs[1]);
      // 4. Convert FDTensor to UIEResult
      Postprocess(outputs, encodings, short_input_texts, short_prompts,
                  input_mapping_with_short_text, &results_list);
--- a/fastdeploy/text/uie/model.h
+++ b/fastdeploy/text/uie/model.h
@@ -14,14 +14,14 @@

 #pragma once

+#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/utils/unique_ptr.h"
 #include <ostream>
 #include <set>
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include "fastdeploy/fastdeploy_model.h"
-#include "fastdeploy/utils/unique_ptr.h"
-#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"

 using namespace paddlenlp;

@@ -99,6 +99,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
  UIEModel(const std::string& model_file, const std::string& params_file,
           const std::string& vocab_file, float position_prob,
           size_t max_length, const std::vector<std::string>& schema,
+           int batch_size,
           const fastdeploy::RuntimeOption& custom_option =
               fastdeploy::RuntimeOption(),
           const fastdeploy::ModelFormat& model_format =
@@ -106,7 +107,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
           SchemaLanguage schema_language = SchemaLanguage::ZH);
  UIEModel(const std::string& model_file, const std::string& params_file,
           const std::string& vocab_file, float position_prob,
-           size_t max_length, const SchemaNode& schema,
+           size_t max_length, const SchemaNode& schema, int batch_size,
           const fastdeploy::RuntimeOption& custom_option =
               fastdeploy::RuntimeOption(),
           const fastdeploy::ModelFormat& model_format =
@@ -115,6 +116,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
  UIEModel(const std::string& model_file, const std::string& params_file,
           const std::string& vocab_file, float position_prob,
           size_t max_length, const std::vector<SchemaNode>& schema,
+           int batch_size,
           const fastdeploy::RuntimeOption& custom_option =
               fastdeploy::RuntimeOption(),
           const fastdeploy::ModelFormat& model_format =
@@ -154,10 +156,10 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
      std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
          results,
      std::vector<std::vector<UIEResult*>>* new_relations);
-  void Predict(
-      const std::vector<std::string>& texts,
-      std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
-          results);
+  void
+  Predict(const std::vector<std::string>& texts,
+          std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
+              results);

 protected:
  using IDX_PROB = std::pair<int64_t, float>;
@@ -190,15 +192,16 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel {
      const SPAN_SET& span_set,
      const std::vector<fast_tokenizer::core::Offset>& offset_mapping,
      std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const;
-  void ConvertSpanToUIEResult(
-      const std::vector<std::string>& texts,
-      const std::vector<std::string>& prompts,
-      const std::vector<std::vector<SpanIdx>>& span_idxs,
-      const std::vector<std::vector<float>>& probs,
-      std::vector<std::vector<UIEResult>>* results) const;
+  void
+  ConvertSpanToUIEResult(const std::vector<std::string>& texts,
+                         const std::vector<std::string>& prompts,
+                         const std::vector<std::vector<SpanIdx>>& span_idxs,
+                         const std::vector<std::vector<float>>& probs,
+                         std::vector<std::vector<UIEResult>>* results) const;
  std::unique_ptr<Schema> schema_;
  size_t max_length_;
  float position_prob_;
+  int batch_size_;
  SchemaLanguage schema_language_;
  fast_tokenizer::tokenizers_impl::ErnieFastTokenizer tokenizer_;
 };
--- a/fastdeploy/text/uie/uie_pybind.cc
+++ b/fastdeploy/text/uie/uie_pybind.cc
@@ -35,24 +35,29 @@ void BindUIE(pybind11::module& m) {

  py::class_<text::UIEModel, FastDeployModel>(m, "UIEModel")
      .def(py::init<std::string, std::string, std::string, float, size_t,
-                    std::vector<std::string>, RuntimeOption, ModelFormat, text::SchemaLanguage>(),
+                    std::vector<std::string>, int, RuntimeOption, ModelFormat,
+                    text::SchemaLanguage>(),
           py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
           py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
           py::arg("custom_option") = fastdeploy::RuntimeOption(),
           py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
           py::arg("schema_language") = text::SchemaLanguage::ZH)
-      .def(
-          py::init<std::string, std::string, std::string, float, size_t,
-                   std::vector<text::SchemaNode>, RuntimeOption, ModelFormat, text::SchemaLanguage>(),
-          py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
-          py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
-          py::arg("custom_option") = fastdeploy::RuntimeOption(),
-          py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
-          py::arg("schema_language") = text::SchemaLanguage::ZH)
      .def(py::init<std::string, std::string, std::string, float, size_t,
-                    text::SchemaNode, RuntimeOption, ModelFormat, text::SchemaLanguage>(),
+                    std::vector<text::SchemaNode>, int, RuntimeOption,
+                    ModelFormat, text::SchemaLanguage>(),
           py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
           py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
+           py::arg("custom_option") = fastdeploy::RuntimeOption(),
+           py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
+           py::arg("schema_language") = text::SchemaLanguage::ZH)
+      .def(py::init<std::string, std::string, std::string, float, size_t,
+                    text::SchemaNode, int, RuntimeOption, ModelFormat,
+                    text::SchemaLanguage>(),
+           py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
+           py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
+           py::arg("batch_size"),
           py::arg("custom_option") = fastdeploy::RuntimeOption(),
           py::arg("model_format") = fastdeploy::ModelFormat::PADDLE,
           py::arg("schema_language") = text::SchemaLanguage::ZH)
@@ -60,23 +65,25 @@ void BindUIE(pybind11::module& m) {
           static_cast<void (text::UIEModel::*)(
               const std::vector<std::string>&)>(&text::UIEModel::SetSchema),
           py::arg("schema"))
-      .def("set_schema", static_cast<void (text::UIEModel::*)(
-                             const std::vector<text::SchemaNode>&)>(
-                             &text::UIEModel::SetSchema),
+      .def("set_schema",
+           static_cast<void (text::UIEModel::*)(
+               const std::vector<text::SchemaNode>&)>(
+               &text::UIEModel::SetSchema),
           py::arg("schema"))
      .def("set_schema",
           static_cast<void (text::UIEModel::*)(const text::SchemaNode&)>(
               &text::UIEModel::SetSchema),
           py::arg("schema"))
-      .def("predict",
-           [](text::UIEModel& self, const std::vector<std::string>& texts) {
-             std::vector<
-                 std::unordered_map<std::string, std::vector<text::UIEResult>>>
-                 results;
-             self.Predict(texts, &results);
-             return results;
-           },
-           py::arg("text"));
+      .def(
+          "predict",
+          [](text::UIEModel& self, const std::vector<std::string>& texts) {
+            std::vector<
+                std::unordered_map<std::string, std::vector<text::UIEResult>>>
+                results;
+            self.Predict(texts, &results);
+            return results;
+          },
+          py::arg("text"));
 }

 }  // namespace fastdeploy
--- a/python/fastdeploy/text/uie/init.py
+++ b/python/fastdeploy/text/uie/init.py
@@ -50,6 +50,7 @@ class UIEModel(FastDeployModel):
                 position_prob=0.5,
                 max_length=128,
                 schema=[],
+                 batch_size=64,
                 runtime_option=RuntimeOption(),
                 model_format=ModelFormat.PADDLE,
                 schema_language=SchemaLanguage.ZH):
@@ -63,9 +64,10 @@ class UIEModel(FastDeployModel):
        else:
            assert "The type of schema should be list or dict."
        schema_language = C.text.SchemaLanguage(schema_language)
-        self._model = C.text.UIEModel(
-            model_file, params_file, vocab_file, position_prob, max_length,
-            schema, runtime_option._option, model_format, schema_language)
+        self._model = C.text.UIEModel(model_file, params_file, vocab_file,
+                                      position_prob, max_length, schema,
+                                      batch_size, runtime_option._option,
+                                      model_format, schema_language)
        assert self.initialized, "UIEModel initialize failed."

    def set_schema(self, schema):