Add uie cpp deploy (#120)

* implement PredictUIEInput * remove batch size * Add GetCandidateIdx * add GetCandidateIdx GetSpan GetSpanIdxAndProbs * Add Predict of UIEModel * Add relation schema * Fix uie unicode bug * rename information_extraction/ernie -> uie * Add more uie task * Add cross task extraction * use CharToBytesOffsetConverter * Add faster_tokenizer dir * Add RuntimeOption args * Add todo comments * Add some readme * fix readme * Fix readme Co-authored-by: Jason <jiangjiajun@baidu.com>
2025-10-05 16:48:03 +08:00 · 2022-08-24 20:20:47 +08:00
parent cf4afa4220
commit bae93cebc6
8 changed files with 969 additions and 184 deletions
--- a/FastDeploy.cmake.in
+++ b/FastDeploy.cmake.in
@@ -108,6 +108,8 @@ if (ENABLE_TEXT)
  find_library(FASTER_TOKENIZER_LIB core_tokenizers ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/lib NO_DEFAULT_PATH)
  list(APPEND FASTDEPLOY_LIBS ${FASTER_TOKENIZER_LIB})
  list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/include)
+   # TODO (zhoushunjie): Will remove it later.
+  list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/include/faster_tokenizer)
  list(APPEND FASTDEPLOY_INCS ${CMAKE_CURRENT_LIST_DIR}/third_libs/install/faster_tokenizer/third_party/include)
 endif()

--- a/examples/text/information_extraction/ernie/cpp/infer.cc
+++ b/examples/text/information_extraction/ernie/cpp/infer.cc
@@ -1,182 +0,0 @@
-// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-#include <iostream>
-#include <sstream>
-
-#include "fastdeploy/function/reduce.h"
-#include "fastdeploy/function/softmax.h"
-#include "fastdeploy/text.h"
-#include "tokenizers/ernie_faster_tokenizer.h"
-
-using namespace paddlenlp;
-
-void LoadTransitionFromFile(const std::string& file,
-                            std::vector<float>* transitions, int* num_tags) {
-  std::ifstream fin(file);
-  std::string curr_transition;
-  float transition;
-  int i = 0;
-  while (fin) {
-    std::getline(fin, curr_transition);
-    std::istringstream iss(curr_transition);
-    while (iss) {
-      iss >> transition;
-      transitions->push_back(transition);
-    }
-    if (curr_transition != "") {
-      ++i;
-    }
-  }
-  *num_tags = i;
-}
-
-template <typename T>
-void ViterbiDecode(const fastdeploy::FDTensor& slot_logits,
-                   const fastdeploy::FDTensor& trans,
-                   fastdeploy::FDTensor* best_path) {
-  int batch_size = slot_logits.shape[0];
-  int seq_len = slot_logits.shape[1];
-  int num_tags = slot_logits.shape[2];
-  best_path->Allocate({batch_size, seq_len}, fastdeploy::FDDataType::INT64);
-
-  const T* slot_logits_ptr = reinterpret_cast<const T*>(slot_logits.Data());
-  const T* trans_ptr = reinterpret_cast<const T*>(trans.Data());
-  int64_t* best_path_ptr = reinterpret_cast<int64_t*>(best_path->Data());
-  std::vector<T> scores(num_tags);
-  std::copy(slot_logits_ptr, slot_logits_ptr + num_tags, scores.begin());
-  std::vector<std::vector<T>> M(num_tags, std::vector<T>(num_tags));
-  for (int b = 0; b < batch_size; ++b) {
-    std::vector<std::vector<int>> paths;
-    const T* curr_slot_logits_ptr = slot_logits_ptr + b * seq_len * num_tags;
-    int64_t* curr_best_path_ptr = best_path_ptr + b * seq_len;
-    for (int t = 1; t < seq_len; t++) {
-      for (size_t i = 0; i < num_tags; i++) {
-        for (size_t j = 0; j < num_tags; j++) {
-          auto trans_idx = i * num_tags * num_tags + j * num_tags;
-          auto slot_logit_idx = t * num_tags + j;
-          M[i][j] = scores[i] + trans_ptr[trans_idx] +
-                    curr_slot_logits_ptr[slot_logit_idx];
-        }
-      }
-      std::vector<int> idxs;
-      for (size_t i = 0; i < num_tags; i++) {
-        T max = 0.0f;
-        int idx = 0;
-        for (size_t j = 0; j < num_tags; j++) {
-          if (M[j][i] > max) {
-            max = M[j][i];
-            idx = j;
-          }
-        }
-        scores[i] = max;
-        idxs.push_back(idx);
-      }
-      paths.push_back(idxs);
-    }
-    int scores_max_index = 0;
-    float scores_max = 0.0f;
-    for (size_t i = 0; i < scores.size(); i++) {
-      if (scores[i] > scores_max) {
-        scores_max = scores[i];
-        scores_max_index = i;
-      }
-    }
-    curr_best_path_ptr[seq_len - 1] = scores_max_index;
-    for (int i = seq_len - 2; i >= 0; i--) {
-      int index = curr_best_path_ptr[i + 1];
-      curr_best_path_ptr[i] = paths[i][index];
-    }
-  }
-}
-
-int main() {
-  // 1. Define a ernie faster tokenizer
-  faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer(
-      "ernie_vocab.txt");
-  std::vector<faster_tokenizer::core::EncodeInput> strings_list = {
-      "导航去科技园二号楼", "屏幕亮度为我减小一点吧"};
-  std::vector<faster_tokenizer::core::Encoding> encodings;
-  tokenizer.EncodeBatchStrings(strings_list, &encodings);
-  size_t batch_size = strings_list.size();
-  size_t seq_len = encodings[0].GetLen();
-  for (auto&& encoding : encodings) {
-    std::cout << encoding.DebugString() << std::endl;
-  }
-  // 2. Initialize runtime
-  fastdeploy::RuntimeOption runtime_option;
-  runtime_option.SetModelPath("nano_static/model.pdmodel",
-                              "nano_static/model.pdiparams");
-  fastdeploy::Runtime runtime;
-  runtime.Init(runtime_option);
-
-  // 3. Construct input vector
-  // 3.1 Convert encodings to input_ids, token_type_ids
-  std::vector<int64_t> input_ids, token_type_ids;
-  for (int i = 0; i < encodings.size(); ++i) {
-    auto&& curr_input_ids = encodings[i].GetIds();
-    auto&& curr_type_ids = encodings[i].GetTypeIds();
-    input_ids.insert(input_ids.end(), curr_input_ids.begin(),
-                     curr_input_ids.end());
-    token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(),
-                          curr_type_ids.end());
-  }
-  // 3.2 Set data to input vector
-  std::vector<fastdeploy::FDTensor> inputs(runtime.NumInputs());
-  void* inputs_ptrs[] = {input_ids.data(), token_type_ids.data()};
-  for (int i = 0; i < runtime.NumInputs(); ++i) {
-    inputs[i].SetExternalData({batch_size, seq_len},
-                              fastdeploy::FDDataType::INT64, inputs_ptrs[i]);
-    inputs[i].name = runtime.GetInputInfo(i).name;
-  }
-
-  // 4. Infer
-  std::vector<fastdeploy::FDTensor> outputs(runtime.NumOutputs());
-  runtime.Infer(inputs, &outputs);
-
-  // 5. Postprocess
-  fastdeploy::FDTensor domain_probs, intent_probs;
-  fastdeploy::Softmax(outputs[0], &domain_probs);
-  fastdeploy::Softmax(outputs[1], &intent_probs);
-
-  fastdeploy::FDTensor domain_max_probs, intent_max_probs;
-  fastdeploy::Max(domain_probs, &domain_max_probs, {-1}, true);
-  fastdeploy::Max(intent_probs, &intent_max_probs, {-1}, true);
-
-  std::vector<float> transition;
-  int num_tags;
-  LoadTransitionFromFile("joint_transition.txt", &transition, &num_tags);
-  fastdeploy::FDTensor trans;
-  trans.SetExternalData({num_tags, num_tags}, fastdeploy::FDDataType::FP32,
-                        transition.data());
-
-  fastdeploy::FDTensor best_path;
-  ViterbiDecode<float>(outputs[2], trans, &best_path);
-  // 6. Print result
-  domain_max_probs.PrintInfo();
-  intent_max_probs.PrintInfo();
-
-  batch_size = best_path.shape[0];
-  seq_len = best_path.shape[1];
-  const int64_t* best_path_ptr =
-      reinterpret_cast<const int64_t*>(best_path.Data());
-  for (int i = 0; i < batch_size; ++i) {
-    std::cout << "best_path[" << i << "] = ";
-    for (int j = 0; j < seq_len; ++j) {
-      std::cout << best_path_ptr[i * seq_len + j] << ", ";
-    }
-    std::cout << std::endl;
-  }
-  best_path.PrintInfo();
-  return 0;
-}
--- a/examples/text/information_extraction/ernie/cpp/CMakeLists.txt
+++ b/examples/text/information_extraction/ernie/cpp/CMakeLists.txt
@@ -21,5 +21,5 @@ include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)

 include_directories(${FASTDEPLOY_INCS})

-add_executable(infer_ernie_demo ${PROJECT_SOURCE_DIR}/infer.cc)
-target_link_libraries(infer_ernie_demo ${FASTDEPLOY_LIBS})
+add_executable(infer_demo ${PROJECT_SOURCE_DIR}/infer.cc ${PROJECT_SOURCE_DIR}/uie.cc)
+target_link_libraries(infer_demo ${FASTDEPLOY_LIBS})
--- a/examples/text/uie/cpp/README.md
+++ b/examples/text/uie/cpp/README.md
@@ -0,0 +1,47 @@
+# 通用信息抽取 UIE C++部署示例
+
+本目录下提供`infer.cc`快速完成[UIE模型](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo/uie)在CPU/GPU的示例。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../docs/quick_start/requirements.md)
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../docs/compile/prebuilt_libraries.md)
+
+以Linux上uie-base模型推理为例，在本目录执行如下命令即可完成编译测试。
+
+```
+# UIE目前还未发布，当前需开发者自行编译FastDeploy，通过如下脚本编译得到部署库fastdeploy-linux-x64-dev
+git clone https://github.com/PaddlePaddle/FastDeploy.git
+cd FastDeploy
+mkdir build && cd build
+cmake .. -DENABLE_ORT_BACKEND=ON  \
+               -DENABLE_VISION=ON \
+               -DENABLE_PADDLE_BACKEND=ON \
+               -DENABLE_TEXT=ON \
+               -DWITH_GPU=ON \
+               -DCMAKE_INSTALL_PREFIX=${PWD}/fastdeploy-linux-x64-gpu-dev
+
+make -j8
+make install
+
+# 编译模型examples代码（SDK中包含了examples代码）
+cd ../examples/text/uie/cpp
+mkdir build
+cd build
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/../../../../../build/fastdeploy-linux-x64-gpu-dev
+make -j
+
+# 下载uie-base模型以及词表
+wget https://bj.bcebos.com/fastdeploy/models/uie/uie-base.tgz
+tar -xvfz uie-base.tgz
+
+
+# CPU 推理
+./infer_demo uie-base 0
+
+# GPU 推理
+./infer_demo uie-base 1
+```
+
+## 模型获取
+UIE 模型介绍可以参考https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo/uie 。其中，在完成训练后，需要将训练后的模型导出成推理模型。该步骤可参考该文档完成导出：https://github.com/PaddlePaddle/PaddleNLP/tree/develop/model_zoo/uie#%E6%A8%A1%E5%9E%8B%E9%83%A8%E7%BD%B2 。
--- a/examples/text/uie/cpp/infer.cc
+++ b/examples/text/uie/cpp/infer.cc
@@ -0,0 +1,115 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include <iostream>
+#include <sstream>
+
+#include "fastdeploy/function/reduce.h"
+#include "fastdeploy/function/softmax.h"
+#include "fastdeploy/text.h"
+#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h"
+#include "uie.h"
+
+using namespace paddlenlp;
+
+#ifdef WIN32
+const char sep = '\\';
+#else
+const char sep = '/';
+#endif
+
+int main(int argc, char* argv[]) {
+  if (argc < 3) {
+    std::cout << "Usage: infer_demo path/to/model run_option, "
+                 "e.g ./infer_demo uie-base  0"
+              << std::endl;
+    std::cout << "The data type of run_option is int, 0: run with cpu; 1: run "
+                 "with gpu."
+              << std::endl;
+    return -1;
+  }
+  auto option = fastdeploy::RuntimeOption();
+  if (std::atoi(argv[2]) == 0) {
+    option.UseCpu();
+  } else {
+    option.UseGpu();
+  }
+  std::string model_dir(argv[1]);
+  std::string model_path = model_dir + sep + "inference.pdmodel";
+  std::string param_path = model_dir + sep + "inference.pdiparams";
+  std::string vocab_path = model_dir + sep + "vocab.txt";
+
+  auto predictor = UIEModel(model_path, param_path, vocab_path, 0.5, 128,
+                            {"时间", "选手", "赛事名称"}, option);
+  fastdeploy::FDINFO << "After init predictor" << std::endl;
+  std::vector<std::unordered_map<std::string, std::vector<UIEResult>>> results;
+  // Named Entity Recognition
+  predictor.Predict({"2月8日上午北京冬奥会自由式滑雪女子大跳台决赛中中国选手谷"
+                     "爱凌以188.25分获得金牌！"},
+                    &results);
+  std::cout << results << std::endl;
+  results.clear();
+
+  // Relation Extraction
+  predictor.SetSchema({{"竞赛名称",
+                        {SchemaNode("主办方"), SchemaNode("承办方"),
+                         SchemaNode("已举办次数")}}});
+  predictor.Predict(
+      {"2022语言与智能技术竞赛由中国中文信息学会和中国计算机学会联合主办，百度"
+       "公司、中国中文信息学会评测工作委员会和中国计算机学会自然语言处理专委会"
+       "承办，已连续举办4届，成为全球最热门的中文NLP赛事之一。"},
+      &results);
+  std::cout << results << std::endl;
+  results.clear();
+
+  // Event Extraction
+  predictor.SetSchema({{"地震触发词",
+                        {SchemaNode("地震强度"), SchemaNode("时间"),
+                         SchemaNode("震中位置"), SchemaNode("震源深度")}}});
+  predictor.Predict(
+      {"中国地震台网正式测定：5月16日06时08分在云南临沧市凤庆县(北纬24."
+       "34度，东经99.98度)发生3.5级地震，震源深度10千米。"},
+      &results);
+  std::cout << results << std::endl;
+  results.clear();
+
+  // Opinion Extraction
+  predictor.SetSchema(
+      {{"评价维度",
+        {SchemaNode("观点词"), SchemaNode("情感倾向[正向，负向]")}}});
+  predictor.Predict(
+      {"店面干净，很清静，服务员服务热情，性价比很高，发现收银台有排队"},
+      &results);
+  std::cout << results << std::endl;
+  results.clear();
+
+  // Sequence classification
+  predictor.SetSchema({"情感倾向[正向，负向]"});
+  predictor.Predict({"这个产品用起来真的很流畅，我非常喜欢"}, &results);
+  std::cout << results << std::endl;
+  results.clear();
+
+  // Cross task extraction
+
+  predictor.SetSchema({{"法院", {}},
+                       {"原告", {SchemaNode("委托代理人")}},
+                       {"被告", {SchemaNode("委托代理人")}}});
+  predictor.Predict({"北京市海淀区人民法院\n民事判决书\n(199x)"
+                     "建初字第xxx号\n原告：张三。\n委托代理人李四，北京市 "
+                     "A律师事务所律师。\n被告：B公司，法定代表人王五，开发公司"
+                     "总经理。\n委托代理人赵六，北京市 C律师事务所律师。"},
+                    &results);
+  std::cout << results << std::endl;
+  results.clear();
+  return 0;
+}
--- a/examples/text/uie/cpp/uie.cc
+++ b/examples/text/uie/cpp/uie.cc
@@ -0,0 +1,646 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "uie.h"
+#include <algorithm>
+#include <codecvt>
+#include <locale>
+#include <queue>
+#include <sstream>
+
+#include "faster_tokenizer/pretokenizers/pretokenizer.h"
+#include "faster_tokenizer/utils/utf8.h"
+
+static std::string DBC2SBC(const std::string& content) {
+  std::string result;
+  size_t content_utf8_len = 0;
+  while (content_utf8_len < content.length()) {
+    uint32_t content_char;
+    auto content_char_width = faster_tokenizer::utils::UTF8ToUInt32(
+        content.data() + content_utf8_len, &content_char);
+    content_char = faster_tokenizer::utils::UTF8ToUnicode(content_char);
+    if (content_char == 0x3000) {
+      content_char = 0x0020;
+    } else {
+      content_char -= 0xfee0;
+    }
+    if (!(content_char >= 0x0021 && content_char <= 0x7e)) {
+      result.append(content.data() + content_utf8_len, content_char_width);
+    } else {
+      char dst_char[5] = {0};
+      uint32_t utf8_uint32 =
+          faster_tokenizer::utils::UnicodeToUTF8(content_char);
+      uint32_t utf8_char_count =
+          faster_tokenizer::utils::UnicodeToUTF8Char(utf8_uint32, dst_char);
+      result.append(dst_char, utf8_char_count);
+    }
+    content_utf8_len += content_char_width;
+  }
+  return result;
+}
+
+static std::ostream& PrintResult(std::ostream& os, const UIEResult& result,
+                                 int tab_size) {
+  constexpr int TAB_OFFSET = 4;
+  // Print text
+  for (int i = 0; i < tab_size; ++i) {
+    os << " ";
+  }
+  os << "text: " << result.text_ << "\n";
+
+  // Print probability
+  for (int i = 0; i < tab_size; ++i) {
+    os << " ";
+  }
+  os << "probability: " << result.probability_ << "\n";
+
+  if (result.start_ != 0 || result.end_ != 0) {
+    // Print start
+    for (int i = 0; i < tab_size; ++i) {
+      os << " ";
+    }
+    os << "start: " << result.start_ << "\n";
+
+    // Print end
+    for (int i = 0; i < tab_size; ++i) {
+      os << " ";
+    }
+    os << "end: " << result.end_ << "\n";
+  }
+
+  // Print relation
+  if (result.relation_.size() > 0) {
+    for (int i = 0; i < tab_size; ++i) {
+      os << " ";
+    }
+    os << "relation:\n";
+    for (auto&& curr_relation : result.relation_) {
+      for (int i = 0; i < tab_size + TAB_OFFSET; ++i) {
+        os << " ";
+      }
+      os << curr_relation.first << ":\n";
+      for (int i = 0; i < curr_relation.second.size(); ++i) {
+        PrintResult(os, curr_relation.second[i],
+                    tab_size + TAB_OFFSET + TAB_OFFSET);
+      }
+    }
+  }
+  os << "\n";
+  return os;
+}
+
+std::ostream& operator<<(std::ostream& os, const UIEResult& result) {
+  return PrintResult(os, result, 0);
+}
+
+std::ostream& operator<<(
+    std::ostream& os,
+    const std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>&
+        results) {
+  os << "The result:\n";
+  for (int i = 0; i < results.size(); ++i) {
+    for (auto&& curr_result : results[i]) {
+      os << curr_result.first << ": \n";
+      for (auto&& uie_result : curr_result.second) {
+        PrintResult(os, uie_result, 4);
+      }
+    }
+    os << std::endl;
+  }
+  return os;
+}
+
+void Schema::CreateRoot(const std::string& name) {
+  root_ = fastdeploy::utils::make_unique<SchemaNode>(name);
+}
+
+Schema::Schema(const std::string& schema, const std::string& name) {
+  CreateRoot(name);
+  root_->AddChild(schema);
+}
+
+Schema::Schema(const std::vector<std::string>& schema_list,
+               const std::string& name) {
+  CreateRoot(name);
+  for (const auto& schema : schema_list) {
+    root_->AddChild(schema);
+  }
+}
+
+Schema::Schema(
+    const std::unordered_map<std::string, std::vector<SchemaNode>>& schema_map,
+    const std::string& name) {
+  CreateRoot(name);
+  for (auto& schema_item : schema_map) {
+    root_->AddChild(schema_item.first, schema_item.second);
+  }
+}
+
+UIEModel::UIEModel(const std::string& model_file,
+                   const std::string& params_file,
+                   const std::string& vocab_file, float position_prob,
+                   size_t max_length, const std::vector<std::string>& schema,
+                   const fastdeploy::RuntimeOption& custom_option,
+                   const fastdeploy::Frontend& model_format)
+    : max_length_(max_length),
+      position_prob_(position_prob),
+      tokenizer_(vocab_file) {
+  runtime_option_ = custom_option;
+  runtime_option_.model_format = model_format;
+  runtime_option_.SetModelPath(model_file, params_file);
+  runtime_.Init(runtime_option_);
+  SetSchema(schema);
+  tokenizer_.EnableTruncMethod(
+      max_length, 0, faster_tokenizer::core::Direction::RIGHT,
+      faster_tokenizer::core::TruncStrategy::LONGEST_FIRST);
+}
+
+UIEModel::UIEModel(
+    const std::string& model_file, const std::string& params_file,
+    const std::string& vocab_file, float position_prob, size_t max_length,
+    const std::unordered_map<std::string, std::vector<SchemaNode>>& schema,
+    const fastdeploy::RuntimeOption& custom_option,
+    const fastdeploy::Frontend& model_format)
+    : max_length_(max_length),
+      position_prob_(position_prob),
+      tokenizer_(vocab_file) {
+  runtime_option_ = custom_option;
+  runtime_option_.model_format = model_format;
+  runtime_option_.SetModelPath(model_file, params_file);
+  runtime_.Init(runtime_option_);
+  SetSchema(schema);
+  tokenizer_.EnableTruncMethod(
+      max_length, 0, faster_tokenizer::core::Direction::RIGHT,
+      faster_tokenizer::core::TruncStrategy::LONGEST_FIRST);
+}
+
+void UIEModel::SetSchema(const std::vector<std::string>& schema) {
+  schema_ = fastdeploy::utils::make_unique<Schema>(schema);
+}
+
+void UIEModel::SetSchema(
+    const std::unordered_map<std::string, std::vector<SchemaNode>>& schema) {
+  schema_ = fastdeploy::utils::make_unique<Schema>(schema);
+}
+
+void UIEModel::AutoSplitter(
+    const std::vector<std::string>& texts, size_t max_length,
+    std::vector<std::string>* short_texts,
+    std::unordered_map<size_t, std::vector<size_t>>* input_mapping) {
+  size_t cnt_org = 0;
+  size_t cnt_short = 0;
+  for (auto& text : texts) {
+    auto text_len = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+        text.c_str(), text.length());
+    if (text_len <= max_length) {
+      short_texts->push_back(text);
+      if (input_mapping->count(cnt_org) == 0) {
+        (*input_mapping)[cnt_org] = {cnt_short};
+      } else {
+        (*input_mapping)[cnt_org].push_back(cnt_short);
+      }
+      cnt_short += 1;
+    } else {
+      faster_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
+          text);
+      for (size_t start = 0; start < text_len; start += max_length) {
+        size_t end = start + max_length;
+        if (end > text_len) {
+          end = text_len;
+        }
+        faster_tokenizer::core::Offset byte_offset;
+        converter.convert({start, end}, &byte_offset);
+        short_texts->emplace_back(text.data() + byte_offset.first,
+                                  byte_offset.second - byte_offset.first);
+      }
+      auto short_idx = cnt_short;
+      cnt_short += text_len / max_length;
+      if (text_len % max_length != 0) {
+        ++cnt_short;
+      }
+      std::vector<size_t> temp_text_id(cnt_short - short_idx);
+      std::iota(temp_text_id.begin(), temp_text_id.end(), short_idx);
+      if (input_mapping->count(cnt_org) == 0) {
+        (*input_mapping)[cnt_org] = std::move(temp_text_id);
+      } else {
+        (*input_mapping)[cnt_org].insert((*input_mapping)[cnt_org].end(),
+                                         temp_text_id.begin(),
+                                         temp_text_id.end());
+      }
+    }
+    cnt_org += 1;
+  }
+}
+
+void UIEModel::GetCandidateIdx(
+    const float* probs, int64_t batch_size, int64_t seq_len,
+    std::vector<std::vector<std::pair<int64_t, float>>>* candidate_idx_prob,
+    float threshold) const {
+  for (int i = 0; i < batch_size; ++i) {
+    candidate_idx_prob->push_back({});
+    for (int j = 0; j < seq_len; ++j) {
+      if (probs[i * seq_len + j] > threshold) {
+        candidate_idx_prob->back().push_back({j, probs[i * seq_len + j]});
+      }
+    }
+  }
+}
+
+bool UIEModel::IdxProbCmp::operator()(
+    const std::pair<IDX_PROB, IDX_PROB>& lhs,
+    const std::pair<IDX_PROB, IDX_PROB>& rhs) const {
+  if (lhs.first.first == rhs.first.first) {
+    return lhs.second.first < rhs.second.first;
+  }
+  return lhs.first.first < rhs.first.first;
+}
+
+void UIEModel::GetSpan(const std::vector<IDX_PROB>& start_idx_prob,
+                       const std::vector<IDX_PROB>& end_idx_prob,
+                       SPAN_SET* span_set) const {
+  size_t start_pointer = 0;
+  size_t end_pointer = 0;
+  size_t len_start = start_idx_prob.size();
+  size_t len_end = end_idx_prob.size();
+  while (start_pointer < len_start && end_pointer < len_end) {
+    if (start_idx_prob[start_pointer].first ==
+        end_idx_prob[end_pointer].first) {
+      span_set->insert(std::make_pair(start_idx_prob[start_pointer],
+                                      end_idx_prob[end_pointer]));
+      ++start_pointer;
+      ++end_pointer;
+    } else if (start_idx_prob[start_pointer].first <
+               end_idx_prob[end_pointer].first) {
+      span_set->insert(std::make_pair(start_idx_prob[start_pointer],
+                                      end_idx_prob[end_pointer]));
+      ++start_pointer;
+    } else {
+      ++end_pointer;
+    }
+  }
+}
+void UIEModel::GetSpanIdxAndProbs(
+    const SPAN_SET& span_set,
+    const std::vector<faster_tokenizer::core::Offset>& offset_mapping,
+    std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const {
+  auto first_sep_idx =
+      std::find_if(offset_mapping.begin() + 1, offset_mapping.end(),
+                   [](const faster_tokenizer::core::Offset& offset) {
+                     return offset == faster_tokenizer::core::Offset(0, 0);
+                   });
+  auto prompt_end_token_id =
+      std::distance(offset_mapping.begin(), first_sep_idx) - 1;
+  for (auto&& span_item : span_set) {
+    probs->push_back(span_item.first.second * span_item.second.second);
+    auto start_id = offset_mapping[span_item.first.first].first;
+    auto end_id = offset_mapping[span_item.second.first].second;
+    bool is_prompt = span_item.second.first <= prompt_end_token_id &&
+                     span_item.second.first > 0;
+    span_idxs->push_back({{start_id, end_id}, is_prompt});
+  }
+}
+
+void UIEModel::ConvertSpanToUIEResult(
+    const std::vector<std::string>& texts,
+    const std::vector<std::string>& prompts,
+    const std::vector<std::vector<SpanIdx>>& span_idxs,
+    const std::vector<std::vector<float>>& probs,
+    std::vector<std::vector<UIEResult>>* results) const {
+  auto batch_size = texts.size();
+  for (int i = 0; i < batch_size; ++i) {
+    std::vector<UIEResult> result_list;
+    if (span_idxs[i].size() == 0) {
+      results->push_back({});
+      continue;
+    }
+    auto&& text = texts[i];
+    auto&& prompt = prompts[i];
+    for (int j = 0; j < span_idxs[i].size(); ++j) {
+      auto start = span_idxs[i][j].offset_.first;
+      auto end = span_idxs[i][j].offset_.second;
+      std::string span_text;
+      std::vector<uint32_t> offset_mapping;
+      if (span_idxs[i][j].is_prompt_) {
+        faster_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
+            prompt);
+        faster_tokenizer::core::Offset byte_offset;
+        converter.convert({start, end}, &byte_offset);
+        span_text = prompt.substr(byte_offset.first,
+                                  byte_offset.second - byte_offset.first);
+        // Indicate cls task
+        start = 0;
+        end = 0;
+      } else {
+        faster_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(
+            text);
+        faster_tokenizer::core::Offset byte_offset;
+        converter.convert({start, end}, &byte_offset);
+        span_text = text.substr(byte_offset.first,
+                                byte_offset.second - byte_offset.first);
+      }
+      result_list.emplace_back(start, end, probs[i][j], span_text);
+    }
+    results->push_back(result_list);
+  }
+}
+
+void UIEModel::AutoJoiner(
+    const std::vector<std::string>& short_texts,
+    const std::unordered_map<size_t, std::vector<size_t>>& input_mapping,
+    std::vector<std::vector<UIEResult>>* results) {
+  bool is_cls_task = false;
+  // 1. Detect if it's a cls task
+  for (auto&& short_result : *results) {
+    if (short_result.size() == 0) {
+      continue;
+    } else if (short_result[0].start_ == 0 && short_result[0].end_ == 0) {
+      is_cls_task = true;
+      break;
+    } else {
+      break;
+    }
+  }
+  // 2. Get the final result
+  std::vector<std::vector<UIEResult>> final_result;
+  if (is_cls_task) {
+    for (auto&& input_mapping_item : input_mapping) {
+      auto curr_mapping = input_mapping_item.second;
+      std::unordered_map<std::string, std::pair<int, float>> cls_options;
+      for (auto&& result_idx : curr_mapping) {
+        if ((*results)[result_idx].size() == 0) {
+          continue;
+        }
+        auto&& text = (*results)[result_idx].front().text_;
+        auto&& probability = (*results)[result_idx].front().probability_;
+        if (cls_options.count(text) == 0) {
+          cls_options[text] = std::make_pair(1, probability);
+        } else {
+          cls_options[text].first += 1;
+          cls_options[text].second += probability;
+        }
+      }
+      std::vector<UIEResult> result_list;
+      if (cls_options.size() > 0) {
+        auto max_iter = std::max_element(
+            cls_options.begin(), cls_options.end(),
+            [](const std::pair<std::string, std::pair<int, float>>& lhs,
+               const std::pair<std::string, std::pair<int, float>>& rhs) {
+              return lhs.second.second < rhs.second.second;
+            });
+        result_list.emplace_back(
+            0, 0, max_iter->second.second / max_iter->second.first,
+            max_iter->first);
+      }
+      final_result.push_back(result_list);
+    }
+  } else {
+    for (auto&& input_mapping_item : input_mapping) {
+      auto curr_mapping = input_mapping_item.second;
+      size_t offset = 0;
+      std::vector<UIEResult> result_list;
+      for (auto&& result_idx : curr_mapping) {
+        if (result_idx == 0) {
+          result_list = std::move((*results)[result_idx]);
+          offset += faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+              short_texts[result_idx].c_str(), short_texts[result_idx].size());
+        } else {
+          for (auto&& curr_result : (*results)[result_idx]) {
+            curr_result.start_ += offset;
+            curr_result.end_ += offset;
+          }
+          offset += faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+              short_texts[result_idx].c_str(), short_texts[result_idx].size());
+          result_list.insert(result_list.end(), (*results)[result_idx].begin(),
+                             (*results)[result_idx].end());
+        }
+      }
+      final_result.push_back(result_list);
+    }
+  }
+  *results = std::move(final_result);
+}
+
+void UIEModel::PredictUIEInput(const std::vector<std::string>& input_texts,
+                               const std::vector<std::string>& prompts,
+                               std::vector<std::vector<UIEResult>>* results) {
+  // 1. Shortten the input texts and prompts
+  auto max_prompt_iter = std::max_element(
+      prompts.begin(), prompts.end(),
+      [](const std::string& lhs, const std::string& rhs) {
+        auto lhs_ulen = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+            lhs.c_str(), lhs.length());
+        auto rhs_ulen = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+            rhs.c_str(), rhs.length());
+        return lhs_ulen < rhs_ulen;
+      });
+
+  auto max_prompt_len = faster_tokenizer::utils::GetUnicodeLenFromUTF8(
+      max_prompt_iter->c_str(), max_prompt_iter->length());
+  auto max_predict_len = max_length_ - 3 - max_prompt_len;
+
+  std::vector<std::string> short_texts;
+  std::unordered_map<size_t, std::vector<size_t>> input_mapping;
+  AutoSplitter(input_texts, max_predict_len, &short_texts, &input_mapping);
+
+  std::vector<std::string> short_texts_prompts;
+  for (auto& item : input_mapping) {
+    short_texts_prompts.insert(short_texts_prompts.end(), item.second.size(),
+                               prompts[item.first]);
+  }
+  std::vector<faster_tokenizer::core::EncodeInput> text_pair_input;
+  for (int i = 0; i < short_texts.size(); ++i) {
+    text_pair_input.emplace_back(std::pair<std::string, std::string>(
+        short_texts_prompts[i], short_texts[i]));
+  }
+
+  // 2. Tokenize the short texts and short prompts
+  std::vector<faster_tokenizer::core::Encoding> encodings;
+  tokenizer_.EncodeBatchStrings(text_pair_input, &encodings);
+  // 3. Construct the input vector tensor
+  // 3.1 Convert encodings to input_ids, token_type_ids, position_ids, attn_mask
+  std::vector<int64_t> input_ids, token_type_ids, position_ids, attn_mask;
+  std::vector<std::vector<faster_tokenizer::core::Offset>> offset_mapping;
+  for (int i = 0; i < encodings.size(); ++i) {
+    auto&& curr_input_ids = encodings[i].GetIds();
+    auto&& curr_type_ids = encodings[i].GetTypeIds();
+    auto&& curr_attn_mask = encodings[i].GetAttentionMask();
+    auto&& curr_offsets = encodings[i].GetOffsets();
+    input_ids.insert(input_ids.end(), curr_input_ids.begin(),
+                     curr_input_ids.end());
+    token_type_ids.insert(token_type_ids.end(), curr_type_ids.begin(),
+                          curr_type_ids.end());
+    attn_mask.insert(attn_mask.end(), curr_attn_mask.begin(),
+                     curr_attn_mask.end());
+    offset_mapping.push_back(curr_offsets);
+    std::vector<int64_t> curr_position_ids(curr_input_ids.size());
+    std::iota(curr_position_ids.begin(), curr_position_ids.end(), 0);
+    position_ids.insert(position_ids.end(), curr_position_ids.begin(),
+                        curr_position_ids.end());
+  }
+
+  // 3.2 Set data to input vector
+  int64_t batch_size = short_texts.size();
+  int64_t seq_len = input_ids.size() / batch_size;
+  std::vector<fastdeploy::FDTensor> inputs(runtime_.NumInputs());
+  int64_t* inputs_ptrs[] = {input_ids.data(), token_type_ids.data(),
+                            position_ids.data(), attn_mask.data()};
+  for (int i = 0; i < runtime_.NumInputs(); ++i) {
+    inputs[i].SetExternalData({batch_size, seq_len},
+                              fastdeploy::FDDataType::INT64, inputs_ptrs[i]);
+    inputs[i].name = runtime_.GetInputInfo(i).name;
+  }
+
+  std::vector<fastdeploy::FDTensor> outputs(runtime_.NumOutputs());
+  // 4. Infer
+  runtime_.Infer(inputs, &outputs);
+  auto* start_prob = reinterpret_cast<float*>(outputs[0].Data());
+  auto* end_prob = reinterpret_cast<float*>(outputs[1].Data());
+
+  // 5. Postprocess
+  std::vector<std::vector<std::pair<int64_t, float>>> start_candidate_idx_prob,
+      end_candidate_idx_prob;
+  GetCandidateIdx(start_prob, outputs[0].shape[0], outputs[0].shape[1],
+                  &start_candidate_idx_prob, position_prob_);
+  GetCandidateIdx(end_prob, outputs[1].shape[0], outputs[1].shape[1],
+                  &end_candidate_idx_prob, position_prob_);
+  SPAN_SET span_set;
+  std::vector<std::vector<float>> probs(batch_size);
+  std::vector<std::vector<SpanIdx>> span_idxs(batch_size);
+  for (int i = 0; i < batch_size; ++i) {
+    GetSpan(start_candidate_idx_prob[i], end_candidate_idx_prob[i], &span_set);
+    GetSpanIdxAndProbs(span_set, offset_mapping[i], &span_idxs[i], &probs[i]);
+    span_set.clear();
+  }
+  ConvertSpanToUIEResult(short_texts, short_texts_prompts, span_idxs, probs,
+                         results);
+  AutoJoiner(short_texts, input_mapping, results);
+}
+
+void UIEModel::Predict(
+    const std::vector<std::string>& texts,
+    std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
+        results) {
+  std::queue<SchemaNode> nodes;
+  for (auto& node : schema_->root_->children_) {
+    nodes.push(node);
+  }
+  results->resize(texts.size());
+  while (!nodes.empty()) {
+    auto node = nodes.front();
+    nodes.pop();
+    std::vector<std::vector<size_t>> input_mapping;
+    size_t idx = 0;
+    std::vector<std::string> input_texts;
+    std::vector<std::string> prompts;
+    // 1. Construct input data from raw text
+    if (node.prefix_.empty()) {
+      for (int i = 0; i < texts.size(); ++i) {
+        input_texts.push_back(texts[i]);
+        prompts.push_back(DBC2SBC(node.name_));
+        input_mapping.push_back({idx});
+        idx += 1;
+      }
+    } else {
+      for (int i = 0; i < texts.size(); ++i) {
+        if (node.prefix_[i].size() == 0) {
+          input_mapping.push_back({});
+        } else {
+          for (auto&& pre : node.prefix_[i]) {
+            input_texts.push_back(texts[i]);
+            prompts.push_back(DBC2SBC(pre + node.name_));
+          }
+          auto prefix_len = node.prefix_[i].size();
+          input_mapping.push_back({});
+          input_mapping.back().resize(prefix_len);
+          std::iota(input_mapping.back().begin(), input_mapping.back().end(),
+                    idx);
+          idx += prefix_len;
+        }
+      }
+    }
+
+    // 2. Predict from UIEInput
+    std::vector<std::vector<UIEResult>> results_list;
+    PredictUIEInput(input_texts, prompts, &results_list);
+    // 3. Postprocess
+    std::vector<std::vector<UIEResult*>> relations;
+    relations.resize(texts.size());
+    if (node.relations_.size() == 0) {
+      for (int i = 0; i < input_mapping.size(); ++i) {
+        auto&& input_mapping_item = input_mapping[i];
+        auto& curr_result = (*results)[i];
+        for (auto&& idx : input_mapping_item) {
+          if (results_list[idx].size() == 0) {
+            continue;
+          }
+          if (curr_result.count(node.name_) == 0) {
+            curr_result[node.name_] = results_list[idx];
+          } else {
+            curr_result[node.name_].insert(curr_result[node.name_].end(),
+                                           results_list[idx].begin(),
+                                           results_list[idx].end());
+          }
+        }
+        if (curr_result.count(node.name_) > 0) {
+          for (auto&& curr_result_ref : curr_result[node.name_]) {
+            relations[i].push_back(&curr_result_ref);
+          }
+        }
+      }
+    } else {
+      auto& new_relations = node.relations_;
+      for (int i = 0; i < input_mapping.size(); ++i) {
+        auto&& input_mapping_item = input_mapping[i];
+        for (int j = 0; j < input_mapping_item.size(); ++j) {
+          auto idx = input_mapping_item[j];
+          if (results_list[idx].size() == 0) {
+            continue;
+          }
+          if (new_relations[i][j]->relation_.count(node.name_) == 0) {
+            new_relations[i][j]->relation_[node.name_] = results_list[idx];
+          } else {
+            auto& curr_result = new_relations[i][j]->relation_[node.name_];
+            curr_result.insert(curr_result.end(), results_list[idx].begin(),
+                               results_list[idx].end());
+          }
+        }
+      }
+      for (int i = 0; i < new_relations.size(); ++i) {
+        for (int j = 0; j < new_relations[i].size(); ++j) {
+          if (new_relations[i][j]->relation_.count(node.name_)) {
+            auto& curr_relation = new_relations[i][j]->relation_[node.name_];
+            for (auto&& curr_result_ref : curr_relation) {
+              relations[i].push_back(&curr_result_ref);
+            }
+          }
+        }
+      }
+    }
+    std::vector<std::vector<std::string>> prefix(texts.size());
+    for (int i = 0; i < input_mapping.size(); ++i) {
+      auto&& input_mapping_item = input_mapping[i];
+      for (auto&& idx : input_mapping_item) {
+        for (int j = 0; j < results_list[idx].size(); ++j) {
+          auto prefix_str = results_list[idx][j].text_ + "\xe7\x9a\x84";
+          prefix[i].push_back(prefix_str);
+        }
+      }
+    }
+    for (auto& node_child : node.children_) {
+      node_child.relations_ = relations;
+      node_child.prefix_ = prefix;
+      nodes.push(node_child);
+    }
+  }
+}
--- a/examples/text/uie/cpp/uie.h
+++ b/examples/text/uie/cpp/uie.h
@@ -0,0 +1,156 @@
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+
+#include <ostream>
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <vector>
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/utils/unique_ptr.h"
+#include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h"
+
+using namespace paddlenlp;
+
+struct UIEResult {
+  size_t start_;
+  size_t end_;
+  double probability_;
+  std::string text_;
+  std::unordered_map<std::string, std::vector<UIEResult>> relation_;
+  UIEResult() = default;
+  UIEResult(size_t start, size_t end, double probability, std::string text)
+      : start_(start), end_(end), probability_(probability), text_(text) {}
+};
+
+std::ostream& operator<<(std::ostream& os, const UIEResult& result);
+std::ostream& operator<<(
+    std::ostream& os,
+    const std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>&
+        results);
+
+struct SchemaNode {
+  std::string name_;
+  std::vector<std::vector<std::string>> prefix_;
+  std::vector<std::vector<UIEResult*>> relations_;
+  std::vector<SchemaNode> children_;
+
+  explicit SchemaNode(const std::string& name,
+                      const std::vector<SchemaNode>& children = {})
+      : name_(name), children_(children) {}
+  void AddChild(const std::string& schema) { children_.emplace_back(schema); }
+  void AddChild(const SchemaNode& schema) { children_.push_back(schema); }
+  void AddChild(const std::string& schema,
+                const std::vector<std::string>& children) {
+    SchemaNode schema_node(schema);
+    for (auto& child : children) {
+      schema_node.children_.emplace_back(child);
+    }
+    children_.emplace_back(schema_node);
+  }
+  void AddChild(const std::string& schema,
+                const std::vector<SchemaNode>& children) {
+    SchemaNode schema_node(schema);
+    schema_node.children_ = children;
+    children_.emplace_back(schema_node);
+  }
+};
+
+struct Schema {
+  explicit Schema(const std::string& schema, const std::string& name = "root");
+  explicit Schema(const std::vector<std::string>& schema_list,
+                  const std::string& name = "root");
+  explicit Schema(const std::unordered_map<std::string,
+                                           std::vector<SchemaNode>>& schema_map,
+                  const std::string& name = "root");
+
+ private:
+  void CreateRoot(const std::string& name);
+  std::unique_ptr<SchemaNode> root_;
+  friend class UIEModel;
+};
+
+struct UIEModel {
+ public:
+  UIEModel(
+      const std::string& model_file, const std::string& params_file,
+      const std::string& vocab_file, float position_prob, size_t max_length,
+      const std::vector<std::string>& schema,
+      const fastdeploy::RuntimeOption& custom_option =
+          fastdeploy::RuntimeOption(),
+      const fastdeploy::Frontend& model_format = fastdeploy::Frontend::PADDLE);
+  UIEModel(
+      const std::string& model_file, const std::string& params_file,
+      const std::string& vocab_file, float position_prob, size_t max_length,
+      const std::unordered_map<std::string, std::vector<SchemaNode>>& schema,
+      const fastdeploy::RuntimeOption& custom_option =
+          fastdeploy::RuntimeOption(),
+      const fastdeploy::Frontend& model_format = fastdeploy::Frontend::PADDLE);
+  void SetSchema(const std::vector<std::string>& schema);
+  void SetSchema(
+      const std::unordered_map<std::string, std::vector<SchemaNode>>& schema);
+
+  void PredictUIEInput(const std::vector<std::string>& input_texts,
+                       const std::vector<std::string>& prompts,
+                       std::vector<std::vector<UIEResult>>* results);
+  void Predict(
+      const std::vector<std::string>& texts,
+      std::vector<std::unordered_map<std::string, std::vector<UIEResult>>>*
+          results);
+
+ private:
+  using IDX_PROB = std::pair<int64_t, float>;
+  struct IdxProbCmp {
+    bool operator()(const std::pair<IDX_PROB, IDX_PROB>& lhs,
+                    const std::pair<IDX_PROB, IDX_PROB>& rhs) const;
+  };
+  using SPAN_SET = std::set<std::pair<IDX_PROB, IDX_PROB>, IdxProbCmp>;
+  struct SpanIdx {
+    faster_tokenizer::core::Offset offset_;
+    bool is_prompt_;
+  };
+  void AutoSplitter(
+      const std::vector<std::string>& texts, size_t max_length,
+      std::vector<std::string>* short_texts,
+      std::unordered_map<size_t, std::vector<size_t>>* input_mapping);
+  void AutoJoiner(
+      const std::vector<std::string>& short_texts,
+      const std::unordered_map<size_t, std::vector<size_t>>& input_mapping,
+      std::vector<std::vector<UIEResult>>* results);
+  // Get idx of the last dimension in probability arrays, which is greater than
+  // a limitation.
+  void GetCandidateIdx(const float* probs, int64_t batch_size, int64_t seq_len,
+                       std::vector<std::vector<IDX_PROB>>* candidate_idx_prob,
+                       float threshold = 0.5) const;
+  void GetSpan(const std::vector<IDX_PROB>& start_idx_prob,
+               const std::vector<IDX_PROB>& end_idx_prob,
+               SPAN_SET* span_set) const;
+  void GetSpanIdxAndProbs(
+      const SPAN_SET& span_set,
+      const std::vector<faster_tokenizer::core::Offset>& offset_mapping,
+      std::vector<SpanIdx>* span_idxs, std::vector<float>* probs) const;
+  void ConvertSpanToUIEResult(
+      const std::vector<std::string>& texts,
+      const std::vector<std::string>& prompts,
+      const std::vector<std::vector<SpanIdx>>& span_idxs,
+      const std::vector<std::vector<float>>& probs,
+      std::vector<std::vector<UIEResult>>* results) const;
+  fastdeploy::RuntimeOption runtime_option_;
+  fastdeploy::Runtime runtime_;
+  std::unique_ptr<Schema> schema_;
+  size_t max_length_;
+  float position_prob_;
+  faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer_;
+};
--- a/external/faster_tokenizer.cmake
+++ b/external/faster_tokenizer.cmake
@@ -23,6 +23,7 @@ set(FASTERTOKENIZER_INSTALL_DIR ${THIRD_PARTY_PATH}/install/faster_tokenizer)
 set(FASTERTOKENIZER_INC_DIR
    "${FASTERTOKENIZER_INSTALL_DIR}/include"
    "${FASTERTOKENIZER_INSTALL_DIR}/third_party/include"
+    "${FASTERTOKENIZER_INSTALL_DIR}/third_party/include/faster_tokenizer" # TODO (zhoushunjie): Will remove it later.
    CACHE PATH "faster_tokenizer include directory." FORCE)
 set(FASTERTOKENIZER_LIB_DIR
    "${FASTERTOKENIZER_INSTALL_DIR}/lib/"