From 60f8f0e11b4ea94d3ca0e44373511a06f61c2d41 Mon Sep 17 00:00:00 2001 From: zhoushunjie Date: Tue, 27 Dec 2022 15:22:09 +0000 Subject: [PATCH] Add batch size argument for uie --- examples/text/uie/cpp/infer.cc | 28 +++++------ examples/text/uie/python/infer.py | 4 +- fastdeploy/text/uie/model.cc | 68 +++++++++++++++++--------- fastdeploy/text/uie/model.h | 31 ++++++------ fastdeploy/text/uie/uie_pybind.cc | 51 ++++++++++--------- python/fastdeploy/text/uie/__init__.py | 8 +-- 6 files changed, 114 insertions(+), 76 deletions(-) diff --git a/examples/text/uie/cpp/infer.cc b/examples/text/uie/cpp/infer.cc index 7f68bc8c2..4ef66adf4 100644 --- a/examples/text/uie/cpp/infer.cc +++ b/examples/text/uie/cpp/infer.cc @@ -49,17 +49,17 @@ int main(int argc, char* argv[]) { backend_type = std::atoi(argv[3]); } switch (backend_type) { - case 0: - option.UsePaddleInferBackend(); - break; - case 1: - option.UseOrtBackend(); - break; - case 2: - option.UseOpenVINOBackend(); - break; - default: - break; + case 0: + option.UsePaddleInferBackend(); + break; + case 1: + option.UseOrtBackend(); + break; + case 2: + option.UseOpenVINOBackend(); + break; + default: + break; } std::string model_dir(argv[1]); std::string model_path = model_dir + sep + "inference.pdmodel"; @@ -68,9 +68,9 @@ int main(int argc, char* argv[]) { using fastdeploy::text::SchemaNode; using fastdeploy::text::UIEResult; - auto predictor = - fastdeploy::text::UIEModel(model_path, param_path, vocab_path, 0.5, 128, - {"时间", "选手", "赛事名称"}, option); + auto predictor = fastdeploy::text::UIEModel( + model_path, param_path, vocab_path, 0.5, 128, + {"时间", "选手", "赛事名称"}, /* batch_size = */ 1, option); std::cout << "After init predictor" << std::endl; std::vector>> results; // Named Entity Recognition diff --git a/examples/text/uie/python/infer.py b/examples/text/uie/python/infer.py index d06e10cbb..10442f95e 100644 --- a/examples/text/uie/python/infer.py +++ b/examples/text/uie/python/infer.py @@ -129,6 +129,7 @@ if __name__ == "__main__": position_prob=0.5, max_length=args.max_length, schema=schema, + batch_size=args.batch_size, runtime_option=runtime_option, schema_language=SchemaLanguage.ZH) @@ -181,7 +182,8 @@ if __name__ == "__main__": schema = {"评价维度": ["观点词", "情感倾向[正向,负向]"]} print(f"The extraction schema: {schema}") uie.set_schema(schema) - results = uie.predict(["店面干净,很清静"], return_dict=True) + results = uie.predict( + ["店面干净,很清静,服务员服务热情,性价比很高,发现收银台有排队"], return_dict=True) pprint(results) print() diff --git a/fastdeploy/text/uie/model.cc b/fastdeploy/text/uie/model.cc index cd90db4f1..3ae7631fb 100644 --- a/fastdeploy/text/uie/model.cc +++ b/fastdeploy/text/uie/model.cc @@ -13,6 +13,8 @@ // limitations under the License. #include "fastdeploy/text/uie/model.h" +#include "fastdeploy/function/concat.h" +#include "fastdeploy/function/split.h" #include #include #include @@ -42,8 +44,7 @@ static std::string DBC2SBC(const std::string& content) { result.append(content.data() + content_utf8_len, content_char_width); } else { char dst_char[5] = {0}; - uint32_t utf8_uint32 = - fast_tokenizer::utils::UnicodeToUTF8(content_char); + uint32_t utf8_uint32 = fast_tokenizer::utils::UnicodeToUTF8(content_char); uint32_t utf8_char_count = fast_tokenizer::utils::UnicodeToUTF8Char(utf8_uint32, dst_char); result.append(dst_char, utf8_char_count); @@ -164,12 +165,12 @@ UIEModel::UIEModel(const std::string& model_file, const std::string& params_file, const std::string& vocab_file, float position_prob, size_t max_length, const std::vector& schema, + int batch_size, const fastdeploy::RuntimeOption& custom_option, const fastdeploy::ModelFormat& model_format, SchemaLanguage schema_language) - : max_length_(max_length), - position_prob_(position_prob), - schema_language_(schema_language), + : max_length_(max_length), position_prob_(position_prob), + schema_language_(schema_language), batch_size_(batch_size), tokenizer_(vocab_file) { runtime_option = custom_option; runtime_option.model_format = model_format; @@ -185,12 +186,12 @@ UIEModel::UIEModel(const std::string& model_file, const std::string& params_file, const std::string& vocab_file, float position_prob, size_t max_length, const std::vector& schema, + int batch_size, const fastdeploy::RuntimeOption& custom_option, const fastdeploy::ModelFormat& model_format, SchemaLanguage schema_language) - : max_length_(max_length), - position_prob_(position_prob), - schema_language_(schema_language), + : max_length_(max_length), position_prob_(position_prob), + schema_language_(schema_language), batch_size_(batch_size), tokenizer_(vocab_file) { runtime_option = custom_option; runtime_option.model_format = model_format; @@ -205,13 +206,12 @@ UIEModel::UIEModel(const std::string& model_file, UIEModel::UIEModel(const std::string& model_file, const std::string& params_file, const std::string& vocab_file, float position_prob, - size_t max_length, const SchemaNode& schema, + size_t max_length, const SchemaNode& schema, int batch_size, const fastdeploy::RuntimeOption& custom_option, const fastdeploy::ModelFormat& model_format, SchemaLanguage schema_language) - : max_length_(max_length), - position_prob_(position_prob), - schema_language_(schema_language), + : max_length_(max_length), position_prob_(position_prob), + schema_language_(schema_language), batch_size_(batch_size), tokenizer_(vocab_file) { runtime_option = custom_option; runtime_option.model_format = model_format; @@ -230,7 +230,8 @@ bool UIEModel::Initialize() { void UIEModel::SetValidBackend() { // TODO(zhoushunjie): Add lite backend in future - valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER, Backend::LITE}; + valid_cpu_backends = {Backend::ORT, Backend::OPENVINO, Backend::PDINFER, + Backend::LITE}; valid_gpu_backends = {Backend::ORT, Backend::PDINFER, Backend::TRT}; } @@ -253,8 +254,8 @@ void UIEModel::AutoSplitter(const std::vector& texts, size_t cnt_org = 0; size_t cnt_short = 0; for (auto& text : texts) { - auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8( - text.c_str(), text.length()); + auto text_len = fast_tokenizer::utils::GetUnicodeLenFromUTF8(text.c_str(), + text.length()); if (text_len <= max_length) { short_texts->push_back(text); if (input_mapping->size() <= cnt_org) { @@ -264,8 +265,7 @@ void UIEModel::AutoSplitter(const std::vector& texts, } cnt_short += 1; } else { - fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter( - text); + fast_tokenizer::pretokenizers::CharToBytesOffsetConverter converter(text); for (size_t start = 0; start < text_len; start += max_length) { size_t end = start + max_length; if (end > text_len) { @@ -742,13 +742,37 @@ void UIEModel::Predict( std::vector encodings; Preprocess(short_input_texts, short_prompts, &encodings, &inputs); - // 3. Infer - std::vector outputs(NumOutputsOfRuntime()); - if (!Infer(inputs, &outputs)) { - FDERROR << "Failed to inference while using model:" << ModelName() - << "." << std::endl; + std::vector> inputs_vec(NumInputsOfRuntime()); + int encoding_size = encodings.size(); + std::vector num_or_sections; + for (int i = 0; i < encoding_size; ++i) { + int actual_batch_size = (std::min)(batch_size_, encoding_size - i); + num_or_sections.push_back(actual_batch_size); + } + for (int i = 0; i < NumInputsOfRuntime(); ++i) { + function::Split(inputs[i], num_or_sections, &inputs_vec[i]); } + // 3. Infer + std::vector outputs(NumOutputsOfRuntime()); + std::vector outputs0, outputs1; + + for (int i = 0; i < inputs_vec[0].size(); ++i) { + std::vector curr_inputs(NumInputsOfRuntime()); + std::vector curr_outputs(NumOutputsOfRuntime()); + for (int j = 0; j < NumInputsOfRuntime(); ++j) { + curr_inputs[j] = std::move(inputs_vec[j][i]); + curr_inputs[j].name = inputs[j].name; + } + if (!Infer(curr_inputs, &curr_outputs)) { + FDERROR << "Failed to inference while using model:" << ModelName() + << "." << std::endl; + } + outputs0.push_back(curr_outputs[0]); + outputs1.push_back(curr_outputs[1]); + } + function::Concat(outputs0, &outputs[0]); + function::Concat(outputs1, &outputs[1]); // 4. Convert FDTensor to UIEResult Postprocess(outputs, encodings, short_input_texts, short_prompts, input_mapping_with_short_text, &results_list); diff --git a/fastdeploy/text/uie/model.h b/fastdeploy/text/uie/model.h index c813369d3..5e2244c08 100644 --- a/fastdeploy/text/uie/model.h +++ b/fastdeploy/text/uie/model.h @@ -14,14 +14,14 @@ #pragma once +#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h" +#include "fastdeploy/fastdeploy_model.h" +#include "fastdeploy/utils/unique_ptr.h" #include #include #include #include #include -#include "fastdeploy/fastdeploy_model.h" -#include "fastdeploy/utils/unique_ptr.h" -#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h" using namespace paddlenlp; @@ -99,6 +99,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel { UIEModel(const std::string& model_file, const std::string& params_file, const std::string& vocab_file, float position_prob, size_t max_length, const std::vector& schema, + int batch_size, const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption(), const fastdeploy::ModelFormat& model_format = @@ -106,7 +107,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel { SchemaLanguage schema_language = SchemaLanguage::ZH); UIEModel(const std::string& model_file, const std::string& params_file, const std::string& vocab_file, float position_prob, - size_t max_length, const SchemaNode& schema, + size_t max_length, const SchemaNode& schema, int batch_size, const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption(), const fastdeploy::ModelFormat& model_format = @@ -115,6 +116,7 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel { UIEModel(const std::string& model_file, const std::string& params_file, const std::string& vocab_file, float position_prob, size_t max_length, const std::vector& schema, + int batch_size, const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption(), const fastdeploy::ModelFormat& model_format = @@ -154,10 +156,10 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel { std::vector>>* results, std::vector>* new_relations); - void Predict( - const std::vector& texts, - std::vector>>* - results); + void + Predict(const std::vector& texts, + std::vector>>* + results); protected: using IDX_PROB = std::pair; @@ -190,15 +192,16 @@ struct FASTDEPLOY_DECL UIEModel : public FastDeployModel { const SPAN_SET& span_set, const std::vector& offset_mapping, std::vector* span_idxs, std::vector* probs) const; - void ConvertSpanToUIEResult( - const std::vector& texts, - const std::vector& prompts, - const std::vector>& span_idxs, - const std::vector>& probs, - std::vector>* results) const; + void + ConvertSpanToUIEResult(const std::vector& texts, + const std::vector& prompts, + const std::vector>& span_idxs, + const std::vector>& probs, + std::vector>* results) const; std::unique_ptr schema_; size_t max_length_; float position_prob_; + int batch_size_; SchemaLanguage schema_language_; fast_tokenizer::tokenizers_impl::ErnieFastTokenizer tokenizer_; }; diff --git a/fastdeploy/text/uie/uie_pybind.cc b/fastdeploy/text/uie/uie_pybind.cc index 146dcc2c6..ad7374229 100644 --- a/fastdeploy/text/uie/uie_pybind.cc +++ b/fastdeploy/text/uie/uie_pybind.cc @@ -35,24 +35,29 @@ void BindUIE(pybind11::module& m) { py::class_(m, "UIEModel") .def(py::init, RuntimeOption, ModelFormat, text::SchemaLanguage>(), + std::vector, int, RuntimeOption, ModelFormat, + text::SchemaLanguage>(), py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"), py::arg("position_prob"), py::arg("max_length"), py::arg("schema"), + py::arg("batch_size"), py::arg("custom_option") = fastdeploy::RuntimeOption(), py::arg("model_format") = fastdeploy::ModelFormat::PADDLE, py::arg("schema_language") = text::SchemaLanguage::ZH) - .def( - py::init, RuntimeOption, ModelFormat, text::SchemaLanguage>(), - py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"), - py::arg("position_prob"), py::arg("max_length"), py::arg("schema"), - py::arg("custom_option") = fastdeploy::RuntimeOption(), - py::arg("model_format") = fastdeploy::ModelFormat::PADDLE, - py::arg("schema_language") = text::SchemaLanguage::ZH) .def(py::init(), + std::vector, int, RuntimeOption, + ModelFormat, text::SchemaLanguage>(), py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"), py::arg("position_prob"), py::arg("max_length"), py::arg("schema"), + py::arg("batch_size"), + py::arg("custom_option") = fastdeploy::RuntimeOption(), + py::arg("model_format") = fastdeploy::ModelFormat::PADDLE, + py::arg("schema_language") = text::SchemaLanguage::ZH) + .def(py::init(), + py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"), + py::arg("position_prob"), py::arg("max_length"), py::arg("schema"), + py::arg("batch_size"), py::arg("custom_option") = fastdeploy::RuntimeOption(), py::arg("model_format") = fastdeploy::ModelFormat::PADDLE, py::arg("schema_language") = text::SchemaLanguage::ZH) @@ -60,23 +65,25 @@ void BindUIE(pybind11::module& m) { static_cast&)>(&text::UIEModel::SetSchema), py::arg("schema")) - .def("set_schema", static_cast&)>( - &text::UIEModel::SetSchema), + .def("set_schema", + static_cast&)>( + &text::UIEModel::SetSchema), py::arg("schema")) .def("set_schema", static_cast( &text::UIEModel::SetSchema), py::arg("schema")) - .def("predict", - [](text::UIEModel& self, const std::vector& texts) { - std::vector< - std::unordered_map>> - results; - self.Predict(texts, &results); - return results; - }, - py::arg("text")); + .def( + "predict", + [](text::UIEModel& self, const std::vector& texts) { + std::vector< + std::unordered_map>> + results; + self.Predict(texts, &results); + return results; + }, + py::arg("text")); } } // namespace fastdeploy diff --git a/python/fastdeploy/text/uie/__init__.py b/python/fastdeploy/text/uie/__init__.py index 00ed67a90..295cea34a 100644 --- a/python/fastdeploy/text/uie/__init__.py +++ b/python/fastdeploy/text/uie/__init__.py @@ -50,6 +50,7 @@ class UIEModel(FastDeployModel): position_prob=0.5, max_length=128, schema=[], + batch_size=64, runtime_option=RuntimeOption(), model_format=ModelFormat.PADDLE, schema_language=SchemaLanguage.ZH): @@ -63,9 +64,10 @@ class UIEModel(FastDeployModel): else: assert "The type of schema should be list or dict." schema_language = C.text.SchemaLanguage(schema_language) - self._model = C.text.UIEModel( - model_file, params_file, vocab_file, position_prob, max_length, - schema, runtime_option._option, model_format, schema_language) + self._model = C.text.UIEModel(model_file, params_file, vocab_file, + position_prob, max_length, schema, + batch_size, runtime_option._option, + model_format, schema_language) assert self.initialized, "UIEModel initialize failed." def set_schema(self, schema):