diff --git a/csrc/fastdeploy/pybind/main.cc.in b/csrc/fastdeploy/pybind/main.cc.in index 8280fdebf..5aaac049c 100644 --- a/csrc/fastdeploy/pybind/main.cc.in +++ b/csrc/fastdeploy/pybind/main.cc.in @@ -19,6 +19,7 @@ namespace fastdeploy { void BindRuntime(pybind11::module&); void BindFDModel(pybind11::module&); void BindVision(pybind11::module&); +void BindText(pybind11::module&); pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) { pybind11::dtype dt; @@ -143,6 +144,11 @@ PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) { m.def_submodule("vision", "Vision module of FastDeploy."); BindVision(vision_module); #endif +#ifdef ENABLE_TEXT + auto text_module = + m.def_submodule("text", "Text module of FastDeploy."); + BindText(text_module); +#endif } } // namespace fastdeploy diff --git a/csrc/fastdeploy/pybind/main.h b/csrc/fastdeploy/pybind/main.h index 09c42f876..6c19edb99 100644 --- a/csrc/fastdeploy/pybind/main.h +++ b/csrc/fastdeploy/pybind/main.h @@ -26,10 +26,15 @@ #include "fastdeploy/vision.h" #endif +#ifdef ENABLE_TEXT +#include "fastdeploy/text.h" +#endif + namespace fastdeploy { void BindBackend(pybind11::module&); void BindVision(pybind11::module&); +void BindText(pybind11::module& m); pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype); diff --git a/csrc/fastdeploy/text/text_pybind.cc b/csrc/fastdeploy/text/text_pybind.cc index 564892f16..55696e236 100644 --- a/csrc/fastdeploy/text/text_pybind.cc +++ b/csrc/fastdeploy/text/text_pybind.cc @@ -10,4 +10,25 @@ // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and -// limitations under the License. \ No newline at end of file +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace fastdeploy { + +void BindUIE(pybind11::module& m); + +void BindText(pybind11::module& m) { + pybind11::class_(m, "UIEResult") + .def(pybind11::init()) + .def_readwrite("start", &text::UIEResult::start_) + .def_readwrite("end", &text::UIEResult::end_) + .def_readwrite("probability_", &text::UIEResult::probability_) + .def_readwrite("text", &text::UIEResult::text_) + .def_readwrite("relation", &text::UIEResult::relation_) + .def("__repr__", &text::UIEResult::Str) + .def("__str__", &text::UIEResult::Str); + BindUIE(m); +} + +} // namespace fastdeploy diff --git a/csrc/fastdeploy/text/uie/model.cc b/csrc/fastdeploy/text/uie/model.cc index 6dbeac5b0..682380871 100644 --- a/csrc/fastdeploy/text/uie/model.cc +++ b/csrc/fastdeploy/text/uie/model.cc @@ -124,6 +124,12 @@ std::ostream& operator<<( return os; } +std::string UIEResult::Str() const { + std::ostringstream oss; + oss << *this; + return oss.str(); +} + void Schema::CreateRoot(const std::string& name) { root_ = fastdeploy::utils::make_unique(name); } @@ -141,15 +147,19 @@ Schema::Schema(const std::vector& schema_list, } } -Schema::Schema( - const std::unordered_map>& schema_map, - const std::string& name) { +Schema::Schema(const std::vector& schema_list, + const std::string& name) { CreateRoot(name); - for (auto& schema_item : schema_map) { - root_->AddChild(schema_item.first, schema_item.second); + for (const auto& schema : schema_list) { + root_->AddChild(schema); } } +Schema::Schema(const SchemaNode& schema, const std::string& name) { + CreateRoot(name); + root_->AddChild(schema); +} + UIEModel::UIEModel(const std::string& model_file, const std::string& params_file, const std::string& vocab_file, float position_prob, @@ -169,12 +179,31 @@ UIEModel::UIEModel(const std::string& model_file, faster_tokenizer::core::TruncStrategy::LONGEST_FIRST); } -UIEModel::UIEModel( - const std::string& model_file, const std::string& params_file, - const std::string& vocab_file, float position_prob, size_t max_length, - const std::unordered_map>& schema, - const fastdeploy::RuntimeOption& custom_option, - const fastdeploy::Frontend& model_format) +UIEModel::UIEModel(const std::string& model_file, + const std::string& params_file, + const std::string& vocab_file, float position_prob, + size_t max_length, const std::vector& schema, + const fastdeploy::RuntimeOption& custom_option, + const fastdeploy::Frontend& model_format) + : max_length_(max_length), + position_prob_(position_prob), + tokenizer_(vocab_file) { + runtime_option_ = custom_option; + runtime_option_.model_format = model_format; + runtime_option_.SetModelPath(model_file, params_file); + runtime_.Init(runtime_option_); + SetSchema(schema); + tokenizer_.EnableTruncMethod( + max_length, 0, faster_tokenizer::core::Direction::RIGHT, + faster_tokenizer::core::TruncStrategy::LONGEST_FIRST); +} + +UIEModel::UIEModel(const std::string& model_file, + const std::string& params_file, + const std::string& vocab_file, float position_prob, + size_t max_length, const SchemaNode& schema, + const fastdeploy::RuntimeOption& custom_option, + const fastdeploy::Frontend& model_format) : max_length_(max_length), position_prob_(position_prob), tokenizer_(vocab_file) { @@ -192,8 +221,11 @@ void UIEModel::SetSchema(const std::vector& schema) { schema_ = fastdeploy::utils::make_unique(schema); } -void UIEModel::SetSchema( - const std::unordered_map>& schema) { +void UIEModel::SetSchema(const std::vector& schema) { + schema_ = fastdeploy::utils::make_unique(schema); +} + +void UIEModel::SetSchema(const SchemaNode& schema) { schema_ = fastdeploy::utils::make_unique(schema); } diff --git a/csrc/fastdeploy/text/uie/model.h b/csrc/fastdeploy/text/uie/model.h index 8c48d487a..5cbc9f43d 100644 --- a/csrc/fastdeploy/text/uie/model.h +++ b/csrc/fastdeploy/text/uie/model.h @@ -37,6 +37,7 @@ struct FASTDEPLOY_DECL UIEResult { UIEResult() = default; UIEResult(size_t start, size_t end, double probability, std::string text) : start_(start), end_(end), probability_(probability), text_(text) {} + std::string Str() const; }; FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& os, @@ -51,7 +52,8 @@ struct FASTDEPLOY_DECL SchemaNode { std::vector> prefix_; std::vector> relations_; std::vector children_; - + SchemaNode() = default; + SchemaNode(const SchemaNode&) = default; explicit SchemaNode(const std::string& name, const std::vector& children = {}) : name_(name), children_(children) {} @@ -77,9 +79,9 @@ struct Schema { explicit Schema(const std::string& schema, const std::string& name = "root"); explicit Schema(const std::vector& schema_list, const std::string& name = "root"); - explicit Schema(const std::unordered_map>& schema_map, + explicit Schema(const std::vector& schema_list, const std::string& name = "root"); + explicit Schema(const SchemaNode& schema, const std::string& name = "root"); private: void CreateRoot(const std::string& name); @@ -99,13 +101,19 @@ struct FASTDEPLOY_DECL UIEModel { UIEModel( const std::string& model_file, const std::string& params_file, const std::string& vocab_file, float position_prob, size_t max_length, - const std::unordered_map>& schema, + const SchemaNode& schema, const fastdeploy::RuntimeOption& custom_option = + fastdeploy::RuntimeOption(), + const fastdeploy::Frontend& model_format = fastdeploy::Frontend::PADDLE); + UIEModel( + const std::string& model_file, const std::string& params_file, + const std::string& vocab_file, float position_prob, size_t max_length, + const std::vector& schema, const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption(), const fastdeploy::Frontend& model_format = fastdeploy::Frontend::PADDLE); void SetSchema(const std::vector& schema); - void SetSchema( - const std::unordered_map>& schema); + void SetSchema(const std::vector& schema); + void SetSchema(const SchemaNode& schema); void ConstructTextsAndPrompts( const std::vector& raw_texts, const std::string& node_name, diff --git a/csrc/fastdeploy/text/uie/uie_pybind.cc b/csrc/fastdeploy/text/uie/uie_pybind.cc new file mode 100644 index 000000000..2186f7a13 --- /dev/null +++ b/csrc/fastdeploy/text/uie/uie_pybind.cc @@ -0,0 +1,73 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "fastdeploy/pybind/main.h" + +namespace py = pybind11; + +namespace fastdeploy { + +void BindUIE(pybind11::module& m) { + py::class_(m, "SchemaNode") + .def(py::init<>()) + .def(py::init>(), + py::arg("name"), py::arg("children") = {}) + .def_readwrite("name", &text::SchemaNode::name_) + .def_readwrite("prefix", &text::SchemaNode::prefix_) + .def_readwrite("relations", &text::SchemaNode::relations_) + .def_readwrite("children", &text::SchemaNode::children_); + + py::class_(m, "UIEModel") + .def(py::init, RuntimeOption, Frontend>(), + py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"), + py::arg("position_prob"), py::arg("max_length"), py::arg("schema"), + py::arg("custom_option") = fastdeploy::RuntimeOption(), + py::arg("model_format") = fastdeploy::Frontend::PADDLE) + .def(py::init, RuntimeOption, Frontend>(), + py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"), + py::arg("position_prob"), py::arg("max_length"), py::arg("schema"), + py::arg("custom_option") = fastdeploy::RuntimeOption(), + py::arg("model_format") = fastdeploy::Frontend::PADDLE) + .def(py::init(), + py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"), + py::arg("position_prob"), py::arg("max_length"), py::arg("schema"), + py::arg("custom_option") = fastdeploy::RuntimeOption(), + py::arg("model_format") = fastdeploy::Frontend::PADDLE) + .def("set_schema", + static_cast&)>(&text::UIEModel::SetSchema), + py::arg("schema")) + .def("set_schema", static_cast&)>( + &text::UIEModel::SetSchema), + py::arg("schema")) + .def("set_schema", + static_cast( + &text::UIEModel::SetSchema), + py::arg("schema")) + .def("predict", + [](text::UIEModel& self, const std::vector& texts) { + std::vector< + std::unordered_map>> + results; + self.Predict(texts, &results); + return results; + }, + py::arg("text")); +} + +} // namespace fastdeploy diff --git a/examples/text/uie/cpp/infer.cc b/examples/text/uie/cpp/infer.cc index 64e250ca3..47b186e51 100644 --- a/examples/text/uie/cpp/infer.cc +++ b/examples/text/uie/cpp/infer.cc @@ -81,9 +81,9 @@ int main(int argc, char* argv[]) { results.clear(); // Relation Extraction - predictor.SetSchema({{"竞赛名称", - {SchemaNode("主办方"), SchemaNode("承办方"), - SchemaNode("已举办次数")}}}); + predictor.SetSchema( + {SchemaNode("竞赛名称", {SchemaNode("主办方"), SchemaNode("承办方"), + SchemaNode("已举办次数")})}); predictor.Predict( {"2022语言与智能技术竞赛由中国中文信息学会和中国计算机学会联合主办,百度" "公司、中国中文信息学会评测工作委员会和中国计算机学会自然语言处理专委会" @@ -93,9 +93,9 @@ int main(int argc, char* argv[]) { results.clear(); // Event Extraction - predictor.SetSchema({{"地震触发词", - {SchemaNode("地震强度"), SchemaNode("时间"), - SchemaNode("震中位置"), SchemaNode("震源深度")}}}); + predictor.SetSchema({SchemaNode( + "地震触发词", {SchemaNode("地震强度"), SchemaNode("时间"), + SchemaNode("震中位置"), SchemaNode("震源深度")})}); predictor.Predict( {"中国地震台网正式测定:5月16日06时08分在云南临沧市凤庆县(北纬24." "34度,东经99.98度)发生3.5级地震,震源深度10千米。"}, @@ -104,14 +104,14 @@ int main(int argc, char* argv[]) { results.clear(); // Opinion Extraction - predictor.SetSchema( - {{"评价维度", - // NOTE(zhoushunjie): It's necessary to explicitly use - // std::vector to convert initializer list of SchemaNode whose size is - // two. If not to do so, an ambiguous compliation error will occur in - // mac x64 platform. - std::vector{SchemaNode("观点词"), - SchemaNode("情感倾向[正向,负向]")}}}); + predictor.SetSchema({SchemaNode( + "评价维度", + // NOTE(zhoushunjie): It's necessary to explicitly use + // std::vector to convert initializer list of SchemaNode whose size is + // two. If not to do so, an ambiguous compliation error will occur in + // mac x64 platform. + std::vector{SchemaNode("观点词"), + SchemaNode("情感倾向[正向,负向]")})}); predictor.Predict( {"店面干净,很清静,服务员服务热情,性价比很高,发现收银台有排队"}, &results); @@ -119,16 +119,16 @@ int main(int argc, char* argv[]) { results.clear(); // Sequence classification - predictor.SetSchema({"情感倾向[正向,负向]"}); + predictor.SetSchema(SchemaNode("情感倾向[正向,负向]")); predictor.Predict({"这个产品用起来真的很流畅,我非常喜欢"}, &results); std::cout << results << std::endl; results.clear(); // Cross task extraction - predictor.SetSchema({{"法院", {}}, - {"原告", {SchemaNode("委托代理人")}}, - {"被告", {SchemaNode("委托代理人")}}}); + predictor.SetSchema({SchemaNode("法院", {}), + SchemaNode("原告", {SchemaNode("委托代理人")}), + SchemaNode("被告", {SchemaNode("委托代理人")})}); predictor.Predict({"北京市海淀区人民法院\n民事判决书\n(199x)" "建初字第xxx号\n原告:张三。\n委托代理人李四,北京市 " "A律师事务所律师。\n被告:B公司,法定代表人王五,开发公司" diff --git a/fastdeploy/__init__.py b/fastdeploy/__init__.py index 211b05043..b27c6e5c8 100644 --- a/fastdeploy/__init__.py +++ b/fastdeploy/__init__.py @@ -24,4 +24,5 @@ from .runtime import Runtime, RuntimeOption from .model import FastDeployModel from . import c_lib_wrap as C from . import vision +from . import text from .download import download, download_and_decompress diff --git a/fastdeploy/text/__init__.py b/fastdeploy/text/__init__.py index 7d175762c..11fb284d0 100644 --- a/fastdeploy/text/__init__.py +++ b/fastdeploy/text/__init__.py @@ -12,3 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. from __future__ import absolute_import + +from . import uie diff --git a/fastdeploy/text/uie/__init__.py b/fastdeploy/text/uie/__init__.py new file mode 100644 index 000000000..08958dd56 --- /dev/null +++ b/fastdeploy/text/uie/__init__.py @@ -0,0 +1,73 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import + +import logging +from ... import Frontend +from ... import RuntimeOption +from ... import c_lib_wrap as C + + +class SchemaNode(object): + def __init__(self, name, children=[]): + schema_node_children = [] + for child in children: + if isinstance(child, str): + schema_node_children += [C.text.SchemaNode(child, [])] + elif isinstance(child, dict): + for key, val in child.item(): + schema_node_child = SchemaNode(key, val) + schema_node_children += [schema_node_child._schema_node] + else: + assert "The type of child of SchemaNode should be str or dict." + self._schema_node = C.text.SchemaNode(name, schema_node_children) + self._schema_node_children = schema_node_children + + +class UIEModel(object): + def __init__(self, + model_file, + params_file, + vocab_file, + position_prob=0.5, + max_length=128, + schema=[], + runtime_option=RuntimeOption(), + model_format=Frontend.PADDLE): + if isinstance(schema, list): + schema = SchemaNode("", schema)._schema_node_children + elif isinstance(schema, dict): + schema_tmp = [] + for key, val in schema.items(): + schema_tmp += [SchemaNode(key, val)._schema_node] + schema = schema_tmp + else: + assert "The type of schema should be list or dict." + self._model = C.text.UIEModel(model_file, params_file, vocab_file, + position_prob, max_length, schema, + runtime_option._option, model_format) + + def set_schema(self, schema): + if isinstance(schema, list): + schema = SchemaNode("", schema)._schema_node_children + elif isinstance(schema, dict): + schema_tmp = [] + for key, val in schema.items(): + schema_tmp += [SchemaNode(key, val)._schema_node] + schema = schema_tmp + self._model.set_schema(schema) + + def predict(self, texts): + return self._model.predict(texts) diff --git a/setup.py b/setup.py index 65732c751..4547f0445 100644 --- a/setup.py +++ b/setup.py @@ -53,6 +53,7 @@ setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND", setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND", "OFF") setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "ON") +setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "ON") setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF") setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF") setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED") @@ -325,7 +326,8 @@ ext_modules = [ # no need to do fancy stuff so far if PACKAGE_NAME != "fastdeploy": - packages = setuptools.find_packages(exclude=['fastdeploy*', 'build_scripts']) + packages = setuptools.find_packages( + exclude=['fastdeploy*', 'build_scripts']) else: packages = setuptools.find_packages(exclude=['build_scripts']) @@ -344,10 +346,16 @@ if sys.version_info[0] == 3: package_data = {PACKAGE_NAME: ["LICENSE", "ThirdPartyNotices.txt"]} if sys.argv[1] == "install" or sys.argv[1] == "bdist_wheel": - shutil.copy(os.path.join(TOP_DIR, "ThirdPartyNotices.txt"), os.path.join(TOP_DIR, PACKAGE_NAME)) - shutil.copy(os.path.join(TOP_DIR, "LICENSE"), os.path.join(TOP_DIR, PACKAGE_NAME)) - if not os.path.exists(os.path.join(TOP_DIR, "fastdeploy", "libs", "third_libs")): - print("Didn't detect path: fastdeploy/libs/third_libs exist, please execute `python setup.py build` first") + shutil.copy( + os.path.join(TOP_DIR, "ThirdPartyNotices.txt"), + os.path.join(TOP_DIR, PACKAGE_NAME)) + shutil.copy( + os.path.join(TOP_DIR, "LICENSE"), os.path.join(TOP_DIR, PACKAGE_NAME)) + if not os.path.exists( + os.path.join(TOP_DIR, "fastdeploy", "libs", "third_libs")): + print( + "Didn't detect path: fastdeploy/libs/third_libs exist, please execute `python setup.py build` first" + ) sys.exit(0) sys.path.append(os.path.split(os.path.abspath(__file__))[0]) from build_scripts.process_libraries import process_libraries