Add uie python api (#214)

* add uie pybind

* Add uie result pybind

* Add uie python class

* fix UIEModel pythonargs

* Add schema node pybind

* remove uie print

* Fix cpp build ci
This commit is contained in:
Jack Zhou
2022-09-13 19:03:06 +08:00
committed by GitHub
parent 82580ac11e
commit 54bea3160d
11 changed files with 272 additions and 43 deletions

View File

@@ -19,6 +19,7 @@ namespace fastdeploy {
void BindRuntime(pybind11::module&);
void BindFDModel(pybind11::module&);
void BindVision(pybind11::module&);
void BindText(pybind11::module&);
pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype) {
pybind11::dtype dt;
@@ -143,6 +144,11 @@ PYBIND11_MODULE(@PY_LIBRARY_NAME@, m) {
m.def_submodule("vision", "Vision module of FastDeploy.");
BindVision(vision_module);
#endif
#ifdef ENABLE_TEXT
auto text_module =
m.def_submodule("text", "Text module of FastDeploy.");
BindText(text_module);
#endif
}
} // namespace fastdeploy

View File

@@ -26,10 +26,15 @@
#include "fastdeploy/vision.h"
#endif
#ifdef ENABLE_TEXT
#include "fastdeploy/text.h"
#endif
namespace fastdeploy {
void BindBackend(pybind11::module&);
void BindVision(pybind11::module&);
void BindText(pybind11::module& m);
pybind11::dtype FDDataTypeToNumpyDataType(const FDDataType& fd_dtype);

View File

@@ -11,3 +11,24 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
namespace fastdeploy {
void BindUIE(pybind11::module& m);
void BindText(pybind11::module& m) {
pybind11::class_<text::UIEResult>(m, "UIEResult")
.def(pybind11::init())
.def_readwrite("start", &text::UIEResult::start_)
.def_readwrite("end", &text::UIEResult::end_)
.def_readwrite("probability_", &text::UIEResult::probability_)
.def_readwrite("text", &text::UIEResult::text_)
.def_readwrite("relation", &text::UIEResult::relation_)
.def("__repr__", &text::UIEResult::Str)
.def("__str__", &text::UIEResult::Str);
BindUIE(m);
}
} // namespace fastdeploy

View File

@@ -124,6 +124,12 @@ std::ostream& operator<<(
return os;
}
std::string UIEResult::Str() const {
std::ostringstream oss;
oss << *this;
return oss.str();
}
void Schema::CreateRoot(const std::string& name) {
root_ = fastdeploy::utils::make_unique<SchemaNode>(name);
}
@@ -141,15 +147,19 @@ Schema::Schema(const std::vector<std::string>& schema_list,
}
}
Schema::Schema(
const std::unordered_map<std::string, std::vector<SchemaNode>>& schema_map,
Schema::Schema(const std::vector<SchemaNode>& schema_list,
const std::string& name) {
CreateRoot(name);
for (auto& schema_item : schema_map) {
root_->AddChild(schema_item.first, schema_item.second);
for (const auto& schema : schema_list) {
root_->AddChild(schema);
}
}
Schema::Schema(const SchemaNode& schema, const std::string& name) {
CreateRoot(name);
root_->AddChild(schema);
}
UIEModel::UIEModel(const std::string& model_file,
const std::string& params_file,
const std::string& vocab_file, float position_prob,
@@ -169,10 +179,29 @@ UIEModel::UIEModel(const std::string& model_file,
faster_tokenizer::core::TruncStrategy::LONGEST_FIRST);
}
UIEModel::UIEModel(
const std::string& model_file, const std::string& params_file,
const std::string& vocab_file, float position_prob, size_t max_length,
const std::unordered_map<std::string, std::vector<SchemaNode>>& schema,
UIEModel::UIEModel(const std::string& model_file,
const std::string& params_file,
const std::string& vocab_file, float position_prob,
size_t max_length, const std::vector<SchemaNode>& schema,
const fastdeploy::RuntimeOption& custom_option,
const fastdeploy::Frontend& model_format)
: max_length_(max_length),
position_prob_(position_prob),
tokenizer_(vocab_file) {
runtime_option_ = custom_option;
runtime_option_.model_format = model_format;
runtime_option_.SetModelPath(model_file, params_file);
runtime_.Init(runtime_option_);
SetSchema(schema);
tokenizer_.EnableTruncMethod(
max_length, 0, faster_tokenizer::core::Direction::RIGHT,
faster_tokenizer::core::TruncStrategy::LONGEST_FIRST);
}
UIEModel::UIEModel(const std::string& model_file,
const std::string& params_file,
const std::string& vocab_file, float position_prob,
size_t max_length, const SchemaNode& schema,
const fastdeploy::RuntimeOption& custom_option,
const fastdeploy::Frontend& model_format)
: max_length_(max_length),
@@ -192,8 +221,11 @@ void UIEModel::SetSchema(const std::vector<std::string>& schema) {
schema_ = fastdeploy::utils::make_unique<Schema>(schema);
}
void UIEModel::SetSchema(
const std::unordered_map<std::string, std::vector<SchemaNode>>& schema) {
void UIEModel::SetSchema(const std::vector<SchemaNode>& schema) {
schema_ = fastdeploy::utils::make_unique<Schema>(schema);
}
void UIEModel::SetSchema(const SchemaNode& schema) {
schema_ = fastdeploy::utils::make_unique<Schema>(schema);
}

View File

@@ -37,6 +37,7 @@ struct FASTDEPLOY_DECL UIEResult {
UIEResult() = default;
UIEResult(size_t start, size_t end, double probability, std::string text)
: start_(start), end_(end), probability_(probability), text_(text) {}
std::string Str() const;
};
FASTDEPLOY_DECL std::ostream& operator<<(std::ostream& os,
@@ -51,7 +52,8 @@ struct FASTDEPLOY_DECL SchemaNode {
std::vector<std::vector<std::string>> prefix_;
std::vector<std::vector<UIEResult*>> relations_;
std::vector<SchemaNode> children_;
SchemaNode() = default;
SchemaNode(const SchemaNode&) = default;
explicit SchemaNode(const std::string& name,
const std::vector<SchemaNode>& children = {})
: name_(name), children_(children) {}
@@ -77,9 +79,9 @@ struct Schema {
explicit Schema(const std::string& schema, const std::string& name = "root");
explicit Schema(const std::vector<std::string>& schema_list,
const std::string& name = "root");
explicit Schema(const std::unordered_map<std::string,
std::vector<SchemaNode>>& schema_map,
explicit Schema(const std::vector<SchemaNode>& schema_list,
const std::string& name = "root");
explicit Schema(const SchemaNode& schema, const std::string& name = "root");
private:
void CreateRoot(const std::string& name);
@@ -99,13 +101,19 @@ struct FASTDEPLOY_DECL UIEModel {
UIEModel(
const std::string& model_file, const std::string& params_file,
const std::string& vocab_file, float position_prob, size_t max_length,
const std::unordered_map<std::string, std::vector<SchemaNode>>& schema,
const SchemaNode& schema, const fastdeploy::RuntimeOption& custom_option =
fastdeploy::RuntimeOption(),
const fastdeploy::Frontend& model_format = fastdeploy::Frontend::PADDLE);
UIEModel(
const std::string& model_file, const std::string& params_file,
const std::string& vocab_file, float position_prob, size_t max_length,
const std::vector<SchemaNode>& schema,
const fastdeploy::RuntimeOption& custom_option =
fastdeploy::RuntimeOption(),
const fastdeploy::Frontend& model_format = fastdeploy::Frontend::PADDLE);
void SetSchema(const std::vector<std::string>& schema);
void SetSchema(
const std::unordered_map<std::string, std::vector<SchemaNode>>& schema);
void SetSchema(const std::vector<SchemaNode>& schema);
void SetSchema(const SchemaNode& schema);
void ConstructTextsAndPrompts(
const std::vector<std::string>& raw_texts, const std::string& node_name,

View File

@@ -0,0 +1,73 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "fastdeploy/pybind/main.h"
namespace py = pybind11;
namespace fastdeploy {
void BindUIE(pybind11::module& m) {
py::class_<text::SchemaNode>(m, "SchemaNode")
.def(py::init<>())
.def(py::init<std::string, std::vector<text::SchemaNode>>(),
py::arg("name"), py::arg("children") = {})
.def_readwrite("name", &text::SchemaNode::name_)
.def_readwrite("prefix", &text::SchemaNode::prefix_)
.def_readwrite("relations", &text::SchemaNode::relations_)
.def_readwrite("children", &text::SchemaNode::children_);
py::class_<text::UIEModel>(m, "UIEModel")
.def(py::init<std::string, std::string, std::string, float, size_t,
std::vector<std::string>, RuntimeOption, Frontend>(),
py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
py::arg("custom_option") = fastdeploy::RuntimeOption(),
py::arg("model_format") = fastdeploy::Frontend::PADDLE)
.def(py::init<std::string, std::string, std::string, float, size_t,
std::vector<text::SchemaNode>, RuntimeOption, Frontend>(),
py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
py::arg("custom_option") = fastdeploy::RuntimeOption(),
py::arg("model_format") = fastdeploy::Frontend::PADDLE)
.def(py::init<std::string, std::string, std::string, float, size_t,
text::SchemaNode, RuntimeOption, Frontend>(),
py::arg("model_file"), py::arg("params_file"), py::arg("vocab_file"),
py::arg("position_prob"), py::arg("max_length"), py::arg("schema"),
py::arg("custom_option") = fastdeploy::RuntimeOption(),
py::arg("model_format") = fastdeploy::Frontend::PADDLE)
.def("set_schema",
static_cast<void (text::UIEModel::*)(
const std::vector<std::string>&)>(&text::UIEModel::SetSchema),
py::arg("schema"))
.def("set_schema", static_cast<void (text::UIEModel::*)(
const std::vector<text::SchemaNode>&)>(
&text::UIEModel::SetSchema),
py::arg("schema"))
.def("set_schema",
static_cast<void (text::UIEModel::*)(const text::SchemaNode&)>(
&text::UIEModel::SetSchema),
py::arg("schema"))
.def("predict",
[](text::UIEModel& self, const std::vector<std::string>& texts) {
std::vector<
std::unordered_map<std::string, std::vector<text::UIEResult>>>
results;
self.Predict(texts, &results);
return results;
},
py::arg("text"));
}
} // namespace fastdeploy

View File

@@ -81,9 +81,9 @@ int main(int argc, char* argv[]) {
results.clear();
// Relation Extraction
predictor.SetSchema({{"竞赛名称",
{SchemaNode("主办方"), SchemaNode("承办方"),
SchemaNode("已举办次数")}}});
predictor.SetSchema(
{SchemaNode("竞赛名称", {SchemaNode("主办方"), SchemaNode("承办方"),
SchemaNode("已举办次数")})});
predictor.Predict(
{"2022语言与智能技术竞赛由中国中文信息学会和中国计算机学会联合主办百度"
"公司、中国中文信息学会评测工作委员会和中国计算机学会自然语言处理专委会"
@@ -93,9 +93,9 @@ int main(int argc, char* argv[]) {
results.clear();
// Event Extraction
predictor.SetSchema({{"地震触发词",
{SchemaNode("地震强度"), SchemaNode("时间"),
SchemaNode("震中位置"), SchemaNode("震源深度")}}});
predictor.SetSchema({SchemaNode(
"地震触发词", {SchemaNode("地震强度"), SchemaNode("时间"),
SchemaNode("震中位置"), SchemaNode("震源深度")})});
predictor.Predict(
{"中国地震台网正式测定5月16日06时08分在云南临沧市凤庆县(北纬24."
"34度东经99.98度)发生3.5级地震震源深度10千米。"},
@@ -104,14 +104,14 @@ int main(int argc, char* argv[]) {
results.clear();
// Opinion Extraction
predictor.SetSchema(
{{"评价维度",
predictor.SetSchema({SchemaNode(
"评价维度",
// NOTE(zhoushunjie): It's necessary to explicitly use
// std::vector to convert initializer list of SchemaNode whose size is
// two. If not to do so, an ambiguous compliation error will occur in
// mac x64 platform.
std::vector<SchemaNode>{SchemaNode("观点词"),
SchemaNode("情感倾向[正向,负向]")}}});
SchemaNode("情感倾向[正向,负向]")})});
predictor.Predict(
{"店面干净,很清静,服务员服务热情,性价比很高,发现收银台有排队"},
&results);
@@ -119,16 +119,16 @@ int main(int argc, char* argv[]) {
results.clear();
// Sequence classification
predictor.SetSchema({"情感倾向[正向,负向]"});
predictor.SetSchema(SchemaNode("情感倾向[正向,负向]"));
predictor.Predict({"这个产品用起来真的很流畅,我非常喜欢"}, &results);
std::cout << results << std::endl;
results.clear();
// Cross task extraction
predictor.SetSchema({{"法院", {}},
{"原告", {SchemaNode("委托代理人")}},
{"被告", {SchemaNode("委托代理人")}}});
predictor.SetSchema({SchemaNode("法院", {}),
SchemaNode("原告", {SchemaNode("委托代理人")}),
SchemaNode("被告", {SchemaNode("委托代理人")})});
predictor.Predict({"北京市海淀区人民法院\n民事判决书\n(199x)"
"建初字第xxx号\n原告:张三。\n委托代理人李四,北京市 "
"A律师事务所律师。\n被告B公司法定代表人王五开发公司"

View File

@@ -24,4 +24,5 @@ from .runtime import Runtime, RuntimeOption
from .model import FastDeployModel
from . import c_lib_wrap as C
from . import vision
from . import text
from .download import download, download_and_decompress

View File

@@ -12,3 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from . import uie

View File

@@ -0,0 +1,73 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import logging
from ... import Frontend
from ... import RuntimeOption
from ... import c_lib_wrap as C
class SchemaNode(object):
def __init__(self, name, children=[]):
schema_node_children = []
for child in children:
if isinstance(child, str):
schema_node_children += [C.text.SchemaNode(child, [])]
elif isinstance(child, dict):
for key, val in child.item():
schema_node_child = SchemaNode(key, val)
schema_node_children += [schema_node_child._schema_node]
else:
assert "The type of child of SchemaNode should be str or dict."
self._schema_node = C.text.SchemaNode(name, schema_node_children)
self._schema_node_children = schema_node_children
class UIEModel(object):
def __init__(self,
model_file,
params_file,
vocab_file,
position_prob=0.5,
max_length=128,
schema=[],
runtime_option=RuntimeOption(),
model_format=Frontend.PADDLE):
if isinstance(schema, list):
schema = SchemaNode("", schema)._schema_node_children
elif isinstance(schema, dict):
schema_tmp = []
for key, val in schema.items():
schema_tmp += [SchemaNode(key, val)._schema_node]
schema = schema_tmp
else:
assert "The type of schema should be list or dict."
self._model = C.text.UIEModel(model_file, params_file, vocab_file,
position_prob, max_length, schema,
runtime_option._option, model_format)
def set_schema(self, schema):
if isinstance(schema, list):
schema = SchemaNode("", schema)._schema_node_children
elif isinstance(schema, dict):
schema_tmp = []
for key, val in schema.items():
schema_tmp += [SchemaNode(key, val)._schema_node]
schema = schema_tmp
self._model.set_schema(schema)
def predict(self, texts):
return self._model.predict(texts)

View File

@@ -53,6 +53,7 @@ setup_configs["ENABLE_OPENVINO_BACKEND"] = os.getenv("ENABLE_OPENVINO_BACKEND",
setup_configs["ENABLE_PADDLE_BACKEND"] = os.getenv("ENABLE_PADDLE_BACKEND",
"OFF")
setup_configs["ENABLE_VISION"] = os.getenv("ENABLE_VISION", "ON")
setup_configs["ENABLE_TEXT"] = os.getenv("ENABLE_TEXT", "ON")
setup_configs["ENABLE_TRT_BACKEND"] = os.getenv("ENABLE_TRT_BACKEND", "OFF")
setup_configs["WITH_GPU"] = os.getenv("WITH_GPU", "OFF")
setup_configs["TRT_DIRECTORY"] = os.getenv("TRT_DIRECTORY", "UNDEFINED")
@@ -325,7 +326,8 @@ ext_modules = [
# no need to do fancy stuff so far
if PACKAGE_NAME != "fastdeploy":
packages = setuptools.find_packages(exclude=['fastdeploy*', 'build_scripts'])
packages = setuptools.find_packages(
exclude=['fastdeploy*', 'build_scripts'])
else:
packages = setuptools.find_packages(exclude=['build_scripts'])
@@ -344,10 +346,16 @@ if sys.version_info[0] == 3:
package_data = {PACKAGE_NAME: ["LICENSE", "ThirdPartyNotices.txt"]}
if sys.argv[1] == "install" or sys.argv[1] == "bdist_wheel":
shutil.copy(os.path.join(TOP_DIR, "ThirdPartyNotices.txt"), os.path.join(TOP_DIR, PACKAGE_NAME))
shutil.copy(os.path.join(TOP_DIR, "LICENSE"), os.path.join(TOP_DIR, PACKAGE_NAME))
if not os.path.exists(os.path.join(TOP_DIR, "fastdeploy", "libs", "third_libs")):
print("Didn't detect path: fastdeploy/libs/third_libs exist, please execute `python setup.py build` first")
shutil.copy(
os.path.join(TOP_DIR, "ThirdPartyNotices.txt"),
os.path.join(TOP_DIR, PACKAGE_NAME))
shutil.copy(
os.path.join(TOP_DIR, "LICENSE"), os.path.join(TOP_DIR, PACKAGE_NAME))
if not os.path.exists(
os.path.join(TOP_DIR, "fastdeploy", "libs", "third_libs")):
print(
"Didn't detect path: fastdeploy/libs/third_libs exist, please execute `python setup.py build` first"
)
sys.exit(0)
sys.path.append(os.path.split(os.path.abspath(__file__))[0])
from build_scripts.process_libraries import process_libraries