// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include "fastdeploy/fastdeploy_model.h" #include "fastdeploy/utils/unique_ptr.h" #include "faster_tokenizer/tokenizers/ernie_faster_tokenizer.h" using namespace paddlenlp; struct UIEResult { size_t start_; size_t end_; double probability_; std::string text_; std::unordered_map> relation_; UIEResult() = default; UIEResult(size_t start, size_t end, double probability, std::string text) : start_(start), end_(end), probability_(probability), text_(text) {} }; std::ostream& operator<<(std::ostream& os, const UIEResult& result); std::ostream& operator<<( std::ostream& os, const std::vector>>& results); struct SchemaNode { std::string name_; std::vector> prefix_; std::vector> relations_; std::vector children_; explicit SchemaNode(const std::string& name, const std::vector& children = {}) : name_(name), children_(children) {} void AddChild(const std::string& schema) { children_.emplace_back(schema); } void AddChild(const SchemaNode& schema) { children_.push_back(schema); } void AddChild(const std::string& schema, const std::vector& children) { SchemaNode schema_node(schema); for (auto& child : children) { schema_node.children_.emplace_back(child); } children_.emplace_back(schema_node); } void AddChild(const std::string& schema, const std::vector& children) { SchemaNode schema_node(schema); schema_node.children_ = children; children_.emplace_back(schema_node); } }; struct Schema { explicit Schema(const std::string& schema, const std::string& name = "root"); explicit Schema(const std::vector& schema_list, const std::string& name = "root"); explicit Schema(const std::unordered_map>& schema_map, const std::string& name = "root"); private: void CreateRoot(const std::string& name); std::unique_ptr root_; friend class UIEModel; }; struct UIEModel { public: UIEModel( const std::string& model_file, const std::string& params_file, const std::string& vocab_file, float position_prob, size_t max_length, const std::vector& schema, const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption(), const fastdeploy::Frontend& model_format = fastdeploy::Frontend::PADDLE); UIEModel( const std::string& model_file, const std::string& params_file, const std::string& vocab_file, float position_prob, size_t max_length, const std::unordered_map>& schema, const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption(), const fastdeploy::Frontend& model_format = fastdeploy::Frontend::PADDLE); void SetSchema(const std::vector& schema); void SetSchema( const std::unordered_map>& schema); void PredictUIEInput(const std::vector& input_texts, const std::vector& prompts, std::vector>* results); void Predict( const std::vector& texts, std::vector>>* results); private: using IDX_PROB = std::pair; struct IdxProbCmp { bool operator()(const std::pair& lhs, const std::pair& rhs) const; }; using SPAN_SET = std::set, IdxProbCmp>; struct SpanIdx { faster_tokenizer::core::Offset offset_; bool is_prompt_; }; void AutoSplitter( const std::vector& texts, size_t max_length, std::vector* short_texts, std::unordered_map>* input_mapping); void AutoJoiner( const std::vector& short_texts, const std::unordered_map>& input_mapping, std::vector>* results); // Get idx of the last dimension in probability arrays, which is greater than // a limitation. void GetCandidateIdx(const float* probs, int64_t batch_size, int64_t seq_len, std::vector>* candidate_idx_prob, float threshold = 0.5) const; void GetSpan(const std::vector& start_idx_prob, const std::vector& end_idx_prob, SPAN_SET* span_set) const; void GetSpanIdxAndProbs( const SPAN_SET& span_set, const std::vector& offset_mapping, std::vector* span_idxs, std::vector* probs) const; void ConvertSpanToUIEResult( const std::vector& texts, const std::vector& prompts, const std::vector>& span_idxs, const std::vector>& probs, std::vector>* results) const; fastdeploy::RuntimeOption runtime_option_; fastdeploy::Runtime runtime_; std::unique_ptr schema_; size_t max_length_; float position_prob_; faster_tokenizer::tokenizers_impl::ErnieFasterTokenizer tokenizer_; };