mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-06 09:07:10 +08:00
276 lines
10 KiB
C++
Executable File
276 lines
10 KiB
C++
Executable File
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
#include <iostream>
|
|
#include <sstream>
|
|
#include <vector>
|
|
#include "fastdeploy/function/reduce.h"
|
|
#include "fastdeploy/function/softmax.h"
|
|
#include "fastdeploy/runtime.h"
|
|
#include "fastdeploy/utils/path.h"
|
|
#include "fast_tokenizer/tokenizers/ernie_fast_tokenizer.h"
|
|
#include "gflags/gflags.h"
|
|
|
|
using namespace paddlenlp;
|
|
using namespace fast_tokenizer::tokenizers_impl;
|
|
#ifdef WIN32
|
|
const char sep = '\\';
|
|
#else
|
|
const char sep = '/';
|
|
#endif
|
|
|
|
DEFINE_string(model_dir, "", "Directory of the inference model.");
|
|
DEFINE_string(vocab_path, "", "Path of the vocab file.");
|
|
DEFINE_string(device, "cpu",
|
|
"Type of inference device, support 'cpu', 'kunlunxin' or 'gpu'.");
|
|
DEFINE_string(backend, "onnx_runtime",
|
|
"The inference runtime backend, support: ['onnx_runtime', "
|
|
"'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt']");
|
|
DEFINE_int32(batch_size, 1, "The batch size of data.");
|
|
DEFINE_int32(max_length, 128, "The batch size of data.");
|
|
DEFINE_bool(use_fp16, false, "Wheter to use FP16 mode.");
|
|
|
|
void PrintUsage() {
|
|
fastdeploy::FDINFO
|
|
<< "Usage: seq_cls_infer_demo --model_dir dir --device [cpu|gpu] "
|
|
"--backend "
|
|
"[onnx_runtime|paddle|openvino|tensorrt|paddle_tensorrt] "
|
|
"--batch_size size --max_length len --use_fp16 false"
|
|
<< std::endl;
|
|
fastdeploy::FDINFO << "Default value of device: cpu" << std::endl;
|
|
fastdeploy::FDINFO << "Default value of backend: onnx_runtime" << std::endl;
|
|
fastdeploy::FDINFO << "Default value of batch_size: 1" << std::endl;
|
|
fastdeploy::FDINFO << "Default value of max_length: 128" << std::endl;
|
|
fastdeploy::FDINFO << "Default value of use_fp16: false" << std::endl;
|
|
}
|
|
|
|
bool CreateRuntimeOption(fastdeploy::RuntimeOption* option) {
|
|
std::string model_path = FLAGS_model_dir + sep + "infer.pdmodel";
|
|
std::string param_path = FLAGS_model_dir + sep + "infer.pdiparams";
|
|
fastdeploy::FDINFO << "model_path = " << model_path
|
|
<< ", param_path = " << param_path << std::endl;
|
|
option->SetModelPath(model_path, param_path);
|
|
|
|
if (FLAGS_device == "kunlunxin") {
|
|
option->UseKunlunXin();
|
|
option->UsePaddleLiteBackend();
|
|
return true;
|
|
} else if (FLAGS_device == "gpu") {
|
|
option->UseGpu();
|
|
} else if (FLAGS_device == "cpu") {
|
|
option->UseCpu();
|
|
} else {
|
|
fastdeploy::FDERROR << "The avilable device should be one of the list "
|
|
"['cpu', 'gpu']. But receive '"
|
|
<< FLAGS_device << "'" << std::endl;
|
|
return false;
|
|
}
|
|
|
|
if (FLAGS_backend == "onnx_runtime") {
|
|
option->UseOrtBackend();
|
|
} else if (FLAGS_backend == "paddle") {
|
|
option->UsePaddleInferBackend();
|
|
} else if (FLAGS_backend == "openvino") {
|
|
option->UseOpenVINOBackend();
|
|
} else if (FLAGS_backend == "tensorrt" ||
|
|
FLAGS_backend == "paddle_tensorrt") {
|
|
option->UseTrtBackend();
|
|
if (FLAGS_backend == "paddle_tensorrt") {
|
|
option->EnablePaddleToTrt();
|
|
option->EnablePaddleTrtCollectShape();
|
|
}
|
|
std::string trt_file = FLAGS_model_dir + sep + "infer.trt";
|
|
option->SetTrtInputShape("input_ids", {1, FLAGS_max_length},
|
|
{FLAGS_batch_size, FLAGS_max_length},
|
|
{FLAGS_batch_size, FLAGS_max_length});
|
|
option->SetTrtInputShape("token_type_ids", {1, FLAGS_max_length},
|
|
{FLAGS_batch_size, FLAGS_max_length},
|
|
{FLAGS_batch_size, FLAGS_max_length});
|
|
if (FLAGS_use_fp16) {
|
|
option->EnableTrtFP16();
|
|
trt_file = trt_file + ".fp16";
|
|
}
|
|
} else {
|
|
fastdeploy::FDERROR << "The avilable backend should be one of the list "
|
|
"['paddle', 'openvino', 'tensorrt', "
|
|
"'paddle_tensorrt']. But receive '"
|
|
<< FLAGS_backend << "'" << std::endl;
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool BatchFyTexts(const std::vector<std::string>& texts, int batch_size,
|
|
std::vector<std::vector<std::string>>* batch_texts) {
|
|
for (int idx = 0; idx < texts.size(); idx += batch_size) {
|
|
int rest = texts.size() - idx;
|
|
int curr_size = std::min(batch_size, rest);
|
|
std::vector<std::string> batch_text(curr_size);
|
|
std::copy_n(texts.begin() + idx, curr_size, batch_text.begin());
|
|
batch_texts->emplace_back(std::move(batch_text));
|
|
}
|
|
return true;
|
|
}
|
|
|
|
struct SeqClsResult {
|
|
int label;
|
|
float confidence;
|
|
};
|
|
|
|
struct ErnieForSequenceClassificationPredictor {
|
|
fastdeploy::Runtime runtime_;
|
|
ErnieFastTokenizer tokenizer_;
|
|
ErnieForSequenceClassificationPredictor(
|
|
const fastdeploy::RuntimeOption& option,
|
|
const ErnieFastTokenizer& tokenizer)
|
|
: tokenizer_(tokenizer) {
|
|
runtime_.Init(option);
|
|
}
|
|
|
|
bool Preprocess(const std::vector<std::string>& texts,
|
|
const std::vector<std::string>& texts_pair,
|
|
std::vector<fastdeploy::FDTensor>* inputs) {
|
|
std::vector<fast_tokenizer::core::Encoding> encodings;
|
|
std::vector<fast_tokenizer::core::EncodeInput> text_pair_input;
|
|
// 1. Tokenize the text or (text, text_pair)
|
|
if (texts_pair.empty()) {
|
|
for (int i = 0; i < texts.size(); ++i) {
|
|
text_pair_input.emplace_back(texts[i]);
|
|
}
|
|
} else {
|
|
if (texts.size() != texts_pair.size()) {
|
|
return false;
|
|
}
|
|
for (int i = 0; i < texts.size(); ++i) {
|
|
text_pair_input.emplace_back(
|
|
std::pair<std::string, std::string>(texts[i], texts_pair[i]));
|
|
}
|
|
}
|
|
tokenizer_.EncodeBatchStrings(text_pair_input, &encodings);
|
|
// 2. Construct the input vector tensor
|
|
// 2.1 Allocate input tensor
|
|
int64_t batch_size = texts.size();
|
|
int64_t seq_len = 0;
|
|
if (batch_size > 0) {
|
|
seq_len = encodings[0].GetIds().size();
|
|
}
|
|
inputs->resize(runtime_.NumInputs());
|
|
for (int i = 0; i < runtime_.NumInputs(); ++i) {
|
|
(*inputs)[i].Allocate({batch_size, seq_len},
|
|
fastdeploy::FDDataType::INT64,
|
|
runtime_.GetInputInfo(i).name);
|
|
}
|
|
// 2.2 Set the value of data
|
|
size_t start = 0;
|
|
int64_t* input_ids_ptr =
|
|
reinterpret_cast<int64_t*>((*inputs)[0].MutableData());
|
|
int64_t* type_ids_ptr =
|
|
reinterpret_cast<int64_t*>((*inputs)[1].MutableData());
|
|
for (int i = 0; i < encodings.size(); ++i) {
|
|
auto&& curr_input_ids = encodings[i].GetIds();
|
|
auto&& curr_type_ids = encodings[i].GetTypeIds();
|
|
std::copy(curr_input_ids.begin(), curr_input_ids.end(),
|
|
input_ids_ptr + start);
|
|
std::copy(curr_type_ids.begin(), curr_type_ids.end(),
|
|
type_ids_ptr + start);
|
|
start += seq_len;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Postprocess(const std::vector<fastdeploy::FDTensor>& outputs,
|
|
std::vector<SeqClsResult>* seq_cls_results) {
|
|
const auto& logits = outputs[0];
|
|
fastdeploy::FDTensor probs;
|
|
fastdeploy::function::Softmax(logits, &probs);
|
|
|
|
fastdeploy::FDTensor labels, confidences;
|
|
fastdeploy::function::Max(probs, &confidences, {-1});
|
|
fastdeploy::function::ArgMax(probs, &labels, -1);
|
|
if (labels.Numel() != confidences.Numel()) {
|
|
return false;
|
|
}
|
|
|
|
seq_cls_results->resize(labels.Numel());
|
|
int64_t* label_ptr = reinterpret_cast<int64_t*>(labels.Data());
|
|
float* confidence_ptr = reinterpret_cast<float*>(confidences.Data());
|
|
for (int i = 0; i < labels.Numel(); ++i) {
|
|
(*seq_cls_results)[i].label = label_ptr[i];
|
|
(*seq_cls_results)[i].confidence = confidence_ptr[i];
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool Predict(const std::vector<std::string>& texts,
|
|
const std::vector<std::string>& texts_pair,
|
|
std::vector<SeqClsResult>* seq_cls_results) {
|
|
std::vector<fastdeploy::FDTensor> inputs;
|
|
if (!Preprocess(texts, texts_pair, &inputs)) {
|
|
return false;
|
|
}
|
|
|
|
std::vector<fastdeploy::FDTensor> outputs(runtime_.NumOutputs());
|
|
runtime_.Infer(inputs, &outputs);
|
|
|
|
if (!Postprocess(outputs, seq_cls_results)) {
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
};
|
|
|
|
int main(int argc, char* argv[]) {
|
|
google::ParseCommandLineFlags(&argc, &argv, true);
|
|
auto option = fastdeploy::RuntimeOption();
|
|
if (!CreateRuntimeOption(&option)) {
|
|
PrintUsage();
|
|
return -1;
|
|
}
|
|
|
|
std::string vocab_path = FLAGS_vocab_path;
|
|
if (!fastdeploy::CheckFileExists(vocab_path)) {
|
|
vocab_path = fastdeploy::PathJoin(FLAGS_model_dir, "vocab.txt");
|
|
if (!fastdeploy::CheckFileExists(vocab_path)) {
|
|
fastdeploy::FDERROR << "The path of vocab " << vocab_path
|
|
<< " doesn't exist" << std::endl;
|
|
PrintUsage();
|
|
return -1;
|
|
}
|
|
}
|
|
ErnieFastTokenizer tokenizer(vocab_path);
|
|
|
|
ErnieForSequenceClassificationPredictor predictor(option, tokenizer);
|
|
|
|
std::vector<SeqClsResult> seq_cls_results;
|
|
std::vector<std::string> texts_ds = {"花呗收款额度限制",
|
|
"花呗支持高铁票支付吗"};
|
|
std::vector<std::string> texts_pair_ds = {"收钱码,对花呗支付的金额有限制吗",
|
|
"为什么友付宝不支持花呗付款"};
|
|
std::vector<std::vector<std::string>> batch_texts, batch_texts_pair;
|
|
BatchFyTexts(texts_ds, FLAGS_batch_size, &batch_texts);
|
|
BatchFyTexts(texts_pair_ds, FLAGS_batch_size, &batch_texts_pair);
|
|
for (int bs = 0; bs < batch_texts.size(); ++bs) {
|
|
predictor.Predict(batch_texts[bs], batch_texts_pair[bs], &seq_cls_results);
|
|
for (int i = 0; i < batch_texts[bs].size(); ++i) {
|
|
std::cout << "Batch id: " << bs << ", example id: " << i
|
|
<< ", sentence 1: " << batch_texts[bs][i]
|
|
<< ", sentence 2: " << batch_texts_pair[bs][i]
|
|
<< ", label: " << seq_cls_results[i].label
|
|
<< ", confidence: " << seq_cls_results[i].confidence
|
|
<< std::endl;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|