# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import distutils.util import numpy as np import fast_tokenizer from paddlenlp.transformers import AutoTokenizer import fastdeploy as fd def parse_arguments(): import argparse import ast parser = argparse.ArgumentParser() parser.add_argument( "--model_dir", required=True, help="The directory of model.") parser.add_argument( "--vocab_path", type=str, default="", help="The path of tokenizer vocab.") parser.add_argument( "--device", type=str, default='cpu', choices=['gpu', 'cpu'], help="Type of inference device, support 'cpu' or 'gpu'.") parser.add_argument( "--backend", type=str, default='onnx_runtime', choices=[ 'onnx_runtime', 'paddle', 'openvino', 'tensorrt', 'paddle_tensorrt' ], help="The inference runtime backend.") parser.add_argument( "--batch_size", type=int, default=1, help="The batch size of data.") parser.add_argument( "--max_length", type=int, default=128, help="The max length of sequence.") parser.add_argument( "--log_interval", type=int, default=10, help="The interval of logging.") parser.add_argument( "--use_fp16", type=distutils.util.strtobool, default=False, help="Wheter to use FP16 mode") parser.add_argument( "--use_fast", type=distutils.util.strtobool, default=False, help="Whether to use fast_tokenizer to accelarate the tokenization.") return parser.parse_args() def batchfy_text(texts, batch_size): batch_texts = [] batch_start = 0 while batch_start < len(texts): batch_texts += [ texts[batch_start:min(batch_start + batch_size, len(texts))] ] batch_start += batch_size return batch_texts class ErnieForSequenceClassificationPredictor(object): def __init__(self, args): self.tokenizer = AutoTokenizer.from_pretrained( 'ernie-3.0-medium-zh', use_faster=args.use_fast) self.runtime = self.create_fd_runtime(args) self.batch_size = args.batch_size self.max_length = args.max_length def create_fd_runtime(self, args): option = fd.RuntimeOption() model_path = os.path.join(args.model_dir, "infer.pdmodel") params_path = os.path.join(args.model_dir, "infer.pdiparams") option.set_model_path(model_path, params_path) if args.device == 'cpu': option.use_cpu() else: option.use_gpu() if args.backend == 'paddle': option.use_paddle_backend() elif args.backend == 'onnx_runtime': option.use_ort_backend() elif args.backend == 'openvino': option.use_openvino_backend() else: option.use_trt_backend() if args.backend == 'paddle_tensorrt': option.enable_paddle_to_trt() option.enable_paddle_trt_collect_shape() trt_file = os.path.join(args.model_dir, "infer.trt") option.set_trt_input_shape( 'input_ids', min_shape=[1, args.max_length], opt_shape=[args.batch_size, args.max_length], max_shape=[args.batch_size, args.max_length]) option.set_trt_input_shape( 'token_type_ids', min_shape=[1, args.max_length], opt_shape=[args.batch_size, args.max_length], max_shape=[args.batch_size, args.max_length]) if args.use_fp16: option.enable_trt_fp16() trt_file = trt_file + ".fp16" option.set_trt_cache_file(trt_file) return fd.Runtime(option) def preprocess(self, texts, texts_pair): data = self.tokenizer( texts, texts_pair, max_length=self.max_length, padding=True, truncation=True) input_ids_name = self.runtime.get_input_info(0).name token_type_ids_name = self.runtime.get_input_info(1).name input_map = { input_ids_name: np.array( data["input_ids"], dtype="int64"), token_type_ids_name: np.array( data["token_type_ids"], dtype="int64") } return input_map def infer(self, input_map): results = self.runtime.infer(input_map) return results def postprocess(self, infer_data): logits = np.array(infer_data[0]) max_value = np.max(logits, axis=1, keepdims=True) exp_data = np.exp(logits - max_value) probs = exp_data / np.sum(exp_data, axis=1, keepdims=True) out_dict = { "label": probs.argmax(axis=-1), "confidence": probs.max(axis=-1) } return out_dict def predict(self, texts, texts_pair=None): input_map = self.preprocess(texts, texts_pair) infer_result = self.infer(input_map) output = self.postprocess(infer_result) return output if __name__ == "__main__": args = parse_arguments() predictor = ErnieForSequenceClassificationPredictor(args) texts_ds = ["花呗收款额度限制", "花呗支持高铁票支付吗"] texts_pair_ds = ["收钱码,对花呗支付的金额有限制吗", "为什么友付宝不支持花呗付款"] batch_texts = batchfy_text(texts_ds, args.batch_size) batch_texts_pair = batchfy_text(texts_pair_ds, args.batch_size) for bs, (texts, texts_pair) in enumerate(zip(batch_texts, batch_texts_pair)): outputs = predictor.predict(texts, texts_pair) for i, (sentence1, sentence2) in enumerate(zip(texts, texts_pair)): print( f"Batch id:{bs}, example id:{i}, sentence1:{sentence1}, sentence2:{sentence2}, label:{outputs['label'][i]}, similarity:{outputs['confidence'][i]:.4f}" )