mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-07 09:31:35 +08:00
@@ -65,6 +65,19 @@ The extraction schema: ['肿瘤的大小', '肿瘤的个数', '肝癌级别', '
|
|||||||
......
|
......
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Description of command line arguments
|
||||||
|
|
||||||
|
`infer.py` 除了以上示例的命令行参数,还支持更多命令行参数的设置。以下为各命令行参数的说明。
|
||||||
|
|
||||||
|
| Argument | Description |
|
||||||
|
|----------|--------------|
|
||||||
|
|--model_dir | The specified directory of model. |
|
||||||
|
|--batch_size | The batch size of inputs. |
|
||||||
|
|--max_length | The max length of sequence. Default to 128|
|
||||||
|
|--device | The device of runtime, choices: ['cpu', 'gpu']. Default to 'cpu' |
|
||||||
|
|--backend | The backend of runtime, choices: ['onnx_runtime', 'paddle_inference', 'openvino', 'tensorrt', 'paddle_tensorrt']. Default to 'paddle_inference'. |
|
||||||
|
|--use_fp16 | Whether to use fp16 precision to infer. It can be turned on when 'tensorrt' or 'paddle_tensorrt' backend is selected. Default to False.|
|
||||||
|
|
||||||
## The way to use the UIE model in each extraction task
|
## The way to use the UIE model in each extraction task
|
||||||
|
|
||||||
In the UIE model, schema represents the structured information to be extracted, so the UIE model can support different information extraction tasks by setting different schemas.
|
In the UIE model, schema represents the structured information to be extracted, so the UIE model can support different information extraction tasks by setting different schemas.
|
||||||
|
@@ -65,6 +65,19 @@ The extraction schema: ['肿瘤的大小', '肿瘤的个数', '肝癌级别', '
|
|||||||
......
|
......
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### 参数说明
|
||||||
|
|
||||||
|
`infer.py` 除了以上示例的命令行参数,还支持更多命令行参数的设置。以下为各命令行参数的说明。
|
||||||
|
|
||||||
|
| 参数 |参数说明 |
|
||||||
|
|----------|--------------|
|
||||||
|
|--model_dir | 指定部署模型的目录 |
|
||||||
|
|--batch_size |输入的batch size,默认为 1|
|
||||||
|
|--max_length |最大序列长度,默认为 128|
|
||||||
|
|--device | 运行的设备,可选范围: ['cpu', 'gpu'],默认为'cpu' |
|
||||||
|
|--backend | 支持的推理后端,可选范围: ['onnx_runtime', 'paddle_inference', 'openvino', 'tensorrt', 'paddle_tensorrt'],默认为'paddle_inference' |
|
||||||
|
|--use_fp16 | 是否使用FP16模式进行推理。使用tensorrt和paddle_tensorrt后端时可开启,默认为False |
|
||||||
|
|
||||||
## UIE模型各抽取任务使用方式
|
## UIE模型各抽取任务使用方式
|
||||||
|
|
||||||
在UIE模型中,schema代表要抽取的结构化信息,所以UIE模型可通过设置不同的schema支持不同信息抽取任务。
|
在UIE模型中,schema代表要抽取的结构化信息,所以UIE模型可通过设置不同的schema支持不同信息抽取任务。
|
||||||
|
@@ -15,6 +15,7 @@ import fastdeploy
|
|||||||
from fastdeploy.text import UIEModel, SchemaLanguage
|
from fastdeploy.text import UIEModel, SchemaLanguage
|
||||||
import os
|
import os
|
||||||
from pprint import pprint
|
from pprint import pprint
|
||||||
|
import distutils.util
|
||||||
|
|
||||||
|
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
@@ -31,17 +32,34 @@ def parse_arguments():
|
|||||||
default='cpu',
|
default='cpu',
|
||||||
choices=['cpu', 'gpu'],
|
choices=['cpu', 'gpu'],
|
||||||
help="Type of inference device, support 'cpu' or 'gpu'.")
|
help="Type of inference device, support 'cpu' or 'gpu'.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--batch_size", type=int, default=1, help="The batch size of data.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--device_id", type=int, default=0, help="device(gpu) id")
|
||||||
|
parser.add_argument(
|
||||||
|
"--max_length",
|
||||||
|
type=int,
|
||||||
|
default=128,
|
||||||
|
help="The max length of sequence.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--backend",
|
"--backend",
|
||||||
type=str,
|
type=str,
|
||||||
default='onnx_runtime',
|
default='paddle_inference',
|
||||||
choices=['onnx_runtime', 'paddle_inference', 'openvino'],
|
choices=[
|
||||||
|
'onnx_runtime', 'paddle_inference', 'openvino', 'paddle_tensorrt',
|
||||||
|
'tensorrt'
|
||||||
|
],
|
||||||
help="The inference runtime backend.")
|
help="The inference runtime backend.")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--cpu_num_threads",
|
"--cpu_num_threads",
|
||||||
type=int,
|
type=int,
|
||||||
default=8,
|
default=8,
|
||||||
help="The number of threads to execute inference in cpu device.")
|
help="The number of threads to execute inference in cpu device.")
|
||||||
|
parser.add_argument(
|
||||||
|
"--use_fp16",
|
||||||
|
type=distutils.util.strtobool,
|
||||||
|
default=False,
|
||||||
|
help="Use FP16 mode")
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@@ -50,8 +68,9 @@ def build_option(args):
|
|||||||
# Set device
|
# Set device
|
||||||
if args.device == 'cpu':
|
if args.device == 'cpu':
|
||||||
runtime_option.use_cpu()
|
runtime_option.use_cpu()
|
||||||
|
runtime_option.set_cpu_thread_num(args.cpu_num_threads)
|
||||||
else:
|
else:
|
||||||
runtime_option.use_gpu()
|
runtime_option.use_gpu(args.device_id)
|
||||||
|
|
||||||
# Set backend
|
# Set backend
|
||||||
if args.backend == 'onnx_runtime':
|
if args.backend == 'onnx_runtime':
|
||||||
@@ -60,7 +79,37 @@ def build_option(args):
|
|||||||
runtime_option.use_paddle_infer_backend()
|
runtime_option.use_paddle_infer_backend()
|
||||||
elif args.backend == 'openvino':
|
elif args.backend == 'openvino':
|
||||||
runtime_option.use_openvino_backend()
|
runtime_option.use_openvino_backend()
|
||||||
runtime_option.set_cpu_thread_num(args.cpu_num_threads)
|
else:
|
||||||
|
runtime_option.use_trt_backend()
|
||||||
|
if args.backend == 'paddle_tensorrt':
|
||||||
|
runtime_option.enable_paddle_to_trt()
|
||||||
|
runtime_option.enable_paddle_trt_collect_shape()
|
||||||
|
# Only useful for single stage predict
|
||||||
|
runtime_option.set_trt_input_shape(
|
||||||
|
'input_ids',
|
||||||
|
min_shape=[1, 1],
|
||||||
|
opt_shape=[args.batch_size, args.max_length // 2],
|
||||||
|
max_shape=[args.batch_size, args.max_length])
|
||||||
|
runtime_option.set_trt_input_shape(
|
||||||
|
'token_type_ids',
|
||||||
|
min_shape=[1, 1],
|
||||||
|
opt_shape=[args.batch_size, args.max_length // 2],
|
||||||
|
max_shape=[args.batch_size, args.max_length])
|
||||||
|
runtime_option.set_trt_input_shape(
|
||||||
|
'pos_ids',
|
||||||
|
min_shape=[1, 1],
|
||||||
|
opt_shape=[args.batch_size, args.max_length // 2],
|
||||||
|
max_shape=[args.batch_size, args.max_length])
|
||||||
|
runtime_option.set_trt_input_shape(
|
||||||
|
'att_mask',
|
||||||
|
min_shape=[1, 1],
|
||||||
|
opt_shape=[args.batch_size, args.max_length // 2],
|
||||||
|
max_shape=[args.batch_size, args.max_length])
|
||||||
|
trt_file = os.path.join(args.model_dir, "inference.trt")
|
||||||
|
if args.use_fp16:
|
||||||
|
runtime_option.enable_trt_fp16()
|
||||||
|
trt_file = trt_file + ".fp16"
|
||||||
|
runtime_option.set_trt_cache_file(trt_file)
|
||||||
return runtime_option
|
return runtime_option
|
||||||
|
|
||||||
|
|
||||||
@@ -78,7 +127,7 @@ if __name__ == "__main__":
|
|||||||
param_path,
|
param_path,
|
||||||
vocab_path,
|
vocab_path,
|
||||||
position_prob=0.5,
|
position_prob=0.5,
|
||||||
max_length=128,
|
max_length=args.max_length,
|
||||||
schema=schema,
|
schema=schema,
|
||||||
runtime_option=runtime_option,
|
runtime_option=runtime_option,
|
||||||
schema_language=SchemaLanguage.ZH)
|
schema_language=SchemaLanguage.ZH)
|
||||||
@@ -132,8 +181,7 @@ if __name__ == "__main__":
|
|||||||
schema = {"评价维度": ["观点词", "情感倾向[正向,负向]"]}
|
schema = {"评价维度": ["观点词", "情感倾向[正向,负向]"]}
|
||||||
print(f"The extraction schema: {schema}")
|
print(f"The extraction schema: {schema}")
|
||||||
uie.set_schema(schema)
|
uie.set_schema(schema)
|
||||||
results = uie.predict(
|
results = uie.predict(["店面干净,很清静"], return_dict=True)
|
||||||
["店面干净,很清静,服务员服务热情,性价比很高,发现收银台有排队"], return_dict=True)
|
|
||||||
pprint(results)
|
pprint(results)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user