[Serving] PaddleSeg add triton serving && simple serving example (#1171)

* Update keypointdetection result docs

* Update im.copy() to im in examples

* Update new Api, fastdeploy::vision::Visualize to fastdeploy::vision

* Update SwapBackgroundSegmentation && SwapBackgroundMatting to SwapBackground

* Update README_CN.md

* Update README_CN.md

* Update preprocessor.h

* PaddleSeg supports triton serving

* Add PaddleSeg simple serving example

* Add PaddleSeg triton serving client code

* Update triton serving runtime config.pbtxt

* Update paddleseg grpc client

* Add paddle serving README
This commit is contained in:
huangjianhui
2023-01-30 09:34:38 +08:00
committed by GitHub
parent 62e051e21d
commit 294607fc4a
17 changed files with 820 additions and 1 deletions

View File

@@ -0,0 +1,36 @@
English | [简体中文](README_CN.md)
# PaddleSegmentation Python Simple Serving Demo
## Environment
- 1. Prepare environment and install FastDeploy Python whl, refer to [download_prebuilt_libraries](../../../../../../docs/en/build_and_install/download_prebuilt_libraries.md)
Server:
```bash
# Download demo code
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
# Download PP_LiteSeg model
wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
# Launch server, change the configurations in server.py to select hardware, backend, etc.
# and use --host, --port to specify IP and port
fastdeploy simple_serving --app server:app
```
Client:
```bash
# Download demo code
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
# Download test image
wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
# Send request and get inference result (Please adapt the IP and port if necessary)
python client.py
```

View File

@@ -0,0 +1,36 @@
简体中文 | [English](README.md)
# PaddleSegmentation Python轻量服务化部署示例
在部署前,需确认以下两个步骤
- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. FastDeploy Python whl包安装参考[FastDeploy Python安装](../../../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
服务端:
```bash
# 下载部署示例代码
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy/examples/vision/segmentation/paddleseg/python/serving
# 下载PP_LiteSeg模型文件
wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
# 启动服务可修改server.py中的配置项来指定硬件、后端等
# 可通过--host、--port指定IP和端口号
fastdeploy simple_serving --app server:app
```
客户端:
```bash
# 下载部署示例代码
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy/examples/vision/detection/paddledetection/python/serving
# 下载测试图片
wget https://gitee.com/paddlepaddle/PaddleDetection/raw/release/2.4/demo/000000014439.jpg
# 请求服务获取推理结果如有必要请修改脚本中的IP和端口号
python client.py
```

View File

@@ -0,0 +1,23 @@
import requests
import json
import cv2
import fastdeploy as fd
from fastdeploy.serving.utils import cv2_to_base64
if __name__ == '__main__':
url = "http://127.0.0.1:8000/fd/ppliteseg"
headers = {"Content-Type": "application/json"}
im = cv2.imread("cityscapes_demo.png")
data = {"data": {"image": cv2_to_base64(im)}, "parameters": {}}
resp = requests.post(url=url, headers=headers, data=json.dumps(data))
if resp.status_code == 200:
r_json = json.loads(resp.json()["result"])
result = fd.vision.utils.json_to_segmentation(r_json)
vis_im = fd.vision.vis_segmentation(im, result, weight=0.5)
cv2.imwrite("visualized_result.jpg", vis_im)
print("Visualized result save in ./visualized_result.jpg")
else:
print("Error code:", resp.status_code)
print(resp.text)

View File

@@ -0,0 +1,38 @@
import fastdeploy as fd
from fastdeploy.serving.server import SimpleServer
import os
import logging
logging.getLogger().setLevel(logging.INFO)
# Configurations
model_dir = 'PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer'
device = 'cpu'
use_trt = False
# Prepare model
model_file = os.path.join(model_dir, "model.pdmodel")
params_file = os.path.join(model_dir, "model.pdiparams")
config_file = os.path.join(model_dir, "deploy.yaml")
# Setup runtime option to select hardware, backend, etc.
option = fd.RuntimeOption()
if device.lower() == 'gpu':
option.use_gpu()
if use_trt:
option.use_trt_backend()
option.set_trt_cache_file('pp_lite_seg.trt')
# Create model instance
model_instance = fd.vision.segmentation.PaddleSegModel(
model_file=model_file,
params_file=params_file,
config_file=config_file,
runtime_option=option)
# Create server, setup REST API
app = SimpleServer()
app.register(
task_name="fd/ppliteseg",
model_handler=fd.serving.handler.VisionModelHandler,
predictor=model_instance)

View File

@@ -0,0 +1,62 @@
English | [简体中文](README_CN.md)
# PaddleSegmentation Serving Deployment Demo
## Launch Serving
```bash
# Download demo code
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy/examples/vision/segmentation/paddleseg/serving
#Download PP_LiteSeg model file
wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
# Move the model files to models/infer/1
mv yolov5s.onnx models/infer/1/
# Pull fastdeploy image, x.y.z is FastDeploy version, example 1.0.2.
docker pull paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
# Run the docker. The docker name is fd_serving, and the current directory is mounted as the docker's /serving directory
nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 bash
# Start the service (Without setting the CUDA_VISIBLE_DEVICES environment variable, it will have scheduling privileges for all GPU cards)
CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models --backend-config=python,shm-default-byte-size=10485760
```
Output the following contents if serving is launched
```
......
I0928 04:51:15.784517 206 grpc_server.cc:4117] Started GRPCInferenceService at 0.0.0.0:8001
I0928 04:51:15.785177 206 http_server.cc:2815] Started HTTPService at 0.0.0.0:8000
I0928 04:51:15.826578 206 http_server.cc:167] Started Metrics Service at 0.0.0.0:8002
```
## Client Requests
Execute the following command in the physical machine to send a grpc request and output the result
```
#Download test images
wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
#Installing client-side dependencies
python3 -m pip install tritonclient\[all\]
# Send requests
python3 paddleseg_grpc_client.py
```
When the request is sent successfully, the results are returned in json format and printed out:
```
```
## Modify Configs
The default is to run ONNXRuntime on CPU. If developers need to run it on GPU or other inference engines, please see the [Configs File](../../../../../serving/docs/EN/model_configuration-en.md) to modify the configs in `models/runtime/config.pbtxt`.

View File

@@ -0,0 +1,68 @@
[English](README.md) | 简体中文
# PaddleSegmentation 服务化部署示例
在服务化部署前,需确认
- 1. 服务化镜像的软硬件环境要求和镜像拉取命令请参考[FastDeploy服务化部署](../../../../../serving/README_CN.md)
## 启动服务
```bash
#下载部署示例代码
git clone https://github.com/PaddlePaddle/FastDeploy.git
cd FastDeploy/examples/vision/segmentation/paddleseg/serving
#下载yolov5模型文件
wget https://bj.bcebos.com/paddlehub/fastdeploy/PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
tar -xvf PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer.tgz
# 将模型文件放入 models/runtime/1目录下
mv PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer/model.pdmodel models/runtime/1/
mv PP_LiteSeg_B_STDC2_cityscapes_with_argmax_infer/model.pdiparams models/runtime/1/
# 拉取fastdeploy镜像(x.y.z为镜像版本号需参照serving文档替换为数字)
# GPU镜像
docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10
# CPU镜像
docker pull registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-cpu-only-21.10
# 运行容器.容器名字为 fd_serving, 并挂载当前目录为容器的 /serving 目录
nvidia-docker run -it --net=host --name fd_serving -v `pwd`/:/serving registry.baidubce.com/paddlepaddle/fastdeploy:x.y.z-gpu-cuda11.4-trt8.4-21.10 bash
# 启动服务(不设置CUDA_VISIBLE_DEVICES环境变量会拥有所有GPU卡的调度权限)
CUDA_VISIBLE_DEVICES=0 fastdeployserver --model-repository=/serving/models --backend-config=python,shm-default-byte-size=10485760
```
>> **注意**: 当出现"Address already in use", 请使用`--grpc-port`指定端口号来启动服务同时更改paddleseg_grpc_client.py中的请求端口号
服务启动成功后, 会有以下输出:
```
......
I0928 04:51:15.784517 206 grpc_server.cc:4117] Started GRPCInferenceService at 0.0.0.0:8001
I0928 04:51:15.785177 206 http_server.cc:2815] Started HTTPService at 0.0.0.0:8000
I0928 04:51:15.826578 206 http_server.cc:167] Started Metrics Service at 0.0.0.0:8002
```
## 客户端请求
在物理机器中执行以下命令发送grpc请求并输出结果
```
#下载测试图片
wget https://paddleseg.bj.bcebos.com/dygraph/demo/cityscapes_demo.png
#安装客户端依赖
python3 -m pip install tritonclient[all]
# 发送请求
python3 paddleseg_grpc_client.py
```
发送请求成功后会返回json格式的检测结果并打印输出:
```
```
## 配置修改
当前默认配置在CPU上运行ONNXRuntime引擎 如果要在GPU或其他推理引擎上运行。 需要修改`models/runtime/config.pbtxt`中配置,详情请参考[配置文档](../../../../../serving/docs/zh_CN/model_configuration.md)

View File

@@ -0,0 +1,3 @@
# PaddleSeg Pipeline
The pipeline directory does not have model files, but a version number directory needs to be maintained.

View File

@@ -0,0 +1,67 @@
platform: "ensemble"
input [
{
name: "INPUT"
data_type: TYPE_UINT8
dims: [-1, -1, -1, 3 ]
}
]
output [
{
name: "SEG_RESULT"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
ensemble_scheduling {
step [
{
model_name: "preprocess"
model_version: 1
input_map {
key: "preprocess_input"
value: "INPUT"
}
output_map {
key: "preprocess_output_1"
value: "RUNTIME_INPUT_1"
}
output_map {
key: "preprocess_output_2"
value: "POSTPROCESS_INPUT_2"
}
},
{
model_name: "runtime"
model_version: 1
input_map {
key: "x"
value: "RUNTIME_INPUT_1"
}
output_map {
key: "argmax_0.tmp_0"
value: "RUNTIME_OUTPUT"
}
},
{
model_name: "postprocess"
model_version: 1
input_map {
key: "post_input_1"
value: "RUNTIME_OUTPUT"
}
input_map {
key: "post_input_2"
value: "POSTPROCESS_INPUT_2"
}
output_map {
key: "post_output"
value: "SEG_RESULT"
}
}
]
}

View File

@@ -0,0 +1,115 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import numpy as np
import time
import os
import fastdeploy as fd
# triton_python_backend_utils is available in every Triton Python model. You
# need to use this module to create inference requests and responses. It also
# contains some utility functions for extracting information from model_config
# and converting Triton input/output types to numpy types.
import triton_python_backend_utils as pb_utils
class TritonPythonModel:
"""Your Python model must use the same class name. Every Python model
that is created must have "TritonPythonModel" as the class name.
"""
def initialize(self, args):
"""`initialize` is called only once when the model is being loaded.
Implementing `initialize` function is optional. This function allows
the model to intialize any state associated with this model.
Parameters
----------
args : dict
Both keys and values are strings. The dictionary keys and values are:
* model_config: A JSON string containing the model configuration
* model_instance_kind: A string containing model instance kind
* model_instance_device_id: A string containing model instance device ID
* model_repository: Model repository path
* model_version: Model version
* model_name: Model name
"""
# You must parse model_config. JSON string is not parsed here
self.model_config = json.loads(args['model_config'])
print("model_config:", self.model_config)
self.input_names = []
for input_config in self.model_config["input"]:
self.input_names.append(input_config["name"])
print("postprocess input names:", self.input_names)
self.output_names = []
self.output_dtype = []
for output_config in self.model_config["output"]:
self.output_names.append(output_config["name"])
dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
self.output_dtype.append(dtype)
print("postprocess output names:", self.output_names)
yaml_path = os.path.abspath(os.path.dirname(__file__)) + "/deploy.yaml"
self.postprocess_ = fd.vision.segmentation.PaddleSegPostprocessor(
yaml_path)
def execute(self, requests):
"""`execute` must be implemented in every Python model. `execute`
function receives a list of pb_utils.InferenceRequest as the only
argument. This function is called when an inference is requested
for this model. Depending on the batching configuration (e.g. Dynamic
Batching) used, `requests` may contain multiple requests. Every
Python model, must create one pb_utils.InferenceResponse for every
pb_utils.InferenceRequest in `requests`. If there is an error, you can
set the error argument when creating a pb_utils.InferenceResponse.
Parameters
----------
requests : list
A list of pb_utils.InferenceRequest
Returns
-------
list
A list of pb_utils.InferenceResponse. The length of this list must
be the same as `requests`
"""
responses = []
for request in requests:
infer_outputs = pb_utils.get_input_tensor_by_name(
request, self.input_names[0])
im_info = pb_utils.get_input_tensor_by_name(request,
self.input_names[1])
infer_outputs = infer_outputs.as_numpy()
im_info = im_info.as_numpy()
for i in range(im_info.shape[0]):
im_info[i] = json.loads(im_info[i].decode('utf-8').replace(
"'", '"'))
results = self.postprocess_.run([infer_outputs], im_info[0])
r_str = fd.vision.utils.fd_result_to_json(results)
r_np = np.array(r_str, dtype=np.object_)
out_tensor = pb_utils.Tensor(self.output_names[0], r_np)
inference_response = pb_utils.InferenceResponse(
output_tensors=[out_tensor, ])
responses.append(inference_response)
return responses
def finalize(self):
"""`finalize` is called only once when the model is being unloaded.
Implementing `finalize` function is optional. This function allows
the model to perform any necessary clean ups before exit.
"""
print('Cleaning up...')

View File

@@ -0,0 +1,30 @@
name: "postprocess"
backend: "python"
input [
{
name: "post_input_1"
data_type: TYPE_INT32
dims: [-1, -1, -1]
},
{
name: "post_input_2"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
output [
{
name: "post_output"
data_type: TYPE_STRING
dims: [ -1 ]
}
]
instance_group [
{
count: 1
kind: KIND_CPU
}
]

View File

@@ -0,0 +1,12 @@
Deploy:
input_shape:
- -1
- 3
- -1
- -1
model: model.pdmodel
output_dtype: int32
output_op: argmax
params: model.pdiparams
transforms:
- type: Normalize

View File

@@ -0,0 +1,117 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import numpy as np
import os
import fastdeploy as fd
# triton_python_backend_utils is available in every Triton Python model. You
# need to use this module to create inference requests and responses. It also
# contains some utility functions for extracting information from model_config
# and converting Triton input/output types to numpy types.
import triton_python_backend_utils as pb_utils
class TritonPythonModel:
"""Your Python model must use the same class name. Every Python model
that is created must have "TritonPythonModel" as the class name.
"""
def initialize(self, args):
"""`initialize` is called only once when the model is being loaded.
Implementing `initialize` function is optional. This function allows
the model to intialize any state associated with this model.
Parameters
----------
args : dict
Both keys and values are strings. The dictionary keys and values are:
* model_config: A JSON string containing the model configuration
* model_instance_kind: A string containing model instance kind
* model_instance_device_id: A string containing model instance device ID
* model_repository: Model repository path
* model_version: Model version
* model_name: Model name
"""
# You must parse model_config. JSON string is not parsed here
self.model_config = json.loads(args['model_config'])
print("model_config:", self.model_config)
self.input_names = []
for input_config in self.model_config["input"]:
self.input_names.append(input_config["name"])
print("preprocess input names:", self.input_names)
self.output_names = []
self.output_dtype = []
for output_config in self.model_config["output"]:
self.output_names.append(output_config["name"])
# dtype = pb_utils.triton_string_to_numpy(output_config["data_type"])
# self.output_dtype.append(dtype)
self.output_dtype.append(output_config["data_type"])
print("preprocess output names:", self.output_names)
# init PaddleSegPreprocess class
yaml_path = os.path.abspath(os.path.dirname(__file__)) + "/deploy.yaml"
self.preprocess_ = fd.vision.segmentation.PaddleSegPreprocessor(
yaml_path)
#if args['model_instance_kind'] == 'GPU':
# device_id = int(args['model_instance_device_id'])
# self.preprocess_.use_gpu(device_id)
def execute(self, requests):
"""`execute` must be implemented in every Python model. `execute`
function receives a list of pb_utils.InferenceRequest as the only
argument. This function is called when an inference is requested
for this model. Depending on the batching configuration (e.g. Dynamic
Batching) used, `requests` may contain multiple requests. Every
Python model, must create one pb_utils.InferenceResponse for every
pb_utils.InferenceRequest in `requests`. If there is an error, you can
set the error argument when creating a pb_utils.InferenceResponse.
Parameters
----------
requests : list
A list of pb_utils.InferenceRequest
Returns
-------
list
A list of pb_utils.InferenceResponse. The length of this list must
be the same as `requests`
"""
responses = []
for request in requests:
data = pb_utils.get_input_tensor_by_name(request,
self.input_names[0])
data = data.as_numpy()
outputs, im_info = self.preprocess_.run(data)
# PaddleSeg preprocess has two outputs
dlpack_tensor = outputs[0].to_dlpack()
output_tensor_0 = pb_utils.Tensor.from_dlpack(self.output_names[0],
dlpack_tensor)
output_tensor_1 = pb_utils.Tensor(
self.output_names[1], np.array(
[im_info], dtype=np.object_))
inference_response = pb_utils.InferenceResponse(
output_tensors=[output_tensor_0, output_tensor_1])
responses.append(inference_response)
return responses
def finalize(self):
"""`finalize` is called only once when the model is being unloaded.
Implementing `finalize` function is optional. This function allows
the model to perform any necessary clean ups before exit.
"""
print('Cleaning up...')

View File

@@ -0,0 +1,34 @@
name: "preprocess"
backend: "python"
input [
{
name: "preprocess_input"
data_type: TYPE_UINT8
dims: [-1, -1, -1, 3 ]
}
]
output [
{
name: "preprocess_output_1"
data_type: TYPE_FP32
dims: [-1, 3, -1, -1 ]
},
{
name: "preprocess_output_2"
data_type: TYPE_STRING
dims: [ -1]
}
]
instance_group [
{
# The number of instances is 1
count: 1
# Use CPU, GPU inference option is:KIND_GPU
kind: KIND_CPU
# The instance is deployed on the 0th GPU card
# gpus: [0]
}
]

View File

@@ -0,0 +1,5 @@
# Runtime Directory
This directory holds the model files.
Paddle models must be model.pdmodel and model.pdiparams files.
ONNX models must be model.onnx files.

View File

@@ -0,0 +1,60 @@
# optional, If name is specified it must match the name of the model repository directory containing the model.
name: "runtime"
backend: "fastdeploy"
max_batch_size: 1
# Input configuration of the model
input [
{
# input name
name: "x"
# input type such as TYPE_FP32、TYPE_UINT8、TYPE_INT8、TYPE_INT16、TYPE_INT32、TYPE_INT64、TYPE_FP16、TYPE_STRING
data_type: TYPE_FP32
# input shape
dims: [3, -1, -1 ]
}
]
# The output of the model is configured in the same format as the input
output [
{
name: "argmax_0.tmp_0"
data_type: TYPE_INT32
dims: [ -1, -1 ]
}
]
# Number of instances of the model
instance_group [
{
# The number of instances is 1
count: 1
# Use GPU, CPU inference option is:KIND_CPU
kind: KIND_GPU
# The instance is deployed on the 0th GPU card
gpus: [0]
}
]
optimization {
execution_accelerators {
gpu_execution_accelerator : [ {
# use TRT engine
name: "tensorrt",
# use fp32 on TRT engine
parameters { key: "precision" value: "trt_fp32" }
},
{
name: "min_shape"
parameters { key: "x" value: "1 3 256 256" }
},
{
name: "opt_shape"
parameters { key: "x" value: "1 3 1024 1024" }
},
{
name: "max_shape"
parameters { key: "x" value: "1 3 2048 2048" }
}
]
}}

View File

@@ -0,0 +1,112 @@
import logging
import numpy as np
import time
from typing import Optional
import cv2
import json
from tritonclient import utils as client_utils
from tritonclient.grpc import InferenceServerClient, InferInput, InferRequestedOutput, service_pb2_grpc, service_pb2
LOGGER = logging.getLogger("run_inference_on_triton")
class SyncGRPCTritonRunner:
DEFAULT_MAX_RESP_WAIT_S = 120
def __init__(
self,
server_url: str,
model_name: str,
model_version: str,
*,
verbose=False,
resp_wait_s: Optional[float]=None, ):
self._server_url = server_url
self._model_name = model_name
self._model_version = model_version
self._verbose = verbose
self._response_wait_t = self.DEFAULT_MAX_RESP_WAIT_S if resp_wait_s is None else resp_wait_s
self._client = InferenceServerClient(
self._server_url, verbose=self._verbose)
error = self._verify_triton_state(self._client)
if error:
raise RuntimeError(
f"Could not communicate to Triton Server: {error}")
LOGGER.debug(
f"Triton server {self._server_url} and model {self._model_name}:{self._model_version} "
f"are up and ready!")
model_config = self._client.get_model_config(self._model_name,
self._model_version)
model_metadata = self._client.get_model_metadata(self._model_name,
self._model_version)
LOGGER.info(f"Model config {model_config}")
LOGGER.info(f"Model metadata {model_metadata}")
for tm in model_metadata.inputs:
print("tm:", tm)
self._inputs = {tm.name: tm for tm in model_metadata.inputs}
self._input_names = list(self._inputs)
self._outputs = {tm.name: tm for tm in model_metadata.outputs}
self._output_names = list(self._outputs)
self._outputs_req = [
InferRequestedOutput(name) for name in self._outputs
]
def Run(self, inputs):
"""
Args:
inputs: list, Each value corresponds to an input name of self._input_names
Returns:
results: dict, {name : numpy.array}
"""
infer_inputs = []
for idx, data in enumerate(inputs):
infer_input = InferInput(self._input_names[idx], data.shape,
"UINT8")
infer_input.set_data_from_numpy(data)
infer_inputs.append(infer_input)
results = self._client.infer(
model_name=self._model_name,
model_version=self._model_version,
inputs=infer_inputs,
outputs=self._outputs_req,
client_timeout=self._response_wait_t, )
results = {name: results.as_numpy(name) for name in self._output_names}
return results
def _verify_triton_state(self, triton_client):
if not triton_client.is_server_live():
return f"Triton server {self._server_url} is not live"
elif not triton_client.is_server_ready():
return f"Triton server {self._server_url} is not ready"
elif not triton_client.is_model_ready(self._model_name,
self._model_version):
return f"Model {self._model_name}:{self._model_version} is not ready"
return None
if __name__ == "__main__":
model_name = "paddleseg"
model_version = "1"
url = "localhost:8001"
runner = SyncGRPCTritonRunner(url, model_name, model_version)
im = cv2.imread("cityscapes_demo.png")
im = np.array([im, ])
# batch input
# im = np.array([im, im, im])
for i in range(1):
result = runner.Run([im, ])
for name, values in result.items():
print("output_name:", name)
# values is batch
for value in values:
value = json.loads(value)
print(
"Only print the first 20 labels in label_map of SEG_RESULT")
value["label_map"] = value["label_map"][:20]
print(value)

View File

@@ -31,7 +31,8 @@ class FASTDEPLOY_DECL PaddleSegPreprocessor {
/** \brief Process the input image and prepare input tensors for runtime
*
* \param[in] images The input image data list, all the elements are returned by cv::imread()
* \param[in] outputs The output tensors which will feed in runtime, include image
* \param[in] outputs The output tensors which will feed in runtime
* \param[in] imgs_info The original input images shape info map, key is "shape_info", value is vector<array<int, 2>> a{{height, width}}
* \return true if the preprocess successed, otherwise false
*/
virtual bool Run(