# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import json import numpy as np import time import fastdeploy as fd # triton_python_backend_utils is available in every Triton Python model. You # need to use this module to create inference requests and responses. It also # contains some utility functions for extracting information from model_config # and converting Triton input/output types to numpy types. import triton_python_backend_utils as pb_utils class TritonPythonModel: """Your Python model must use the same class name. Every Python model that is created must have "TritonPythonModel" as the class name. """ def initialize(self, args): """`initialize` is called only once when the model is being loaded. Implementing `initialize` function is optional. This function allows the model to intialize any state associated with this model. Parameters ---------- args : dict Both keys and values are strings. The dictionary keys and values are: * model_config: A JSON string containing the model configuration * model_instance_kind: A string containing model instance kind * model_instance_device_id: A string containing model instance device ID * model_repository: Model repository path * model_version: Model version * model_name: Model name """ # You must parse model_config. JSON string is not parsed here self.model_config = json.loads(args['model_config']) print("model_config:", self.model_config) self.input_names = [] for input_config in self.model_config["input"]: self.input_names.append(input_config["name"]) print("preprocess input names:", self.input_names) self.output_names = [] self.output_dtype = [] for output_config in self.model_config["output"]: self.output_names.append(output_config["name"]) dtype = pb_utils.triton_string_to_numpy(output_config["data_type"]) self.output_dtype.append(dtype) print("preprocess output names:", self.output_names) self.preprocessor_ = fd.vision.detection.YOLOv5Preprocessor() def execute(self, requests): """`execute` must be implemented in every Python model. `execute` function receives a list of pb_utils.InferenceRequest as the only argument. This function is called when an inference is requested for this model. Depending on the batching configuration (e.g. Dynamic Batching) used, `requests` may contain multiple requests. Every Python model, must create one pb_utils.InferenceResponse for every pb_utils.InferenceRequest in `requests`. If there is an error, you can set the error argument when creating a pb_utils.InferenceResponse. Parameters ---------- requests : list A list of pb_utils.InferenceRequest Returns ------- list A list of pb_utils.InferenceResponse. The length of this list must be the same as `requests` """ responses = [] for request in requests: data = pb_utils.get_input_tensor_by_name(request, self.input_names[0]) data = data.as_numpy() outputs, im_infos = self.preprocessor_.run(data) # YOLOv5 preprocess has two output dlpack_tensor = outputs[0].to_dlpack() output_tensor_0 = pb_utils.Tensor.from_dlpack(self.output_names[0], dlpack_tensor) output_tensor_1 = pb_utils.Tensor( self.output_names[1], np.array( im_infos, dtype=np.object_)) inference_response = pb_utils.InferenceResponse( output_tensors=[output_tensor_0, output_tensor_1]) responses.append(inference_response) return responses def finalize(self): """`finalize` is called only once when the model is being unloaded. Implementing `finalize` function is optional. This function allows the model to perform any necessary clean ups before exit. """ print('Cleaning up...')