From de72162af9a5bed221e1ec3034f8affe5410bcdb Mon Sep 17 00:00:00 2001 From: WJJ1995 Date: Wed, 14 Dec 2022 10:14:29 +0800 Subject: [PATCH] [Serving] Fixed preprocess&&postprocess in YOLOv5 Serving (#874) * add onnx_ort_runtime demo * rm in requirements * support batch eval * fixed MattingResults bug * move assignment for DetectionResult * integrated x2paddle * add model convert readme * update readme * re-lint * add processor api * Add MattingResult Free * change valid_cpu_backends order * add ppocr benchmark * mv bs from 64 to 32 * fixed quantize.md * fixed quantize bugs * Add Monitor for benchmark * update mem monitor * Set trt_max_batch_size default 1 * fixed ocr benchmark bug * support yolov5 in serving * Fixed yolov5 serving * Fixed postprocess Co-authored-by: Jason --- .../serving/models/postprocess/1/model.py | 1 - .../serving/models/postprocess/1/model.py | 36 +++++-------------- .../serving/models/preprocess/1/model.py | 33 ++++++----------- 3 files changed, 19 insertions(+), 51 deletions(-) mode change 100644 => 100755 examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py diff --git a/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py b/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py old mode 100644 new mode 100755 index b328b249b..0ab7dcdc4 --- a/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py +++ b/examples/vision/classification/paddleclas/serving/models/postprocess/1/model.py @@ -84,7 +84,6 @@ class TritonPythonModel: be the same as `requests` """ responses = [] - # print("num:", len(requests), flush=True) for request in requests: infer_outputs = pb_utils.get_input_tensor_by_name( request, self.input_names[0]) diff --git a/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py b/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py index 30a744b68..7c608db43 100644 --- a/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py +++ b/examples/vision/detection/yolov5/serving/models/postprocess/1/model.py @@ -61,31 +61,7 @@ class TritonPythonModel: dtype = pb_utils.triton_string_to_numpy(output_config["data_type"]) self.output_dtype.append(dtype) print("postprocess output names:", self.output_names) - - def yolov5_postprocess(self, infer_outputs, im_infos): - """ - Parameters - ---------- - infer_outputs : numpy.array - Contains the batch of inference results - im_infos : numpy.array(b'{}') - Returns - ------- - numpy.array - yolov5 postprocess result - """ - results = [] - for i_batch in range(len(im_infos)): - new_infer_output = infer_outputs[i_batch:i_batch + 1] - new_im_info = im_infos[i_batch].decode('utf-8').replace("'", '"') - new_im_info = json.loads(new_im_info) - - result = fd.vision.detection.YOLOv5.postprocess( - [new_infer_output, ], new_im_info) - - r_str = fd.vision.utils.fd_result_to_json(result) - results.append(r_str) - return np.array(results, dtype=np.object) + self.postprocessor_ = fd.vision.detection.YOLOv5Postprocessor() def execute(self, requests): """`execute` must be implemented in every Python model. `execute` @@ -107,7 +83,6 @@ class TritonPythonModel: be the same as `requests` """ responses = [] - # print("num:", len(requests), flush=True) for request in requests: infer_outputs = pb_utils.get_input_tensor_by_name( request, self.input_names[0]) @@ -115,10 +90,15 @@ class TritonPythonModel: self.input_names[1]) infer_outputs = infer_outputs.as_numpy() im_infos = im_infos.as_numpy() + for i in range(im_infos.shape[0]): + im_infos[i] = json.loads(im_infos[i].decode('utf-8').replace( + "'", '"')) - results = self.yolov5_postprocess(infer_outputs, im_infos) + results = self.postprocessor_.run([infer_outputs], im_infos) + r_str = fd.vision.utils.fd_result_to_json(results) + r_np = np.array(r_str, dtype=np.object) - out_tensor = pb_utils.Tensor(self.output_names[0], results) + out_tensor = pb_utils.Tensor(self.output_names[0], r_np) inference_response = pb_utils.InferenceResponse( output_tensors=[out_tensor, ]) responses.append(inference_response) diff --git a/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py b/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py index cd22aa37b..cf4f7e8e8 100644 --- a/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py +++ b/examples/vision/detection/yolov5/serving/models/preprocess/1/model.py @@ -61,21 +61,7 @@ class TritonPythonModel: dtype = pb_utils.triton_string_to_numpy(output_config["data_type"]) self.output_dtype.append(dtype) print("preprocess output names:", self.output_names) - - def yolov5_preprocess(self, input_data): - """ - According to Triton input, the preprocessing results of YoloV5 model are obtained. - """ - im_infos = [] - pre_outputs = [] - for i_batch in input_data: - pre_output, im_info = fd.vision.detection.YOLOv5.preprocess( - i_batch) - pre_outputs.append(pre_output) - im_infos.append(im_info) - im_infos = np.array(im_infos, dtype=np.object) - pre_outputs = np.concatenate(pre_outputs, axis=0) - return pre_outputs, im_infos + self.preprocessor_ = fd.vision.detection.YOLOv5Preprocessor() def execute(self, requests): """`execute` must be implemented in every Python model. `execute` @@ -97,18 +83,21 @@ class TritonPythonModel: be the same as `requests` """ responses = [] - # print("num:", len(requests), flush=True) for request in requests: data = pb_utils.get_input_tensor_by_name(request, self.input_names[0]) data = data.as_numpy() - outputs = self.yolov5_preprocess(data) - output_tensors = [] - for idx, output in enumerate(outputs): - output_tensors.append( - pb_utils.Tensor(self.output_names[idx], output)) + outputs, im_infos = self.preprocessor_.run(data) + + # YOLOv5 preprocess has two output + dlpack_tensor = outputs[0].to_dlpack() + output_tensor_0 = pb_utils.Tensor.from_dlpack(self.output_names[0], + dlpack_tensor) + output_tensor_1 = pb_utils.Tensor( + self.output_names[1], np.array( + im_infos, dtype=np.object)) inference_response = pb_utils.InferenceResponse( - output_tensors=output_tensors) + output_tensors=[output_tensor_0, output_tensor_1]) responses.append(inference_response) return responses