diff --git a/MobileStereoNet/README.md b/MobileStereoNet/README.md index af3c73a..f029779 100644 --- a/MobileStereoNet/README.md +++ b/MobileStereoNet/README.md @@ -6,7 +6,7 @@ 样例输入:双目相机拍摄的两张jpg图片 -样例输出:物体的三维坐标 +样例输出:物体的三维坐标和距离 ### 1.1 支持的产品 @@ -19,7 +19,7 @@ ### 1.3 软件方案介绍 -本方案中,利用Opencv对输入图片进行畸变矫正,然后采用yolov3预训练模型对矫正图片进行物体识别,最终根据识别得到的生成框坐标输出物体的三维坐标。 +本方案中,利用Opencv对输入图片进行畸变矫正,然后采用yolov3预训练模型对矫正图片进行物体识别,最终根据识别得到的生成框坐标输出物体的三维坐标和距离。 表1.1 系统方案中各模块功能: @@ -28,9 +28,9 @@ | 1 | 图像输入 | 使用Opencv的imread读入图片 | | 2 | 图像放缩 | 使用Opencv的resize放缩检测图片到416*416大小 | | 3 | 畸变矫正 | 利用Opencv和相机参数重构图片 | -| 4 | 视察计算 | 使用Opencv的SGBM方法计算图片视差 | +| 4 | 视差计算 | 使用Opencv的SGBM方法计算图片视差 | | 5 | 物体检测 | 利用Yolov3的检测模型,检测图片中物体并得到检测框坐标 | -| 6 | 结果输出 | 根据检测框坐标输出物体的三维坐标 | +| 6 | 结果输出 | 根据检测框坐标输出物体的三维坐标和距离 | @@ -44,9 +44,12 @@ │ ├── yolov3_tf_bs1_fp16.cfg # yolov3后处理配置 │ └── yolov3_tf_aipp.om ├── image # 存放测试图片 -|── camera_configs.py # 相机参数文件 -|── yolov3_infer.py # 获取坐标文件 -|── yolov3_infer.py # yolov3模型推理文件 +├── RESOURCES +│ └── flow.png +├── camera_configs.py # 获取相机参数 +├── camera.xml # 相机参数文件 +├── main.py # 获取坐标文件 +├── yolov3_infer.py # yolov3模型推理文件 ├── ExportStereoParams.m # matlab导出xml函数 └── README.md ``` @@ -64,9 +67,9 @@ 1、由于yolov3模型限制,仅支持获取在`./models/yolov3.names`文件中的**80**种物体的三维坐标。 -2、对超出相机标定范围的物体定位,结果不是很理想。 +2、对超出相机标定范围的物体定位,误差可能会很大。 -3、不同标定相机需要对`yolo_deep.py`文件中函数**stereo_match**的**numDisparities**和**uniquenessRatio**这两个主要参数进行调整。 +3、不同标定相机需要对`main.py`文件中函数**stereo_match**的**numDisparities**和**uniquenessRatio**这两个主要参数进行调整。注意**numDisparities**参数必须可以被16整除,**uniquenessRatio**参数值通常在5-15范围内。 ## 2 环境依赖 @@ -90,7 +93,7 @@ ## 依赖安装 -创建虚拟环境后运行如下命令: +创建虚拟环境并激活后运行如下命令: ``` pip install opencv-python @@ -103,23 +106,31 @@ pip install opencv-python **步骤1** -[下载YOLOv3模型](https://www.hiascend.com/zh/software/modelzoo/models/detail/C/210261e64adc42d2b3d84c447844e4c7/1)放入`./model`文件夹中。 +[下载YOLOv3模型](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/c-version/YoloV3_for_TensorFlow/zh/1.6/m/YOLOv3_TensorFlow_1.6_model.zip)并解压,将其中的`yolov3_tf_aipp.om`放入`./model`文件夹中。 **步骤2** -将matlab标定导出的xml相机参数文件放入项目根目录中。 +使用Matlab标定双目相机,然后用提供的`ExportStereoParams.m`文件将相机参数导出成`camera.xml`文件,放入项目根目录中。 + +双目相机标定方法可以参考此处:[Link](https://blog.csdn.net/qq_38236355/article/details/89280633) **步骤3** -自行选择两张jpg文件,放入`./image`文件夹中,再执行 +使用步骤2标定的相机拍摄双目照片,放入`./image`文件夹中。注意如果相机拍出来的左右图片在一张图上,则需要自行分割为两张大小一样的图片,确保左右图片张数相等。根据放入的图片名称修改`main.py`中`LEFTIMG`和`RIGHTIMG`的图片命名格式,再执行: ``` -python yolo_deep.py +python main.py ``` 结果展示: -![pic](RESOURCES/result.png) +``` +The result of case 10: + +Pixel coordinates x = 500, y = 248 +3D coordinates (305.618927, 90.307648, 2773.036865) mm +umbrella's actual distance: 2.791289 m +``` ## 4 常见问题 @@ -135,4 +146,4 @@ E20221122 15:43:32.366075 9866 MxOmModelDesc.cpp:138] Infer failed, result is n **解决方案:** -同时使用dvpp和`yolov3_tf_aipp.om`模型时会报上述错误。解决方式一,将B_USEDVPP设置为False,使用opencv处理图片。解决方式二,将模型换成`yolov3_tf_bs1_fp16.om` \ No newline at end of file +同时设置`B_USEDVPP=True`和使用`yolov3_tf_aipp.om`模型时会报上述错误。解决方式一,将B_USEDVPP设置为False,使用opencv处理图片。解决方式二,将模型换成`yolov3_tf_bs1_fp16.om` \ No newline at end of file diff --git a/MobileStereoNet/RESOURCES/result.png b/MobileStereoNet/RESOURCES/result.png deleted file mode 100644 index 69e1fe2..0000000 Binary files a/MobileStereoNet/RESOURCES/result.png and /dev/null differ diff --git a/MobileStereoNet/camera_configs.py b/MobileStereoNet/camera_configs.py index 78b07a4..b5696a7 100644 --- a/MobileStereoNet/camera_configs.py +++ b/MobileStereoNet/camera_configs.py @@ -37,10 +37,10 @@ right_camera_matrix = np.array([camera_list[2][:3], camera_list[2][6:]]) right_distortion = np.array([camera_list[3]]) -# 旋转关系向量 +# Rotation vector R = np.array([camera_list[4][:3], camera_list[4][3:6], camera_list[4][6:]]) -# 平移关系向量 +# Translation vector T = np.array(camera_list[5]) diff --git a/MobileStereoNet/main.py b/MobileStereoNet/main.py new file mode 100644 index 0000000..ad61cff --- /dev/null +++ b/MobileStereoNet/main.py @@ -0,0 +1,135 @@ +# Copyright(C) 2022. Huawei Technologies Co.,Ltd. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import glob +import math +import collections +import cv2 +import numpy as np +import camera_configs +from yolov3_infer import yolo_infer + +LEFTIMG = "left_*.jpg" +RIGHTIMG = "right_*.jpg" +YOLO_RESIZELEN = 416 + + +def get_rectify(height, width): + left_matrix = camera_configs.left_camera_matrix + right_matrix = camera_configs.right_camera_matrix + left_distortion = camera_configs.left_distortion + right_distortion = camera_configs.right_distortion + R = camera_configs.R + T = camera_configs.T + + # Image size + size = (width, height) + + # Calculate correction transformation + R1, R2, P1, P2, Q, validPixROI1, validPixROI2 = cv2.stereoRectify(left_matrix, left_distortion, + right_matrix, right_distortion, size, R, T) + # Calculate correction map + left_map1, left_map2 = cv2.initUndistortRectifyMap(left_matrix, left_distortion, R1, P1, size, cv2.CV_16SC2) + right_map1, right_map2 = cv2.initUndistortRectifyMap(right_matrix, right_distortion, R2, P2, size, cv2.CV_16SC2) + + Camera = collections.namedtuple('Camera', ['left_map1', 'left_map2', 'right_map1', 'right_map2', 'Q']) + + camera = Camera(left_map1, left_map2, right_map1, right_map2, Q) + return camera + + +def stereo_match(imgleft, imgright): + stereo = cv2.StereoSGBM_create(minDisparity=0, + numDisparities=16 * 6, + blockSize=5, + P1=216, + P2=864, + disp12MaxDiff=1, + uniquenessRatio=10, + speckleWindowSize=0, + speckleRange=1, + preFilterCap=60, + mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY) + disparity = stereo.compute(imgleft, imgright) + return disparity + + +if __name__ == '__main__': + + left_paths = [] + left_paths.extend(glob.glob(os.path.join("image", LEFTIMG))) + left_paths.sort() + + right_paths = [] + right_paths.extend(glob.glob(os.path.join("image", RIGHTIMG))) + right_paths.sort() + + if len(left_paths) == 0 or len(right_paths) == 0: + print("The dataset is empty!.Please check the dataset and files.") + exit() + + if len(left_paths) != len(right_paths): + print("Picture missing!.Please check the dataset and files.") + exit() + + paths = zip(left_paths, right_paths) + + NUM = 0 + for left, right in paths: + img1 = cv2.imread(left) + img2 = cv2.imread(right) + + img_height, img_width = img1.shape[0:2] + + configs = get_rectify(img_height, img_width) + + # Distortion correction + img1_rectified = cv2.remap(img1, configs.left_map1, configs.left_map2, cv2.INTER_LINEAR) + img2_rectified = cv2.remap(img2, configs.right_map1, configs.right_map2, cv2.INTER_LINEAR) + cv2.imwrite("SGBM_left.jpg", img1_rectified) + + # Set the picture as a grayscale image to prepare for SGBM + imgL = cv2.cvtColor(img1_rectified, cv2.COLOR_BGR2GRAY) + imgR = cv2.cvtColor(img2_rectified, cv2.COLOR_BGR2GRAY) + + # Generate parallax map according to SGBM/Semi-Global Block Matching + parallax = stereo_match(imgL, imgR) + + # Expand the picture to 3d space, and the value in z direction is the current distance + threeD = cv2.reprojectImageTo3D(parallax.astype(np.float32) / 16., configs.Q) + + coordinate = yolo_infer("SGBM_left.jpg", YOLO_RESIZELEN) + + NUM += 1 + print("The result of case %d :" % NUM) + + for coor in coordinate: + x = coor.x1 + y = coor.y1 + name = coor.className + + x = int(x) + y = int(y) + + print('\nPixel coordinates x = {}, y = {}'.format(x, y)) + + x = x - 1 + y = y - 1 + print("3D coordinates ({:f}, {:f}, {:f}) mm ".format(threeD[y][x][0], threeD[y][x][1], threeD[y][x][2])) + distance = math.sqrt(threeD[y][x][0] ** 2 + threeD[y][x][1] ** 2 + threeD[y][x][2] ** 2) + distance = distance / 1000.0 # mm -> m + print("{}'s actual distance: {:f} m\n".format(name, distance)) + + os.remove("SGBM_left.jpg") diff --git a/MobileStereoNet/yolo_deep.py b/MobileStereoNet/yolo_deep.py deleted file mode 100644 index 43b5485..0000000 --- a/MobileStereoNet/yolo_deep.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright(C) 2022. Huawei Technologies Co.,Ltd. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import math -import collections -import cv2 -import numpy as np -import camera_configs -from yolov3_infer import yolo_infer - -LEFTIMG_PATH = "./image/left_0.jpg" -RIGHTIMG_PATH = "./image/right_0.jpg" -YOLO_RESIZELEN = 416 - - -def get_rectify(height, width): - left_matrix = camera_configs.left_camera_matrix - right_matrix = camera_configs.right_camera_matrix - left_distortion = camera_configs.left_distortion - right_distortion = camera_configs.right_distortion - R = camera_configs.R - T = camera_configs.T - - # 图像尺寸 - size = (width, height) - - # 进行立体更正 - R1, R2, P1, P2, Q, roi1, roi2 = cv2.stereoRectify(left_matrix, left_distortion, - right_matrix, right_distortion, size, R, T) - # 计算更正map - left_map1, left_map2 = cv2.initUndistortRectifyMap(left_matrix, left_distortion, R1, P1, size, cv2.CV_16SC2) - right_map1, right_map2 = cv2.initUndistortRectifyMap(right_matrix, right_distortion, R2, P2, size, cv2.CV_16SC2) - - Camera = collections.namedtuple('Camera', ['left_map1', 'left_map2', 'right_map1', 'right_map2', 'Q']) - - camera = Camera(left_map1, left_map2, right_map1, right_map2, Q) - return camera - - -def stereo_match(imgleft, imgright): - stereo = cv2.StereoSGBM_create(minDisparity=0, - numDisparities=16 * 6, - blockSize=5, - P1=216, - P2=864, - disp12MaxDiff=1, - uniquenessRatio=10, - speckleWindowSize=0, - speckleRange=1, - preFilterCap=60, - mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY) - disparity = stereo.compute(imgleft, imgright) - return disparity - - -if __name__ == '__main__': - img1 = cv2.imread(LEFTIMG_PATH) - img2 = cv2.imread(RIGHTIMG_PATH) - - img_height, img_width = img1.shape[0:2] - - configs = get_rectify(img_height, img_width) - - # 根据更正map对图片进行重构 - img1_rectified = cv2.remap(img1, configs.left_map1, configs.left_map2, cv2.INTER_LINEAR) - img2_rectified = cv2.remap(img2, configs.right_map1, configs.right_map2, cv2.INTER_LINEAR) - cv2.imwrite("SGBM_left.jpg", img1_rectified) - - # 将图片置为灰度图,为StereoSGBM作准备 - imgL = cv2.cvtColor(img1_rectified, cv2.COLOR_BGR2GRAY) - imgR = cv2.cvtColor(img2_rectified, cv2.COLOR_BGR2GRAY) - - # 根据SGBM/Semi-Global Block Matching方法生成差异图 - left_match = stereo_match(imgL, imgR) - - # 将图片扩展至3d空间中,其z方向的值则为当前的距离 - threeD = cv2.reprojectImageTo3D(left_match.astype(np.float32) / 16., configs.Q) - - # 因为om模型读取要YUV格式,前面cv读取处理是BGR,我暂时没找到直接定义Image类的方法,所以重读一遍重构后的图片 - coordinate = yolo_infer("SGBM_left.jpg", YOLO_RESIZELEN) - - for coor in coordinate: - x = coor.x1 - y = coor.y1 - - x = int(x) - y = int(y) - - print('\n像素坐标 x = %d, y = %d' % (x, y)) - - x = x - 1 - y = y - 1 - print("世界坐标xyz 是:", threeD[y][x][0] / 1000.0, threeD[y][x][1] / 1000.0, threeD[y][x][2] / 1000.0, "m") - distance = math.sqrt(threeD[y][x][0] ** 2 + threeD[y][x][1] ** 2 + threeD[y][x][2] ** 2) - distance = distance / 1000.0 # mm -> m - print("距离是:", distance, "m") diff --git a/MobileStereoNet/yolov3_infer.py b/MobileStereoNet/yolov3_infer.py index 56e46c5..235eaf0 100644 --- a/MobileStereoNet/yolov3_infer.py +++ b/MobileStereoNet/yolov3_infer.py @@ -18,18 +18,17 @@ import cv2 from mindx.sdk import base from mindx.sdk.base import Tensor, Model, Size, log, ImageProcessor, post, BTensor -DEVICE_ID = 0 # 芯片ID -MODEL_PATH = "./model/yolov3_tf_aipp.om" # 模型的路径 -CONFIG_PATH = "./model/yolov3_tf_bs1_fp16.cfg" # 模型配置文件的路径 -LABEL_PATH = "./model/yolov3.names" # 分类标签文件的路径 -B_USEDVPP = False # 使用dvpp图像处理器启用,使用opencv时False +DEVICE_ID = 0 +MODEL_PATH = "./model/yolov3_tf_aipp.om" +CONFIG_PATH = "./model/yolov3_tf_bs1_fp16.cfg" +LABEL_PATH = "./model/yolov3.names" +B_USEDVPP = False # Enabled with dvpp image processor, false with opencv def yolo_infer(image_path, yolo_resize): - yolo = Model(MODEL_PATH, DEVICE_ID) # 创造模型对象 + yolo = Model(MODEL_PATH, DEVICE_ID) image_tensor = [] if B_USEDVPP: - # 创造图像处理器对象!!!!!使用该方法处理后数据已在device侧 image_processor0 = ImageProcessor(DEVICE_ID) decode_img = image_processor0.decode(image_path, base.nv12) @@ -39,7 +38,7 @@ def yolo_infer(image_path, yolo_resize): image_processor1 = ImageProcessor(DEVICE_ID) size_cof = Size(yolo_resize, yolo_resize) resize_img = image_processor1.resize(decode_img, size_cof) - image_tensor = [resize_img.to_tensor()] # 推理前需要转换为tensor的List,数据已在device侧无需转移 + image_tensor = [resize_img.to_tensor()] else: image = np.array(cv2.imread(image_path)) @@ -50,16 +49,15 @@ def yolo_infer(image_path, yolo_resize): resize_img = cv2.resize(image, size_cof, interpolation=cv2.INTER_LINEAR) - yuv_img = cv2.cvtColor(resize_img, cv2.COLOR_BGR2YUV) - yuv_img = yuv_img[np.newaxis, :, :] - image_tensor = Tensor(yuv_img) # 推理前需要转换为tensor的List,使用Tensor类来构建。 + yuv_img = resize_img[np.newaxis, :, :] + image_tensor = Tensor(yuv_img) - image_tensor.to_device(DEVICE_ID) # 重要,需要转移至device侧 + image_tensor.to_device(DEVICE_ID) image_tensor = [image_tensor] outputs = yolo.infer(image_tensor) - print("-----------YOLO Infer Success!----------------") + yolov3_post = post.Yolov3PostProcess( - config_path=CONFIG_PATH, label_path=LABEL_PATH) # 构造对应的后处理对象 + config_path=CONFIG_PATH, label_path=LABEL_PATH) resize_info = base.ResizedImageInfo() resize_info.heightResize = yolo_resize