Modify comments and readme

2025-09-26 20:01:17 +08:00 · 2022-12-05 20:22:00 +08:00
parent e506795d04
commit 4b2cd2afc2
6 changed files with 176 additions and 139 deletions
--- a/MobileStereoNet/README.md
+++ b/MobileStereoNet/README.md
@@ -6,7 +6,7 @@
 样例输入：双目相机拍摄的两张jpg图片
-样例输出：物体的三维坐标
+样例输出：物体的三维坐标和距离
 ### 1.1 支持的产品
@@ -19,7 +19,7 @@
 ### 1.3 软件方案介绍
-本方案中，利用Opencv对输入图片进行畸变矫正，然后采用yolov3预训练模型对矫正图片进行物体识别，最终根据识别得到的生成框坐标输出物体的三维坐标。
+本方案中，利用Opencv对输入图片进行畸变矫正，然后采用yolov3预训练模型对矫正图片进行物体识别，最终根据识别得到的生成框坐标输出物体的三维坐标和距离。
 表1.1 系统方案中各模块功能：
@@ -28,9 +28,9 @@
 | 1    | 图像输入    | 使用Opencv的imread读入图片 |
 | 2    | 图像放缩    | 使用Opencv的resize放缩检测图片到416*416大小 |
 | 3    | 畸变矫正    | 利用Opencv和相机参数重构图片 |
-| 4    | 视察计算    | 使用Opencv的SGBM方法计算图片视差 |
+| 4    | 视差计算    | 使用Opencv的SGBM方法计算图片视差 |
 | 5    | 物体检测    | 利用Yolov3的检测模型，检测图片中物体并得到检测框坐标 |
-| 6    | 结果输出    | 根据检测框坐标输出物体的三维坐标 |
+| 6    | 结果输出    | 根据检测框坐标输出物体的三维坐标和距离 |
@@ -44,9 +44,12 @@
 │   ├── yolov3_tf_bs1_fp16.cfg		# yolov3后处理配置
 │   └── yolov3_tf_aipp.om
 ├── image                           # 存放测试图片
-|── camera_configs.py               # 相机参数文件
+├── RESOURCES
-|── yolov3_infer.py                 # 获取坐标文件
+│   └── flow.png
-|── yolov3_infer.py                 # yolov3模型推理文件
+├── camera_configs.py               # 获取相机参数
 ├── camera.xml                      # 相机参数文件
 ├── main.py                         # 获取坐标文件
 ├── yolov3_infer.py                 # yolov3模型推理文件
 ├── ExportStereoParams.m            # matlab导出xml函数
 └── README.md
 ```
@@ -64,9 +67,9 @@
 1、由于yolov3模型限制，仅支持获取在`./models/yolov3.names`文件中的**80**种物体的三维坐标。
-2、对超出相机标定范围的物体定位，结果不是很理想。
+2、对超出相机标定范围的物体定位，误差可能会很大。
-3、不同标定相机需要对`yolo_deep.py`文件中函数**stereo_match**的**numDisparities**和**uniquenessRatio**这两个主要参数进行调整。
+3、不同标定相机需要对`main.py`文件中函数**stereo_match**的**numDisparities**和**uniquenessRatio**这两个主要参数进行调整。注意**numDisparities**参数必须可以被16整除，**uniquenessRatio**参数值通常在5-15范围内。
 ## 2 环境依赖
@@ -90,7 +93,7 @@
 ## 依赖安装
-创建虚拟环境后运行如下命令：
+创建虚拟环境并激活后运行如下命令：
 ```
 pip install opencv-python
@@ -103,23 +106,31 @@ pip install opencv-python
 **步骤1** 
-[下载YOLOv3模型](https://www.hiascend.com/zh/software/modelzoo/models/detail/C/210261e64adc42d2b3d84c447844e4c7/1)放入`./model`文件夹中。
+[下载YOLOv3模型](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/c-version/YoloV3_for_TensorFlow/zh/1.6/m/YOLOv3_TensorFlow_1.6_model.zip)并解压，将其中的`yolov3_tf_aipp.om`放入`./model`文件夹中。
 **步骤2** 
-将matlab标定导出的xml相机参数文件放入项目根目录中。
+使用Matlab标定双目相机，然后用提供的`ExportStereoParams.m`文件将相机参数导出成`camera.xml`文件，放入项目根目录中。
 双目相机标定方法可以参考此处：[Link](https://blog.csdn.net/qq_38236355/article/details/89280633)
 **步骤3** 
-自行选择两张jpg文件，放入`./image`文件夹中，再执行
+使用步骤2标定的相机拍摄双目照片，放入`./image`文件夹中。注意如果相机拍出来的左右图片在一张图上，则需要自行分割为两张大小一样的图片，确保左右图片张数相等。根据放入的图片名称修改`main.py`中`LEFTIMG`和`RIGHTIMG`的图片命名格式，再执行：
 ```
-python yolo_deep.py
+python main.py
 ```
 结果展示：
-![pic](RESOURCES/result.png)
+```
 The result of case 10:
 Pixel coordinates x = 500, y = 248
 3D coordinates (305.618927, 90.307648, 2773.036865) mm
 umbrella's actual distance: 2.791289 m
 ```
 ## 4 常见问题
@@ -135,4 +146,4 @@ E20221122 15:43:32.366075  9866 MxOmModelDesc.cpp:138] Infer failed, result is n
 **解决方案：**
-同时使用dvpp和`yolov3_tf_aipp.om`模型时会报上述错误。解决方式一，将B_USEDVPP设置为False，使用opencv处理图片。解决方式二，将模型换成`yolov3_tf_bs1_fp16.om`
+同时设置`B_USEDVPP=True`和使用`yolov3_tf_aipp.om`模型时会报上述错误。解决方式一，将B_USEDVPP设置为False，使用opencv处理图片。解决方式二，将模型换成`yolov3_tf_bs1_fp16.om`
--- a/MobileStereoNet/RESOURCES/result.png
+++ b/MobileStereoNet/RESOURCES/result.png
--- a/MobileStereoNet/camera_configs.py
+++ b/MobileStereoNet/camera_configs.py
@@ -37,10 +37,10 @@ right_camera_matrix = np.array([camera_list[2][:3],
                                camera_list[2][6:]])
 right_distortion = np.array([camera_list[3]])
-# 旋转关系向量
+# Rotation vector
 R = np.array([camera_list[4][:3],
              camera_list[4][3:6],
              camera_list[4][6:]])
-# 平移关系向量
+# Translation vector
 T = np.array(camera_list[5])
--- a/MobileStereoNet/main.py
+++ b/MobileStereoNet/main.py
@@ -0,0 +1,135 @@
 # Copyright(C) 2022. Huawei Technologies Co.,Ltd. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import glob
 import math
 import collections
 import cv2
 import numpy as np
 import camera_configs
 from yolov3_infer import yolo_infer
 LEFTIMG = "left_*.jpg"
 RIGHTIMG = "right_*.jpg"
 YOLO_RESIZELEN = 416
 def get_rectify(height, width):
    left_matrix = camera_configs.left_camera_matrix
    right_matrix = camera_configs.right_camera_matrix
    left_distortion = camera_configs.left_distortion
    right_distortion = camera_configs.right_distortion
    R = camera_configs.R
    T = camera_configs.T
    # Image size
    size = (width, height)
    # Calculate correction transformation
    R1, R2, P1, P2, Q, validPixROI1, validPixROI2 = cv2.stereoRectify(left_matrix, left_distortion,
                                                                      right_matrix, right_distortion, size, R, T)
    # Calculate correction map
    left_map1, left_map2 = cv2.initUndistortRectifyMap(left_matrix, left_distortion, R1, P1, size, cv2.CV_16SC2)
    right_map1, right_map2 = cv2.initUndistortRectifyMap(right_matrix, right_distortion, R2, P2, size, cv2.CV_16SC2)
    Camera = collections.namedtuple('Camera', ['left_map1', 'left_map2', 'right_map1', 'right_map2', 'Q'])
    camera = Camera(left_map1, left_map2, right_map1, right_map2, Q)
    return camera
 def stereo_match(imgleft, imgright):
    stereo = cv2.StereoSGBM_create(minDisparity=0,
                                   numDisparities=16 * 6,
                                   blockSize=5,
                                   P1=216,
                                   P2=864,
                                   disp12MaxDiff=1,
                                   uniquenessRatio=10,
                                   speckleWindowSize=0,
                                   speckleRange=1,
                                   preFilterCap=60,
                                   mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY)
    disparity = stereo.compute(imgleft, imgright)
    return disparity
 if __name__ == '__main__':
    left_paths = []
    left_paths.extend(glob.glob(os.path.join("image", LEFTIMG)))
    left_paths.sort()
    right_paths = []
    right_paths.extend(glob.glob(os.path.join("image", RIGHTIMG)))
    right_paths.sort()
    if len(left_paths) == 0 or len(right_paths) == 0:
        print("The dataset is empty!.Please check the dataset and files.")
        exit()
    if len(left_paths) != len(right_paths):
        print("Picture missing!.Please check the dataset and files.")
        exit()
    paths = zip(left_paths, right_paths)
    NUM = 0
    for left, right in paths:
        img1 = cv2.imread(left)
        img2 = cv2.imread(right)
        img_height, img_width = img1.shape[0:2]
        configs = get_rectify(img_height, img_width)
        # Distortion correction
        img1_rectified = cv2.remap(img1, configs.left_map1, configs.left_map2, cv2.INTER_LINEAR)
        img2_rectified = cv2.remap(img2, configs.right_map1, configs.right_map2, cv2.INTER_LINEAR)
        cv2.imwrite("SGBM_left.jpg", img1_rectified)
        # Set the picture as a grayscale image to prepare for SGBM 
        imgL = cv2.cvtColor(img1_rectified, cv2.COLOR_BGR2GRAY)
        imgR = cv2.cvtColor(img2_rectified, cv2.COLOR_BGR2GRAY)
        # Generate parallax map according to SGBM/Semi-Global Block Matching
        parallax = stereo_match(imgL, imgR)
        # Expand the picture to 3d space, and the value in z direction is the current distance
        threeD = cv2.reprojectImageTo3D(parallax.astype(np.float32) / 16., configs.Q)
        coordinate = yolo_infer("SGBM_left.jpg", YOLO_RESIZELEN)
        NUM += 1
        print("The result of case %d :" % NUM)
        for coor in coordinate:
            x = coor.x1 
            y = coor.y1  
            name = coor.className
            x = int(x)
            y = int(y)
            print('\nPixel coordinates x = {}, y = {}'.format(x, y))
            x = x - 1
            y = y - 1
            print("3D coordinates ({:f}, {:f}, {:f}) mm ".format(threeD[y][x][0], threeD[y][x][1], threeD[y][x][2]))
            distance = math.sqrt(threeD[y][x][0] ** 2 + threeD[y][x][1] ** 2 + threeD[y][x][2] ** 2)
            distance = distance / 1000.0  # mm -> m
            print("{}'s actual distance: {:f} m\n".format(name, distance))
    os.remove("SGBM_left.jpg")
--- a/MobileStereoNet/yolo_deep.py
+++ b/MobileStereoNet/yolo_deep.py
@@ -1,107 +0,0 @@
 # Copyright(C) 2022. Huawei Technologies Co.,Ltd. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import math
 import collections
 import cv2
 import numpy as np
 import camera_configs
 from yolov3_infer import yolo_infer
 LEFTIMG_PATH = "./image/left_0.jpg"
 RIGHTIMG_PATH = "./image/right_0.jpg"
 YOLO_RESIZELEN = 416
 def get_rectify(height, width):
    left_matrix = camera_configs.left_camera_matrix
    right_matrix = camera_configs.right_camera_matrix
    left_distortion = camera_configs.left_distortion
    right_distortion = camera_configs.right_distortion
    R = camera_configs.R
    T = camera_configs.T
    # 图像尺寸
    size = (width, height)
    # 进行立体更正
    R1, R2, P1, P2, Q, roi1, roi2 = cv2.stereoRectify(left_matrix, left_distortion,
                                                      right_matrix, right_distortion, size, R, T)
    # 计算更正map
    left_map1, left_map2 = cv2.initUndistortRectifyMap(left_matrix, left_distortion, R1, P1, size, cv2.CV_16SC2)
    right_map1, right_map2 = cv2.initUndistortRectifyMap(right_matrix, right_distortion, R2, P2, size, cv2.CV_16SC2)
    Camera = collections.namedtuple('Camera', ['left_map1', 'left_map2', 'right_map1', 'right_map2', 'Q'])
    camera = Camera(left_map1, left_map2, right_map1, right_map2, Q)
    return camera
 def stereo_match(imgleft, imgright):
    stereo = cv2.StereoSGBM_create(minDisparity=0,
                                   numDisparities=16 * 6,
                                   blockSize=5,
                                   P1=216,
                                   P2=864,
                                   disp12MaxDiff=1,
                                   uniquenessRatio=10,
                                   speckleWindowSize=0,
                                   speckleRange=1,
                                   preFilterCap=60,
                                   mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY)
    disparity = stereo.compute(imgleft, imgright)
    return disparity
 if __name__ == '__main__':
    img1 = cv2.imread(LEFTIMG_PATH)
    img2 = cv2.imread(RIGHTIMG_PATH)
    img_height, img_width = img1.shape[0:2]
    configs = get_rectify(img_height, img_width)
    # 根据更正map对图片进行重构
    img1_rectified = cv2.remap(img1, configs.left_map1, configs.left_map2, cv2.INTER_LINEAR)
    img2_rectified = cv2.remap(img2, configs.right_map1, configs.right_map2, cv2.INTER_LINEAR)
    cv2.imwrite("SGBM_left.jpg", img1_rectified)
    # 将图片置为灰度图，为StereoSGBM作准备 
    imgL = cv2.cvtColor(img1_rectified, cv2.COLOR_BGR2GRAY)
    imgR = cv2.cvtColor(img2_rectified, cv2.COLOR_BGR2GRAY)
    # 根据SGBM/Semi-Global Block Matching方法生成差异图
    left_match = stereo_match(imgL, imgR)
    # 将图片扩展至3d空间中，其z方向的值则为当前的距离
    threeD = cv2.reprojectImageTo3D(left_match.astype(np.float32) / 16., configs.Q)
    # 因为om模型读取要YUV格式，前面cv读取处理是BGR，我暂时没找到直接定义Image类的方法，所以重读一遍重构后的图片
    coordinate = yolo_infer("SGBM_left.jpg", YOLO_RESIZELEN)
    for coor in coordinate:
        x = coor.x1 
        y = coor.y1  
        x = int(x)
        y = int(y)
        print('\n像素坐标 x = %d, y = %d' % (x, y))
        x = x - 1
        y = y - 1
        print("世界坐标xyz 是：", threeD[y][x][0] / 1000.0, threeD[y][x][1] / 1000.0, threeD[y][x][2] / 1000.0, "m")
        distance = math.sqrt(threeD[y][x][0] ** 2 + threeD[y][x][1] ** 2 + threeD[y][x][2] ** 2)
        distance = distance / 1000.0  # mm -> m
        print("距离是：", distance, "m")
--- a/MobileStereoNet/yolov3_infer.py
+++ b/MobileStereoNet/yolov3_infer.py
@@ -18,18 +18,17 @@ import cv2
 from mindx.sdk import base
 from mindx.sdk.base import Tensor, Model, Size, log, ImageProcessor, post, BTensor
-DEVICE_ID = 0  # 芯片ID
+DEVICE_ID = 0  
-MODEL_PATH = "./model/yolov3_tf_aipp.om"  # 模型的路径
+MODEL_PATH = "./model/yolov3_tf_aipp.om"  
-CONFIG_PATH = "./model/yolov3_tf_bs1_fp16.cfg"  # 模型配置文件的路径
+CONFIG_PATH = "./model/yolov3_tf_bs1_fp16.cfg"  
-LABEL_PATH = "./model/yolov3.names"  # 分类标签文件的路径
+LABEL_PATH = "./model/yolov3.names"  
-B_USEDVPP = False  # 使用dvpp图像处理器启用，使用opencv时False
+B_USEDVPP = False  # Enabled with dvpp image processor, false with opencv
 def yolo_infer(image_path, yolo_resize):
-    yolo = Model(MODEL_PATH, DEVICE_ID)  # 创造模型对象
+    yolo = Model(MODEL_PATH, DEVICE_ID)  
    image_tensor = []
    if B_USEDVPP:
    # 创造图像处理器对象!!!!!使用该方法处理后数据已在device侧
        image_processor0 = ImageProcessor(DEVICE_ID)
        decode_img = image_processor0.decode(image_path, base.nv12)
@@ -39,7 +38,7 @@ def yolo_infer(image_path, yolo_resize):
        image_processor1 = ImageProcessor(DEVICE_ID)
        size_cof = Size(yolo_resize, yolo_resize)
        resize_img = image_processor1.resize(decode_img, size_cof)  
-        image_tensor = [resize_img.to_tensor()]  # 推理前需要转换为tensor的List，数据已在device侧无需转移
+        image_tensor = [resize_img.to_tensor()]  
    else:
        image = np.array(cv2.imread(image_path))
@@ -50,16 +49,15 @@ def yolo_infer(image_path, yolo_resize):
        resize_img = cv2.resize(image, size_cof, interpolation=cv2.INTER_LINEAR)
-        yuv_img =  cv2.cvtColor(resize_img, cv2.COLOR_BGR2YUV)
+        yuv_img = resize_img[np.newaxis, :, :]
-        yuv_img = yuv_img[np.newaxis, :, :]
+        image_tensor = Tensor(yuv_img) 
        image_tensor = Tensor(yuv_img) # 推理前需要转换为tensor的List，使用Tensor类来构建。
-        image_tensor.to_device(DEVICE_ID) # 重要，需要转移至device侧
+        image_tensor.to_device(DEVICE_ID) 
        image_tensor = [image_tensor]
    outputs = yolo.infer(image_tensor)
-    print("-----------YOLO Infer Success!----------------")
+
    yolov3_post = post.Yolov3PostProcess(
-        config_path=CONFIG_PATH, label_path=LABEL_PATH)  # 构造对应的后处理对象
+        config_path=CONFIG_PATH, label_path=LABEL_PATH)  
    resize_info = base.ResizedImageInfo()
    resize_info.heightResize = yolo_resize