Modify comments and readme

2025-09-26 20:01:17 +08:00 · 2022-12-05 20:22:00 +08:00
parent e506795d04
commit 4b2cd2afc2
6 changed files with 176 additions and 139 deletions
--- a/MobileStereoNet/README.md
+++ b/MobileStereoNet/README.md
@@ -6,7 +6,7 @@

 样例输入：双目相机拍摄的两张jpg图片

-样例输出：物体的三维坐标
+样例输出：物体的三维坐标和距离

 ### 1.1 支持的产品

@@ -19,7 +19,7 @@

 ### 1.3 软件方案介绍

-本方案中，利用Opencv对输入图片进行畸变矫正，然后采用yolov3预训练模型对矫正图片进行物体识别，最终根据识别得到的生成框坐标输出物体的三维坐标。
+本方案中，利用Opencv对输入图片进行畸变矫正，然后采用yolov3预训练模型对矫正图片进行物体识别，最终根据识别得到的生成框坐标输出物体的三维坐标和距离。

 表1.1 系统方案中各模块功能：

@@ -28,9 +28,9 @@
 | 1    | 图像输入    | 使用Opencv的imread读入图片 |
 | 2    | 图像放缩    | 使用Opencv的resize放缩检测图片到416*416大小 |
 | 3    | 畸变矫正    | 利用Opencv和相机参数重构图片 |
-| 4    | 视察计算    | 使用Opencv的SGBM方法计算图片视差 |
+| 4    | 视差计算    | 使用Opencv的SGBM方法计算图片视差 |
 | 5    | 物体检测    | 利用Yolov3的检测模型，检测图片中物体并得到检测框坐标 |
-| 6    | 结果输出    | 根据检测框坐标输出物体的三维坐标 |
+| 6    | 结果输出    | 根据检测框坐标输出物体的三维坐标和距离 |



@@ -44,9 +44,12 @@
 │   ├── yolov3_tf_bs1_fp16.cfg		# yolov3后处理配置
 │   └── yolov3_tf_aipp.om
 ├── image                           # 存放测试图片
-|── camera_configs.py               # 相机参数文件
-|── yolov3_infer.py                 # 获取坐标文件
-|── yolov3_infer.py                 # yolov3模型推理文件
+├── RESOURCES
+│   └── flow.png
+├── camera_configs.py               # 获取相机参数
+├── camera.xml                      # 相机参数文件
+├── main.py                         # 获取坐标文件
+├── yolov3_infer.py                 # yolov3模型推理文件
 ├── ExportStereoParams.m            # matlab导出xml函数
 └── README.md
 ```
@@ -64,9 +67,9 @@

 1、由于yolov3模型限制，仅支持获取在`./models/yolov3.names`文件中的**80**种物体的三维坐标。

-2、对超出相机标定范围的物体定位，结果不是很理想。
+2、对超出相机标定范围的物体定位，误差可能会很大。

-3、不同标定相机需要对`yolo_deep.py`文件中函数**stereo_match**的**numDisparities**和**uniquenessRatio**这两个主要参数进行调整。
+3、不同标定相机需要对`main.py`文件中函数**stereo_match**的**numDisparities**和**uniquenessRatio**这两个主要参数进行调整。注意**numDisparities**参数必须可以被16整除，**uniquenessRatio**参数值通常在5-15范围内。


 ## 2 环境依赖
@@ -90,7 +93,7 @@

 ## 依赖安装

-创建虚拟环境后运行如下命令：
+创建虚拟环境并激活后运行如下命令：

 ```
 pip install opencv-python
@@ -103,23 +106,31 @@ pip install opencv-python

 **步骤1** 

-[下载YOLOv3模型](https://www.hiascend.com/zh/software/modelzoo/models/detail/C/210261e64adc42d2b3d84c447844e4c7/1)放入`./model`文件夹中。
+[下载YOLOv3模型](https://ascend-repo-modelzoo.obs.cn-east-2.myhuaweicloud.com/c-version/YoloV3_for_TensorFlow/zh/1.6/m/YOLOv3_TensorFlow_1.6_model.zip)并解压，将其中的`yolov3_tf_aipp.om`放入`./model`文件夹中。

 **步骤2** 

-将matlab标定导出的xml相机参数文件放入项目根目录中。
+使用Matlab标定双目相机，然后用提供的`ExportStereoParams.m`文件将相机参数导出成`camera.xml`文件，放入项目根目录中。
+
+双目相机标定方法可以参考此处：[Link](https://blog.csdn.net/qq_38236355/article/details/89280633)

 **步骤3** 

-自行选择两张jpg文件，放入`./image`文件夹中，再执行
+使用步骤2标定的相机拍摄双目照片，放入`./image`文件夹中。注意如果相机拍出来的左右图片在一张图上，则需要自行分割为两张大小一样的图片，确保左右图片张数相等。根据放入的图片名称修改`main.py`中`LEFTIMG`和`RIGHTIMG`的图片命名格式，再执行：

 ```
-python yolo_deep.py
+python main.py
 ```

 结果展示：

-![pic](RESOURCES/result.png)
+```
+The result of case 10:
+
+Pixel coordinates x = 500, y = 248
+3D coordinates (305.618927, 90.307648, 2773.036865) mm
+umbrella's actual distance: 2.791289 m
+```

 ## 4 常见问题

@@ -135,4 +146,4 @@ E20221122 15:43:32.366075  9866 MxOmModelDesc.cpp:138] Infer failed, result is n

 **解决方案：**

-同时使用dvpp和`yolov3_tf_aipp.om`模型时会报上述错误。解决方式一，将B_USEDVPP设置为False，使用opencv处理图片。解决方式二，将模型换成`yolov3_tf_bs1_fp16.om`
+同时设置`B_USEDVPP=True`和使用`yolov3_tf_aipp.om`模型时会报上述错误。解决方式一，将B_USEDVPP设置为False，使用opencv处理图片。解决方式二，将模型换成`yolov3_tf_bs1_fp16.om`
--- a/MobileStereoNet/RESOURCES/result.png
+++ b/MobileStereoNet/RESOURCES/result.png
--- a/MobileStereoNet/camera_configs.py
+++ b/MobileStereoNet/camera_configs.py
@@ -37,10 +37,10 @@ right_camera_matrix = np.array([camera_list[2][:3],
                                camera_list[2][6:]])
 right_distortion = np.array([camera_list[3]])

-# 旋转关系向量
+# Rotation vector
 R = np.array([camera_list[4][:3],
              camera_list[4][3:6],
              camera_list[4][6:]])

-# 平移关系向量
+# Translation vector
 T = np.array(camera_list[5])
--- a/MobileStereoNet/main.py
+++ b/MobileStereoNet/main.py
@@ -0,0 +1,135 @@
+# Copyright(C) 2022. Huawei Technologies Co.,Ltd. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import glob
+import math
+import collections
+import cv2
+import numpy as np
+import camera_configs
+from yolov3_infer import yolo_infer
+
+LEFTIMG = "left_*.jpg"
+RIGHTIMG = "right_*.jpg"
+YOLO_RESIZELEN = 416
+
+
+def get_rectify(height, width):
+    left_matrix = camera_configs.left_camera_matrix
+    right_matrix = camera_configs.right_camera_matrix
+    left_distortion = camera_configs.left_distortion
+    right_distortion = camera_configs.right_distortion
+    R = camera_configs.R
+    T = camera_configs.T
+
+    # Image size
+    size = (width, height)
+
+    # Calculate correction transformation
+    R1, R2, P1, P2, Q, validPixROI1, validPixROI2 = cv2.stereoRectify(left_matrix, left_distortion,
+                                                                      right_matrix, right_distortion, size, R, T)
+    # Calculate correction map
+    left_map1, left_map2 = cv2.initUndistortRectifyMap(left_matrix, left_distortion, R1, P1, size, cv2.CV_16SC2)
+    right_map1, right_map2 = cv2.initUndistortRectifyMap(right_matrix, right_distortion, R2, P2, size, cv2.CV_16SC2)
+
+    Camera = collections.namedtuple('Camera', ['left_map1', 'left_map2', 'right_map1', 'right_map2', 'Q'])
+
+    camera = Camera(left_map1, left_map2, right_map1, right_map2, Q)
+    return camera
+
+
+def stereo_match(imgleft, imgright):
+    stereo = cv2.StereoSGBM_create(minDisparity=0,
+                                   numDisparities=16 * 6,
+                                   blockSize=5,
+                                   P1=216,
+                                   P2=864,
+                                   disp12MaxDiff=1,
+                                   uniquenessRatio=10,
+                                   speckleWindowSize=0,
+                                   speckleRange=1,
+                                   preFilterCap=60,
+                                   mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY)
+    disparity = stereo.compute(imgleft, imgright)
+    return disparity
+
+
+if __name__ == '__main__':
+
+    left_paths = []
+    left_paths.extend(glob.glob(os.path.join("image", LEFTIMG)))
+    left_paths.sort()
+
+    right_paths = []
+    right_paths.extend(glob.glob(os.path.join("image", RIGHTIMG)))
+    right_paths.sort()
+
+    if len(left_paths) == 0 or len(right_paths) == 0:
+        print("The dataset is empty!.Please check the dataset and files.")
+        exit()
+
+    if len(left_paths) != len(right_paths):
+        print("Picture missing!.Please check the dataset and files.")
+        exit()
+
+    paths = zip(left_paths, right_paths)
+
+    NUM = 0
+    for left, right in paths:
+        img1 = cv2.imread(left)
+        img2 = cv2.imread(right)
+
+        img_height, img_width = img1.shape[0:2]
+
+        configs = get_rectify(img_height, img_width)
+
+        # Distortion correction
+        img1_rectified = cv2.remap(img1, configs.left_map1, configs.left_map2, cv2.INTER_LINEAR)
+        img2_rectified = cv2.remap(img2, configs.right_map1, configs.right_map2, cv2.INTER_LINEAR)
+        cv2.imwrite("SGBM_left.jpg", img1_rectified)
+
+        # Set the picture as a grayscale image to prepare for SGBM 
+        imgL = cv2.cvtColor(img1_rectified, cv2.COLOR_BGR2GRAY)
+        imgR = cv2.cvtColor(img2_rectified, cv2.COLOR_BGR2GRAY)
+
+        # Generate parallax map according to SGBM/Semi-Global Block Matching
+        parallax = stereo_match(imgL, imgR)
+
+        # Expand the picture to 3d space, and the value in z direction is the current distance
+        threeD = cv2.reprojectImageTo3D(parallax.astype(np.float32) / 16., configs.Q)
+
+        coordinate = yolo_infer("SGBM_left.jpg", YOLO_RESIZELEN)
+
+        NUM += 1
+        print("The result of case %d :" % NUM)
+
+        for coor in coordinate:
+            x = coor.x1 
+            y = coor.y1  
+            name = coor.className
+
+            x = int(x)
+            y = int(y)
+
+            print('\nPixel coordinates x = {}, y = {}'.format(x, y))
+
+            x = x - 1
+            y = y - 1
+            print("3D coordinates ({:f}, {:f}, {:f}) mm ".format(threeD[y][x][0], threeD[y][x][1], threeD[y][x][2]))
+            distance = math.sqrt(threeD[y][x][0] ** 2 + threeD[y][x][1] ** 2 + threeD[y][x][2] ** 2)
+            distance = distance / 1000.0  # mm -> m
+            print("{}'s actual distance: {:f} m\n".format(name, distance))
+
+    os.remove("SGBM_left.jpg")
--- a/MobileStereoNet/yolo_deep.py
+++ b/MobileStereoNet/yolo_deep.py
@@ -1,107 +0,0 @@
-# Copyright(C) 2022. Huawei Technologies Co.,Ltd. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import math
-import collections
-import cv2
-import numpy as np
-import camera_configs
-from yolov3_infer import yolo_infer
-
-LEFTIMG_PATH = "./image/left_0.jpg"
-RIGHTIMG_PATH = "./image/right_0.jpg"
-YOLO_RESIZELEN = 416
-
-
-def get_rectify(height, width):
-    left_matrix = camera_configs.left_camera_matrix
-    right_matrix = camera_configs.right_camera_matrix
-    left_distortion = camera_configs.left_distortion
-    right_distortion = camera_configs.right_distortion
-    R = camera_configs.R
-    T = camera_configs.T
-
-    # 图像尺寸
-    size = (width, height)
-
-    # 进行立体更正
-    R1, R2, P1, P2, Q, roi1, roi2 = cv2.stereoRectify(left_matrix, left_distortion,
-                                                      right_matrix, right_distortion, size, R, T)
-    # 计算更正map
-    left_map1, left_map2 = cv2.initUndistortRectifyMap(left_matrix, left_distortion, R1, P1, size, cv2.CV_16SC2)
-    right_map1, right_map2 = cv2.initUndistortRectifyMap(right_matrix, right_distortion, R2, P2, size, cv2.CV_16SC2)
-    
-    Camera = collections.namedtuple('Camera', ['left_map1', 'left_map2', 'right_map1', 'right_map2', 'Q'])
-
-    camera = Camera(left_map1, left_map2, right_map1, right_map2, Q)
-    return camera
-
-
-def stereo_match(imgleft, imgright):
-    stereo = cv2.StereoSGBM_create(minDisparity=0,
-                                   numDisparities=16 * 6,
-                                   blockSize=5,
-                                   P1=216,
-                                   P2=864,
-                                   disp12MaxDiff=1,
-                                   uniquenessRatio=10,
-                                   speckleWindowSize=0,
-                                   speckleRange=1,
-                                   preFilterCap=60,
-                                   mode=cv2.STEREO_SGBM_MODE_SGBM_3WAY)
-    disparity = stereo.compute(imgleft, imgright)
-    return disparity
-
-
-if __name__ == '__main__':
-    img1 = cv2.imread(LEFTIMG_PATH)
-    img2 = cv2.imread(RIGHTIMG_PATH)
-    
-    img_height, img_width = img1.shape[0:2]
-
-    configs = get_rectify(img_height, img_width)
-
-    # 根据更正map对图片进行重构
-    img1_rectified = cv2.remap(img1, configs.left_map1, configs.left_map2, cv2.INTER_LINEAR)
-    img2_rectified = cv2.remap(img2, configs.right_map1, configs.right_map2, cv2.INTER_LINEAR)
-    cv2.imwrite("SGBM_left.jpg", img1_rectified)
-
-    # 将图片置为灰度图，为StereoSGBM作准备 
-    imgL = cv2.cvtColor(img1_rectified, cv2.COLOR_BGR2GRAY)
-    imgR = cv2.cvtColor(img2_rectified, cv2.COLOR_BGR2GRAY)
-
-    # 根据SGBM/Semi-Global Block Matching方法生成差异图
-    left_match = stereo_match(imgL, imgR)
-
-    # 将图片扩展至3d空间中，其z方向的值则为当前的距离
-    threeD = cv2.reprojectImageTo3D(left_match.astype(np.float32) / 16., configs.Q)
-
-    # 因为om模型读取要YUV格式，前面cv读取处理是BGR，我暂时没找到直接定义Image类的方法，所以重读一遍重构后的图片
-    coordinate = yolo_infer("SGBM_left.jpg", YOLO_RESIZELEN)
-
-    for coor in coordinate:
-        x = coor.x1 
-        y = coor.y1  
-
-        x = int(x)
-        y = int(y)
-
-        print('\n像素坐标 x = %d, y = %d' % (x, y))
-
-        x = x - 1
-        y = y - 1
-        print("世界坐标xyz 是：", threeD[y][x][0] / 1000.0, threeD[y][x][1] / 1000.0, threeD[y][x][2] / 1000.0, "m")
-        distance = math.sqrt(threeD[y][x][0] ** 2 + threeD[y][x][1] ** 2 + threeD[y][x][2] ** 2)
-        distance = distance / 1000.0  # mm -> m
-        print("距离是：", distance, "m")
--- a/MobileStereoNet/yolov3_infer.py
+++ b/MobileStereoNet/yolov3_infer.py
@@ -18,18 +18,17 @@ import cv2
 from mindx.sdk import base
 from mindx.sdk.base import Tensor, Model, Size, log, ImageProcessor, post, BTensor

-DEVICE_ID = 0  # 芯片ID
-MODEL_PATH = "./model/yolov3_tf_aipp.om"  # 模型的路径
-CONFIG_PATH = "./model/yolov3_tf_bs1_fp16.cfg"  # 模型配置文件的路径
-LABEL_PATH = "./model/yolov3.names"  # 分类标签文件的路径
-B_USEDVPP = False  # 使用dvpp图像处理器启用，使用opencv时False
+DEVICE_ID = 0  
+MODEL_PATH = "./model/yolov3_tf_aipp.om"  
+CONFIG_PATH = "./model/yolov3_tf_bs1_fp16.cfg"  
+LABEL_PATH = "./model/yolov3.names"  
+B_USEDVPP = False  # Enabled with dvpp image processor, false with opencv


 def yolo_infer(image_path, yolo_resize):
-    yolo = Model(MODEL_PATH, DEVICE_ID)  # 创造模型对象
+    yolo = Model(MODEL_PATH, DEVICE_ID)  
    image_tensor = []
    if B_USEDVPP:
-    # 创造图像处理器对象!!!!!使用该方法处理后数据已在device侧
        image_processor0 = ImageProcessor(DEVICE_ID)
        decode_img = image_processor0.decode(image_path, base.nv12)
        
@@ -39,7 +38,7 @@ def yolo_infer(image_path, yolo_resize):
        image_processor1 = ImageProcessor(DEVICE_ID)
        size_cof = Size(yolo_resize, yolo_resize)
        resize_img = image_processor1.resize(decode_img, size_cof)  
-        image_tensor = [resize_img.to_tensor()]  # 推理前需要转换为tensor的List，数据已在device侧无需转移
+        image_tensor = [resize_img.to_tensor()]  

    else:
        image = np.array(cv2.imread(image_path))
@@ -50,16 +49,15 @@ def yolo_infer(image_path, yolo_resize):

        resize_img = cv2.resize(image, size_cof, interpolation=cv2.INTER_LINEAR)
        
-        yuv_img =  cv2.cvtColor(resize_img, cv2.COLOR_BGR2YUV)
-        yuv_img = yuv_img[np.newaxis, :, :]
-        image_tensor = Tensor(yuv_img) # 推理前需要转换为tensor的List，使用Tensor类来构建。
+        yuv_img = resize_img[np.newaxis, :, :]
+        image_tensor = Tensor(yuv_img) 
        
-        image_tensor.to_device(DEVICE_ID) # 重要，需要转移至device侧
+        image_tensor.to_device(DEVICE_ID) 
        image_tensor = [image_tensor]
    outputs = yolo.infer(image_tensor)
-    print("-----------YOLO Infer Success!----------------")
+
    yolov3_post = post.Yolov3PostProcess(
-        config_path=CONFIG_PATH, label_path=LABEL_PATH)  # 构造对应的后处理对象
+        config_path=CONFIG_PATH, label_path=LABEL_PATH)  
    
    resize_info = base.ResizedImageInfo()
    resize_info.heightResize = yolo_resize