mirror of
https://github.com/Ascend/ascend-opencv.git
synced 2025-10-08 01:30:18 +08:00
update
This commit is contained in:
139
acl/README_CN.md
Executable file
139
acl/README_CN.md
Executable file
@@ -0,0 +1,139 @@
|
||||
# Opencv ACL模块安装及使用<a name="ZH-CN_TOPIC_0302083215"></a>
|
||||
|
||||
## 功能描述<a name="section1421916179418"></a>
|
||||
|
||||
该模块实现了Opencv部分模块对AscendCL的支持,包括MAT类及部分矩阵操作函数,具体见API接口文档
|
||||
|
||||
|
||||
|
||||
## 目录结构<a name="section8733528154320"></a>
|
||||
|
||||
```
|
||||
├── CMakeLists.txt //Cmake配置
|
||||
├── include //头文件目录
|
||||
│ └── opencv2
|
||||
│ └── acl
|
||||
│ ├── acl.hpp //ACL头文件
|
||||
│ ├── acl_init.hpp //ACL初始化模块类的声明
|
||||
│ ├── acl_mat.hpp //aclMat类的声明
|
||||
│ ├── acl_type.hpp //ACL类型声明
|
||||
│ ├── gemm.hpp //gemm模块
|
||||
│ ├── init_core.hpp //ACL初始化环境核心实现
|
||||
│ ├── mat_core.hpp //Mat类核心实现
|
||||
│ ├── mathfuncs.hpp //math函数模块
|
||||
│ ├── matrices.hpp //矩阵操作模块
|
||||
│ └── operator_desc.hpp //算子描述模块
|
||||
├── README_CN.md
|
||||
├── run.sh //自动化部署脚本
|
||||
├── src //源文件目录,对应声明
|
||||
│ ├── acl_init.cpp
|
||||
│ ├── acl_mat.cpp
|
||||
│ ├── gemm.cpp
|
||||
│ ├── mathfuncs.cpp
|
||||
│ ├── matrices.cpp
|
||||
│ ├── operator_desc.cpp
|
||||
│ └── precomp.hpp //头文件总包含
|
||||
└── test //单元测试目录
|
||||
├── acl.cpp //总测试模块
|
||||
├── acl.json
|
||||
├── test_acl.cpp //aclMat类重载运算符测试
|
||||
├── test_common.cpp //测试公用模块
|
||||
├── test_common.hpp //测试公用模块声明
|
||||
├── test_correctness.cpp //函数正确性验证
|
||||
├── test_correctness.hpp
|
||||
├── test_gemm.cpp //gemm模块性能验证
|
||||
├── test_main.cpp
|
||||
├── test_mathfuncs.cpp //math函数模块性能验证
|
||||
├── test_matrices.cpp //矩阵操作模块性能验证
|
||||
├── test_perf.hpp
|
||||
└── test_precomp.hpp //测试头文件总包含
|
||||
```
|
||||
|
||||
## 环境要求<a name="zh-cn_topic_0230709958_section1256019267915"></a>
|
||||
|
||||
- 操作系统及架构:CentOS x86\_64、CentOS aarch64、Ubuntu 18.04 x86\_64、EulerOS x86、EulerOS aarch64
|
||||
- 编译器:
|
||||
- 运行环境操作系统架构为x86时,编译器为g++
|
||||
- 运行环境操作系统架构为arm64时,编译器为aarch64-linux-gnu-g++
|
||||
- python及依赖的库:Python3.7.*x*(3.7.0 ~ 3.7.11)、Python3.8.*x*(3.8.0 ~ 3.8.11)
|
||||
- 已完成昇腾AI软件栈的部署。
|
||||
|
||||
|
||||
## 配置环境变量
|
||||
|
||||
- 开发环境上环境变量配置
|
||||
|
||||
1. CANN-Toolkit包提供进程级环境变量配置脚本,供用户在进程中引用,以自动完成CANN基础环境变量的配置,配置示例如下所示
|
||||
|
||||
```
|
||||
. ${HOME}/Ascend/ascend-toolkit/set_env.sh
|
||||
```
|
||||
|
||||
“$HOME/Ascend”请替换“Ascend-cann-toolkit”包的实际安装路径。
|
||||
|
||||
2. 算子编译依赖Python,以Python3.7.5为例,请以运行用户执行如下命令设置Python3.7.5的相关环境变量。
|
||||
|
||||
```
|
||||
#用于设置python3.7.5库文件路径
|
||||
export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib:$LD_LIBRARY_PATH
|
||||
#如果用户环境存在多个python3版本,则指定使用python3.7.5版本
|
||||
export PATH=/usr/local/python3.7.5/bin:$PATH
|
||||
```
|
||||
|
||||
Python3.7.5安装路径请根据实际情况进行替换,您也可以将以上命令写入~/.bashrc文件中,然后执行source ~/.bashrc命令使其立即生效。
|
||||
|
||||
3. 开发环境上,设置环境变量,配置AscendCL单算子验证程序编译依赖的头文件与库文件路径。
|
||||
|
||||
编译脚本会按环境变量指向的路径查找编译依赖的头文件和库文件,“$HOME/Ascend”请替换“Ascend-cann-toolkit”包的实际安装路径。
|
||||
|
||||
- 当运行环境操作系统架构是x86时,配置示例如下所示:
|
||||
|
||||
```
|
||||
export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest/x86_64-linux
|
||||
export NPU_HOST_LIB=$DDK_PATH/acllib/lib64/stub
|
||||
```
|
||||
|
||||
- 当运行环境操作系统架构时AArch64时,配置示例如下所示:
|
||||
|
||||
```
|
||||
export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest/arm64-linux
|
||||
export NPU_HOST_LIB=$DDK_PATH/acllib/lib64/stub
|
||||
```
|
||||
|
||||
- 运行环境上环境变量配置
|
||||
|
||||
- 若运行环境上安装的“Ascend-cann-toolkit”包,环境变量设置如下:
|
||||
|
||||
```
|
||||
. ${HOME}/Ascend/ascend-toolkit/set_env.sh
|
||||
```
|
||||
|
||||
- 若运行环境上安装的“Ascend-cann-nnrt”包,环境变量设置如下:
|
||||
|
||||
```
|
||||
. ${HOME}/Ascend/nnrt/set_env.sh
|
||||
```
|
||||
|
||||
- 若运行环境上安装的“Ascend-cann-nnae”包,环境变量设置如下:
|
||||
|
||||
```
|
||||
. ${HOME}/Ascend/nnae/set_env.sh
|
||||
```
|
||||
|
||||
“$HOME/Ascend”请替换相关软件包的实际安装路径。
|
||||
|
||||
|
||||
|
||||
|
||||
## 安装说明
|
||||
1. 在配置好AScend之后,用户需要官网下载好opencv库和本模块(acl),保证acl模块和opencv在同一级目录下
|
||||
2. 进入acl目录将run.sh脚本拷贝或者移动到acl和opencv的同级目录
|
||||
3. 如果acl路径不在系统默认路径,修改acl/CMakelists.txt文件,修改acl_lib,acl_inc路径
|
||||
4. 给脚本文件加权限: chmod +x run.sh
|
||||
5. 运行脚本: ./run.sh
|
||||
6. 如果需要安装之后运行单元测试模块,可在脚本后加命令: ./run.sh ACLTEST
|
||||
|
||||
|
||||
|
||||
|
||||
|
@@ -137,15 +137,16 @@ namespace cv
|
||||
|
||||
inline aclMat::~aclMat()
|
||||
{
|
||||
release();
|
||||
if (refcount)
|
||||
release();
|
||||
}
|
||||
|
||||
inline aclMat &aclMat::operator=(const aclMat &m)
|
||||
{
|
||||
if (this != &m)
|
||||
{
|
||||
if (m.refcount)
|
||||
CV_XADD(m.refcount, 1);
|
||||
if (refcount)
|
||||
CV_XADD(refcount, -1);
|
||||
flags = m.flags;
|
||||
rows = m.rows;
|
||||
cols = m.cols;
|
||||
@@ -159,6 +160,8 @@ namespace cv
|
||||
acl_context = m.acl_context;
|
||||
totalSize = m.totalSize;
|
||||
data = m.data;
|
||||
if (m.refcount)
|
||||
CV_XADD(m.refcount, 1);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
33
acl/run.sh
Executable file
33
acl/run.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#! /bin/bash
|
||||
opencv_directory=./opencv
|
||||
acl_directory=./acl
|
||||
|
||||
if [ -d $opencv_directory -o -d $acl_directory ]
|
||||
then
|
||||
mv $acl_directory $opencv_directory/modules/
|
||||
fi
|
||||
|
||||
cd $opencv_directory
|
||||
|
||||
build_directory=$PWD/build
|
||||
if [ ! -d $build_directory ]
|
||||
then
|
||||
mkdir -p build
|
||||
fi
|
||||
cd build
|
||||
|
||||
cmake ..
|
||||
make -j
|
||||
while [ $? != 0 ]
|
||||
do
|
||||
make -j
|
||||
done
|
||||
|
||||
for var in $@
|
||||
do
|
||||
if [ $var == "ACLTEST" ]
|
||||
then
|
||||
cd bin
|
||||
./opencv_test_acl
|
||||
fi
|
||||
done
|
@@ -48,7 +48,7 @@ namespace cv
|
||||
case ACL_FLOAT:
|
||||
{
|
||||
aclrtMalloc(&dev_ptr, powersize, ACL_MEM_MALLOC_NORMAL_ONLY);
|
||||
float32_t power_32f = float32_t(power);
|
||||
float power_32f = float(power);
|
||||
aclrtMemcpy(dev_ptr, powersize, static_cast<void *>(&power_32f), powersize, ACL_MEMCPY_HOST_TO_DEVICE);
|
||||
return dev_ptr;
|
||||
}
|
||||
|
@@ -90,7 +90,23 @@ namespace cv
|
||||
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
|
||||
}
|
||||
*/
|
||||
|
||||
static int merge_type(int depth, int channels)
|
||||
{
|
||||
switch (depth)
|
||||
{
|
||||
case CV_8U:
|
||||
return CV_8UC(channels);
|
||||
case CV_8S:
|
||||
return CV_8SC(channels);
|
||||
case CV_32F:
|
||||
return CV_32FC(channels);
|
||||
case CV_32S:
|
||||
return CV_32SC(channels);
|
||||
case CV_64F:
|
||||
return CV_64FC(channels);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void merge(const vector<aclMat>& mv, aclMat& dest)
|
||||
{
|
||||
@@ -110,17 +126,22 @@ namespace cv
|
||||
opDesc.AddInputTensorDesc(dataType, inputShape.size(), inputShape.data(), ACL_FORMAT_NHWC);
|
||||
}
|
||||
|
||||
int cols = dest.step/dest.elemSize();
|
||||
vector<int64_t> outputShape{1, dest.rows, cols, dest.channels()};
|
||||
int cols = mv[0].step/mv[0].elemSize();
|
||||
int channels = mv.size();
|
||||
vector<int64_t> outputShape{1, mv[0].rows, cols, channels};
|
||||
opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_NHWC);
|
||||
|
||||
ino64_t N = mv.size();
|
||||
aclopSetAttrInt(opDesc.opAttr, "N", N);
|
||||
|
||||
aclSetTensorDescName(opDesc.inputDesc[0], "concat_dim");
|
||||
|
||||
aclSetTensorDescName(opDesc.inputDesc[1], "x0");
|
||||
aclSetTensorDescName(opDesc.inputDesc[2], "x1");
|
||||
aclSetTensorDescName(opDesc.inputDesc[3], "x2");
|
||||
if (mv.size() == 3)
|
||||
aclSetTensorDescName(opDesc.inputDesc[3], "x2");
|
||||
else if(mv.size() == 4)
|
||||
aclSetTensorDescName(opDesc.inputDesc[4], "x3");
|
||||
aclSetTensorDescName(opDesc.outputDesc[0], "y");
|
||||
|
||||
void *dev;
|
||||
@@ -133,6 +154,9 @@ namespace cv
|
||||
for (size_t i = 0; i < mv.size(); ++i)
|
||||
inputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize));
|
||||
|
||||
int type = merge_type(mv[0].depth(), channels);
|
||||
aclMat temp(mv[0].rows, mv[0].cols, type, mv[0].acl_context);
|
||||
dest = temp;
|
||||
outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize));
|
||||
|
||||
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context);
|
||||
@@ -257,6 +281,23 @@ namespace cv
|
||||
}
|
||||
*/
|
||||
|
||||
static int split_type(int depth)
|
||||
{
|
||||
switch (depth)
|
||||
{
|
||||
case CV_8U:
|
||||
return CV_8UC1;
|
||||
case CV_8S:
|
||||
return CV_8SC1;
|
||||
case CV_32F:
|
||||
return CV_32FC1;
|
||||
case CV_32S:
|
||||
return CV_32SC1;
|
||||
case CV_64F:
|
||||
return CV_64FC1;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
void split(const aclMat& src, vector<aclMat>& mv)
|
||||
{
|
||||
@@ -274,8 +315,7 @@ namespace cv
|
||||
|
||||
for (int i = 0; i < num_split; ++i)
|
||||
{
|
||||
int cols = mv[i].step/mv[i].elemSize();
|
||||
vector<int64_t> outputShape{1, mv[i].rows, cols, mv[i].channels()};
|
||||
vector<int64_t> outputShape{1, src.rows, cols, 1};
|
||||
opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_ND);
|
||||
}
|
||||
|
||||
@@ -285,11 +325,16 @@ namespace cv
|
||||
|
||||
inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize));
|
||||
|
||||
int type = split_type(src.depth());
|
||||
for (int i = 0; i < num_split; ++i)
|
||||
{
|
||||
aclMat tmp(src.rows, src.cols, type, src.acl_context);
|
||||
mv[i] = tmp;
|
||||
outputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize));
|
||||
}
|
||||
|
||||
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context);
|
||||
|
||||
|
||||
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
|
||||
for (int i = 0; i < num_split; ++i)
|
||||
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
|
||||
|
@@ -114,13 +114,10 @@ void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context)
|
||||
for (val = 8; val <= valmax; val *= 2)
|
||||
{
|
||||
int n = 100;
|
||||
Mat mat_src(val, val, type[i]);
|
||||
Mat mat_dest(val, val, type[i]);
|
||||
Mat mat_src(val, val, type[i], Scalar(1, 2, 4));
|
||||
Mat mat_dest(val, val, type[i], Scalar(2, 4, 8));
|
||||
Mat mat_dest1(val, val, type[i]);
|
||||
|
||||
test.SetDataRange(mat_src, 2);
|
||||
test.SetDataRange(mat_dest, 1);
|
||||
|
||||
aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
|
||||
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
|
||||
|
||||
|
Reference in New Issue
Block a user