This commit is contained in:
luoliang
2022-07-26 15:33:02 +08:00
parent 0180fc6cda
commit e51a341661
7 changed files with 234 additions and 17 deletions

139
acl/README_CN.md Executable file
View File

@@ -0,0 +1,139 @@
# Opencv ACL模块安装及使用<a name="ZH-CN_TOPIC_0302083215"></a>
## 功能描述<a name="section1421916179418"></a>
该模块实现了Opencv部分模块对AscendCL的支持,包括MAT类及部分矩阵操作函数具体见API接口文档
## 目录结构<a name="section8733528154320"></a>
```
├── CMakeLists.txt //Cmake配置
├── include //头文件目录
│ └── opencv2
│ └── acl
│ ├── acl.hpp //ACL头文件
│ ├── acl_init.hpp //ACL初始化模块类的声明
│ ├── acl_mat.hpp //aclMat类的声明
│ ├── acl_type.hpp //ACL类型声明
│ ├── gemm.hpp //gemm模块
│ ├── init_core.hpp //ACL初始化环境核心实现
│ ├── mat_core.hpp //Mat类核心实现
│ ├── mathfuncs.hpp //math函数模块
│ ├── matrices.hpp //矩阵操作模块
│ └── operator_desc.hpp //算子描述模块
├── README_CN.md
├── run.sh //自动化部署脚本
├── src //源文件目录,对应声明
│ ├── acl_init.cpp
│ ├── acl_mat.cpp
│ ├── gemm.cpp
│ ├── mathfuncs.cpp
│ ├── matrices.cpp
│ ├── operator_desc.cpp
│ └── precomp.hpp //头文件总包含
└── test //单元测试目录
├── acl.cpp //总测试模块
├── acl.json
├── test_acl.cpp //aclMat类重载运算符测试
├── test_common.cpp //测试公用模块
├── test_common.hpp //测试公用模块声明
├── test_correctness.cpp //函数正确性验证
├── test_correctness.hpp
├── test_gemm.cpp //gemm模块性能验证
├── test_main.cpp
├── test_mathfuncs.cpp //math函数模块性能验证
├── test_matrices.cpp //矩阵操作模块性能验证
├── test_perf.hpp
└── test_precomp.hpp //测试头文件总包含
```
## 环境要求<a name="zh-cn_topic_0230709958_section1256019267915"></a>
- 操作系统及架构CentOS x86\_64、CentOS aarch64、Ubuntu 18.04 x86\_64、EulerOS x86、EulerOS aarch64
- 编译器:
- 运行环境操作系统架构为x86时编译器为g++
- 运行环境操作系统架构为arm64时编译器为aarch64-linux-gnu-g++
- python及依赖的库Python3.7.*x*3.7.0 ~ 3.7.11、Python3.8.*x*3.8.0 ~ 3.8.11
- 已完成昇腾AI软件栈的部署。
## 配置环境变量
- 开发环境上环境变量配置
1. CANN-Toolkit包提供进程级环境变量配置脚本供用户在进程中引用以自动完成CANN基础环境变量的配置配置示例如下所示
```
. ${HOME}/Ascend/ascend-toolkit/set_env.sh
```
“$HOME/Ascend”请替换“Ascend-cann-toolkit”包的实际安装路径。
2. 算子编译依赖Python以Python3.7.5为例请以运行用户执行如下命令设置Python3.7.5的相关环境变量。
```
#用于设置python3.7.5库文件路径
export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib:$LD_LIBRARY_PATH
#如果用户环境存在多个python3版本则指定使用python3.7.5版本
export PATH=/usr/local/python3.7.5/bin:$PATH
```
Python3.7.5安装路径请根据实际情况进行替换,您也可以将以上命令写入~/.bashrc文件中然后执行source ~/.bashrc命令使其立即生效。
3. 开发环境上设置环境变量配置AscendCL单算子验证程序编译依赖的头文件与库文件路径。
编译脚本会按环境变量指向的路径查找编译依赖的头文件和库文件,“$HOME/Ascend”请替换“Ascend-cann-toolkit”包的实际安装路径。
- 当运行环境操作系统架构是x86时配置示例如下所示
```
export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest/x86_64-linux
export NPU_HOST_LIB=$DDK_PATH/acllib/lib64/stub
```
- 当运行环境操作系统架构时AArch64时配置示例如下所示
```
export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest/arm64-linux
export NPU_HOST_LIB=$DDK_PATH/acllib/lib64/stub
```
- 运行环境上环境变量配置
- 若运行环境上安装的“Ascend-cann-toolkit”包环境变量设置如下
```
. ${HOME}/Ascend/ascend-toolkit/set_env.sh
```
- 若运行环境上安装的“Ascend-cann-nnrt”包环境变量设置如下
```
. ${HOME}/Ascend/nnrt/set_env.sh
```
- 若运行环境上安装的“Ascend-cann-nnae”包环境变量设置如下
```
. ${HOME}/Ascend/nnae/set_env.sh
```
“$HOME/Ascend”请替换相关软件包的实际安装路径。
## 安装说明
1. 在配置好AScend之后,用户需要官网下载好opencv库和本模块(acl),保证acl模块和opencv在同一级目录下
2. 进入acl目录将run.sh脚本拷贝或者移动到acl和opencv的同级目录
3. 如果acl路径不在系统默认路径修改acl/CMakelists.txt文件修改acl_lib,acl_inc路径
4. 给脚本文件加权限: chmod +x run.sh
5. 运行脚本: ./run.sh
6. 如果需要安装之后运行单元测试模块,可在脚本后加命令: ./run.sh ACLTEST

View File

@@ -137,15 +137,16 @@ namespace cv
inline aclMat::~aclMat()
{
release();
if (refcount)
release();
}
inline aclMat &aclMat::operator=(const aclMat &m)
{
if (this != &m)
{
if (m.refcount)
CV_XADD(m.refcount, 1);
if (refcount)
CV_XADD(refcount, -1);
flags = m.flags;
rows = m.rows;
cols = m.cols;
@@ -159,6 +160,8 @@ namespace cv
acl_context = m.acl_context;
totalSize = m.totalSize;
data = m.data;
if (m.refcount)
CV_XADD(m.refcount, 1);
}
return *this;
}

33
acl/run.sh Executable file
View File

@@ -0,0 +1,33 @@
#! /bin/bash
opencv_directory=./opencv
acl_directory=./acl
if [ -d $opencv_directory -o -d $acl_directory ]
then
mv $acl_directory $opencv_directory/modules/
fi
cd $opencv_directory
build_directory=$PWD/build
if [ ! -d $build_directory ]
then
mkdir -p build
fi
cd build
cmake ..
make -j
while [ $? != 0 ]
do
make -j
done
for var in $@
do
if [ $var == "ACLTEST" ]
then
cd bin
./opencv_test_acl
fi
done

View File

@@ -48,7 +48,7 @@ namespace cv
case ACL_FLOAT:
{
aclrtMalloc(&dev_ptr, powersize, ACL_MEM_MALLOC_NORMAL_ONLY);
float32_t power_32f = float32_t(power);
float power_32f = float(power);
aclrtMemcpy(dev_ptr, powersize, static_cast<void *>(&power_32f), powersize, ACL_MEMCPY_HOST_TO_DEVICE);
return dev_ptr;
}

View File

@@ -90,7 +90,23 @@ namespace cv
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
}
*/
static int merge_type(int depth, int channels)
{
switch (depth)
{
case CV_8U:
return CV_8UC(channels);
case CV_8S:
return CV_8SC(channels);
case CV_32F:
return CV_32FC(channels);
case CV_32S:
return CV_32SC(channels);
case CV_64F:
return CV_64FC(channels);
}
return -1;
}
void merge(const vector<aclMat>& mv, aclMat& dest)
{
@@ -110,17 +126,22 @@ namespace cv
opDesc.AddInputTensorDesc(dataType, inputShape.size(), inputShape.data(), ACL_FORMAT_NHWC);
}
int cols = dest.step/dest.elemSize();
vector<int64_t> outputShape{1, dest.rows, cols, dest.channels()};
int cols = mv[0].step/mv[0].elemSize();
int channels = mv.size();
vector<int64_t> outputShape{1, mv[0].rows, cols, channels};
opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_NHWC);
ino64_t N = mv.size();
aclopSetAttrInt(opDesc.opAttr, "N", N);
aclSetTensorDescName(opDesc.inputDesc[0], "concat_dim");
aclSetTensorDescName(opDesc.inputDesc[1], "x0");
aclSetTensorDescName(opDesc.inputDesc[2], "x1");
aclSetTensorDescName(opDesc.inputDesc[3], "x2");
if (mv.size() == 3)
aclSetTensorDescName(opDesc.inputDesc[3], "x2");
else if(mv.size() == 4)
aclSetTensorDescName(opDesc.inputDesc[4], "x3");
aclSetTensorDescName(opDesc.outputDesc[0], "y");
void *dev;
@@ -133,6 +154,9 @@ namespace cv
for (size_t i = 0; i < mv.size(); ++i)
inputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize));
int type = merge_type(mv[0].depth(), channels);
aclMat temp(mv[0].rows, mv[0].cols, type, mv[0].acl_context);
dest = temp;
outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context);
@@ -257,6 +281,23 @@ namespace cv
}
*/
static int split_type(int depth)
{
switch (depth)
{
case CV_8U:
return CV_8UC1;
case CV_8S:
return CV_8SC1;
case CV_32F:
return CV_32FC1;
case CV_32S:
return CV_32SC1;
case CV_64F:
return CV_64FC1;
}
return -1;
}
void split(const aclMat& src, vector<aclMat>& mv)
{
@@ -274,8 +315,7 @@ namespace cv
for (int i = 0; i < num_split; ++i)
{
int cols = mv[i].step/mv[i].elemSize();
vector<int64_t> outputShape{1, mv[i].rows, cols, mv[i].channels()};
vector<int64_t> outputShape{1, src.rows, cols, 1};
opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_ND);
}
@@ -285,11 +325,16 @@ namespace cv
inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize));
int type = split_type(src.depth());
for (int i = 0; i < num_split; ++i)
{
aclMat tmp(src.rows, src.cols, type, src.acl_context);
mv[i] = tmp;
outputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize));
}
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context);
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
for (int i = 0; i < num_split; ++i)
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));

View File

@@ -114,13 +114,10 @@ void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context)
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_src(val, val, type[i], Scalar(1, 2, 4));
Mat mat_dest(val, val, type[i], Scalar(2, 4, 8));
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 2);
test.SetDataRange(mat_dest, 1);
aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);

2
run.sh
View File

@@ -30,4 +30,4 @@ do
cd bin
./opencv_test_acl
fi
done
done