mirror of
https://github.com/Ascend/ascend-opencv.git
synced 2025-10-08 01:30:18 +08:00
update
This commit is contained in:
139
acl/README_CN.md
Executable file
139
acl/README_CN.md
Executable file
@@ -0,0 +1,139 @@
|
|||||||
|
# Opencv ACL模块安装及使用<a name="ZH-CN_TOPIC_0302083215"></a>
|
||||||
|
|
||||||
|
## 功能描述<a name="section1421916179418"></a>
|
||||||
|
|
||||||
|
该模块实现了Opencv部分模块对AscendCL的支持,包括MAT类及部分矩阵操作函数,具体见API接口文档
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## 目录结构<a name="section8733528154320"></a>
|
||||||
|
|
||||||
|
```
|
||||||
|
├── CMakeLists.txt //Cmake配置
|
||||||
|
├── include //头文件目录
|
||||||
|
│ └── opencv2
|
||||||
|
│ └── acl
|
||||||
|
│ ├── acl.hpp //ACL头文件
|
||||||
|
│ ├── acl_init.hpp //ACL初始化模块类的声明
|
||||||
|
│ ├── acl_mat.hpp //aclMat类的声明
|
||||||
|
│ ├── acl_type.hpp //ACL类型声明
|
||||||
|
│ ├── gemm.hpp //gemm模块
|
||||||
|
│ ├── init_core.hpp //ACL初始化环境核心实现
|
||||||
|
│ ├── mat_core.hpp //Mat类核心实现
|
||||||
|
│ ├── mathfuncs.hpp //math函数模块
|
||||||
|
│ ├── matrices.hpp //矩阵操作模块
|
||||||
|
│ └── operator_desc.hpp //算子描述模块
|
||||||
|
├── README_CN.md
|
||||||
|
├── run.sh //自动化部署脚本
|
||||||
|
├── src //源文件目录,对应声明
|
||||||
|
│ ├── acl_init.cpp
|
||||||
|
│ ├── acl_mat.cpp
|
||||||
|
│ ├── gemm.cpp
|
||||||
|
│ ├── mathfuncs.cpp
|
||||||
|
│ ├── matrices.cpp
|
||||||
|
│ ├── operator_desc.cpp
|
||||||
|
│ └── precomp.hpp //头文件总包含
|
||||||
|
└── test //单元测试目录
|
||||||
|
├── acl.cpp //总测试模块
|
||||||
|
├── acl.json
|
||||||
|
├── test_acl.cpp //aclMat类重载运算符测试
|
||||||
|
├── test_common.cpp //测试公用模块
|
||||||
|
├── test_common.hpp //测试公用模块声明
|
||||||
|
├── test_correctness.cpp //函数正确性验证
|
||||||
|
├── test_correctness.hpp
|
||||||
|
├── test_gemm.cpp //gemm模块性能验证
|
||||||
|
├── test_main.cpp
|
||||||
|
├── test_mathfuncs.cpp //math函数模块性能验证
|
||||||
|
├── test_matrices.cpp //矩阵操作模块性能验证
|
||||||
|
├── test_perf.hpp
|
||||||
|
└── test_precomp.hpp //测试头文件总包含
|
||||||
|
```
|
||||||
|
|
||||||
|
## 环境要求<a name="zh-cn_topic_0230709958_section1256019267915"></a>
|
||||||
|
|
||||||
|
- 操作系统及架构:CentOS x86\_64、CentOS aarch64、Ubuntu 18.04 x86\_64、EulerOS x86、EulerOS aarch64
|
||||||
|
- 编译器:
|
||||||
|
- 运行环境操作系统架构为x86时,编译器为g++
|
||||||
|
- 运行环境操作系统架构为arm64时,编译器为aarch64-linux-gnu-g++
|
||||||
|
- python及依赖的库:Python3.7.*x*(3.7.0 ~ 3.7.11)、Python3.8.*x*(3.8.0 ~ 3.8.11)
|
||||||
|
- 已完成昇腾AI软件栈的部署。
|
||||||
|
|
||||||
|
|
||||||
|
## 配置环境变量
|
||||||
|
|
||||||
|
- 开发环境上环境变量配置
|
||||||
|
|
||||||
|
1. CANN-Toolkit包提供进程级环境变量配置脚本,供用户在进程中引用,以自动完成CANN基础环境变量的配置,配置示例如下所示
|
||||||
|
|
||||||
|
```
|
||||||
|
. ${HOME}/Ascend/ascend-toolkit/set_env.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
“$HOME/Ascend”请替换“Ascend-cann-toolkit”包的实际安装路径。
|
||||||
|
|
||||||
|
2. 算子编译依赖Python,以Python3.7.5为例,请以运行用户执行如下命令设置Python3.7.5的相关环境变量。
|
||||||
|
|
||||||
|
```
|
||||||
|
#用于设置python3.7.5库文件路径
|
||||||
|
export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib:$LD_LIBRARY_PATH
|
||||||
|
#如果用户环境存在多个python3版本,则指定使用python3.7.5版本
|
||||||
|
export PATH=/usr/local/python3.7.5/bin:$PATH
|
||||||
|
```
|
||||||
|
|
||||||
|
Python3.7.5安装路径请根据实际情况进行替换,您也可以将以上命令写入~/.bashrc文件中,然后执行source ~/.bashrc命令使其立即生效。
|
||||||
|
|
||||||
|
3. 开发环境上,设置环境变量,配置AscendCL单算子验证程序编译依赖的头文件与库文件路径。
|
||||||
|
|
||||||
|
编译脚本会按环境变量指向的路径查找编译依赖的头文件和库文件,“$HOME/Ascend”请替换“Ascend-cann-toolkit”包的实际安装路径。
|
||||||
|
|
||||||
|
- 当运行环境操作系统架构是x86时,配置示例如下所示:
|
||||||
|
|
||||||
|
```
|
||||||
|
export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest/x86_64-linux
|
||||||
|
export NPU_HOST_LIB=$DDK_PATH/acllib/lib64/stub
|
||||||
|
```
|
||||||
|
|
||||||
|
- 当运行环境操作系统架构时AArch64时,配置示例如下所示:
|
||||||
|
|
||||||
|
```
|
||||||
|
export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest/arm64-linux
|
||||||
|
export NPU_HOST_LIB=$DDK_PATH/acllib/lib64/stub
|
||||||
|
```
|
||||||
|
|
||||||
|
- 运行环境上环境变量配置
|
||||||
|
|
||||||
|
- 若运行环境上安装的“Ascend-cann-toolkit”包,环境变量设置如下:
|
||||||
|
|
||||||
|
```
|
||||||
|
. ${HOME}/Ascend/ascend-toolkit/set_env.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
- 若运行环境上安装的“Ascend-cann-nnrt”包,环境变量设置如下:
|
||||||
|
|
||||||
|
```
|
||||||
|
. ${HOME}/Ascend/nnrt/set_env.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
- 若运行环境上安装的“Ascend-cann-nnae”包,环境变量设置如下:
|
||||||
|
|
||||||
|
```
|
||||||
|
. ${HOME}/Ascend/nnae/set_env.sh
|
||||||
|
```
|
||||||
|
|
||||||
|
“$HOME/Ascend”请替换相关软件包的实际安装路径。
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## 安装说明
|
||||||
|
1. 在配置好AScend之后,用户需要官网下载好opencv库和本模块(acl),保证acl模块和opencv在同一级目录下
|
||||||
|
2. 进入acl目录将run.sh脚本拷贝或者移动到acl和opencv的同级目录
|
||||||
|
3. 如果acl路径不在系统默认路径,修改acl/CMakelists.txt文件,修改acl_lib,acl_inc路径
|
||||||
|
4. 给脚本文件加权限: chmod +x run.sh
|
||||||
|
5. 运行脚本: ./run.sh
|
||||||
|
6. 如果需要安装之后运行单元测试模块,可在脚本后加命令: ./run.sh ACLTEST
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@@ -137,15 +137,16 @@ namespace cv
|
|||||||
|
|
||||||
inline aclMat::~aclMat()
|
inline aclMat::~aclMat()
|
||||||
{
|
{
|
||||||
release();
|
if (refcount)
|
||||||
|
release();
|
||||||
}
|
}
|
||||||
|
|
||||||
inline aclMat &aclMat::operator=(const aclMat &m)
|
inline aclMat &aclMat::operator=(const aclMat &m)
|
||||||
{
|
{
|
||||||
if (this != &m)
|
if (this != &m)
|
||||||
{
|
{
|
||||||
if (m.refcount)
|
if (refcount)
|
||||||
CV_XADD(m.refcount, 1);
|
CV_XADD(refcount, -1);
|
||||||
flags = m.flags;
|
flags = m.flags;
|
||||||
rows = m.rows;
|
rows = m.rows;
|
||||||
cols = m.cols;
|
cols = m.cols;
|
||||||
@@ -159,6 +160,8 @@ namespace cv
|
|||||||
acl_context = m.acl_context;
|
acl_context = m.acl_context;
|
||||||
totalSize = m.totalSize;
|
totalSize = m.totalSize;
|
||||||
data = m.data;
|
data = m.data;
|
||||||
|
if (m.refcount)
|
||||||
|
CV_XADD(m.refcount, 1);
|
||||||
}
|
}
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
33
acl/run.sh
Executable file
33
acl/run.sh
Executable file
@@ -0,0 +1,33 @@
|
|||||||
|
#! /bin/bash
|
||||||
|
opencv_directory=./opencv
|
||||||
|
acl_directory=./acl
|
||||||
|
|
||||||
|
if [ -d $opencv_directory -o -d $acl_directory ]
|
||||||
|
then
|
||||||
|
mv $acl_directory $opencv_directory/modules/
|
||||||
|
fi
|
||||||
|
|
||||||
|
cd $opencv_directory
|
||||||
|
|
||||||
|
build_directory=$PWD/build
|
||||||
|
if [ ! -d $build_directory ]
|
||||||
|
then
|
||||||
|
mkdir -p build
|
||||||
|
fi
|
||||||
|
cd build
|
||||||
|
|
||||||
|
cmake ..
|
||||||
|
make -j
|
||||||
|
while [ $? != 0 ]
|
||||||
|
do
|
||||||
|
make -j
|
||||||
|
done
|
||||||
|
|
||||||
|
for var in $@
|
||||||
|
do
|
||||||
|
if [ $var == "ACLTEST" ]
|
||||||
|
then
|
||||||
|
cd bin
|
||||||
|
./opencv_test_acl
|
||||||
|
fi
|
||||||
|
done
|
@@ -48,7 +48,7 @@ namespace cv
|
|||||||
case ACL_FLOAT:
|
case ACL_FLOAT:
|
||||||
{
|
{
|
||||||
aclrtMalloc(&dev_ptr, powersize, ACL_MEM_MALLOC_NORMAL_ONLY);
|
aclrtMalloc(&dev_ptr, powersize, ACL_MEM_MALLOC_NORMAL_ONLY);
|
||||||
float32_t power_32f = float32_t(power);
|
float power_32f = float(power);
|
||||||
aclrtMemcpy(dev_ptr, powersize, static_cast<void *>(&power_32f), powersize, ACL_MEMCPY_HOST_TO_DEVICE);
|
aclrtMemcpy(dev_ptr, powersize, static_cast<void *>(&power_32f), powersize, ACL_MEMCPY_HOST_TO_DEVICE);
|
||||||
return dev_ptr;
|
return dev_ptr;
|
||||||
}
|
}
|
||||||
|
@@ -90,7 +90,23 @@ namespace cv
|
|||||||
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
|
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
static int merge_type(int depth, int channels)
|
||||||
|
{
|
||||||
|
switch (depth)
|
||||||
|
{
|
||||||
|
case CV_8U:
|
||||||
|
return CV_8UC(channels);
|
||||||
|
case CV_8S:
|
||||||
|
return CV_8SC(channels);
|
||||||
|
case CV_32F:
|
||||||
|
return CV_32FC(channels);
|
||||||
|
case CV_32S:
|
||||||
|
return CV_32SC(channels);
|
||||||
|
case CV_64F:
|
||||||
|
return CV_64FC(channels);
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
void merge(const vector<aclMat>& mv, aclMat& dest)
|
void merge(const vector<aclMat>& mv, aclMat& dest)
|
||||||
{
|
{
|
||||||
@@ -110,17 +126,22 @@ namespace cv
|
|||||||
opDesc.AddInputTensorDesc(dataType, inputShape.size(), inputShape.data(), ACL_FORMAT_NHWC);
|
opDesc.AddInputTensorDesc(dataType, inputShape.size(), inputShape.data(), ACL_FORMAT_NHWC);
|
||||||
}
|
}
|
||||||
|
|
||||||
int cols = dest.step/dest.elemSize();
|
int cols = mv[0].step/mv[0].elemSize();
|
||||||
vector<int64_t> outputShape{1, dest.rows, cols, dest.channels()};
|
int channels = mv.size();
|
||||||
|
vector<int64_t> outputShape{1, mv[0].rows, cols, channels};
|
||||||
opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_NHWC);
|
opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_NHWC);
|
||||||
|
|
||||||
ino64_t N = mv.size();
|
ino64_t N = mv.size();
|
||||||
aclopSetAttrInt(opDesc.opAttr, "N", N);
|
aclopSetAttrInt(opDesc.opAttr, "N", N);
|
||||||
|
|
||||||
aclSetTensorDescName(opDesc.inputDesc[0], "concat_dim");
|
aclSetTensorDescName(opDesc.inputDesc[0], "concat_dim");
|
||||||
|
|
||||||
aclSetTensorDescName(opDesc.inputDesc[1], "x0");
|
aclSetTensorDescName(opDesc.inputDesc[1], "x0");
|
||||||
aclSetTensorDescName(opDesc.inputDesc[2], "x1");
|
aclSetTensorDescName(opDesc.inputDesc[2], "x1");
|
||||||
aclSetTensorDescName(opDesc.inputDesc[3], "x2");
|
if (mv.size() == 3)
|
||||||
|
aclSetTensorDescName(opDesc.inputDesc[3], "x2");
|
||||||
|
else if(mv.size() == 4)
|
||||||
|
aclSetTensorDescName(opDesc.inputDesc[4], "x3");
|
||||||
aclSetTensorDescName(opDesc.outputDesc[0], "y");
|
aclSetTensorDescName(opDesc.outputDesc[0], "y");
|
||||||
|
|
||||||
void *dev;
|
void *dev;
|
||||||
@@ -133,6 +154,9 @@ namespace cv
|
|||||||
for (size_t i = 0; i < mv.size(); ++i)
|
for (size_t i = 0; i < mv.size(); ++i)
|
||||||
inputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize));
|
inputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize));
|
||||||
|
|
||||||
|
int type = merge_type(mv[0].depth(), channels);
|
||||||
|
aclMat temp(mv[0].rows, mv[0].cols, type, mv[0].acl_context);
|
||||||
|
dest = temp;
|
||||||
outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize));
|
outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize));
|
||||||
|
|
||||||
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context);
|
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context);
|
||||||
@@ -257,6 +281,23 @@ namespace cv
|
|||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
static int split_type(int depth)
|
||||||
|
{
|
||||||
|
switch (depth)
|
||||||
|
{
|
||||||
|
case CV_8U:
|
||||||
|
return CV_8UC1;
|
||||||
|
case CV_8S:
|
||||||
|
return CV_8SC1;
|
||||||
|
case CV_32F:
|
||||||
|
return CV_32FC1;
|
||||||
|
case CV_32S:
|
||||||
|
return CV_32SC1;
|
||||||
|
case CV_64F:
|
||||||
|
return CV_64FC1;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
void split(const aclMat& src, vector<aclMat>& mv)
|
void split(const aclMat& src, vector<aclMat>& mv)
|
||||||
{
|
{
|
||||||
@@ -274,8 +315,7 @@ namespace cv
|
|||||||
|
|
||||||
for (int i = 0; i < num_split; ++i)
|
for (int i = 0; i < num_split; ++i)
|
||||||
{
|
{
|
||||||
int cols = mv[i].step/mv[i].elemSize();
|
vector<int64_t> outputShape{1, src.rows, cols, 1};
|
||||||
vector<int64_t> outputShape{1, mv[i].rows, cols, mv[i].channels()};
|
|
||||||
opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_ND);
|
opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_ND);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -285,11 +325,16 @@ namespace cv
|
|||||||
|
|
||||||
inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize));
|
inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize));
|
||||||
|
|
||||||
|
int type = split_type(src.depth());
|
||||||
for (int i = 0; i < num_split; ++i)
|
for (int i = 0; i < num_split; ++i)
|
||||||
|
{
|
||||||
|
aclMat tmp(src.rows, src.cols, type, src.acl_context);
|
||||||
|
mv[i] = tmp;
|
||||||
outputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize));
|
outputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize));
|
||||||
|
}
|
||||||
|
|
||||||
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context);
|
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context);
|
||||||
|
|
||||||
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
|
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
|
||||||
for (int i = 0; i < num_split; ++i)
|
for (int i = 0; i < num_split; ++i)
|
||||||
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
|
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
|
||||||
|
@@ -114,13 +114,10 @@ void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context)
|
|||||||
for (val = 8; val <= valmax; val *= 2)
|
for (val = 8; val <= valmax; val *= 2)
|
||||||
{
|
{
|
||||||
int n = 100;
|
int n = 100;
|
||||||
Mat mat_src(val, val, type[i]);
|
Mat mat_src(val, val, type[i], Scalar(1, 2, 4));
|
||||||
Mat mat_dest(val, val, type[i]);
|
Mat mat_dest(val, val, type[i], Scalar(2, 4, 8));
|
||||||
Mat mat_dest1(val, val, type[i]);
|
Mat mat_dest1(val, val, type[i]);
|
||||||
|
|
||||||
test.SetDataRange(mat_src, 2);
|
|
||||||
test.SetDataRange(mat_dest, 1);
|
|
||||||
|
|
||||||
aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
|
aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
|
||||||
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
|
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user