commit b5c1dcf4d8500dc6fd092e47b78bf508ff96f086 Author: luoliang Date: Fri Jul 22 17:14:48 2022 +0800 up diff --git a/README_CN.md b/README_CN.md new file mode 100755 index 0000000..46baed7 --- /dev/null +++ b/README_CN.md @@ -0,0 +1,139 @@ +# Opencv ACL模块安装及使用 + +## 功能描述 + +该模块实现了Opencv部分模块对AscendCL的支持,包括MAT类及部分矩阵操作函数,具体见API接口文档 + + + +## 目录结构 + +``` +├── CMakeLists.txt //Cmake配置 +├── include //头文件目录 +│ └── opencv2 +│ └── acl +│ ├── acl.hpp //ACL头文件 +│ ├── acl_init.hpp //ACL初始化模块类的声明 +│ ├── acl_mat.hpp //aclMat类的声明 +│ ├── acl_type.hpp //ACL类型声明 +│ ├── gemm.hpp //gemm模块 +│ ├── init_core.hpp //ACL初始化环境核心实现 +│ ├── mat_core.hpp //Mat类核心实现 +│ ├── mathfuncs.hpp //math函数模块 +│ ├── matrices.hpp //矩阵操作模块 +│ └── operator_desc.hpp //算子描述模块 +├── README_CN.md +├── run.sh //自动化部署脚本 +├── src //源文件目录,对应声明 +│ ├── acl_init.cpp +│ ├── acl_mat.cpp +│ ├── gemm.cpp +│ ├── mathfuncs.cpp +│ ├── matrices.cpp +│ ├── operator_desc.cpp +│ └── precomp.hpp //头文件总包含 +└── test //单元测试目录 + ├── acl.cpp //总测试模块 + ├── acl.json + ├── test_acl.cpp //aclMat类重载运算符测试 + ├── test_common.cpp //测试公用模块 + ├── test_common.hpp //测试公用模块声明 + ├── test_correctness.cpp //函数正确性验证 + ├── test_correctness.hpp + ├── test_gemm.cpp //gemm模块性能验证 + ├── test_main.cpp + ├── test_mathfuncs.cpp //math函数模块性能验证 + ├── test_matrices.cpp //矩阵操作模块性能验证 + ├── test_perf.hpp + └── test_precomp.hpp //测试头文件总包含 +``` + +## 环境要求 + +- 操作系统及架构:CentOS x86\_64、CentOS aarch64、Ubuntu 18.04 x86\_64、EulerOS x86、EulerOS aarch64 +- 编译器: + - 运行环境操作系统架构为x86时,编译器为g++ + - 运行环境操作系统架构为arm64时,编译器为aarch64-linux-gnu-g++ +- python及依赖的库:Python3.7.*x*(3.7.0 ~ 3.7.11)、Python3.8.*x*(3.8.0 ~ 3.8.11) +- 已完成昇腾AI软件栈的部署。 + + +## 配置环境变量 + +- 开发环境上环境变量配置 + + 1. CANN-Toolkit包提供进程级环境变量配置脚本,供用户在进程中引用,以自动完成CANN基础环境变量的配置,配置示例如下所示 + + ``` + . ${HOME}/Ascend/ascend-toolkit/set_env.sh + ``` + + “$HOME/Ascend”请替换“Ascend-cann-toolkit”包的实际安装路径。 + + 2. 算子编译依赖Python,以Python3.7.5为例,请以运行用户执行如下命令设置Python3.7.5的相关环境变量。 + + ``` + #用于设置python3.7.5库文件路径 + export LD_LIBRARY_PATH=/usr/local/python3.7.5/lib:$LD_LIBRARY_PATH + #如果用户环境存在多个python3版本,则指定使用python3.7.5版本 + export PATH=/usr/local/python3.7.5/bin:$PATH + ``` + + Python3.7.5安装路径请根据实际情况进行替换,您也可以将以上命令写入~/.bashrc文件中,然后执行source ~/.bashrc命令使其立即生效。 + + 3. 开发环境上,设置环境变量,配置AscendCL单算子验证程序编译依赖的头文件与库文件路径。 + + 编译脚本会按环境变量指向的路径查找编译依赖的头文件和库文件,“$HOME/Ascend”请替换“Ascend-cann-toolkit”包的实际安装路径。 + + - 当运行环境操作系统架构是x86时,配置示例如下所示: + + ``` + export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest/x86_64-linux + export NPU_HOST_LIB=$DDK_PATH/acllib/lib64/stub + ``` + + - 当运行环境操作系统架构时AArch64时,配置示例如下所示: + + ``` + export DDK_PATH=$HOME/Ascend/ascend-toolkit/latest/arm64-linux + export NPU_HOST_LIB=$DDK_PATH/acllib/lib64/stub + ``` + +- 运行环境上环境变量配置 + + - 若运行环境上安装的“Ascend-cann-toolkit”包,环境变量设置如下: + + ``` + . ${HOME}/Ascend/ascend-toolkit/set_env.sh + ``` + + - 若运行环境上安装的“Ascend-cann-nnrt”包,环境变量设置如下: + + ``` + . ${HOME}/Ascend/nnrt/set_env.sh + ``` + + - 若运行环境上安装的“Ascend-cann-nnae”包,环境变量设置如下: + + ``` + . ${HOME}/Ascend/nnae/set_env.sh + ``` + + “$HOME/Ascend”请替换相关软件包的实际安装路径。 + + + + +## 安装说明 +1. 在配置好AScend之后,用户需要官网下载好opencv库和本模块(acl),保证acl模块和opencv在同一级目录下 +2. 进入acl目录将run.sh脚本拷贝或者移动到acl和opencv的同级目录 +3. 如果acl路径不在系统默认路径,修改acl/CMakelists.txt文件,修改acl_lib,acl_inc路径 +4. 给脚本文件加权限: chmod +x run.sh +5. 运行脚本: ./run.sh +6. 如果需要安装之后运行单元测试模块,可在脚本后加命令: ./run.sh ACLTEST + + + + + diff --git a/acl/CMakeLists.txt b/acl/CMakeLists.txt new file mode 100644 index 0000000..dc33fbf --- /dev/null +++ b/acl/CMakeLists.txt @@ -0,0 +1,23 @@ +#if(NOT HAVE_ACL) +# ocv_module_disable(acl) +# return() +#endif() + +#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" "-DENABLE_DVPP_INTERFACE") + +set(acl_lib "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/") +set(acl_lib "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/stub/") +link_directories(${acl_lib}) + +set(acl_inc "/usr/local/Ascend/ascend-toolkit/latest/acllib/include/") +set(acl_inc "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/include/") +ocv_include_directories(${acl_inc}) + +set(ASCEND_LIBRARIES "ascendcl" "stdc++" "acl_op_compiler") + +set(the_description "ACL-accelerated Computer Vision") +ocv_define_module(acl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d opencv_ml "${ASCEND_LIBRARIES}") + +ocv_target_link_libraries(${the_module} "${ASCEND_LIBRARIES}") + +ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow -Woverloaded-virtual -Wunused-private-field) diff --git a/acl/include/opencv2/acl/acl.hpp b/acl/include/opencv2/acl/acl.hpp new file mode 100644 index 0000000..dcaa2fe --- /dev/null +++ b/acl/include/opencv2/acl/acl.hpp @@ -0,0 +1,57 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_ACL_HPP +#define OPENCV_ACL_HPP + +#include "acl_type.hpp" +#include "acl_init.hpp" +#include "acl_mat.hpp" +#include "operator_desc.hpp" +#include "mathfuncs.hpp" +#include "matrices.hpp" +#include "gemm.hpp" +#include "mat_core.hpp" +#include "init_core.hpp" + +#endif diff --git a/acl/include/opencv2/acl/acl_init.hpp b/acl/include/opencv2/acl/acl_init.hpp new file mode 100644 index 0000000..30df086 --- /dev/null +++ b/acl/include/opencv2/acl/acl_init.hpp @@ -0,0 +1,58 @@ +#ifndef OPENCV_ACL_INIT_HPP +#define OPENCV_ACL_INIT_HPP + +#include +#include + +#include "opencv2/core.hpp" +#include "acl_type.hpp" + +using namespace std; + +namespace cv +{ + namespace acl + { + CV_EXPORTS Mutex &getInitMutex(); + //////////////////////////////// aclEnv //////////////////////////////// + class CV_EXPORTS aclEnv + { + public: + aclEnv(); + aclEnv(const char* config_path); + static aclEnv* get_acl_env(const char* config_path); + int get_device_count(); + int *refcount; + ~aclEnv(); + private: + uint32_t _device_count; + }; + + //////////////////////////////// aclCxt //////////////////////////////// + class CV_EXPORTS aclCxt + { + public: + aclCxt(); + aclCxt(int device_id); + + aclrtContext* get_context(); + void set_current_context(); + + void create_stream(int count = 1); + aclStream get_stream(const size_t index = 0); + ~aclCxt(); + private: + int32_t _device_id; + aclrtContext* _context; + std::vector _acl_streams; + }; + + //////////////////////////////// device //////////////////////////////// + CV_EXPORTS aclCxt *set_device(const char* config_path, int device_id = 0, int stream_count = 1); + CV_EXPORTS void release_device(aclCxt* context); + + } /* end of namespace acl */ + +} /* end of namespace cv */ + +#endif \ No newline at end of file diff --git a/acl/include/opencv2/acl/acl_mat.hpp b/acl/include/opencv2/acl/acl_mat.hpp new file mode 100644 index 0000000..c1e8bec --- /dev/null +++ b/acl/include/opencv2/acl/acl_mat.hpp @@ -0,0 +1,173 @@ +#ifndef OPENCV_ACL_MAT_HPP +#define OPENCV_ACL_MAT_HPP + +#include "acl/acl.h" +#include "opencv2/core.hpp" +#include "acl_type.hpp" +#include "acl_init.hpp" + + +namespace cv +{ + namespace acl + { + //////////////////////////////// aclMat //////////////////////////////// + class CV_EXPORTS aclMat + { + public: + //! default constructor + aclMat(); + //! constructs aclMatrix of the specified size and type (_type is CV_8UC1, CV_16FC1 etc.) + aclMat(int rows, int cols, int type, aclCxt *acl_context, ALIGNMENT config = MEMORY_UNALIGNED, MemMallocPolicy policy = MALLOC_HUGE_FIRST); + aclMat(Size size, int type, aclCxt *acl_context, ALIGNMENT config = MEMORY_UNALIGNED, MemMallocPolicy policy = MALLOC_HUGE_FIRST); + //! copy constructor + aclMat(const aclMat &m); + //! constructor for aclMatrix headers pointing to user-allocated data + aclMat(int rows, int cols, int type, void *data, aclCxt* acl_context, ALIGNMENT config = MEMORY_UNALIGNED, size_t step = Mat::AUTO_STEP); + aclMat(Size size, int type, void *data, aclCxt* acl_context, ALIGNMENT config = MEMORY_UNALIGNED, size_t step = Mat::AUTO_STEP); + //! creates a matrix header for a part of the bigger matrix + aclMat(const aclMat &m, const Range &rowRange, const Range &colRange = Range::all()); + aclMat(const aclMat &m, const Rect &roi); + //! builds aclMat from Mat. Perfom blocking upload to device. + aclMat (const Mat &m, aclCxt* acl_context, ALIGNMENT config = MEMORY_UNALIGNED, MemMallocPolicy policy = MALLOC_HUGE_FIRST); + //! destructor - calls release() + ~aclMat(); + + //! assignment operators shallow copy + aclMat &operator=(const aclMat &m); + //! assignment operator. Perfom blocking upload to device. + aclMat &operator=(const Mat &m); + + //! pefroms blocking upload data to aclMat. + void upload(const Mat &m, ALIGNMENT config = MEMORY_UNALIGNED); + void upload(const Mat &m, aclStream stream, ALIGNMENT config = MEMORY_UNALIGNED); + //! downloads data from device to host memory. Blocking calls. + void download(Mat &m, ALIGNMENT config = MEMORY_UNALIGNED) const; + void download(Mat &m, aclStream stream, ALIGNMENT config = MEMORY_UNALIGNED) const; + + operator Mat() const; + aclMat clone() const; + void copyTo(aclMat& dest) const; + + //! returns a new aclMatrix header for the specified row + aclMat row(int y) const; + //! returns a new aclMatrix header for the specified column + aclMat col(int x) const; + //! ... for the specified row span + aclMat rowRange(int startrow, int endrow) const; + aclMat rowRange(const Range &r) const; + //! ... for the specified column span + aclMat colRange(int startcol, int endcol) const; + aclMat colRange(const Range &r) const; + + //! locates aclMatrix header within a parent aclMatrix. See below + void locateROI(Size &wholeSize, Point &ofs) const; + //! moves/resizes the current aclMatrix ROI inside the parent aclMatrix. + aclMat &adjustROI(int dtop, int dbottom, int dleft, int dright); + + //! allocates new aclMatrix data unless the aclMatrix already has specified size and type. + // previous data is unreferenced if needed. + void create(int rows, int cols, int type, ALIGNMENT config = MEMORY_UNALIGNED, MemMallocPolicy policy = MALLOC_HUGE_FIRST); + void create(Size size, int type, ALIGNMENT config = MEMORY_UNALIGNED, MemMallocPolicy policy = MALLOC_HUGE_FIRST); + + //! allocates new aclMatrix with specified device memory type. + void createEx(int rows, int cols, int type, ALIGNMENT config = MEMORY_UNALIGNED, MemMallocPolicy policy = MALLOC_HUGE_FIRST); + void createEx(Size size, int type, ALIGNMENT config = MEMORY_UNALIGNED, MemMallocPolicy policy = MALLOC_HUGE_FIRST); + + //! decreases reference counter; + // deallocate the data when reference counter reaches 0. + void release(); + + //! swaps with other smart pointer + void swap(aclMat &mat); + + //! extracts a rectangular sub-aclMatrix + // (this is a generalized form of row, rowRange etc.) + aclMat operator()( Range rowRange, Range colRange ) const; + aclMat operator()( const Rect &roi ) const; + + aclMat& operator+=( const aclMat& m ); + aclMat& operator-=( const aclMat& m ); + aclMat& operator/=( const aclMat& m ); + aclMat& operator*=( const aclMat& m ); + + + //! returns true if the aclMatrix data is continuous + // (i.e. when there are no gaps between successive rows). + // similar to CV_IS_aclMat_CONT(cvaclMat->type) + bool isContinuous() const; + + //! returns element size in bytes, + // similar to CV_ELEM_SIZE(cvMat->type) + size_t elemSize() const; + //! returns the size of element channel in bytes. + size_t elemSize1() const; + + //! returns element type, similar to CV_MAT_TYPE(cvMat->type) + int type() const; + //! returns element type, i.e. 8UC3 returns 8UC4 because in acl + //! 3 channels element actually use 4 channel space + int acltype() const; + //! returns element type, similar to CV_MAT_DEPTH(cvMat->type) + int depth() const; + + //! returns element type, similar to CV_MAT_CN(cvMat->type) + int channels() const; + //! returns element type, return 4 for 3 channels element, + //!becuase 3 channels element actually use 4 channel space + int aclchannels() const; + + //! returns step/elemSize1() + size_t step1() const; + //! returns aclMatrix size: + // width == number of columns, height == number of rows + Size size() const; + //! returns true if aclMatrix data is NULL + bool empty() const; + + friend void swap(aclMat &a, aclMat &b); + friend void ensureSizeIsEnough(int rows, int cols, int type, aclMat &m, ALIGNMENT config = MEMORY_UNALIGNED); + friend void ensureSizeIsEnough(Size size, int type, aclMat &m, ALIGNMENT config = MEMORY_UNALIGNED); + + /*! includes several bit-fields: + - the magic signature + - continuity flag + - depth + - number of channels + */ + int flags; + //! the number of rows and columns + int rows, cols; + //! a distance between successive rows in bytes; includes the gap if any + size_t step; + //! pointer to the data(ACL memory object) + //uchar *data; + + //! OpenCL context associated with the aclMat object. + void *data; // TODO + + //! pointer to the reference counter; + // when aclMatrix points to user-allocated data, the pointer is NULL + int *refcount; + + //! helper fields used in locateROI and adjustROI + //datastart and dataend are not used in current version + uchar *datastart; + uchar *dataend; + + //add offset for handle ROI, calculated in byte + int offset; + //add wholerows and wholecols for the whole matrix, datastart and dataend are no longer used + int wholerows; + int wholecols; + + aclCxt *acl_context; + size_t totalSize; + + }; + } /* end of namespace acl */ + +} /* end of namespace cv */ + + +#endif \ No newline at end of file diff --git a/acl/include/opencv2/acl/acl_type.hpp b/acl/include/opencv2/acl/acl_type.hpp new file mode 100644 index 0000000..f2f9648 --- /dev/null +++ b/acl/include/opencv2/acl/acl_type.hpp @@ -0,0 +1,158 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_ACL_TYPE_HPP +#define OPENCV_ACL_TYPE_HPP + +#define AclSafeCall(expr) __aclSafeCall(expr, __FILE__, __LINE__, __func__) +#define AclVerifyCall(expr) __aclSafeCall(res, __FILE__, __LINE__, __func__) + +#include +#include "opencv2/core.hpp" +#include "acl/acl.h" + +namespace cv +{ + namespace acl + { + /** + * An error is reported if the expression value is not 0 + */ + static inline void __aclSafeCall(int err, const char* file, const int line, const char *func="") + { + if(0 != err) + { + const char* function = func ? func : "unknown function"; + std::cerr << "Acl Called Error: " << "file " << file << ", func " << function << ", line " << line << " errorCode: " << err << std::endl; + std::cerr.flush(); + } + } + + /* Memory alignment */ + enum ALIGNMENT { MEMORY_UNALIGNED = 0, MEMORY_ALIGN = 1}; + + enum { MAGIC_VAL = 0x42FF0000, AUTO_STEP = 0, CONTINUOUS_FLAG = CV_MAT_CONT_FLAG, SUBMATRIX_FLAG = CV_SUBMAT_FLAG }; + enum { MAGIC_MASK = 0xFFFF0000, TYPE_MASK = 0x00000FFF, DEPTH_MASK = 7 }; + + typedef aclrtStream aclStream; + + typedef enum Opdims { TWO_DIMS = 1, FOUR_DIMS } Opdims; + + enum DeviceType + { + ACL_DEVICE_TYPE_DEFAULT = (1 << 0), + ACL_DEVICE_TYPE_200 = (1 << 1), + ACL_DEVICE_TYPE_ACCELERATOR = (1 << 3), + }; + + enum AttrType + { + OP_BOOL = 1, + OP_INT, + OP_FLOAT, + OP_STRING + }; + + typedef enum MemMallocPolicy + { + MALLOC_HUGE_FIRST = 1, + MALLOC_HUGE_ONLY, + MALLOC_NORMAL_ONLY, + MALLOC_HUGE_FIRST_P2P, + MALLOC_HUGE_ONLY_P2P, + MALLOC_NORMAL_ONLY_P2P + } MemMallocPolicy; + + + CV_EXPORTS aclDataType type_transition(int depth); + CV_EXPORTS aclrtMemMallocPolicy type_transition(MemMallocPolicy type); + + inline aclDataType type_transition(int depth) + { + switch (depth) + { + case CV_8U: + return ACL_UINT8; + case CV_8S: + return ACL_INT8; + case CV_16U: + return ACL_UINT16; + case CV_16S: + return ACL_INT16; + case CV_16F: + return ACL_FLOAT16; + case CV_32S: + return ACL_INT32; + case CV_32F: + return ACL_FLOAT; + case CV_64F: + return ACL_DOUBLE; + } + return ACL_DT_UNDEFINED; + } + + inline aclrtMemMallocPolicy type_transition(MemMallocPolicy type) + { + switch (type) + { + case MALLOC_HUGE_FIRST: + return ACL_MEM_MALLOC_HUGE_FIRST; + case MALLOC_HUGE_ONLY: + return ACL_MEM_MALLOC_HUGE_ONLY; + case MALLOC_NORMAL_ONLY: + return ACL_MEM_MALLOC_NORMAL_ONLY; + case MALLOC_HUGE_FIRST_P2P: + return ACL_MEM_MALLOC_HUGE_FIRST_P2P; + case MALLOC_HUGE_ONLY_P2P: + return ACL_MEM_MALLOC_HUGE_ONLY_P2P; + case MALLOC_NORMAL_ONLY_P2P: + return ACL_MEM_MALLOC_NORMAL_ONLY_P2P; + } + return ACL_MEM_MALLOC_HUGE_FIRST; + } + + } /* end of namespace acl */ + +} /* end of namespace cv */ + +#endif /* __OPENCV_ACL_HPP__ */ diff --git a/acl/include/opencv2/acl/gemm.hpp b/acl/include/opencv2/acl/gemm.hpp new file mode 100644 index 0000000..1a39c4b --- /dev/null +++ b/acl/include/opencv2/acl/gemm.hpp @@ -0,0 +1,21 @@ +#ifndef OPENCV_GEMM_HPP +#define OPENCV_GEMM_HPP + +#include "acl_mat.hpp" + +namespace cv +{ + namespace acl + { + // matrix multiplication + CV_EXPORTS void MatMul(const aclMat& src1, const aclMat& src2, aclMat& dest); + // convolution + CV_EXPORTS void Convolution(const aclMat& src, const aclMat& kernel, aclMat& dest, \ + const vector& stridesList = vector {1, 1, 1, 1}, const vector& padsList = vector {0, 0, 0, 0}); + + } /* end of namespace acl */ + +} /* end of namespace cv */ + + +#endif \ No newline at end of file diff --git a/acl/include/opencv2/acl/init_core.hpp b/acl/include/opencv2/acl/init_core.hpp new file mode 100644 index 0000000..8ba5445 --- /dev/null +++ b/acl/include/opencv2/acl/init_core.hpp @@ -0,0 +1,118 @@ +#ifndef OPENCV_INIT_CORE_HPP +#define OPENCV_INIT_CORE_HPP + +#include "acl_init.hpp" + +namespace cv +{ + namespace acl + { + ///////////////////////////aclEnv////////////////////////////////// + /** + * acl init + */ + inline aclEnv::aclEnv() + {} + + inline aclEnv::aclEnv(const char* config_path) + { + uint32_t device_count; + + AclSafeCall(aclInit(config_path)); + + AclSafeCall(aclrtGetDeviceCount(&device_count)); + + _device_count = device_count; + // Reference Counting + refcount = static_cast(fastMalloc(sizeof(*refcount))); + *refcount = 0; + + clog << "aclInit() is success" << endl; + } + + inline int aclEnv::get_device_count() + { + return _device_count; + } + + inline aclEnv::~aclEnv() + { + AclSafeCall(aclFinalize()); + clog << "aclFinalize() is success" << endl; + } + + + /////////////////////////////////////////aclCxt//////////////////////////// + inline aclCxt::aclCxt() {}; + + inline aclCxt::aclCxt(int device_id) : _device_id(device_id) + { + _context = static_cast(fastMalloc(sizeof(*_context))); + AclSafeCall(aclrtCreateContext(_context, _device_id)); + + clog << "aclrtCreateContext() is success" << endl; + } + + inline aclrtContext* aclCxt::get_context() + { + return _context; + } + + /** + * set current context + */ + inline void aclCxt::set_current_context() + { + AclSafeCall(aclrtSetCurrentContext(*_context)); + } + + inline void aclCxt::create_stream(int count) + { + CV_Assert(count > 0); + + int i; + for(i = 0; i ().swap(_acl_streams); + AclSafeCall(aclrtDestroyContext(*_context)); + + clog << "aclrtDestroyContext() is success" << endl; + } + + } /* end of namespace acl */ + +} /* end of namespace cv */ + +#endif \ No newline at end of file diff --git a/acl/include/opencv2/acl/mat_core.hpp b/acl/include/opencv2/acl/mat_core.hpp new file mode 100644 index 0000000..23bebc5 --- /dev/null +++ b/acl/include/opencv2/acl/mat_core.hpp @@ -0,0 +1,383 @@ +#ifndef OPENCV_MAT_CORE_HPP +#define OPENCV_MAT_CORE_HPP + +#include "acl_type.hpp" + +namespace cv +{ + namespace acl + { + //////////////////////////////////////////////////////////////////////// + //////////////////////////////// aclMat //////////////////////////////// + //////////////////////////////////////////////////////////////////////// + + inline aclMat::aclMat() + : flags(0), rows(0), cols(0), step(0), data(nullptr), refcount(nullptr), + datastart(nullptr), dataend(nullptr), offset(0), wholerows(0), wholecols(0), acl_context(0), totalSize(0) + { + } + + /** + * @param [in] _acl_context: Acl context + * @param [in] config: Byte aligned or not, Default MEMORY_UNALIGNED + * @param [in] policy: Acl Memory Application mode, Default ACL_MEM_MALLOC_HUGE_FIRST + */ + inline aclMat::aclMat(int _rows, int _cols, int _type, aclCxt *_acl_context, ALIGNMENT config, MemMallocPolicy policy) + : flags(0), rows(0), cols(0), step(0), data(nullptr), refcount(nullptr), datastart(nullptr), + dataend(nullptr), offset(0), wholerows(0), wholecols(0), acl_context(_acl_context), totalSize(0) + { + if (_rows > 0 && _cols > 0) + create(_rows, _cols, _type, config, policy); + } + + inline aclMat::aclMat(Size _size, int _type, aclCxt *_acl_context, ALIGNMENT config, MemMallocPolicy policy) + : flags(0), rows(0), cols(0), step(0), data(nullptr), refcount(nullptr), datastart(nullptr), + dataend(nullptr), offset(0), wholerows(0), wholecols(0), acl_context(_acl_context), totalSize(0) + { + if (_size.height > 0 && _size.width > 0) + create(_size, _type, config, policy); + } + + inline aclMat::aclMat(const aclMat &m) + : flags(m.flags), rows(m.rows), cols(m.cols), step(m.step), data(m.data), refcount(m.refcount), + datastart(m.datastart), dataend(m.dataend), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), + acl_context(m.acl_context), totalSize(m.totalSize) + { + if (refcount) + CV_XADD(refcount, 1); + } + + inline aclMat::aclMat(int _rows, int _cols, int _type, void *_data, aclCxt *_acl_context, ALIGNMENT config, size_t _step) + : flags(0), rows(0), cols(0), step(0), data(nullptr), refcount(nullptr), datastart(nullptr), + dataend(nullptr), offset(0), wholerows(0), wholecols(0), acl_context(_acl_context), totalSize(0) + { + cv::Mat m(_rows, _cols, _type, _data, _step); + if (m.rows > 0 && m.cols > 0) + create(m.rows, m.cols, m.type(), config); + upload(m, config); + } + + inline aclMat::aclMat(Size _size, int _type, void *_data, aclCxt *_acl_context, ALIGNMENT config, size_t _step) + : flags(0), rows(0), cols(0), step(0), data(nullptr), refcount(nullptr), datastart(nullptr), + dataend(nullptr), offset(0), wholerows(0), wholecols(0), acl_context(_acl_context), totalSize(0) + { + cv::Mat m(_size, _type, _data, _step); + if (m.rows > 0 && m.cols > 0) + create(m.rows, m.cols, m.type(), config); + upload(m, config); + } + + /** + * @param [in] rRange: rows begin to end, Range(start, end) + * @param [in] cRange: cols begin to end, Range(start, end) + */ + inline aclMat::aclMat(const aclMat &m, const Range &rRange, const Range &cRange) + : flags(m.flags), step(m.step), refcount(m.refcount), datastart(m.datastart), dataend(m.dataend), + offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), acl_context(m.acl_context), totalSize(m.totalSize) + { + if (rRange == Range::all()) + rows = m.rows; + else + { + CV_Assert(0 <= rRange.start && rRange.start <= rRange.end && rRange.end <= m.rows); + rows = rRange.size(); + offset += step * rRange.start; + } + + if (cRange == Range::all()) + cols = m.cols; + else + { + CV_Assert(0 <= cRange.start && cRange.start <= cRange.end && cRange.end <= m.cols); + cols = cRange.size(); + offset += cRange.start * elemSize(); + flags &= cols < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; + } + + if (rows == 1) + flags |= Mat::CONTINUOUS_FLAG; + + if (refcount) + CV_XADD(refcount, 1); + if (rows <= 0 || cols <= 0) + rows = cols = 0; + + data = static_cast((static_cast(m.data) + offset)); + } + + /** + * @param [in] roi: Matrix position, Rect(x, y, width, height) + * + */ + inline aclMat::aclMat(const aclMat &m, const Rect &roi) + : flags(m.flags), rows(roi.height), cols(roi.width), step(m.step), refcount(m.refcount), datastart(m.datastart), + dataend(m.dataend), offset(m.offset), wholerows(m.wholerows), wholecols(m.wholecols), acl_context(m.acl_context), + totalSize(m.totalSize) + { + flags &= roi.width < m.cols ? ~Mat::CONTINUOUS_FLAG : -1; + offset += roi.y * step + roi.x * elemSize(); + CV_Assert(0 <= roi.x && 0 <= roi.width && roi.x + roi.width <= m.wholecols && + 0 <= roi.y && 0 <= roi.height && roi.y + roi.height <= m.wholerows); + if (refcount) + CV_XADD(refcount, 1); + if (rows <= 0 || cols <= 0) + rows = cols = 0; + + data = static_cast((static_cast(m.data) + offset)); + } + + inline aclMat::aclMat(const Mat &m, aclCxt *_acl_context, ALIGNMENT config, MemMallocPolicy policy) + : flags(0), rows(m.rows), cols(m.cols), step(0), data(nullptr), refcount(nullptr), datastart(nullptr), + dataend(nullptr), offset(0), wholerows(0), wholecols(0), acl_context(_acl_context), totalSize(0) + { + if (m.rows > 0 && m.cols > 0) + create(m.rows, m.cols, m.type(), config, policy); + upload(m, config); + } + + inline aclMat::~aclMat() + { + release(); + } + + inline aclMat &aclMat::operator=(const aclMat &m) + { + if (this != &m) + { + if (m.refcount) + CV_XADD(m.refcount, 1); + flags = m.flags; + rows = m.rows; + cols = m.cols; + step = m.step; + datastart = m.datastart; + dataend = m.dataend; + offset = m.offset; + wholerows = m.wholerows; + wholecols = m.wholecols; + refcount = m.refcount; + acl_context = m.acl_context; + totalSize = m.totalSize; + data = m.data; + } + return *this; + } + + inline aclMat& aclMat::operator=(const Mat &m) + { + upload(m); + return *this; + } + + inline aclMat::operator Mat() const + { + Mat m(rows, cols, type()); + download(m); + return m; + } + + inline aclMat aclMat::clone() const + { + aclMat m; + copyTo(m); + return m; + } + + inline void aclMat::copyTo(aclMat& dest) const + { + if (this != &dest) + { + dest.rows = rows; + dest.cols = cols; + dest.step = step; + dest.wholerows = wholerows; + dest.wholecols = wholecols; + dest.refcount = refcount; + dest.acl_context = acl_context; + dest.totalSize = totalSize; + + void *dev_ptr; + AclSafeCall(aclrtMalloc(&dev_ptr, totalSize, type_transition(MALLOC_HUGE_FIRST))); + AclSafeCall(aclrtMemcpy(dev_ptr, totalSize, data, totalSize, ACL_MEMCPY_DEVICE_TO_DEVICE)); + + dest.data = dev_ptr; + dest.datastart = static_cast(data); + dest.dataend = static_cast(data) + totalSize; + dest.refcount = static_cast(fastMalloc(sizeof(*refcount))); + *refcount = 0; + CV_XADD(refcount, 1); + dest.flags |= Mat::CONTINUOUS_FLAG; + } + } + + inline aclMat aclMat::row(int y) const + { + return aclMat(*this, Range(y, y + 1), Range::all()); + } + + inline aclMat aclMat::col(int x) const + { + return aclMat(*this, Range::all(), Range(x, x + 1)); + } + + inline aclMat aclMat::rowRange(int startrow, int endrow) const + { + return aclMat(*this, Range(startrow, endrow), Range::all()); + } + + inline aclMat aclMat::rowRange(const Range &r) const + { + return aclMat(*this, r, Range::all()); + } + + inline aclMat aclMat::colRange(int startcol, int endcol) const + { + return aclMat(*this, Range::all(), Range(startcol, endcol)); + } + + inline aclMat aclMat::colRange(const Range &r) const + { + return aclMat(*this, Range::all(), r); + } + + inline void aclMat::locateROI( Size &wholeSize, Point &ofs ) const + { + size_t esz = elemSize(); + CV_DbgAssert(step > 0); + if(offset == 0) + ofs.x = ofs.y = 0; + else + { + ofs.y = (int)(offset / step); + ofs.x = (int)((offset - step * ofs.y) / esz); + CV_DbgAssert(data == (datastart + ofs.y * step + ofs.x * esz)); + } + wholeSize.height = wholerows; + wholeSize.width = wholecols; + } + + inline aclMat &aclMat::adjustROI( int dtop, int dbottom, int dleft, int dright ) + { + Size wholeSize; + Point ofs; + size_t esz = elemSize(); + locateROI( wholeSize, ofs ); + int row1 = std::max(ofs.y - dtop, 0), row2 = std::min(ofs.y + rows + dbottom, wholeSize.height); + int col1 = std::max(ofs.x - dleft, 0), col2 = std::min(ofs.x + cols + dright, wholeSize.width); + offset += (row1 - ofs.y) * step + (col1 - ofs.x) * esz; + rows = row2 - row1; + cols = col2 - col1; + if( esz * cols == step || rows == 1 ) + flags |= Mat::CONTINUOUS_FLAG; + else + flags &= ~Mat::CONTINUOUS_FLAG; + + data = static_cast((static_cast(datastart) + offset)); + return *this; + } + + inline void aclMat::swap(aclMat &b) + { + std::swap( flags, b.flags ); + std::swap( rows, b.rows ); + std::swap( cols, b.cols ); + std::swap( step, b.step ); + std::swap( data, b.data ); + std::swap( datastart, b.datastart ); + std::swap( dataend, b.dataend ); + std::swap( refcount, b.refcount ); + std::swap( offset, b.offset ); + std::swap( wholerows, b.wholerows ); + std::swap( wholecols, b.wholecols ); + std::swap( acl_context, b.acl_context); + std::swap( totalSize, b.totalSize); + } + + inline aclMat aclMat::operator()( Range rRange, Range cRange ) const + { + return aclMat(*this, rRange, cRange); + } + + inline aclMat aclMat::operator()( const Rect &roi ) const + { + return aclMat(*this, roi); + } + + inline bool aclMat::isContinuous() const + { + return (flags & Mat::CONTINUOUS_FLAG) != 0; + } + + inline size_t aclMat::elemSize() const + { + return CV_ELEM_SIZE((CV_MAKE_TYPE(type(), channels()))); + } + + inline size_t aclMat::elemSize1() const + { + return CV_ELEM_SIZE1(flags); + } + + inline int aclMat::type() const + { + return CV_MAT_TYPE(flags); + } + + inline int aclMat::acltype() const + { + return CV_MAKE_TYPE(depth(), aclchannels()); + } + + inline int aclMat::depth() const + { + return CV_MAT_DEPTH(flags); + } + + inline int aclMat::channels() const + { + return CV_MAT_CN(flags); + } + + inline int aclMat::aclchannels() const + { + return (CV_MAT_CN(flags)) == 3 ? 4 : (CV_MAT_CN(flags)); + } + + inline size_t aclMat::step1() const + { + return step / elemSize1(); + } + + inline Size aclMat::size() const + { + return Size(cols, rows); + } + + inline bool aclMat::empty() const + { + return data == 0; + } + + inline void swap( aclMat &a, aclMat &b ) + { + a.swap(b); + } + + inline void ensureSizeIsEnough(int rows, int cols, int type, aclMat &m, ALIGNMENT config) + { + if (m.type() == type && m.rows >= rows && m.cols >= cols) + m = m(Rect(0, 0, cols, rows)); + else + m.create(rows, cols, type, config); + } + + inline void ensureSizeIsEnough(Size size, int type, ALIGNMENT config, aclMat &m) + { + ensureSizeIsEnough(size.height, size.width, type, m, config); + } + + } /* end of namespace acl */ + +} /* end of namespace cv */ + +#endif \ No newline at end of file diff --git a/acl/include/opencv2/acl/mathfuncs.hpp b/acl/include/opencv2/acl/mathfuncs.hpp new file mode 100644 index 0000000..0ff9089 --- /dev/null +++ b/acl/include/opencv2/acl/mathfuncs.hpp @@ -0,0 +1,27 @@ +#ifndef OPENCV_MATHFUNCS_HPP +#define OPENCV_MATHFUNCS_HPP + +#include "acl_mat.hpp" + +/** + * mathfunctions; + */ + +namespace cv +{ + namespace acl + { + CV_EXPORTS aclMat abs(const aclMat &src); + CV_EXPORTS void pow(const aclMat &src, double power, aclMat &dest); + CV_EXPORTS void sqrt(const aclMat &src, aclMat &dest); + CV_EXPORTS void add(const aclMat &src, const aclMat &other_src, aclMat &dest); + CV_EXPORTS void divide(const aclMat &src, const aclMat &other_src, aclMat &dest); + CV_EXPORTS void exp(const aclMat &src, aclMat &dest); + CV_EXPORTS void log(const aclMat &src, aclMat &dest); + CV_EXPORTS void max(const aclMat &src, const aclMat &other_src, aclMat &dest); + CV_EXPORTS void min(const aclMat &src, const aclMat &other_src, aclMat &dest); + } /* end of namespace acl */ + +} /* end of namespace cv */ + +#endif \ No newline at end of file diff --git a/acl/include/opencv2/acl/matrices.hpp b/acl/include/opencv2/acl/matrices.hpp new file mode 100644 index 0000000..fe6724d --- /dev/null +++ b/acl/include/opencv2/acl/matrices.hpp @@ -0,0 +1,23 @@ +#ifndef OPENCV_MATRICES_HPP +#define OPENCV_MATRICES_HPP + +#include "acl_mat.hpp" + +namespace cv +{ + namespace acl + { + // Matrix lookup table + //CV_EXPORTS void lookUpTable(const aclMat& src, const aclMat& lut, aclMat& dst); + // Multiple channel merge + CV_EXPORTS void merge(const vector& mv, aclMat& dst); + // Split into channels + CV_EXPORTS void split(const aclMat& src, vector& mv); + // Matrix transpose + CV_EXPORTS void transpose(const aclMat& src, aclMat& dest); + CV_EXPORTS void flip(const aclMat& src, aclMat& dest, int flipCode = 0); + } /* end of namespace acl */ + +} /* end of namespace cv */ + +#endif \ No newline at end of file diff --git a/acl/include/opencv2/acl/operator_desc.hpp b/acl/include/opencv2/acl/operator_desc.hpp new file mode 100644 index 0000000..0e4e94e --- /dev/null +++ b/acl/include/opencv2/acl/operator_desc.hpp @@ -0,0 +1,93 @@ +#ifndef OPENCV_OPERATOR_DESC_HPP +#define OPENCV_OPERATOR_DESC_HPP + +#include +#include + +#include "acl_type.hpp" +#include "acl_init.hpp" +#include "acl_mat.hpp" +#include "acl/acl.h" + +namespace cv +{ + namespace acl + { + class CV_EXPORTS OperatorDesc + { + public: + /** + * Constructor + * @param [in] opType: op type + */ + OperatorDesc(std::string opType); + + /** + * Destructor + */ + virtual ~OperatorDesc(); + + /** + * Add an input tensor description + * @param [in] dataType: data type + * @param [in] numDims: number of dims + * @param [in] dims: dims + * @param [in] format: format + * @return OperatorDesc + */ + OperatorDesc &AddInputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); + + /** + * Add an output tensor description + * @param [in] dataType: data type + * @param [in] numDims: number of dims + * @param [in] dims: dims + * @param [in] format: format + * @return OperatorDesc + */ + OperatorDesc &AddOutputTensorDesc(aclDataType dataType, int numDims, const int64_t *dims, aclFormat format); + + template + bool AddTensorAttr(const char *attrName, AttrType type, T vaule) + { + if (opAttr == nullptr) + return false; + switch (type) + { + case OP_BOOL: + aclopSetAttrBool(opAttr, attrName, vaule); + break; + case OP_INT: + aclopSetAttrInt(opAttr, attrName, vaule); + break; + case OP_FLOAT: + aclopSetAttrFloat(opAttr, attrName, vaule); + break; + default: + break; + } + return true; + } + std::string opType; + std::vector inputDesc; + std::vector outputDesc; + aclopAttr *opAttr; + }; + + + // Create operator description + CV_EXPORTS OperatorDesc CreateOpDesc(const string opType, const vector &input_Mat, vector &output_Mat, aclFormat format = ACL_FORMAT_NHWC, Opdims config = FOUR_DIMS); + // Compile and run the operator + CV_EXPORTS void compileAndRunop(OperatorDesc &opDesc, vector &inputBuffers_, vector &outputBuffers_, aclCxt *acl_context); + // Suitable for one input and one output + CV_EXPORTS void OneInAndOneOut(const aclMat &input, aclMat &output, const string opType); + // Suitable for tow input and one output + CV_EXPORTS void TwoInAndOneOut(const aclMat &inputMat, const aclMat &inputMatOther, aclMat &outputMat, const string opType); + // run the operator + CV_EXPORTS void Runop(vector &input, vector &output, OperatorDesc &opDesc); + + } /* end of namespace acl */ + +} /* end of namespace cv */ + +#endif // OPERATOR_DESC_HPP diff --git a/acl/src/acl_init.cpp b/acl/src/acl_init.cpp new file mode 100644 index 0000000..776a9b0 --- /dev/null +++ b/acl/src/acl_init.cpp @@ -0,0 +1,119 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" + + + +namespace cv +{ + namespace acl + { + ///////////////////////////aclEnv////////////////////////////////// + static Mutex *__initmutex = NULL; + Mutex &getInitMutex() + { + if (__initmutex == NULL) + __initmutex = new Mutex(); + return *__initmutex; + } + + aclEnv *global_aclenv = nullptr; + aclEnv* aclEnv::get_acl_env(const char* config_path) + { + if (nullptr == global_aclenv) + { + AutoLock lock(getInitMutex()); + if (nullptr == global_aclenv) + global_aclenv = new aclEnv(config_path); + } + return global_aclenv; + } + + + /////////////////////////create acl context//////////////////////// + /** + * @brief: set device and stream + * @param [in] config_path: ajson path + * @param [in] device_id: device id + * @param [in] stream_count: stream count + */ + aclCxt *set_device(const char* config_path, int device_id, int stream_count) + { + aclEnv *acl_env = aclEnv::get_acl_env(config_path); + if (global_aclenv->refcount) { + AutoLock lock(getInitMutex()); + CV_XADD(global_aclenv->refcount, 1); + } + int device_count = acl_env->get_device_count(); + CV_Assert(device_id < device_count); + + aclCxt *acl_context = new aclCxt(device_id); + acl_context->set_current_context(); + acl_context->create_stream(stream_count); + + clog << "set_device() is success" << endl; + return acl_context; + } + + void release_device(aclCxt* context) + { + CV_Assert(context); + delete context; + context = nullptr; + if (global_aclenv->refcount) + { + AutoLock lock(getInitMutex()); + CV_XADD(global_aclenv->refcount, -1); + + if (*(global_aclenv->refcount) == 0) + { + delete global_aclenv; + global_aclenv = nullptr; + } + } + clog << "release_device() is success" << endl; + } + + } /* end of namespace acl */ + +} /* end of namespace cv */ diff --git a/acl/src/acl_mat.cpp b/acl/src/acl_mat.cpp new file mode 100644 index 0000000..92e10cf --- /dev/null +++ b/acl/src/acl_mat.cpp @@ -0,0 +1,216 @@ +#include "precomp.hpp" + + +#define ALIGN 64 + +namespace cv +{ + namespace acl + { + /* Memory alignment */ + static inline size_t alignSize(size_t sz, int n = ALIGN); + + void aclMat::upload(const Mat &m, ALIGNMENT config) + { + CV_Assert((config == ALIGNMENT::MEMORY_ALIGN) || (config == ALIGNMENT::MEMORY_UNALIGNED)); + if (config == ALIGNMENT::MEMORY_UNALIGNED) + { + CV_Assert(m.data && (this->step == m.step) && (this->rows == m.rows) && (this->cols == m.cols) && (this->type() == m.type())); + aclrtMemcpy((void *)this->data, (m.step * m.rows), (void *)m.data, (m.step * m.rows), ACL_MEMCPY_HOST_TO_DEVICE); + } + else if (config == ALIGNMENT::MEMORY_ALIGN) + { + CV_Assert(m.data && (this->rows == m.rows) && (this->cols == m.cols) && (this->type() == m.type())); + aclrtMemcpy2d((void *)this->data, this->step, (void *)m.data, m.step, m.cols * m.elemSize(), m.rows, ACL_MEMCPY_HOST_TO_DEVICE); + } + } + + void aclMat::upload(const Mat &m, aclStream stream, ALIGNMENT config) + { + CV_Assert((config == ALIGNMENT::MEMORY_ALIGN) || (config == ALIGNMENT::MEMORY_UNALIGNED)); + if (config == ALIGNMENT::MEMORY_UNALIGNED) + { + CV_Assert(m.data && (this->step == m.step) && (this->rows == m.rows) && (this->cols == m.cols) && (this->type() == m.type())); + aclrtMemcpyAsync((void *)this->data, this->totalSize, (void *)m.data, (m.step * m.rows), ACL_MEMCPY_HOST_TO_DEVICE, stream); + } + else if (config == ALIGNMENT::MEMORY_ALIGN) + { + CV_Assert(m.data && (this->rows == m.rows) && (this->cols == m.cols) && (this->type() == m.type())); + aclrtMemcpy2dAsync((void *)this->data, this->step, (void *)m.data, m.step, m.cols * m.elemSize(), m.rows, ACL_MEMCPY_HOST_TO_DEVICE, stream); + } + AclSafeCall(aclrtSynchronizeStream(stream)); + } + + + void aclMat::download(Mat &m, ALIGNMENT config) const + { + CV_Assert((config == ALIGNMENT::MEMORY_ALIGN) || (config == ALIGNMENT::MEMORY_UNALIGNED)); + if (config == ALIGNMENT::MEMORY_UNALIGNED) + { + CV_Assert(m.data && (this->step == m.step) && (this->rows == m.rows) && (this->cols == m.cols) && (this->type() == m.type())); + aclrtMemcpy((void *)m.data, (m.step * m.rows), (void *)(this->data), (m.step * m.rows), ACL_MEMCPY_DEVICE_TO_HOST); + } + else if (config == ALIGNMENT::MEMORY_ALIGN) + { + CV_Assert(m.data && (this->rows == m.rows) && (this->cols == m.cols) && (this->type() == m.type())); + aclrtMemcpy2d((void *)m.data, m.step, (void *)(this->data), this->step, this->cols * this->elemSize(), this->rows, ACL_MEMCPY_DEVICE_TO_HOST); + } + return; + } + + void aclMat::download(Mat &m, aclStream stream, ALIGNMENT config) const + { + CV_Assert((config == ALIGNMENT::MEMORY_ALIGN) || (config == ALIGNMENT::MEMORY_UNALIGNED)); + if (config == ALIGNMENT::MEMORY_UNALIGNED) + { + CV_Assert(m.data && (this->step == m.step) && (this->rows == m.rows) && (this->cols == m.cols) && (this->type() == m.type())); + aclrtMemcpyAsync((void *)m.data, (m.step * m.rows), (void *)(this->data), this->totalSize, ACL_MEMCPY_DEVICE_TO_HOST, stream); + } + else if (config == ALIGNMENT::MEMORY_ALIGN) + { + CV_Assert(m.data && (this->rows == m.rows) && (this->cols == m.cols) && (this->type() == m.type())); + aclrtMemcpy2dAsync((void *)m.data, m.step, (void *)(this->data), this->step, this->cols * this->elemSize(), this->rows, ACL_MEMCPY_DEVICE_TO_HOST, stream); + } + AclSafeCall(aclrtSynchronizeStream(stream)); + return; + } + + void aclMat::create(int _rows, int _cols, int _type, ALIGNMENT config, MemMallocPolicy policy) + { + createEx(_rows, _cols, _type, config, policy); + } + + void aclMat::create(Size size, int type, ALIGNMENT config, MemMallocPolicy policy) + { + createEx(size, type, config, policy); + } + + inline size_t alignSize(size_t sz, int n) + { + return (((sz) + n - 1) / n ) * n; + } + + /* core logic */ + void aclMat::createEx(int _rows, int _cols, int _type, ALIGNMENT config, MemMallocPolicy policy) + { + /* TO ENSURE */ + //_type &= CV_MAT_TYPE_MASK; + _type &= TYPE_MASK; + if (rows == _rows && cols == _cols && type() == _type && data) + return; + + if (data) + release(); + + CV_DbgAssert(_rows >= 0 && _cols >= 0); + + if (_rows > 0 && _cols > 0) + { + /* TO ENSURE */ + //flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL; + flags = Mat::MAGIC_VAL + _type; + rows = _rows; + cols = _cols; + wholerows = _rows; + wholecols = _cols; + size_t esz = elemSize(); + void *dev_ptr; + if (config == ALIGNMENT::MEMORY_ALIGN) + { + if (channels() == 3) + step = alignSize(cols * esz, ALIGN * channels()); + else + step = alignSize(cols * esz); + } + else + step = cols * esz; + totalSize = step * rows; + + AclSafeCall(aclrtMalloc(&dev_ptr, totalSize, type_transition(policy))); + + data = dev_ptr; + datastart = static_cast(data); + dataend = static_cast(data) + totalSize; + refcount = static_cast(fastMalloc(sizeof(*refcount))); + *refcount = 0; + CV_XADD(refcount, 1); + flags |= Mat::CONTINUOUS_FLAG; + } + } + + void aclMat::createEx(Size size, int type, ALIGNMENT config, MemMallocPolicy policy) + { + createEx(size.height, size.width, type, config, policy); + } + + void aclMat::release() + { + CV_XADD(refcount, -1); + if (data && (*refcount == 0)) + { + aclrtFree(data); + } + data = nullptr; + datastart = nullptr; + dataend = nullptr; + } + + aclMat &aclMat::operator+=(const aclMat &m) + { + CV_Assert(this->rows == m.rows && this->cols == m.cols && this->type() == m.type()); + TwoInAndOneOut(*this, m, *this, "Add"); + return *this; + } + + aclMat &aclMat::operator-=(const aclMat &m) + { + CV_Assert(this->rows == m.rows && this->cols == m.cols && this->type() == m.type()); + TwoInAndOneOut(*this, m, *this, "Sub"); + return *this; + } + + aclMat &aclMat::operator/=(const aclMat &m) + { + CV_Assert(this->rows == m.rows && this->cols == m.cols && this->type() == m.type()); + TwoInAndOneOut(*this, m, *this, "Div"); + return *this; + } + + aclMat &aclMat::operator*=(const aclMat &m) + { + CV_Assert(this->cols == m.rows && this->type() == m.type()); + vector input_Mat; + vector output_Mat; + vector inputBuffers_; + vector outputBuffers_; + aclMat newMat{this->rows, m.cols, this->type(), this->acl_context}; + + input_Mat.emplace_back(*this); + input_Mat.emplace_back(m); + output_Mat.emplace_back(newMat); + + inputBuffers_.emplace_back(aclCreateDataBuffer(this->data, this->totalSize)); + inputBuffers_.emplace_back(aclCreateDataBuffer(m.data, m.totalSize)); + inputBuffers_.emplace_back(aclCreateDataBuffer(nullptr, 0)); + outputBuffers_.emplace_back(aclCreateDataBuffer(newMat.data, newMat.totalSize)); + + OperatorDesc opDesc = CreateOpDesc("MatMul", input_Mat, output_Mat, ACL_FORMAT_NHWC, TWO_DIMS); + opDesc.AddInputTensorDesc(ACL_DT_UNDEFINED, 0, nullptr, ACL_FORMAT_UNDEFINED); + opDesc.AddTensorAttr("transpose_x1", OP_BOOL, false); + opDesc.AddTensorAttr("transpose_x2", OP_BOOL, false); + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, this->acl_context); + + newMat.data = aclGetDataBufferAddr(outputBuffers_[0]); + *this = newMat; + + for (size_t i = 0; i < inputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); + for (size_t i = 0; i < outputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); + + return *this; + } + + } /* end of namespace acl */ + +} /* end of namespace cv */ diff --git a/acl/src/gemm.cpp b/acl/src/gemm.cpp new file mode 100644 index 0000000..3251e7a --- /dev/null +++ b/acl/src/gemm.cpp @@ -0,0 +1,90 @@ +#include "precomp.hpp" + +namespace cv +{ + namespace acl + { + /** + * @brief: matrix multiplication + * + */ + void MatMul(const aclMat& src1, const aclMat& src2, aclMat& dest) + { + CV_Assert(src1.cols == src2.rows && src1.type() == src2.type()); + vector input_Mat; + vector output_Mat; + vector inputBuffers_; + vector outputBuffers_; + + input_Mat.emplace_back(src1); + input_Mat.emplace_back(src2); + output_Mat.emplace_back(dest); + + inputBuffers_.emplace_back(aclCreateDataBuffer(src1.data, src1.totalSize)); + inputBuffers_.emplace_back(aclCreateDataBuffer(src2.data, src2.totalSize)); + inputBuffers_.emplace_back(aclCreateDataBuffer(nullptr, 0)); + outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); + + OperatorDesc opDesc = CreateOpDesc("MatMul", input_Mat, output_Mat, ACL_FORMAT_NHWC, TWO_DIMS); + opDesc.AddInputTensorDesc(ACL_DT_UNDEFINED, 0, nullptr, ACL_FORMAT_UNDEFINED); + opDesc.AddTensorAttr("transpose_x1", OP_BOOL, false); + opDesc.AddTensorAttr("transpose_x2", OP_BOOL, false); + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); + + dest.data = aclGetDataBufferAddr(outputBuffers_[0]); + + for (size_t i = 0; i < inputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); + for (size_t i = 0; i < outputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); + } + + /** + * @brief convolution + * @param [in] src: characteristic matrix + * @param [in] kernel: convolution kernel + * @param [in] dest: destination matrix + * @param [in] stridesList: strides, The N and C dimensions must be set to 1 + * @param [in] padSList: pads, vector(top, bottom, left, right) + */ + void Convolution(const aclMat& src, const aclMat& kernel, aclMat& dest, const vector& stridesList, const vector& padsList) + { + vector inputBuffers_; + vector outputBuffers_; + vector dilationsList{1, 1, 1, 1}; + string opType = "Conv2D"; + int dest_rows = (src.rows + padsList[0] + padsList[1] - (1 * (kernel.rows - 1) + 1)) / stridesList[2] + 1; + int dest_cols = (src.cols + padsList[2] + padsList[3] - (1 * (kernel.cols - 1) + 1)) / stridesList[3] + 1; + aclMat acl_dest{dest_rows, dest_cols, src.type(), src.acl_context}; + + vector shape{1, 1, src.rows, src.cols}; + vector shape1{1, 1, kernel.rows, kernel.cols}; + vector shape2{1, 1, acl_dest.rows, acl_dest.cols}; + + aclDataType dataType = type_transition(src.depth()); + aclFormat format = ACL_FORMAT_NCHW; + OperatorDesc opDesc(opType); + opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format); + opDesc.AddInputTensorDesc(dataType, shape1.size(), shape1.data(), format); + opDesc.AddOutputTensorDesc(dataType, shape2.size(), shape2.data(), format); + + auto opAttr = opDesc.opAttr; + aclopSetAttrListInt(opAttr, "strides", stridesList.size(), stridesList.data()); + aclopSetAttrListInt(opAttr, "pads", padsList.size(), padsList.data()); + aclopSetAttrListInt(opAttr, "dilations", dilationsList.size(), dilationsList.data()); + + inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); + inputBuffers_.emplace_back(aclCreateDataBuffer(kernel.data, kernel.totalSize)); + outputBuffers_.emplace_back(aclCreateDataBuffer(acl_dest.data, acl_dest.totalSize)); + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context); + acl_dest.data = aclGetDataBufferAddr(outputBuffers_[0]); + dest = acl_dest; + + for (size_t i = 0; i < inputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); + for (size_t i = 0; i < outputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); + } + } /* end of namespace acl */ + +} /* end of namespace cv */ \ No newline at end of file diff --git a/acl/src/mathfuncs.cpp b/acl/src/mathfuncs.cpp new file mode 100644 index 0000000..4452a5c --- /dev/null +++ b/acl/src/mathfuncs.cpp @@ -0,0 +1,220 @@ +#include "precomp.hpp" + +namespace cv +{ + namespace acl + { + aclMat abs(const aclMat& a) + { + aclMat dest(a.rows, a.cols, a.type(), a.acl_context); + OneInAndOneOut(a, dest, "Abs"); + return a; + } + + static void *power_data(double power, aclDataType type, size_t powersize) + { + void *dev_ptr; + + switch(type) + { + case ACL_UINT8: + { + aclrtMalloc(&dev_ptr, powersize, ACL_MEM_MALLOC_NORMAL_ONLY); + uchar power_8u = uchar(power); + aclrtMemcpy(dev_ptr, powersize, static_cast(&power_8u), powersize, ACL_MEMCPY_HOST_TO_DEVICE); + return dev_ptr; + } + case ACL_INT8: + { + aclrtMalloc(&dev_ptr, powersize, ACL_MEM_MALLOC_NORMAL_ONLY); + char power_8s = char(power); + aclrtMemcpy(dev_ptr, powersize, static_cast(&power_8s), powersize, ACL_MEMCPY_HOST_TO_DEVICE); + return dev_ptr; + } + case ACL_FLOAT16: + { + aclrtMalloc(&dev_ptr, powersize, ACL_MEM_MALLOC_NORMAL_ONLY); + float16_t power_16f = float16_t(power); + aclrtMemcpy(dev_ptr, powersize, static_cast(&power_16f), powersize, ACL_MEMCPY_HOST_TO_DEVICE); + return dev_ptr; + } + case ACL_INT32: + { + aclrtMalloc(&dev_ptr, powersize, ACL_MEM_MALLOC_NORMAL_ONLY); + int power_32s = int(power); + aclrtMemcpy(dev_ptr, powersize, static_cast(&power_32s), powersize, ACL_MEMCPY_HOST_TO_DEVICE); + return dev_ptr; + } + case ACL_FLOAT: + { + aclrtMalloc(&dev_ptr, powersize, ACL_MEM_MALLOC_NORMAL_ONLY); + float32_t power_32f = float32_t(power); + aclrtMemcpy(dev_ptr, powersize, static_cast(&power_32f), powersize, ACL_MEMCPY_HOST_TO_DEVICE); + return dev_ptr; + } + case ACL_DOUBLE: + { + aclrtMalloc(&dev_ptr, powersize, ACL_MEM_MALLOC_NORMAL_ONLY); + double power_64f = double(power); + aclrtMemcpy(dev_ptr, powersize, static_cast(&power_64f), powersize, ACL_MEMCPY_HOST_TO_DEVICE); + return dev_ptr; + } + default: + return nullptr; + } + + } + + void pow(const aclMat& src, double power, aclMat& dest) + { + vector input_Mat; + vector output_Mat; + vector inputBuffers_; + vector outputBuffers_; + + aclDataType dataType = type_transition(src.depth()); + + input_Mat.emplace_back(src); + output_Mat.emplace_back(dest); + + OperatorDesc opDesc = CreateOpDesc("Pow", input_Mat, output_Mat); + vector shape2{1}; + opDesc.AddInputTensorDesc(dataType, shape2.size(), shape2.data(), ACL_FORMAT_NHWC); + + size_t size = aclGetTensorDescSize(opDesc.inputDesc[1]); + inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); + inputBuffers_.emplace_back(aclCreateDataBuffer(power_data(power, dataType, size), size)); + outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); + + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); + + for (size_t i = 0; i < inputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); + for (size_t i = 0; i < outputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); + } + + void add(const aclMat& src, const aclMat& other_src, aclMat& dest) + { + bool is_correct; + + is_correct = (src.rows == other_src.rows); + is_correct &= (src.rows == dest.rows); + is_correct &= (src.cols == other_src.cols); + is_correct &= (src.cols == dest.cols); + is_correct &= (src.type() == other_src.type()); + is_correct &= (src.type() == dest.type()); + CV_Assert(is_correct); + + TwoInAndOneOut(src, other_src, dest, "Add"); + } + + void divide(const aclMat& src, const aclMat& other_src, aclMat& dest) + { + bool is_correct; + + is_correct = (src.rows == other_src.rows); + is_correct &= (src.rows == dest.rows); + is_correct &= (src.cols == other_src.cols); + is_correct &= (src.cols == dest.cols); + is_correct &= (src.type() == other_src.type()); + is_correct &= (src.type() == dest.type()); + CV_Assert(is_correct); + + TwoInAndOneOut(src, other_src, dest, "Div"); + } + + void exp(const aclMat& src, aclMat& dest) + { + CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type()); + + vector input_Mat; + vector output_Mat; + + vector inputBuffers_; + vector outputBuffers_; + + input_Mat.emplace_back(src); + output_Mat.emplace_back(dest); + + inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); + outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); + + OperatorDesc opDesc = CreateOpDesc("Exp", input_Mat, output_Mat); + opDesc.AddTensorAttr("base", OP_FLOAT, -1.0); + opDesc.AddTensorAttr("scale", OP_FLOAT, 1.0); + opDesc.AddTensorAttr("shift", OP_FLOAT, 0.0); + + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); + + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0])); + } + + void log(const aclMat &src, aclMat &dest) + { + CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type()); + + vector input_Mat; + vector output_Mat; + + vector inputBuffers_; + vector outputBuffers_; + + input_Mat.emplace_back(src); + output_Mat.emplace_back(dest); + + inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); + outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); + + OperatorDesc opDesc = CreateOpDesc("Log", input_Mat, output_Mat); + opDesc.AddTensorAttr("base", OP_FLOAT, -1.0); + opDesc.AddTensorAttr("scale", OP_FLOAT, 1.0); + opDesc.AddTensorAttr("shift", OP_FLOAT, 0.0); + + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); + + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0])); + } + + void max(const aclMat &src, const aclMat &other_src, aclMat &dest) + { + bool is_correct; + + is_correct = (src.rows == other_src.rows); + is_correct &= (src.rows == dest.rows); + is_correct &= (src.cols == other_src.cols); + is_correct &= (src.cols == dest.cols); + is_correct &= (src.type() == other_src.type()); + is_correct &= (src.type() == dest.type()); + CV_Assert(is_correct); + + TwoInAndOneOut(src, other_src, dest, "Maximum"); + } + + void min(const aclMat &src, const aclMat &other_src, aclMat &dest) + { + bool is_correct; + + is_correct = (src.rows == other_src.rows); + is_correct &= (src.rows == dest.rows); + is_correct &= (src.cols == other_src.cols); + is_correct &= (src.cols == dest.cols); + is_correct &= (src.type() == other_src.type()); + is_correct &= (src.type() == dest.type()); + CV_Assert(is_correct); + + TwoInAndOneOut(src, other_src, dest, "Minimum"); + } + + void sqrt(const aclMat &src, aclMat &dest) + { + CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type()); + + OneInAndOneOut(src, dest, "Sqrt"); + } + + } /* end of namespace acl */ + +} /* end of namespace cv */ \ No newline at end of file diff --git a/acl/src/matrices.cpp b/acl/src/matrices.cpp new file mode 100644 index 0000000..e087533 --- /dev/null +++ b/acl/src/matrices.cpp @@ -0,0 +1,438 @@ +#include "precomp.hpp" + +namespace cv +{ + namespace acl + { +/* + //disable + void lookUpTable(const aclMat& src, const aclMat& lut, aclMat& dest) + { + bool is_correct; + is_correct = ((src.depth() == CV_8U) || (src.depth() == CV_8S)); + is_correct &= ((lut.depth() == CV_8U) || (lut.depth() == CV_8S)); + is_correct &= (lut.totalSize == 256); + CV_Assert(is_correct); + + vector input_Mat; + vector output_Mat; + vector inputBuffers_; + vector outputBuffers_; + + uchar keyValue[256]; + for (int i = 0; i < 256; ++i) + keyValue[i] = i; + aclMat key(1, 256, src.type(), keyValue, src.acl_context); + + input_Mat.emplace_back(src); + input_Mat.emplace_back(key); + input_Mat.emplace_back(lut); + + inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); + inputBuffers_.emplace_back(aclCreateDataBuffer(key.data, key.totalSize)); + inputBuffers_.emplace_back(aclCreateDataBuffer(lut.data, lut.totalSize)); + + aclDataType dataType = type_transition(input_Mat[0].depth()); + aclFormat format = ACL_FORMAT_NHWC; + + OperatorDesc opDesc("LookupTableImport"); + vector shape1{src.rows, src.cols * src.channels()}; + vector shape2{lut.rows, lut.cols * lut.channels()}; + vector shape3{dest.rows, dest.cols * dest.channels()}; + opDesc.AddInputTensorDesc(dataType, shape1.size(), shape1.data(), format); + opDesc.AddInputTensorDesc(dataType, shape2.size(), shape2.data(), format); + opDesc.AddInputTensorDesc(dataType, shape3.size(), shape3.data(), format); + + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); + + dest.data = aclGetDataBufferAddr(inputBuffers_[0]); + + for (size_t i = 0; i < inputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); + for (size_t i = 0; i < outputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); + } +*/ + +/* + void merge(const vector& mv, aclMat& dest) + { + vector inputBuffers_; + vector outputBuffers_; + + OperatorDesc opDesc("ConcatD"); + aclDataType dataType = type_transition(mv[0].depth()); + + for (size_t i = 0; i < mv.size(); ++i) + { + int cols = mv[i].step/mv[i].elemSize(); + vector inputShape{1, mv[i].rows, cols, mv[i].channels()}; + opDesc.AddInputTensorDesc(dataType, inputShape.size(), inputShape.data(), ACL_FORMAT_ND); + } + int cols = dest.step/dest.elemSize(); + vector outputShape{1, dest.rows, cols, dest.channels()}; + opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_ND); + + for (size_t i = 0; i < opDesc.inputDesc.size(); ++i) + { + inputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize)); + } + outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); + + aclopSetAttrInt(opDesc.opAttr, "N", mv.size()); + aclopSetAttrInt(opDesc.opAttr, "concat_dim", 3); + + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); + + for (size_t i = 0; i < inputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); + for (size_t i = 0; i < outputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); + } +*/ + + + void merge(const vector& mv, aclMat& dest) + { + vector inputBuffers_; + vector outputBuffers_; + + OperatorDesc opDesc("Concat"); + aclDataType dataType = type_transition(mv[0].depth()); + + vector inputShape{}; + opDesc.AddInputTensorDesc(ACL_INT32, inputShape.size(), inputShape.data(), ACL_FORMAT_ND); + + for (size_t i = 0; i < mv.size(); ++i) + { + int cols = mv[i].step/mv[i].elemSize(); + vector inputShape{1, mv[i].rows, cols, mv[i].channels()}; + opDesc.AddInputTensorDesc(dataType, inputShape.size(), inputShape.data(), ACL_FORMAT_NHWC); + } + + int cols = dest.step/dest.elemSize(); + vector outputShape{1, dest.rows, cols, dest.channels()}; + opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_NHWC); + + ino64_t N = mv.size(); + aclopSetAttrInt(opDesc.opAttr, "N", N); + + aclSetTensorDescName(opDesc.inputDesc[0], "concat_dim"); + aclSetTensorDescName(opDesc.inputDesc[1], "x0"); + aclSetTensorDescName(opDesc.inputDesc[2], "x1"); + aclSetTensorDescName(opDesc.inputDesc[3], "x2"); + aclSetTensorDescName(opDesc.outputDesc[0], "y"); + + void *dev; + int64_t concat_dim = 3; + size_t size = aclGetTensorDescSize(opDesc.inputDesc[0]); + aclrtMalloc(&dev, size, ACL_MEM_MALLOC_HUGE_FIRST); + aclrtMemcpy(dev, size, &concat_dim, size, ACL_MEMCPY_HOST_TO_DEVICE); + inputBuffers_.emplace_back(aclCreateDataBuffer(dev, size)); + + for (size_t i = 0; i < mv.size(); ++i) + inputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize)); + + outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); + + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); + + for (size_t i = 0; i < inputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); + for (size_t i = 0; i < outputBuffers_.size(); i++) + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); + + aclrtFree(dev); + } + + + +/** + * @brief : Dynamic shape reasoning, compiler problems + * + */ + + void transpose(const aclMat& src, aclMat& dest) + { + vector inputBuffers_; + vector outputBuffers_; + vector inputBuffers_host; + + OperatorDesc opDesc("Transpose"); + aclDataType dataType = type_transition(src.depth()); + + vector inputShape1{1, src.rows, src.cols, src.channels()}; + opDesc.AddInputTensorDesc(dataType, inputShape1.size(), inputShape1.data(), ACL_FORMAT_ND); + + vector inputShape2{4}; + opDesc.AddInputTensorDesc(ACL_INT32, inputShape2.size(), inputShape2.data(), ACL_FORMAT_ND); + + vector outputShape{-1, -1, -1, -1}; + opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_ND); + + inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); + + void *dev; + void *perm; + + size_t size = aclGetTensorDescSize(opDesc.inputDesc[1]); + aclrtMalloc(&dev, size, ACL_MEM_MALLOC_HUGE_FIRST); + aclrtMallocHost(&perm, aclGetTensorDescSize(opDesc.inputDesc.data()[1])); + ((int *)perm)[0] = 0; + ((int *)perm)[1] = 2; + ((int *)perm)[2] = 1; + ((int *)perm)[3] = 3; + aclrtMemcpy(dev, size, perm, size, ACL_MEMCPY_HOST_TO_DEVICE); + inputBuffers_.emplace_back(aclCreateDataBuffer(dev, size)); + + AclSafeCall(aclopCompile(opDesc.opType.c_str(), + opDesc.inputDesc.size(), + opDesc.inputDesc.data(), + opDesc.outputDesc.size(), + opDesc.outputDesc.data(), + opDesc.opAttr, + ACL_ENGINE_SYS, + ACL_COMPILE_SYS, + nullptr)); + + void *host_data; + size_t host_size = src.totalSize; + aclrtMallocHost(&host_data, host_size); + aclrtMemcpy(host_data, host_size, src.data, host_size, ACL_MEMCPY_DEVICE_TO_HOST); + inputBuffers_host.emplace_back(aclCreateDataBuffer(host_data, host_size)); + inputBuffers_host.emplace_back(aclCreateDataBuffer(perm, size)); + + AclSafeCall(aclopInferShape("Transpose", opDesc.inputDesc.size(), opDesc.inputDesc.data(), \ + inputBuffers_host.data(), opDesc.outputDesc.size(), opDesc.outputDesc.data(), opDesc.opAttr)); + outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); + + AclSafeCall(aclopExecuteV2(opDesc.opType.c_str(), + inputBuffers_.size(), + opDesc.inputDesc.data(), + inputBuffers_.data(), + outputBuffers_.size(), + opDesc.outputDesc.data(), + outputBuffers_.data(), + opDesc.opAttr, + src.acl_context->get_stream(0))); + + AclSafeCall(aclrtSynchronizeStream(src.acl_context->get_stream(0))); + + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[1])); + AclSafeCall(aclDestroyDataBuffer(inputBuffers_host[0])); + AclSafeCall(aclDestroyDataBuffer(inputBuffers_host[1])); + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0])); + aclrtFreeHost(perm); + aclrtFreeHost(host_data); + } + + + +/* + void transpose(const aclMat& src, aclMat& dest) + { + vector inputBuffers_; + vector outputBuffers_; + + OperatorDesc opDesc("TransposeD"); + aclDataType dataType = type_transition(src.depth()); + + vector inputShape1{1, src.rows, src.cols, src.channels()}; + opDesc.AddInputTensorDesc(dataType, inputShape1.size(), inputShape1.data(), ACL_FORMAT_NHWC); + + vector outputShape{1, src.cols, src.rows, src.channels()}; + opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_NHWC); + + vector permlist = {0, 2, 1, 3}; + aclopSetAttrListInt(opDesc.opAttr, "perm", permlist.size(), permlist.data()); + + inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); + outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); + + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context); + + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0])); + } +*/ + + + void split(const aclMat& src, vector& mv) + { + vector inputBuffers_; + vector outputBuffers_; + int split_dim = 3; + int num_split = src.channels(); + + OperatorDesc opDesc("SplitD"); + aclDataType dataType = type_transition(src.depth()); + + int cols = src.step/src.elemSize(); + vector inputShape1{1, src.rows, cols, src.channels()}; + opDesc.AddInputTensorDesc(dataType, inputShape1.size(), inputShape1.data(), ACL_FORMAT_ND); + + for (int i = 0; i < num_split; ++i) + { + int cols = mv[i].step/mv[i].elemSize(); + vector outputShape{1, mv[i].rows, cols, mv[i].channels()}; + opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_ND); + } + + auto opAttr = opDesc.opAttr; + aclopSetAttrInt(opAttr, "split_dim", split_dim); + aclopSetAttrInt(opAttr, "num_split", num_split); + + inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); + + for (int i = 0; i < num_split; ++i) + outputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize)); + + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context); + + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); + for (int i = 0; i < num_split; ++i) + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); + } + + +/* + //disable + + void split(const aclMat& src, vector& mv) + { + vector inputBuffers_; + vector inputBuffers_host; + vector outputBuffers_; + int num_split = src.channels(); + + OperatorDesc opDesc("Split"); + aclDataType dataType = type_transition(src.depth()); + + vector inputShape{}; + opDesc.AddInputTensorDesc(ACL_INT32, inputShape.size(), inputShape.data(), ACL_FORMAT_ND); + + int cols = src.step/src.elemSize(); + vector inputShape1{1, src.rows, cols, src.channels()}; + opDesc.AddInputTensorDesc(dataType, inputShape1.size(), inputShape1.data(), ACL_FORMAT_ND); + + for (int i = 0; i < num_split; ++i) + { + vector outputShape{-1, -1, -1, -1}; + opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_ND); + } + + aclSetTensorDescName(opDesc.inputDesc[0], "split_dim"); + aclSetTensorDescName(opDesc.inputDesc[1], "x"); + aclSetTensorDescName(opDesc.outputDesc[0], "y0"); + aclSetTensorDescName(opDesc.outputDesc[1], "y1"); + aclSetTensorDescName(opDesc.outputDesc[2], "y2"); + + aclopSetAttrInt(opDesc.opAttr, "num_split", num_split); + + AclSafeCall(aclopCompile(opDesc.opType.c_str(), + opDesc.inputDesc.size(), + opDesc.inputDesc.data(), + opDesc.outputDesc.size(), + opDesc.outputDesc.data(), + opDesc.opAttr, + ACL_ENGINE_SYS, + ACL_COMPILE_SYS, + nullptr)); + + void *dev; + int split_dim = 3; + size_t size = aclGetTensorDescSize(opDesc.inputDesc[0]); + aclrtMalloc(&dev, size, ACL_MEM_MALLOC_HUGE_FIRST); + aclrtMemcpy(dev, size, &split_dim, size, ACL_MEMCPY_HOST_TO_DEVICE); + + inputBuffers_host.emplace_back(aclCreateDataBuffer(&split_dim, size)); + + void *host_data; + size_t host_size = src.totalSize; + aclrtMallocHost(&host_data, host_size); + aclrtMemcpy(host_data, host_size, src.data, host_size, ACL_MEMCPY_DEVICE_TO_HOST); + inputBuffers_host.emplace_back(aclCreateDataBuffer(host_data, host_size)); + + AclSafeCall(aclopInferShape("Split", opDesc.inputDesc.size(), opDesc.inputDesc.data(), \ + inputBuffers_host.data(), opDesc.outputDesc.size(), opDesc.outputDesc.data(), opDesc.opAttr)); + + inputBuffers_.emplace_back(aclCreateDataBuffer(dev, size)); + inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); + + for (int i = 0; i < num_split; ++i) + outputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize)); + + AclSafeCall(aclopExecuteV2(opDesc.opType.c_str(), + inputBuffers_.size(), + opDesc.inputDesc.data(), + inputBuffers_.data(), + outputBuffers_.size(), + opDesc.outputDesc.data(), + outputBuffers_.data(), + opDesc.opAttr, + src.acl_context->get_stream(0))); + + AclSafeCall(aclrtSynchronizeStream(src.acl_context->get_stream(0))); + + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[1])); + AclSafeCall(aclDestroyDataBuffer(inputBuffers_host[0])); + AclSafeCall(aclDestroyDataBuffer(inputBuffers_host[1])); + for (int i = 0; i < num_split; ++i) + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); + } +*/ + + static void flip_(const aclMat& src, aclMat& dest, int axis) + { + vector inputBuffers_; + vector outputBuffers_; + + OperatorDesc opDesc("ReverseV2"); + aclDataType dataType = type_transition(src.depth()); + + vector inputShape1{1, src.rows, src.cols, src.channels()}; + opDesc.AddInputTensorDesc(dataType, inputShape1.size(), inputShape1.data(), ACL_FORMAT_ND); + + vector inputShape2{1}; + opDesc.AddInputTensorDesc(ACL_INT32, inputShape2.size(), inputShape2.data(), ACL_FORMAT_ND); + + vector outputShape{1, dest.rows, dest.cols, dest.channels()}; + opDesc.AddOutputTensorDesc(dataType, outputShape.size(), outputShape.data(), ACL_FORMAT_ND); + + inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); + + void *dev; + size_t size = aclGetTensorDescSize(opDesc.inputDesc[1]); + aclrtMalloc(&dev, size, ACL_MEM_MALLOC_HUGE_FIRST); + aclrtMemcpy(dev, size, &axis, size, ACL_MEMCPY_HOST_TO_DEVICE); + inputBuffers_.emplace_back(aclCreateDataBuffer(dev, size)); + + outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); + + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context); + + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[1])); + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0])); + } + + void flip(const aclMat& src, aclMat& dest, int filpCode) + { + if (filpCode == 0) { + flip_(src, dest, 1); + } + else if (filpCode > 0) { + flip_(src, dest, 2); + } + else { + flip_(src, dest, 2); + aclMat tmp(dest.rows, dest.cols, dest.type(), dest.acl_context); + aclrtMemcpy(tmp.data, dest.totalSize, dest.data, dest.totalSize, ACL_MEMCPY_DEVICE_TO_DEVICE); + flip_(tmp, dest, 1); + } + } + } /* end of namespace acl */ + +} /* end of namespace cv */ \ No newline at end of file diff --git a/acl/src/operator_desc.cpp b/acl/src/operator_desc.cpp new file mode 100644 index 0000000..3a97bdc --- /dev/null +++ b/acl/src/operator_desc.cpp @@ -0,0 +1,185 @@ + +/** +* @file operator_desc.cpp +* +* Copyright (C) 2020. Huawei Technologies Co., Ltd. All rights reserved. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +*/ + +#include "precomp.hpp" + +using namespace std; + +namespace cv +{ + namespace acl + { + OperatorDesc::OperatorDesc(std::string opType) : opType(std::move(opType)) + { + opAttr = aclopCreateAttr(); + } + + OperatorDesc::~OperatorDesc() + { + for (auto *desc : inputDesc) + { + aclDestroyTensorDesc(desc); + } + + for (auto *desc : outputDesc) + { + aclDestroyTensorDesc(desc); + } + + aclopDestroyAttr(opAttr); + } + + OperatorDesc &OperatorDesc::AddInputTensorDesc(aclDataType dataType, + int numDims, + const int64_t *dims, + aclFormat format) + { + aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format); + CV_Assert(desc); + inputDesc.emplace_back(desc); + return *this; + } + + OperatorDesc &OperatorDesc::AddOutputTensorDesc(aclDataType dataType, + int numDims, + const int64_t *dims, + aclFormat format) + { + aclTensorDesc *desc = aclCreateTensorDesc(dataType, numDims, dims, format); + CV_Assert(desc); + outputDesc.emplace_back(desc); + return *this; + } + + /** + * @brief create operator describe + * + */ + OperatorDesc CreateOpDesc(const string opType, const vector& input_Mat, vector& output_Mat, aclFormat format, Opdims config) + { + CV_Assert(config == TWO_DIMS || config == FOUR_DIMS); + + size_t i; + aclDataType dataType = type_transition(input_Mat[0].depth()); + + OperatorDesc opDesc(opType); + for (i = 0; i < input_Mat.size(); ++i) { + if (config == TWO_DIMS) + { + int cols = input_Mat[i].step / input_Mat[i].elemSize(); + vector shape{input_Mat[i].rows, cols}; + opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format); + } + else if(config == FOUR_DIMS) + { + int cols = input_Mat[i].step / input_Mat[i].elemSize(); + vector shape{1, input_Mat[i].rows, cols, input_Mat[i].channels()}; + opDesc.AddInputTensorDesc(dataType, shape.size(), shape.data(), format); + } + } + + for (i = 0; i < output_Mat.size(); ++i) { + if (config == TWO_DIMS) + { + int cols = output_Mat[i].step / output_Mat[i].elemSize(); + vector shape{output_Mat[i].rows, cols}; + opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format); + } + else if(config == FOUR_DIMS) + { + int cols = output_Mat[i].step / output_Mat[i].elemSize(); + vector shape{1, output_Mat[i].rows, cols, output_Mat[i].channels()}; + opDesc.AddOutputTensorDesc(dataType, shape.size(), shape.data(), format); + } + } + + return opDesc; + } + + /** + * @brief compile and run operator + * + */ + void compileAndRunop(OperatorDesc& opDesc, vector& inputBuffers_, vector& outputBuffers_, aclCxt *acl_context) + { + AclSafeCall(aclopCompile(opDesc.opType.c_str(), + opDesc.inputDesc.size(), + opDesc.inputDesc.data(), + opDesc.outputDesc.size(), + opDesc.outputDesc.data(), + opDesc.opAttr, + ACL_ENGINE_SYS, + ACL_COMPILE_SYS, + nullptr)); + + AclSafeCall(aclopExecuteV2(opDesc.opType.c_str(), + inputBuffers_.size(), + opDesc.inputDesc.data(), + inputBuffers_.data(), + outputBuffers_.size(), + opDesc.outputDesc.data(), + outputBuffers_.data(), + opDesc.opAttr, + acl_context->get_stream(0))); + + + AclSafeCall(aclrtSynchronizeStream(acl_context->get_stream(0))); + + } + + void Runop(vector& input, vector& output, OperatorDesc& opDesc) + { + size_t i; + + vector inputBuffers_; + vector outputBuffers_; + + for (i = 0; i < input.size(); ++i) + inputBuffers_.emplace_back(aclCreateDataBuffer(input[i].data, input[i].totalSize)); + for (i = 0; i < output.size(); ++i) + outputBuffers_.emplace_back(aclCreateDataBuffer(output[i].data, output[i].totalSize)); + + compileAndRunop(opDesc, inputBuffers_, outputBuffers_, output[0].acl_context); + + for (i = 0; i < input.size(); ++i) + AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); + for (i = 0; i < output.size(); ++i) + AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); + } + + void OneInAndOneOut(const aclMat& inputMat, aclMat& outputMat, const string opType) + { + vector input_Mat; + vector output_Mat; + + input_Mat.emplace_back(inputMat); + output_Mat.emplace_back(outputMat); + + OperatorDesc opDesc = CreateOpDesc(opType, input_Mat, output_Mat); + Runop(input_Mat, output_Mat, opDesc); + } + + void TwoInAndOneOut(const aclMat& inputMat, const aclMat& inputMatOther, aclMat& outputMat, const string opType) + { + vector input_Mat; + vector output_Mat; + + input_Mat.emplace_back(inputMat); + input_Mat.emplace_back(inputMatOther); + output_Mat.emplace_back(outputMat); + + OperatorDesc opDesc = CreateOpDesc(opType, input_Mat, output_Mat); + Runop(input_Mat, output_Mat, opDesc); + } + + } /* end of namespace acl */ + +} /* end of namespace cv */ diff --git a/acl/src/precomp.hpp b/acl/src/precomp.hpp new file mode 100644 index 0000000..d9d5319 --- /dev/null +++ b/acl/src/precomp.hpp @@ -0,0 +1,70 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef OPENCV_ACL_PRECOMP_HPP__ +#define OPENCV_ACL_PRECOMP_HPP__ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "opencv2/core.hpp" +#include "opencv2/acl/acl.hpp" +#include "opencv2/core/utility.hpp" +#include "opencv2/acl/operator_desc.hpp" +#include "acl/acl.h" +#include "acl/acl_op_compiler.h" + +using namespace std; +using namespace cv; +using namespace cv::acl; + +#endif diff --git a/acl/test/acl.cpp b/acl/test/acl.cpp new file mode 100644 index 0000000..9844712 --- /dev/null +++ b/acl/test/acl.cpp @@ -0,0 +1,305 @@ +#include "test_correctness.hpp" +#include "test_perf.hpp" +#include "test_common.hpp" + + +namespace opencv_test +{ + namespace + { + aclCxt *acl_context_0 = set_device("/home/perfxlab4/OpenCV_ACL/modules/acl/test/acl.json", 1, 2); + ////////////////////////////////////////////////////Correctness_test//////////////////////////////////////////////////////// + + /* range: rows: 1 ~ 64, cols: 1 ~ 64, type: 0 ~ 7 + * test function: + * config: MEMORY_ALIGN + * aclMat(int rows, int cols, int type, aclCxt *acl_context, ALIGNMENT config = MEMORY_UNALIGNED, aclrtMemMallocPolicy policy = ACL_MEM_MALLOC_HUGE_FIRST); + * aclMat(Size size, int type, aclCxt *acl_context, ALIGNMENT config = MEMORY_UNALIGNED, aclrtMemMallocPolicy policy = ACL_MEM_MALLOC_HUGE_FIRST); + * aclMat(const aclMat &m); + * + */ + TEST(ACLMAT_CONSTRUCTOR, MEMORY_ALIGN) + { + AclMat_Test test; + test.Test_constructor_ALIGN(acl_context_0); + } + + /* range: rows: 1 ~ 64, cols: 1 ~ 64, type: 0 ~ 7 + * test function: + * config: MEMORY_UNALIGNED + * aclMat(int rows, int cols, int type, aclCxt *acl_context, ALIGNMENT config = MEMORY_UNALIGNED, aclrtMemMallocPolicy policy = ACL_MEM_MALLOC_HUGE_FIRST); + * aclMat(Size size, int type, aclCxt *acl_context, ALIGNMENT config = MEMORY_UNALIGNED, aclrtMemMallocPolicy policy = ACL_MEM_MALLOC_HUGE_FIRST); + * + */ + TEST(ACLMAT_CONSTRUCTOR, MEMORY_UNALIGNED) + { + AclMat_Test test; + test.Test_constructor_UNALIGNED(acl_context_0); + } + + /* range: rows: 1 ~ 64, cols: 1 ~ 64, type: 0 ~ 7 + * test function: + * aclMat(const aclMat &m); + */ + TEST(ACLMAT_CONSTRUCTOR, COPY_CONSTRUCTOR) + { + AclMat_Test test; + test.Test_constructor(acl_context_0); + } + + /* range: rows: 1 ~ 64, cols: 1 ~ 64, type: 0 ~ 7 + * test function: + * aclMat(int rows, int cols, int type, void *data, aclCxt* acl_context, ALIGNMENT config = MEMORY_UNALIGNED, size_t step = Mat::AUTO_STEP); + * aclMat(Size size, int type, void *data, aclCxt* acl_context, ALIGNMENT config = MEMORY_UNALIGNED, size_t step = Mat::AUTO_STEP); + */ + TEST(ACLMAT_CONSTRUCTOR, DATA) + { + AclMat_Test test; + test.Test_constructor_DATA(acl_context_0); + } + + /* range: rows: 1 ~ 64, cols: 1 ~ 64, type: 0 ~ 7 + * test function: + * aclMat(const aclMat &m, const Range &rowRange, const Range &colRange = Range::all()); + * + */ + TEST(ACLMAT_CONSTRUCTOR, RANGE) + { + AclMat_Test test; + test.Test_constructor_RANGE(acl_context_0); + } + + /* + * test function: + * aclMat(const aclMat &m, const Rect &roi); + * + */ + TEST(ACLMAT_CONSTRUCTOR, ROI) + { + AclMat_Test test; + test.Test_constructor_ROI(acl_context_0); + } + + /* + * test function: + * aclMat (const Mat &m, aclCxt* acl_context, ALIGNMENT config = MEMORY_UNALIGNED, aclrtMemMallocPolicy policy = ACL_MEM_MALLOC_HUGE_FIRST); + */ + TEST(ACLMAT_CONSTRUCTOR, MAT) + { + AclMat_Test test; + test.Test_constructor_MAT(acl_context_0); + } + + /* range: rows: 1 ~ 64, cols: 1 ~ 64, type: 0 ~ 7 + * test function: + * CV_EXPORTS void upload(const Mat &m, ALIGNMENT config = MEMORY_UNALIGNED); + * CV_EXPORTS void upload(const Mat &m, aclStream stream, ALIGNMENT config = MEMORY_UNALIGNED); + * + */ + TEST(ACLMAT_FUNCTION, DATA_TRANSFER) + { + AclMat_Test test; + test.Test_DATA_TRANSFER(acl_context_0); + } + + /* range: rows: 1 ~ 64, cols: 1 ~ 64, type: 0 ~ 7 + * test function: + * CV_EXPORTS void download(Mat &m, ALIGNMENT config = MEMORY_UNALIGNED) const; + * CV_EXPORTS void download(Mat &m, aclStream stream, ALIGNMENT config = MEMORY_UNALIGNED) const; + * + */ + TEST(ACLMAT_FUNCTION, DATA_TRANSFERASYNC) + { + AclMat_Test test; + test.Test_DATA_TRANSFERASYNC(acl_context_0); + } + + /* + * test function: + * void locateROI(Size &wholeSize, Point &ofs) const; + */ + TEST(ACLMAT_FUNCTION, LOCATEROI) + { + AclMat_Test test; + test.Test_locateROI(acl_context_0); + } + + /* + * test function: + * void swap(aclMat &mat); + * + */ + TEST(ACLMAT_FUNCTION, SWAP) + { + AclMat_Test test; + test.Test_swap(acl_context_0); + } + + /* + * test function: + * operator+=() + * + */ + TEST(ACLMAT_FUNCTION, OPERATOR_ADD) + { + AclMat_Test test; + test.Test_operator_add(acl_context_0); + } + + /* + * test function: + * operator-=() + * + */ + TEST(ACLMAT_FUNCTION, OPERATOR_SUB) + { + AclMat_Test test; + test.Test_operator_sub(acl_context_0); + } + + /* + * test function: + * operator*=() + * + */ + TEST(ACLMAT_FUNCTION, OPERATOR_MUL) + { + AclMat_Test test; + test.Test_operator_mul(acl_context_0); + } + + /* + * test function: + * operator/=() + * + */ + TEST(ACLMAT_FUNCTION, OPERATOR_DIV) + { + AclMat_Test test; + test.Test_operator_div(acl_context_0); + } +////////////////////////////////////////////////////Perf_test//////////////////////////////////////////////////////// + + TEST(Operator, add) + { + PERF_TEST test; + test.Test_operator_add_perf(acl_context_0); + } + + TEST(Operator, sub) + { + PERF_TEST test; + test.Test_operator_sub_perf(acl_context_0); + } + + TEST(Operator, div) + { + PERF_TEST test; + test.Test_operator_div_perf(acl_context_0); + } + + TEST(Operator, mul) + { + PERF_TEST test; + test.Test_operator_mul_perf(acl_context_0); + } + + TEST(Mathfuncs, abs) + { + PERF_TEST test; + test.Test_Abs(acl_context_0); + } + + TEST(Mathfunction, pow) + { + PERF_TEST test; + test.Test_Pow(acl_context_0); + } + + TEST(Mathfunction, sqrt) + { + PERF_TEST test; + test.Test_Sqrt(acl_context_0); + } + + TEST(Mathfunction, add) + { + PERF_TEST test; + test.Test_Add(acl_context_0); + } + + TEST(Mathfunction, divide) + { + PERF_TEST test; + test.Test_Divide(acl_context_0); + } + + TEST(Mathfunction, exp) + { + PERF_TEST test; + test.Test_Exp(acl_context_0); + } + + TEST(Mathfunction, log) + { + PERF_TEST test; + test.Test_Log(acl_context_0); + } + + TEST(Mathfunction, max) + { + PERF_TEST test; + test.Test_Max(acl_context_0); + } + + TEST(Mathfunction, min) + { + PERF_TEST test; + test.Test_Min(acl_context_0); + } + + TEST(Gemm, MatMul) + { + PERF_TEST test; + test.Test_MatMul(acl_context_0); + } + + TEST(Matrices, merge) + { + PERF_TEST test; + test.Test_Merge(acl_context_0); + } + + TEST(Gemm, Convolution) + { + PERF_TEST test; + test.Test_Convolution(acl_context_0); + } + + TEST(Matrices, split) + { + PERF_TEST test; + test.Test_Split(acl_context_0); + } +/* + TEST(Matrices, lookuptable) + { + PERF_TEST test; + test.Test_Lookuptable(acl_context_0); + } +*/ + + TEST(Matrices, transpose) + { + PERF_TEST test; + test.Test_Transpose(acl_context_0); + } + + TEST(Matrices, flip) + { + PERF_TEST test; + test.Test_Flip(acl_context_0); + release_device(acl_context_0); + } + } +} \ No newline at end of file diff --git a/acl/test/acl.json b/acl/test/acl.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/acl/test/acl.json @@ -0,0 +1 @@ +{} diff --git a/acl/test/test_acl.cpp b/acl/test/test_acl.cpp new file mode 100644 index 0000000..70128bd --- /dev/null +++ b/acl/test/test_acl.cpp @@ -0,0 +1,196 @@ +#include "test_common.hpp" +#include "test_perf.hpp" + +void PERF_TEST::Test_operator_add_perf(aclCxt *acl_context) +{ + int val; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + vector type{CV_8UC1, CV_32FC1, CV_32SC1, CV_64FC1}; + for (size_t i = 0; i < type.size(); ++i) + { + test.PrintLog("Perf test : Function: operator+=()", type[i]); + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src(val, val, type[i]); + Mat mat_dest(val, val, type[i]); + Mat mat_dest1(val, val, type[i]); + + test.SetDataRange(mat_src, 1); + test.SetDataRange(mat_dest, 1); + + aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + mat_dest += mat_src; + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + aclmat_dest += aclmat_src; + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } + } +} + +void PERF_TEST::Test_operator_sub_perf(aclCxt *acl_context) +{ + int val; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + vector type{CV_8UC1, CV_32FC1, CV_32SC1, CV_64FC1}; + for (size_t i = 0; i < type.size(); ++i) + { + test.PrintLog("Perf test : Function: operator-=()", type[i]); + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src(val, val, type[i]); + Mat mat_dest(val, val, type[i]); + Mat mat_dest1(val, val, type[i]); + + test.SetDataRange(mat_src, 1); + test.SetDataRange(mat_dest, 1); + + aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + mat_dest -= mat_src; + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + aclmat_dest -= aclmat_src; + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } + } + +} + +void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context) +{ + int val; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + vector type{CV_8UC1, CV_32FC1, CV_32SC1, CV_64FC1}; + for (size_t i = 0; i < type.size(); ++i) + { + test.PrintLog("Perf test : Function: operator/=()", type[i]); + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src(val, val, type[i]); + Mat mat_dest(val, val, type[i]); + Mat mat_dest1(val, val, type[i]); + + test.SetDataRange(mat_src, 2); + test.SetDataRange(mat_dest, 1); + + aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + mat_dest /= mat_src; + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + aclmat_dest /= aclmat_src; + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } + } + +} + +void PERF_TEST::Test_operator_mul_perf(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + type = CV_32FC1; + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src(val, val, type); + Mat mat_dest(val, val, type); + Mat mat_dest1(val, val, type); + + test.SetDataRange(mat_src, 1); + test.SetDataRange(mat_dest, 1); + + aclMat aclmat_src(val, val, type, mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + mat_dest *= mat_src; + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + aclmat_dest *= aclmat_src; + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} \ No newline at end of file diff --git a/acl/test/test_common.cpp b/acl/test/test_common.cpp new file mode 100644 index 0000000..e4d498e --- /dev/null +++ b/acl/test/test_common.cpp @@ -0,0 +1,238 @@ +#include "test_common.hpp" + +Common_Test::Common_Test() { + srand((unsigned)time(NULL)); +} + +Common_Test::~Common_Test() { + +} + +bool Common_Test::Test_Diff(const aclMat& aclmat, const Mat& mat, ALIGNMENT config) { + bool is_correct; + + if (config == ALIGNMENT::MEMORY_UNALIGNED) + { + is_correct = (aclmat.rows == mat.rows); + is_correct &= (aclmat.cols == mat.cols); + is_correct &= (aclmat.channels() == mat.channels()); + is_correct &= (aclmat.type() == mat.type()); + is_correct &= (aclmat.step == mat.step); + is_correct &= (aclmat.elemSize() == mat.elemSize()); + is_correct &= (aclmat.totalSize == mat.total() * mat.elemSize()); + is_correct &= ((aclmat.dataend - aclmat.datastart) == (mat.dataend - mat.datastart)); + + Mat mat_dest(mat.rows, mat.cols, mat.type()); + aclmat.download(mat_dest); + is_correct &= Test_Diff(mat, mat_dest); + } + else + { + is_correct = (aclmat.rows == mat.rows); + is_correct &= (aclmat.cols == mat.cols); + is_correct &= (aclmat.channels() == mat.channels()); + is_correct &= (aclmat.type() == mat.type()); + is_correct &= (aclmat.elemSize() == mat.elemSize()); + + Mat mat_dest(mat.rows, mat.cols, mat.type()); + aclmat.download(mat_dest, MEMORY_ALIGN); + is_correct &= Test_Diff(mat, mat_dest); + } + + return is_correct; +} + +bool Common_Test::Test_Diff(const aclMat& aclmat, const aclMat& aclmat_other) { + bool is_correct; + + is_correct = (aclmat.flags == aclmat_other.flags); + is_correct &= (aclmat.rows == aclmat_other.rows); + is_correct &= (aclmat.cols == aclmat_other.cols); + is_correct &= (aclmat.type() == aclmat_other.type()); + is_correct &= (aclmat.step == aclmat_other.step); + is_correct &= (aclmat.data == aclmat_other.data); + is_correct &= (aclmat.refcount == aclmat_other.refcount); + is_correct &= (aclmat.datastart == aclmat_other.datastart); + is_correct &= (aclmat.dataend == aclmat_other.dataend); + is_correct &= (aclmat.offset == aclmat_other.offset); + is_correct &= (aclmat.wholerows == aclmat_other.wholerows); + is_correct &= (aclmat.wholecols == aclmat_other.wholecols); + is_correct &= (aclmat.acl_context == aclmat_other.acl_context); + is_correct &= (aclmat.totalSize == aclmat_other.totalSize); + + return is_correct; +} + +bool Common_Test::Test_Diff(const Mat &mat, const Mat &mat_other) +{ + bool is_correct; + + is_correct = (mat.rows == mat_other.rows); + is_correct &= (mat.cols == mat_other.cols); + is_correct &= (mat.type() == mat_other.type()); + is_correct &= (mat.channels() == mat.channels()); + is_correct &= (mat.step == mat_other.step); + is_correct &= (mat.elemSize() == mat_other.elemSize()); + is_correct &= (mat.total() == mat_other.total()); + + switch (mat.depth()) + { + case CV_8U: + for (int i = 0; (is_correct == true) && (i < mat.rows * mat.cols * mat.channels()); i += mat.channels()) + { + for (int j = 0; j < mat.channels(); ++j) + is_correct &= ((mat.data)[i+j] == (mat_other.data)[i+j]); + } + return is_correct; + case CV_16U: + for (int i = 0; (is_correct == true) && (i < mat.rows * mat.cols * mat.channels()); i += mat.channels()) + { + for (int j = 0; j < mat.channels(); ++j) + is_correct &= (((unsigned short *)mat.data)[i+j] == ((unsigned short *)mat_other.data)[i+j]); + } + return is_correct; + case CV_32S: + for (int i = 0; (is_correct == true) && (i < mat.rows * mat.cols * mat.channels()); i += mat.channels()) + { + for (int j = 0; j < mat.channels(); ++j) + is_correct &= (((int *)(mat.data))[i+j] == (((int *)mat_other.data))[i+j]); + } + return is_correct; + case CV_32F: + for (int i = 0; (is_correct == true) && (i < mat.rows * mat.cols * mat.channels()); i += mat.channels()) + { + for (int j = 0; j < mat.channels(); ++j) + is_correct &= ((((float *)(mat.data))[i+j] - (((float *)mat_other.data))[i+j] >= -0.00001) || \ + (((float *)(mat.data))[i+j] - (((float *)mat_other.data))[i+j] <= 0.00001)); + } + return is_correct; + case CV_64F: + for (int i = 0; (is_correct == true) && (i < mat.rows * mat.cols * mat.channels()); i += mat.channels()) + { + for (int j = 0; j < mat.channels(); ++j) + is_correct &= ((((double *)(mat.data))[i+j] - (((double *)mat_other.data))[i+j] >= -0.00001) || \ + (((double *)(mat.data))[i+j] - (((double *)mat_other.data))[i+j] <= 0.00001)); + } + return is_correct; + } + return is_correct; +} + +void Common_Test::MatShow(cv::Mat &m, string str) +{ + cout << str.c_str() << endl; + cout << m; + cout << endl + << endl + << endl; +} + +void Common_Test::StatShow(cv::Mat &mat_src, aclMat &aclmat_dst) +{ + cout << "//////////////////////////////// MatStat ////////////////////////////////" << endl; + cout << "type: " << mat_src.type() << endl; + cout << "elemSize: " << mat_src.elemSize() << endl; + cout << "channels: " << mat_src.channels() << endl; + cout << "step: " << mat_src.step << endl; + cout << "totalSize: " << mat_src.rows * mat_src.cols * mat_src.elemSize() << endl; + cout << "totalSize: " << mat_src.total() * mat_src.elemSize() << endl; + cout << "dataend - datastart: " << mat_src.dataend - mat_src.datastart << endl; + + cout << "//////////////////////////////// aclMatStat ////////////////////////////////" << endl; + cout << "type: " << aclmat_dst.type() << endl; + cout << "elemSize: " << aclmat_dst.elemSize() << endl; + cout << "channels: " << aclmat_dst.channels() << endl; + cout << "step: " << aclmat_dst.step << endl; + cout << "totalSize: " << aclmat_dst.rows * aclmat_dst.step << endl; + cout << "totalSize: " << aclmat_dst.totalSize << endl; + cout << "dataend - datastart: " << aclmat_dst.dataend - aclmat_dst.datastart << endl; + cout << "wholerows: " << aclmat_dst.wholerows << endl; + cout << "wholecols: " << aclmat_dst.wholecols << endl; + cout << "offset : " << aclmat_dst.offset << endl; +} + +void Common_Test::PrintLog(const string& funcname, int type) +{ + switch (type) + { + case CV_8UC1: + cout << funcname << "\t" + << "Type: CV_8UC1" << endl; + break; + case CV_8UC3: + cout << funcname << "\t" + << "Type: CV_8UC3" << endl; + break; + case CV_32FC1: + cout << funcname << "\t" + << "Type: CV_32FC1" << endl; + break; + case CV_32FC3: + cout << funcname << "\t" + << "Type: CV_32FC3" << endl; + break; + case CV_32SC1: + cout << funcname << "\t" + << "Type: CV_32SC1" << endl; + break; + case CV_32SC3: + cout << funcname << "\t" + << "Type: CV_32SC3" << endl; + break; + case CV_64FC1: + cout << funcname << "\t" + << "Type: CV_64FC1" << endl; + break; + default: + break; + } +} + +/* srand((unsigned)time(NULL)) in constructor */ +size_t Common_Test::RandDom_(int config) { + return static_cast(rand() % config); +} + +bool Common_Test::SetDataRange(Mat &src, int dataRange) +{ + switch (src.depth()) + { + case CV_8U: + for (int i = 0; i < src.rows * src.cols * src.channels(); i += src.channels()) + { + for (int j = 0; j < src.channels(); ++j) + (src.data)[i+j] = RandDom_(dataRange); + } + return true; + case CV_16U: + for (int i = 0; i < src.rows * src.cols * src.channels(); i += src.channels()) + { + for (int j = 0; j < src.channels(); ++j) + ((unsigned short *)src.data)[i+j] = RandDom_(dataRange); + } + return true; + case CV_32S: + for (int i = 0; i < src.rows * src.cols * src.channels(); i += src.channels()) + { + for (int j = 0; j < src.channels(); ++j) + ((int *)src.data)[i+j] = RandDom_(dataRange); + } + return true; + case CV_32F: + for (int i = 0; i < src.rows * src.cols * src.channels(); i += src.channels()) + { + for (int j = 0; j < src.channels(); ++j) + ((float *)src.data)[i+j] = RandDom_(dataRange) / 1.0; + } + return true; + case CV_64F: + for (int i = 0; i < src.rows * src.cols * src.channels(); i += src.channels()) + { + for (int j = 0; j < src.channels(); ++j) + ((double *)src.data)[i+j] = RandDom_(dataRange) / 1.0; + } + return true; + default: + return false; + } +} diff --git a/acl/test/test_common.hpp b/acl/test/test_common.hpp new file mode 100644 index 0000000..809783b --- /dev/null +++ b/acl/test/test_common.hpp @@ -0,0 +1,27 @@ +#ifndef __OPENCV_TEST_COMMON_HPP__ +#define __OPENCV_TEST_COMMON_HPP__ + +#include "test_precomp.hpp" + +typedef enum TestDatatype { + INT = 1, + FLOAT +} TestDatatype; + +class CV_EXPORTS Common_Test { + public: + Common_Test(); + ~Common_Test(); + CV_EXPORTS bool Test_Diff(const aclMat& aclmat, const Mat& mat, ALIGNMENT config = ALIGNMENT::MEMORY_UNALIGNED); + CV_EXPORTS bool Test_Diff(const aclMat& aclmat, const aclMat& aclmat_other); + CV_EXPORTS bool Test_Diff(const Mat& mat, const Mat& mat_other); + CV_EXPORTS void MatShow(Mat &m, string str); + CV_EXPORTS void StatShow(Mat &mat_src, aclMat &aclmat_dst); + CV_EXPORTS void PrintLog(const string& funcname, int type); + + CV_EXPORTS size_t RandDom_(int config = 0xff); + CV_EXPORTS bool SetDataRange(Mat &src, int dataRange = 0xff); +}; + + +#endif \ No newline at end of file diff --git a/acl/test/test_correctness.cpp b/acl/test/test_correctness.cpp new file mode 100644 index 0000000..92eb619 --- /dev/null +++ b/acl/test/test_correctness.cpp @@ -0,0 +1,691 @@ +/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + + + +#include "test_common.hpp" +#include "test_correctness.hpp" + +AclMat_Test::AclMat_Test() { + +} + +AclMat_Test::~AclMat_Test() { + +} + +/* thread function */ +void thread_handler(void) { + aclCxt *acl_context_0 = set_device("/home/perfxlab4/OpenCV_ACL/modules/acl/test/acl.json", 0, 1); + release_device(acl_context_0); +} + +void AclMat_Test::Test_set_device() { + /* Current thread */ + aclCxt *acl_context_0 = set_device("/home/perfxlab4/OpenCV_ACL/modules/acl/test/acl.json", 0, 1); + + /* Different scope */ + { + aclCxt *acl_context_1 = set_device("/home/perfxlab4/OpenCV_ACL/modules/acl/test/acl.json", 2, 3); + release_device(acl_context_1); + } + + release_device(acl_context_0); + /* Different thread */ + thread t(thread_handler); + t.join(); +} + +void AclMat_Test::Test_constructor_UNALIGNED(aclCxt *acl_context) { + Common_Test test; + int rows, cols, type; + bool ret; + const int rowsMax = 128; + const int colsMax = 128; + const int typeMax = 7; + + for (type = 0; type < typeMax; type++) { + for (rows = 1; rows < rowsMax; rows++) { + for (cols = 1; cols < colsMax; cols++) { + Mat mat_src(rows, cols, type); + aclMat aclmat_src(rows, cols, type, acl_context); + test.SetDataRange(mat_src, 32); + aclmat_src.upload(mat_src); + ret = test.Test_Diff(aclmat_src, mat_src); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_constructor_UNALIGNED: -> aclMat(rows, cols, type, acl_context, config, policy) <- is success" << endl; + + for (type = 0; type < typeMax; type++) { + for (rows = 1; rows < rowsMax; rows++) { + for (cols = 1; cols < colsMax; cols++) { + Mat mat_src(cv::Size(cols, rows), type); + test.SetDataRange(mat_src, 32); + aclMat aclmat_src(cv::Size(cols, rows), type, acl_context); + aclmat_src.upload(mat_src); + ret = test.Test_Diff(aclmat_src, mat_src); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_constructor_UNALIGNED: -> aclMat(size, type, acl_context, config, policy) <- is success" << endl; +} + +void AclMat_Test::Test_constructor_ALIGN(aclCxt *acl_context) { + Common_Test test; + int rows, cols, type; + bool ret; + const int rowsMax = 128; + const int colsMax = 128; + const int typeMax = 7; + + for (type = 0; type < typeMax; type++) { + for (rows = 1; rows < rowsMax; rows++) { + for (cols = 1; cols < colsMax; cols++) { + Mat mat_src(rows, cols, type); + test.SetDataRange(mat_src, 32); + aclMat aclmat_src(rows, cols, type, acl_context, MEMORY_ALIGN); + aclmat_src.upload(mat_src, MEMORY_ALIGN); + ret = test.Test_Diff(aclmat_src, mat_src, MEMORY_ALIGN); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_constructor_ALIGN: -> aclMat(rows, cols, type, acl_context, config, policy) <- is success" << endl; + + for (type = 0; type < typeMax; type++) { + for (rows = 1; rows < rowsMax; rows++) { + for (cols = 1; cols < colsMax; cols++) { + Mat mat_src(cv::Size(cols, rows), type); + test.SetDataRange(mat_src, 32); + aclMat aclmat_src(cv::Size(cols, rows), type, acl_context, MEMORY_ALIGN); + aclmat_src.upload(mat_src, MEMORY_ALIGN); + ret = test.Test_Diff(aclmat_src, mat_src, MEMORY_ALIGN); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_constructor_ALIGN: -> aclMat(size, type, acl_context, config, policy) <- is success" << endl; + + +} + +void AclMat_Test::Test_constructor(aclCxt *acl_context_0) { + Common_Test test; + int rows, cols, type; + bool ret; + const int rowsMax = 128; + const int colsMax = 128; + const int typeMax = 7; + + for (type = 0; type < typeMax; type++) { + for (rows = 1; rows < rowsMax; rows++) { + for (cols = 1; cols < colsMax; cols++) { + aclMat aclmat_src(rows, cols, type, acl_context_0); + aclMat aclmat_dest(aclmat_src); + ret = test.Test_Diff(aclmat_src, aclmat_dest); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_constructor: -> aclMat(aclmat_src) <- is success" << endl; + + for (type = 0; type < typeMax; type++) { + for (rows = 1; rows < rowsMax; rows++) { + for (cols = 1; cols < colsMax; cols++) { + aclMat aclmat_src(cv::Size(cols, rows), type, acl_context_0, MEMORY_ALIGN); + aclMat aclmat_dest(aclmat_src); + ret = test.Test_Diff(aclmat_src, aclmat_dest); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_constructor: -> aclMat(const aclMat& other) <- is success" << endl; +} + +void AclMat_Test::Test_constructor_DATA(aclCxt *acl_context_0) { + Common_Test test; + int rows, cols, type; + bool ret; + const int rowsMax = 128; + const int colsMax = 128; + const int typeMax = 7; + + for (type = 0; type < typeMax; type++) { + for (rows = 1; rows < rowsMax; rows++) { + for (cols = 1; cols < colsMax; cols++) { + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + test.SetDataRange(mat_src); + + aclMat aclmat_src(rows, cols, type, mat_src.data, acl_context_0); + aclmat_src.download(mat_dest); + ret = test.Test_Diff(mat_src, mat_dest); + ASSERT_TRUE(ret); + } + } + } + cerr << "Test_constructor_DATA: -> aclMat(rows, cols, type, data, acl_context)) <- is success" << endl; + + for (type = 0; type < typeMax; type++) { + for (rows = 1; rows < rowsMax; rows++) { + for (cols = 1; cols < colsMax; cols++) { + Mat mat_src(cv::Size(cols, rows), type); + Mat mat_dest(cv::Size(cols, rows), type); + test.SetDataRange(mat_src); + + aclMat aclmat_src(cv::Size(cols, rows), type, mat_src.data, acl_context_0); + aclmat_src.download(mat_dest); + ret = test.Test_Diff(mat_src, mat_dest); + ASSERT_TRUE(ret); + } + } + } + + cerr << "Test_constructor_DATA: -> aclMat(size, type, data, acl_context)) <- is success" << endl; +} + +void AclMat_Test::Test_constructor_RANGE(aclCxt *acl_context_0) { + Common_Test test; + int type; + bool ret; + int rangerows, rangecols; + int rows = 64, cols = 64; + const int rangerowsMax = 64; + const int rangecolsMax = 64; + const int typeMax = 7; + + for (type = 0; type < typeMax; type++) { + for (rangerows = 4; rangerows < rangerowsMax; rangerows++) { + for (rangecols = 4; rangecols < rangecolsMax; rangecols++) { + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + test.SetDataRange(mat_src); + test.SetDataRange(mat_dest); + + Mat mat_rangesrc(mat_src, cv::Range(2, rangerows), cv::Range(2, rangecols)); + Mat mat_rangedest(mat_dest, cv::Range(2, rangerows), cv::Range(2, rangecols)); + aclMat aclmat_src(rows, cols, type, mat_src.data, acl_context_0); + aclMat aclmat_range(aclmat_src, cv::Range(2, rangerows), cv::Range(2, rangecols)); + aclmat_range.download(mat_rangedest); + ret = test.Test_Diff(mat_rangesrc, mat_rangedest); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_constructor_RANGE: -> aclMat(aclmat_src, rowragne, colrange)) <- is success" << endl; + +} + +void AclMat_Test::Test_constructor_ROI(aclCxt *acl_context_0) { + Common_Test test; + { + int rows = 6, cols = 8; + int type = CV_8UC1; + cv::Rect roi(2, 2, 1, 1); + bool ret; + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + + test.SetDataRange(mat_src); + test.SetDataRange(mat_dest); + + Mat mat_roi1(mat_src, roi); + Mat mat_roi(mat_dest, roi); + + aclMat aclmat_src(rows, cols, type, mat_src.data, acl_context_0); + aclMat aclmat_roi(aclmat_src, roi); + aclmat_roi.download(mat_roi); + ret = test.Test_Diff(mat_roi1, mat_roi); + ASSERT_TRUE(ret); + } + + { + int rows = 12, cols = 61; + int type = CV_16UC3; + cv::Rect roi(8, 8, 2, 2); + bool ret; + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + + test.SetDataRange(mat_src); + test.SetDataRange(mat_dest); + + Mat mat_roi1(mat_src, roi); + Mat mat_roi(mat_dest, roi); + + aclMat aclmat_src(rows, cols, type, mat_src.data, acl_context_0); + aclMat aclmat_roi(aclmat_src, roi); + aclmat_roi.download(mat_roi); + ret = test.Test_Diff(mat_roi1, mat_roi); + ASSERT_TRUE(ret); + } + + { + int rows = 16, cols = 80; + int type = CV_32FC3; + cv::Rect roi(8, 4, 1, 3); + bool ret; + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + + test.SetDataRange(mat_src); + test.SetDataRange(mat_dest); + + Mat mat_roi1(mat_src, roi); + Mat mat_roi(mat_dest, roi); + + aclMat aclmat_src(rows, cols, type, mat_src.data, acl_context_0); + aclMat aclmat_roi(aclmat_src, roi); + aclmat_roi.download(mat_roi); + ret = test.Test_Diff(mat_roi1, mat_roi); + ASSERT_TRUE(ret); + } + + clog << "Test_constructor_ROI: -> aclMat(aclmat_src, roi)) <- is success" << endl; +} + +void AclMat_Test::Test_constructor_MAT(aclCxt *acl_context_0) { + Common_Test test; + int rows, cols, type; + bool ret; + const int rowsMax = 1048; + const int colsMax = 1048; + const int typeMax = 7; + + for (type = 0; type < typeMax; type++) { + for (rows = 1000; rows < rowsMax; rows++) { + for (cols = 1000; cols < colsMax; cols++) { + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + test.SetDataRange(mat_src); + + aclMat aclmat_src(mat_src, acl_context_0); + aclmat_src.download(mat_dest); + ret = test.Test_Diff(mat_src, mat_dest); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_constructor_MAT: -> aclMat(mat_src, acl_context_0)) <- is success" << endl; + +} + +void AclMat_Test::Test_DATA_TRANSFER(aclCxt *acl_context_0) { + Common_Test test; + int rows, cols, type; + bool ret; + const int rowsMax = 1048; + const int colsMax = 1048; + const int typeMax = 7; + + for (type = 0; type < typeMax; type++) + { + for (rows = 1000; rows < rowsMax; rows++) + { + for (cols = 1000; cols < colsMax; cols++) + { + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + + test.SetDataRange(mat_src); + test.SetDataRange(mat_dest); + + aclMat aclmat_src(rows, cols, type, acl_context_0); + aclmat_src.upload(mat_src); + aclmat_src.download(mat_dest); + ret = test.Test_Diff(mat_src, mat_dest); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_DATA_TRANSFER_UNALIGNED: -> upload(), download() <- is success" << endl; + + for (type = 0; type < typeMax; type++) + { + for (rows = 1000; rows < rowsMax; rows++) + { + for (cols = 1000; cols < colsMax; cols++) + { + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + + test.SetDataRange(mat_src); + test.SetDataRange(mat_dest); + + aclMat aclmat_src(rows, cols, type, acl_context_0, MEMORY_ALIGN); + aclmat_src.upload(mat_src, MEMORY_ALIGN); + aclmat_src.download(mat_dest, MEMORY_ALIGN); + ret = test.Test_Diff(mat_src, mat_dest); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_DATA_TRANSFER_ALIGN: -> upload(), download() <- is success" << endl; +} + +void AclMat_Test::Test_DATA_TRANSFERASYNC(aclCxt *acl_context_0) { + Common_Test test; + int rows, cols, type; + bool ret; + const int rowsMax = 1048; + const int colsMax = 1048; + const int typeMax = 7; + + for (type = 0; type < typeMax; type++) + { + for (rows = 1000; rows < rowsMax; rows++) + { + for (cols = 1000; cols < colsMax; cols++) + { + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + + test.SetDataRange(mat_src); + test.SetDataRange(mat_dest); + + aclMat aclmat_src(rows, cols, type, acl_context_0); + aclmat_src.upload(mat_src, aclmat_src.acl_context->get_stream(0)); + aclmat_src.download(mat_dest, aclmat_src.acl_context->get_stream(0)); + ret = test.Test_Diff(mat_src, mat_dest); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_DATA_TRANSFERASYNC_UNALIGNED: -> upload(), download() <- is success" << endl; + + for (type = 0; type < typeMax; type++) + { + for (rows = 1000; rows < rowsMax; rows++) + { + for (cols = 1000; cols < colsMax; cols++) + { + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + + test.SetDataRange(mat_src); + test.SetDataRange(mat_dest); + + aclMat aclmat_src(rows, cols, type, acl_context_0, MEMORY_ALIGN); + aclmat_src.upload(mat_src, aclmat_src.acl_context->get_stream(0), MEMORY_ALIGN); + aclmat_src.download(mat_dest, aclmat_src.acl_context->get_stream(0), MEMORY_ALIGN); + ret = test.Test_Diff(mat_src, mat_dest); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_DATA_TRANSFERASYNC_ALIGN: -> upload(), download() <- is success" << endl; + +} + +static inline void dataSwap(int& data1, int& data2) { + Common_Test test; + int temp; + if (data1 < data2) { + temp = data1; + data1 = data2; + data2 = temp; + } +} + +void AclMat_Test::Test_locateROI(aclCxt *acl_context_0) { + Common_Test test; + int rows = 256, cols = 256; + int type = CV_8UC1; + int rangex, rangey; + int rangex1, rangey1; + cv::Size size, size1; + cv::Point ofs, ofs1; + + for (int x = 0; x < rows * cols; ++x) + { + rangex = (rangex = test.RandDom_()) > 0 ? rangex : 1; + rangey = (rangey = test.RandDom_()) > 0 ? rangey : 1; + rangex1 = (rangex1 = test.RandDom_()) > 0 ? rangex1 : 1; + rangey1 = (rangey1 = test.RandDom_()) > 0 ? rangey1 : 1; + + dataSwap(rangex, rangex1); + dataSwap(rangey, rangey1); + + Mat mat_src(rows, cols, type); + Mat mat_range(mat_src, cv::Range(rangex1, rangex+1), cv::Range(rangey1, rangey+1)); + mat_range.locateROI(size, ofs); + + aclMat aclmat_src(rows, cols, type, acl_context_0); + aclMat aclmat_range(aclmat_src, cv::Range(rangex1, rangex+1), cv::Range(rangey1, rangey+1)); + aclmat_range.locateROI(size1, ofs1); + + ASSERT_EQ(size.height, size1.height); + ASSERT_EQ(size.width, size1.width); + ASSERT_EQ(ofs.x, ofs1.x); + ASSERT_EQ(ofs.y, ofs1.y); + } + clog << "Test_loacteROI: -> locateROI() <- is success" << endl; + +} + +void AclMat_Test::Test_swap(aclCxt *acl_context_0) { + Common_Test test; + int rows, cols, type; + bool ret; + const int rowsMax = 1048; + const int colsMax = 1048; + const int typeMax = 7; + + for (type = 0; type < typeMax; type++) + { + for (rows = 1024; rows < rowsMax; rows++) + { + for (cols = 1024; cols < colsMax; cols++) + { + Mat mat_src(rows, cols, type); + Mat mat_dest(rows, cols, type); + + test.SetDataRange(mat_src); + test.SetDataRange(mat_dest); + + Mat mat_dest1(rows, cols, type); + Mat mat_dest2(rows, cols, type); + + aclMat aclmat_src(rows, cols, type, mat_src.data, acl_context_0); + aclMat aclmat_src1(rows, cols, type, mat_dest.data, acl_context_0); + aclmat_src.swap(aclmat_src1); + + aclmat_src.download(mat_dest1); + aclmat_src1.download(mat_dest2); + + ret = test.Test_Diff(mat_dest1, mat_dest); + ASSERT_TRUE(ret); + + ret = test.Test_Diff(mat_dest2, mat_src); + ASSERT_TRUE(ret); + } + } + } + clog << "Test_Swap: -> swap() <- is success" << endl; +} + +void AclMat_Test::Test_operator_add(aclCxt *acl_context) { + Common_Test test; + int rows, cols; + bool ret; + const int rowsMax = 1048; + const int colsMax = 1048; + + vector type{CV_8UC1, CV_8UC3, CV_32FC1, CV_32FC3, CV_32SC1, CV_32SC3}; + for (size_t i = 0; i < type.size(); ++i) + { + test.PrintLog("Correctness test: Functoin: operator+=()", type[i]); + for (rows = 1024; rows < rowsMax; rows++) + { + for (cols = 1024; cols < colsMax; cols++) + { + Mat mat_src(rows, cols, type[i]); + Mat mat_dest(rows, cols, type[i]); + Mat mat_dest1(rows, cols, type[i]); + + test.SetDataRange(mat_src, 32); + test.SetDataRange(mat_dest, 32); + + aclMat aclmat_src(rows, cols, type[i], mat_src.data, acl_context, MEMORY_ALIGN); + aclMat aclmat_dest(rows, cols, type[i], mat_dest.data, acl_context, MEMORY_ALIGN); + + mat_dest += mat_src; + + aclmat_dest += aclmat_src; + aclmat_dest.download(mat_dest1, MEMORY_ALIGN); + + ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + } + } + } + +} + +void AclMat_Test::Test_operator_sub(aclCxt *acl_context) { + Common_Test test; + int rows, cols; + bool ret; + const int rowsMax = 1048; + const int colsMax = 1048; + + vector type{CV_8UC1, CV_8UC3, CV_32FC1, CV_32FC3, CV_32SC1, CV_32SC3, CV_64FC1}; + for (size_t i = 0; i < type.size(); ++i) + { + test.PrintLog("Correctness test: Functoin: operator-=()", type[i]); + for (rows = 1024; rows < rowsMax; rows++) + { + for (cols = 1024; cols < colsMax; cols++) + { + Mat mat_src(rows, cols, type[i], Scalar(1, 2, 3)); + Mat mat_dest(rows, cols, type[i], Scalar(4, 6, 8)); + Mat mat_dest1(rows, cols, type[i]); + + aclMat aclmat_src(rows, cols, type[i], mat_src.data, acl_context, MEMORY_ALIGN); + aclMat aclmat_dest(rows, cols, type[i], mat_dest.data, acl_context, MEMORY_ALIGN); + + mat_dest -= mat_src; + aclmat_dest -= aclmat_src; + aclmat_dest.download(mat_dest1, MEMORY_ALIGN); + + ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + } + } + } + +} + +void AclMat_Test::Test_operator_div(aclCxt *acl_context) { + Common_Test test; + int rows, cols; + bool ret; + const int rowsMax = 1048; + const int colsMax = 1048; + + vector type{CV_8UC1, CV_8UC3, CV_32FC1, CV_32FC3, CV_32SC1, CV_32SC3, CV_64FC1}; + for (size_t i = 0; i < type.size(); ++i) + { + test.PrintLog("Correctness test: Functoin: operator/=()", type[i]); + for (rows = 1024; rows < rowsMax; rows++) + { + for (cols = 1024; cols < colsMax; cols++) + { + Mat mat_src(rows, cols, type[i], Scalar(1, 2, 4)); + Mat mat_dest(rows, cols, type[i], Scalar(4, 6, 8)); + Mat mat_dest1(rows, cols, type[i]); + + aclMat aclmat_src(rows, cols, type[i], mat_src.data, acl_context, MEMORY_ALIGN); + aclMat aclmat_dest(rows, cols, type[i], mat_dest.data, acl_context, MEMORY_ALIGN); + + mat_dest /= mat_src; + aclmat_dest /= aclmat_src; + aclmat_dest.download(mat_dest1, MEMORY_ALIGN); + + ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + } + } + } +} + +void AclMat_Test::Test_operator_mul(aclCxt *acl_context) { + Common_Test test; + int val; + bool ret; + const int valMax = 1048; + + vector type{CV_32FC1}; + for (size_t i = 0; i < type.size(); ++i) + { + test.PrintLog("Correctness test: Functoin: operator*=()", type[i]); + for (val = 1024; val < valMax; val++) + { + Mat mat_src(val, val, type[i]); + Mat mat_dest(val, val, type[i]); + Mat mat_dest1(val, val, type[i]); + + test.SetDataRange(mat_src, 32); + test.SetDataRange(mat_dest, 32); + + aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context); + + mat_dest *= mat_src; + aclmat_dest *= aclmat_src; + aclmat_dest.download(mat_dest1); + + ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + } + } + +} + + + + diff --git a/acl/test/test_correctness.hpp b/acl/test/test_correctness.hpp new file mode 100644 index 0000000..490cc19 --- /dev/null +++ b/acl/test/test_correctness.hpp @@ -0,0 +1,42 @@ +#ifndef __OPENCV_CORRECTNESS_HPP__ +#define __OPENCV_CORRECTNESS_HPP__ + +#include "test_precomp.hpp" +class CV_EXPORTS AclMat_Test { + public: + AclMat_Test(); + ~AclMat_Test(); + /* test set_device() */ + CV_EXPORTS void Test_set_device(); + /* test aclMat(int rows, int cols, int type, aclCxt *acl_context, aclrtMemMallocPolicy policy = ACL_MEM_MALLOC_HUGE_FIRST) */ + CV_EXPORTS void Test_constructor_UNALIGNED(aclCxt *acl_context); + CV_EXPORTS void Test_constructor_ALIGN(aclCxt *acl_context); + + /* test aclMat(const aclMat &m) */ + CV_EXPORTS void Test_constructor(aclCxt *acl_context); + /* test aclMat(int rows, int cols, int type, void *data, aclCxt* acl_context, size_t step = Mat::AUTO_STEP) */ + CV_EXPORTS void Test_constructor_DATA(aclCxt *acl_context); + /* test aclMat(const aclMat &m, const Range &rowRange, const Range &colRange = Range::all()) */ + CV_EXPORTS void Test_constructor_RANGE(aclCxt *acl_context); + /* test aclMat(const aclMat &m, const Rect &roi) */ + CV_EXPORTS void Test_constructor_ROI(aclCxt *acl_context); + /* test aclMat (const Mat &m, aclCxt* acl_context, aclrtMemMallocPolicy policy = ACL_MEM_MALLOC_HUGE_FIRST) */ + CV_EXPORTS void Test_constructor_MAT(aclCxt *acl_context); + /* test upload download*/ + CV_EXPORTS void Test_DATA_TRANSFER(aclCxt *acl_context); + /* test upload_2d download_2d */ + CV_EXPORTS void Test_DATA_TRANSFERASYNC(aclCxt *acl_context); + /* test locateROI adjustROI */ + CV_EXPORTS void Test_locateROI(aclCxt *acl_context); + /* test swap */ + CV_EXPORTS void Test_swap(aclCxt *acl_context); + + CV_EXPORTS void Test_operator_add(aclCxt *acl_context); + CV_EXPORTS void Test_operator_sub(aclCxt *acl_context); + CV_EXPORTS void Test_operator_mul(aclCxt *acl_context); + CV_EXPORTS void Test_operator_div(aclCxt *acl_context); +}; + +void thread_handler(void); + +#endif \ No newline at end of file diff --git a/acl/test/test_gemm.cpp b/acl/test/test_gemm.cpp new file mode 100644 index 0000000..fc3fa63 --- /dev/null +++ b/acl/test/test_gemm.cpp @@ -0,0 +1,102 @@ +#include "test_common.hpp" +#include "test_perf.hpp" + +void PERF_TEST::Test_MatMul(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + type = CV_32FC1; + + for (val = 8; val <= valmax; val *= 2) + { + Mat mat_src(val, val, type); + Mat mat_src1(val, val, type); + Mat mat_dest(val, val, type); + Mat mat_dest1(val, val, type); + + test.SetDataRange(mat_src, 32); + test.SetDataRange(mat_src1, 32); + test.SetDataRange(mat_dest, 32); + + aclMat aclmat_src(val, val, type, mat_src.data, acl_context); + aclMat aclmat_src1(val, val, type, mat_src1.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + int n = 100; + + begin = static_cast(getTickCount()); + while (n--) + mat_dest = mat_src * mat_src1; + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + MatMul(aclmat_src1, aclmat_src, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} + +void PERF_TEST::Test_Convolution(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + type = CV_32FC1; + + for (val = 8; val <= valmax; val *= 2) + { + Mat mat_src(val, val, type, Scalar{1, 2}); + Mat mat_kernel(3, 3, type, Scalar(1, 4)); + Mat mat_dest(val, val, type, Scalar{6}); + + aclMat aclmat_src(val, val, type, mat_src.data, acl_context); + aclMat aclmat_kernel(3, 3, type, mat_kernel.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + int n = 1; + + begin = static_cast(getTickCount()); + while (n--) + filter2D(mat_src, mat_dest, -1, mat_kernel); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 1; + begin = static_cast(getTickCount()); + vector strides{1, 1, 1, 1}; + vector pads{1, 1, 1, 1}; + while (n--) + Convolution(aclmat_src, aclmat_kernel, aclmat_dest, strides, pads); + end = static_cast(getTickCount()); + Mat mat_dest1(aclmat_dest.rows, aclmat_dest.cols, type); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + cout << mat_dest << endl; + cout << mat_dest1 << endl; + /* + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + */ + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} \ No newline at end of file diff --git a/acl/test/test_main.cpp b/acl/test/test_main.cpp new file mode 100644 index 0000000..93e4d28 --- /dev/null +++ b/acl/test/test_main.cpp @@ -0,0 +1,10 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#include "test_precomp.hpp" + +#if defined(HAVE_HPX) + #include +#endif + +CV_TEST_MAIN("cv") diff --git a/acl/test/test_mathfuncs.cpp b/acl/test/test_mathfuncs.cpp new file mode 100644 index 0000000..9c76363 --- /dev/null +++ b/acl/test/test_mathfuncs.cpp @@ -0,0 +1,418 @@ +#include "test_common.hpp" +#include "test_perf.hpp" + +void PERF_TEST::Test_Abs(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + type = CV_32FC1; + + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src(val, val, type, Scalar{-2}); + Mat mat_dest(val, val, type, Scalar{-4}); + Mat mat_dest1(val, val, type, Scalar{-6}); + + aclMat aclmat_src(val, val, type, mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + mat_dest = abs(mat_src); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + aclmat_dest = abs(aclmat_src); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} + +void PERF_TEST::Test_Pow(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + type = CV_32FC1; + + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + int power = test.RandDom_(6); + Mat mat_src(val, val, type); + Mat mat_dest(val, val, type); + Mat mat_dest1(val, val, type); + + test.SetDataRange(mat_src, 32); + test.SetDataRange(mat_dest, 32); + + aclMat aclmat_src(val, val, type, mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + pow(mat_src, power, mat_dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + pow(aclmat_src, power, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} + +void PERF_TEST::Test_Sqrt(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + type = CV_32FC1; + + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src(val, val, type); + Mat mat_dest(val, val, type); + Mat mat_dest1(val, val, type); + + test.SetDataRange(mat_src, 32); + test.SetDataRange(mat_dest, 32); + + aclMat aclmat_src(val, val, type, mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + sqrt(mat_src, mat_dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + sqrt(aclmat_src, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} + +void PERF_TEST::Test_Add(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + + type = CV_32FC1; + + for (val = 8; val <= valmax; val *= 2) + { + Common_Test test; + int n = 100; + Mat mat_src1(val, val, type); + Mat mat_src2(val, val, type); + Mat mat_dest(val, val, type); + Mat mat_dest1(val, val, type); + + test.SetDataRange(mat_src1, 32); + test.SetDataRange(mat_src2, 32); + test.SetDataRange(mat_dest, 32); + + aclMat aclmat_src1(val, val, type, mat_src1.data, acl_context); + aclMat aclmat_src2(val, val, type, mat_src2.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + add(mat_src1, mat_src2, mat_dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + add(aclmat_src1, aclmat_src2, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} + +void PERF_TEST::Test_Divide(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + + type = CV_32FC1; + + for (val = 8; val <= valmax; val *= 2) + { + Common_Test test; + int n = 100; + Mat mat_src1(val, val, type); + Mat mat_src2(val, val, type); + Mat mat_dest(val, val, type); + Mat mat_dest1(val, val, type); + + test.SetDataRange(mat_src1, 32); + test.SetDataRange(mat_src2, 2); + test.SetDataRange(mat_dest, 32); + + aclMat aclmat_src1(val, val, type, mat_src1.data, acl_context); + aclMat aclmat_src2(val, val, type, mat_src2.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + divide(mat_src1, mat_src2, mat_dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + divide(aclmat_src1, aclmat_src2, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} + +void PERF_TEST::Test_Exp(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + type = CV_32FC1; + + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src(val, val, type); + Mat mat_dest(val, val, type); + Mat mat_dest1(val, val, type); + + test.SetDataRange(mat_src, 32); + test.SetDataRange(mat_dest, 2); + + aclMat aclmat_src(val, val, type, mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + exp(mat_src, mat_dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + exp(aclmat_src, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} + +void PERF_TEST::Test_Log(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + type = CV_32FC1; + + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src(val, val, type); + Mat mat_dest(val, val, type); + Mat mat_dest1(val, val, type); + + test.SetDataRange(mat_src, 32); + test.SetDataRange(mat_dest, 32); + + aclMat aclmat_src(val, val, type, mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + log(mat_src, mat_dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + log(aclmat_src, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} + +void PERF_TEST::Test_Max(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + + type = CV_32FC2; + + for (val = 8; val <= valmax; val *= 2) + { + Common_Test test; + int n = 100; + Mat mat_src1(val, val, type); + Mat mat_src2(val, val, type); + Mat mat_dest(val, val, type); + Mat mat_dest1(val, val, type); + + test.SetDataRange(mat_src1, 32); + test.SetDataRange(mat_src2, 32); + test.SetDataRange(mat_dest, 32); + + aclMat aclmat_src1(val, val, type, mat_src2.data, acl_context); + aclMat aclmat_src2(val, val, type, mat_src1.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + cv::max(mat_src1, mat_src2, mat_dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + cv::acl::max(aclmat_src1, aclmat_src2, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} + +void PERF_TEST::Test_Min(aclCxt *acl_context) +{ + int val, type; + int valmax = 8192; + double begin, end, time, acltime; + + type = CV_32FC3; + + for (val = 8; val <= valmax; val *= 2) + { + Common_Test test; + int n = 100; + Mat mat_src1(val, val, type); + Mat mat_src2(val, val, type); + Mat mat_dest(val, val, type); + Mat mat_dest1(val, val, type); + + test.SetDataRange(mat_src1, 32); + test.SetDataRange(mat_src2, 32); + test.SetDataRange(mat_dest, 32); + + aclMat aclmat_src1(val, val, type, mat_src2.data, acl_context); + aclMat aclmat_src2(val, val, type, mat_src1.data, acl_context); + aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + cv::min(mat_src1, mat_src2, mat_dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + cv::acl::min(aclmat_src1, aclmat_src2, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } +} \ No newline at end of file diff --git a/acl/test/test_matrices.cpp b/acl/test/test_matrices.cpp new file mode 100644 index 0000000..58a190e --- /dev/null +++ b/acl/test/test_matrices.cpp @@ -0,0 +1,259 @@ +#include "test_common.hpp" +#include "test_perf.hpp" + +/* +//disable +void PERF_TEST::Test_Lookuptable(aclCxt *acl_context_0) +{ + int type = CV_8UC1; + Common_Test test; + Mat mat_src(1, 256, type); + Mat mat_dest(1, 256, type); + Mat lookuptable(1, 256, type); + + test.SetDataRange(mat_src, 32); + test.SetDataRange(lookuptable, 32); + + aclMat aclmat_src(1, 256, type, mat_src.data, acl_context_0); + aclMat aclmat_dest(1, 256, type, mat_dest.data, acl_context_0); + aclMat lut(1, 256, type, lookuptable.data, acl_context_0); + // LUT(mat_src, lookuptable, mat_dest); + lookUpTable(aclmat_src, lut, aclmat_dest); + cout << mat_src << endl; + cout << lookuptable << endl; + cout << mat_dest << endl; +} +*/ + + +void PERF_TEST::Test_Merge(aclCxt *acl_context) +{ + int val; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + vector srcType{CV_32FC1}; + vector destType{CV_32FC3}; + + for (size_t i = 0; i < srcType.size(); ++i) + { + test.PrintLog("Perf test : Function: merge()", srcType[i]); + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src1(val, val, srcType[i], Scalar(1)); + Mat mat_src2(val, val, srcType[i], Scalar(2)); + Mat mat_src3(val, val, srcType[i], Scalar(3)); + Mat mat_dest(val, val, destType[i]); + Mat mat_dest1(val, val, destType[i]); + + test.SetDataRange(mat_src1, 32); + test.SetDataRange(mat_src2, 32); + test.SetDataRange(mat_src3, 32); + + aclMat aclmat_src1(val, val, srcType[i], mat_src1.data, acl_context); + aclMat aclmat_src2(val, val, srcType[i], mat_src2.data, acl_context); + aclMat aclmat_src3(val, val, srcType[i], mat_src3.data, acl_context); + aclMat aclmat_dest(val, val, destType[i], mat_dest.data, acl_context); + + vector src; + src.emplace_back(mat_src1); + src.emplace_back(mat_src2); + src.emplace_back(mat_src3); + + vector acl_src; + acl_src.emplace_back(aclmat_src1); + acl_src.emplace_back(aclmat_src2); + acl_src.emplace_back(aclmat_src3); + + begin = static_cast(getTickCount()); + while (n--) + merge(src, mat_dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + merge(acl_src, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } + } +} + + +void PERF_TEST::Test_Transpose(aclCxt *acl_context) +{ + int val; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + vector type{CV_32FC1, CV_32SC1}; + for (size_t i = 0; i < type.size(); ++i) + { + test.PrintLog("Perf test : Function: transpose()", type[i]); + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src(val, val, type[i]); + Mat mat_dest(val, val, type[i]); + Mat mat_dest1(val, val, type[i]); + + test.SetDataRange(mat_src, 32); + + aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + transpose(mat_src, mat_dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + transpose(aclmat_src, aclmat_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } + } +} + +void PERF_TEST::Test_Split(aclCxt *acl_context) +{ + int val; + int valmax = 8; + double begin, end, time, acltime; + Common_Test test; + + vector srcType{CV_32FC3}; + vector destType{CV_32FC1}; + + for (size_t i = 0; i < srcType.size(); ++i) + { + test.PrintLog("Perf test : Function: split()", srcType[i]); + for (val = 8; val <= valmax; val *= 2) + { + int n = 1; + Mat mat_src(val, val, srcType[i]); + Mat mat_dest1(val, val, destType[i]); + Mat mat_dest2(val, val, destType[i]); + Mat mat_dest3(val, val, destType[i]); + + test.SetDataRange(mat_src, 32); + + aclMat aclmat_src(val, val, srcType[i], mat_src.data, acl_context); + aclMat aclmat_dest1(val, val, destType[i], mat_dest1.data, acl_context); + aclMat aclmat_dest2(val, val, destType[i], mat_dest2.data, acl_context); + aclMat aclmat_dest3(val, val, destType[i], mat_dest3.data, acl_context); + + vector dest; + dest.emplace_back(mat_dest1); + dest.emplace_back(mat_dest2); + dest.emplace_back(mat_dest3); + + vector acl_dest; + acl_dest.emplace_back(aclmat_dest1); + acl_dest.emplace_back(aclmat_dest2); + acl_dest.emplace_back(aclmat_dest3); + + begin = static_cast(getTickCount()); + while (n--) + split(mat_src, dest); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 1; + begin = static_cast(getTickCount()); + while (n--) + split(aclmat_src, acl_dest); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + (acl_dest.data())[0].download(mat_dest1); + (acl_dest.data())[1].download(mat_dest2); + (acl_dest.data())[2].download(mat_dest3); + + bool ret = test.Test_Diff((dest.data())[0], mat_dest1); + ret &= test.Test_Diff((dest.data())[1], mat_dest2); + ret &= test.Test_Diff((dest.data())[2], mat_dest3); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } + } + +} + + +void PERF_TEST::Test_Flip(aclCxt *acl_context) +{ + int val; + int valmax = 8192; + double begin, end, time, acltime; + Common_Test test; + + vector type{CV_8UC1, CV_32FC1, CV_32SC1, CV_64FC1}; + for (size_t i = 0; i < type.size(); ++i) + { + test.PrintLog("Perf test : Function: flip()", type[i]); + for (val = 8; val <= valmax; val *= 2) + { + int n = 100; + Mat mat_src(val, val, type[i]); + Mat mat_dest(val, val, type[i]); + Mat mat_dest1(val, val, type[i]); + + test.SetDataRange(mat_src, 32); + + aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context); + aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context); + + begin = static_cast(getTickCount()); + while (n--) + flip(mat_src, mat_dest, 0); + end = static_cast(getTickCount()); + time = (end - begin) / getTickFrequency(); + + n = 100; + begin = static_cast(getTickCount()); + while (n--) + flip(aclmat_src, aclmat_dest, 0); + end = static_cast(getTickCount()); + acltime = (end - begin) / getTickFrequency(); + + aclmat_dest.download(mat_dest1); + bool ret = test.Test_Diff(mat_dest, mat_dest1); + ASSERT_TRUE(ret); + if (val < 128) + cout << "Shape: " << val << " x " << val << "\t\t"; + else + cout << "Shape: " << val << " x " << val << "\t"; + cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; + } + } +} \ No newline at end of file diff --git a/acl/test/test_perf.hpp b/acl/test/test_perf.hpp new file mode 100644 index 0000000..034a30a --- /dev/null +++ b/acl/test/test_perf.hpp @@ -0,0 +1,34 @@ +#ifndef __OPENCV_TEST_PERF_HPP__ +#define __OPENCV_TEST_PERF_HPP__ + +#include "test_precomp.hpp" + +class PERF_TEST +{ +public: + CV_EXPORTS void Test_operator_add_perf(aclCxt *acl_context); + CV_EXPORTS void Test_operator_sub_perf(aclCxt *acl_context); + CV_EXPORTS void Test_operator_div_perf(aclCxt *acl_context); + CV_EXPORTS void Test_operator_mul_perf(aclCxt *acl_context); + CV_EXPORTS void Test_Abs(aclCxt *acl_context); + CV_EXPORTS void Test_Pow(aclCxt *acl_context); + CV_EXPORTS void Test_Sqrt(aclCxt *acl_context); + CV_EXPORTS void Test_Add(aclCxt *acl_context); + CV_EXPORTS void Test_Divide(aclCxt *acl_context); + CV_EXPORTS void Test_Exp(aclCxt *acl_context); + CV_EXPORTS void Test_Log(aclCxt *acl_context); + CV_EXPORTS void Test_Max(aclCxt *acl_context); + CV_EXPORTS void Test_Min(aclCxt *acl_context); + + CV_EXPORTS void Test_MatMul(aclCxt *acl_context); + CV_EXPORTS void Test_Convolution(aclCxt *acl_context); + + CV_EXPORTS void Test_Lookuptable(aclCxt *acl_context); + CV_EXPORTS void Test_Merge(aclCxt *acl_context); + CV_EXPORTS void Test_Split(aclCxt *acl_context); + CV_EXPORTS void Test_Transpose(aclCxt *acl_context); + CV_EXPORTS void Test_Flip(aclCxt *acl_context); + +}; + +#endif \ No newline at end of file diff --git a/acl/test/test_precomp.hpp b/acl/test/test_precomp.hpp new file mode 100644 index 0000000..947b076 --- /dev/null +++ b/acl/test/test_precomp.hpp @@ -0,0 +1,25 @@ +// This file is part of OpenCV project. +// It is subject to the license terms in the LICENSE file found in the top-level directory +// of this distribution and at http://opencv.org/license.html. +#ifndef __OPENCV_TEST_PRECOMP_HPP__ +#define __OPENCV_TEST_PRECOMP_HPP__ +#include +#include +#include +#include + +#include "opencv2/core.hpp" +#include "opencv2/ts.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/videoio.hpp" +#include "sys/time.h" +#include "opencv2/acl/acl.hpp" + +using namespace cv; +using namespace cv::acl; +using namespace cvtest; +using namespace testing; +using namespace std; + +#endif diff --git a/run.sh b/run.sh new file mode 100755 index 0000000..e15f287 --- /dev/null +++ b/run.sh @@ -0,0 +1,33 @@ +#! /bin/bash +opencv_directory=./opencv +acl_directory=./acl + +if [ -d $opencv_directory -o -d $acl_directory ] +then + mv $acl_directory $opencv_directory/modules/ +fi + +cd $opencv_directory + +build_directory=$PWD/build +if [ ! -d $build_directory ] +then + mkdir -p build +fi +cd build + +cmake .. +make -j +while [ $? != 0 ] +do + make -j +done + +for var in $@ +do + if [ $var == "ACLTEST" ] + then + cd bin + ./opencv_test_acl + fi +done \ No newline at end of file