update stream

This commit is contained in:
luoliang
2022-08-19 19:04:30 +08:00
parent f87cc9911c
commit 72d80d5421
20 changed files with 660 additions and 748 deletions

View File

@@ -1,6 +1,15 @@
#if(NOT HAVE_ACL)
# ocv_module_disable(acl)
# return()
#endif()
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" "-DENABLE_DVPP_INTERFACE")
set(acl_lib "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
set(acl_lib "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/stub/") set(acl_lib "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/stub/")
link_directories(${acl_lib}) link_directories(${acl_lib})
set(acl_inc "/usr/local/Ascend/ascend-toolkit/latest/acllib/include/")
set(acl_inc "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/include/") set(acl_inc "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/include/")
ocv_include_directories(${acl_inc}) ocv_include_directories(${acl_inc})

View File

@@ -47,6 +47,7 @@ namespace cv
std::vector<aclStream> _acl_streams; std::vector<aclStream> _acl_streams;
}; };
CV_EXPORTS void wait_stream(aclCxt* context, const int stream_id = 0);
//////////////////////////////// device //////////////////////////////// //////////////////////////////// device ////////////////////////////////
CV_EXPORTS aclCxt *set_device(const char* config_path, int device_id = 0, int stream_count = 1); CV_EXPORTS aclCxt *set_device(const char* config_path, int device_id = 0, int stream_count = 1);
CV_EXPORTS void release_device(aclCxt* context); CV_EXPORTS void release_device(aclCxt* context);

View File

@@ -8,10 +8,10 @@ namespace cv
namespace acl namespace acl
{ {
// matrix multiplication // matrix multiplication
CV_EXPORTS void MatMul(const aclMat& src1, const aclMat& src2, aclMat& dest); CV_EXPORTS void MatMul(const aclMat& src1, const aclMat& src2, aclMat& dest, int stream_id = 0);
// convolution // convolution
CV_EXPORTS void Convolution(const aclMat& src, const aclMat& kernel, aclMat& dest, \ CV_EXPORTS void Convolution(const aclMat& src, const aclMat& kernel, aclMat& dest, \
const vector<int64_t>& stridesList = vector<int64_t> {1, 1, 1, 1}, const vector<int64_t>& padsList = vector<int64_t> {0, 0, 0, 0}); const vector<int64_t>& stridesList = vector<int64_t> {1, 1, 1, 1}, const vector<int64_t>& padsList = vector<int64_t> {0, 0, 0, 0}, int stream_id = 0);
} /* end of namespace acl */ } /* end of namespace acl */

View File

@@ -11,15 +11,15 @@ namespace cv
{ {
namespace acl namespace acl
{ {
CV_EXPORTS aclMat abs(const aclMat &src); CV_EXPORTS aclMat abs(const aclMat &src, int stream_id = 0);
CV_EXPORTS void pow(const aclMat &src, double power, aclMat &dest); CV_EXPORTS void pow(const aclMat &src, double power, aclMat &dest, int stream_id = 0);
CV_EXPORTS void sqrt(const aclMat &src, aclMat &dest); CV_EXPORTS void sqrt(const aclMat &src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void add(const aclMat &src, const aclMat &other_src, aclMat &dest); CV_EXPORTS void add(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void divide(const aclMat &src, const aclMat &other_src, aclMat &dest); CV_EXPORTS void divide(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void exp(const aclMat &src, aclMat &dest); CV_EXPORTS void exp(const aclMat &src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void log(const aclMat &src, aclMat &dest); CV_EXPORTS void log(const aclMat &src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void max(const aclMat &src, const aclMat &other_src, aclMat &dest); CV_EXPORTS void max(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void min(const aclMat &src, const aclMat &other_src, aclMat &dest); CV_EXPORTS void min(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id = 0);
} /* end of namespace acl */ } /* end of namespace acl */
} /* end of namespace cv */ } /* end of namespace cv */

View File

@@ -10,12 +10,12 @@ namespace cv
// Matrix lookup table // Matrix lookup table
//CV_EXPORTS void lookUpTable(const aclMat& src, const aclMat& lut, aclMat& dst); //CV_EXPORTS void lookUpTable(const aclMat& src, const aclMat& lut, aclMat& dst);
// Multiple channel merge // Multiple channel merge
CV_EXPORTS void merge(const vector<aclMat>& mv, aclMat& dst); CV_EXPORTS void merge(const vector<aclMat>& mv, aclMat& dst, int stream_id = 0);
// Split into channels // Split into channels
CV_EXPORTS void split(const aclMat& src, vector<aclMat>& mv); CV_EXPORTS void split(const aclMat& src, vector<aclMat>& mv, int stream_id = 0);
// Matrix transpose // Matrix transpose
CV_EXPORTS void transpose(const aclMat& src, aclMat& dest); CV_EXPORTS void transpose(const aclMat& src, aclMat& dest, int stream_id = 0);
CV_EXPORTS void flip(const aclMat& src, aclMat& dest, int flipCode = 0); CV_EXPORTS void flip(const aclMat& src, aclMat& dest, int flipCode = 0, int stream_id = 0);
} /* end of namespace acl */ } /* end of namespace acl */
} /* end of namespace cv */ } /* end of namespace cv */

View File

@@ -78,13 +78,13 @@ namespace cv
// Create operator description // Create operator description
CV_EXPORTS OperatorDesc CreateOpDesc(const string opType, const vector<aclMat> &input_Mat, vector<aclMat> &output_Mat, aclFormat format = ACL_FORMAT_NHWC, Opdims config = FOUR_DIMS); CV_EXPORTS OperatorDesc CreateOpDesc(const string opType, const vector<aclMat> &input_Mat, vector<aclMat> &output_Mat, aclFormat format = ACL_FORMAT_NHWC, Opdims config = FOUR_DIMS);
// Compile and run the operator // Compile and run the operator
CV_EXPORTS void compileAndRunop(OperatorDesc &opDesc, vector<aclDataBuffer *> &inputBuffers_, vector<aclDataBuffer *> &outputBuffers_, aclCxt *acl_context); CV_EXPORTS void compileAndRunop(OperatorDesc &opDesc, vector<aclDataBuffer *> &inputBuffers_, vector<aclDataBuffer *> &outputBuffers_, aclCxt *acl_context, int stream_id);
// Suitable for one input and one output // Suitable for one input and one output
CV_EXPORTS void OneInAndOneOut(const aclMat &input, aclMat &output, const string opType); CV_EXPORTS void OneInAndOneOut(const aclMat &input, aclMat &output, const string opType, int stream_id = 0);
// Suitable for tow input and one output // Suitable for tow input and one output
CV_EXPORTS void TwoInAndOneOut(const aclMat &inputMat, const aclMat &inputMatOther, aclMat &outputMat, const string opType); CV_EXPORTS void TwoInAndOneOut(const aclMat &inputMat, const aclMat &inputMatOther, aclMat &outputMat, const string opType, int stream_id = 0);
// run the operator // run the operator
CV_EXPORTS void Runop(vector<aclMat> &input, vector<aclMat> &output, OperatorDesc &opDesc); CV_EXPORTS void Runop(vector<aclMat> &input, vector<aclMat> &output, OperatorDesc &opDesc, int stream_id);
} /* end of namespace acl */ } /* end of namespace acl */

View File

@@ -69,6 +69,10 @@ namespace cv
return global_aclenv; return global_aclenv;
} }
void wait_stream(aclCxt * acl_context, const int stream_id)
{
aclrtSynchronizeStream(acl_context->get_stream(stream_id));
}
/////////////////////////create acl context//////////////////////// /////////////////////////create acl context////////////////////////
/** /**

View File

@@ -199,7 +199,7 @@ namespace cv
inputBuffers_.emplace_back(aclCreateDataBuffer(nullptr, 0)); inputBuffers_.emplace_back(aclCreateDataBuffer(nullptr, 0));
outputBuffers_.emplace_back(aclCreateDataBuffer(newMat.data, newMat.totalSize)); outputBuffers_.emplace_back(aclCreateDataBuffer(newMat.data, newMat.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, this->acl_context); compileAndRunop(opDesc, inputBuffers_, outputBuffers_, this->acl_context, 0);
*this = newMat; *this = newMat;

View File

@@ -8,7 +8,7 @@ namespace cv
* @brief: matrix multiplication * @brief: matrix multiplication
* *
*/ */
void MatMul(const aclMat& src1, const aclMat& src2, aclMat& dest) void MatMul(const aclMat& src1, const aclMat& src2, aclMat& dest, int stream_id)
{ {
CV_Assert(src1.cols == src2.rows && src1.type() == src2.type()); CV_Assert(src1.cols == src2.rows && src1.type() == src2.type());
vector<aclMat> input_Mat; vector<aclMat> input_Mat;
@@ -29,7 +29,7 @@ namespace cv
opDesc.AddInputTensorDesc(ACL_DT_UNDEFINED, 0, nullptr, ACL_FORMAT_UNDEFINED); opDesc.AddInputTensorDesc(ACL_DT_UNDEFINED, 0, nullptr, ACL_FORMAT_UNDEFINED);
opDesc.AddTensorAttr("transpose_x1", OP_BOOL, false); opDesc.AddTensorAttr("transpose_x1", OP_BOOL, false);
opDesc.AddTensorAttr("transpose_x2", OP_BOOL, false); opDesc.AddTensorAttr("transpose_x2", OP_BOOL, false);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
for (size_t i = 0; i < inputBuffers_.size(); i++) for (size_t i = 0; i < inputBuffers_.size(); i++)
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i]));
@@ -45,7 +45,7 @@ namespace cv
* @param [in] stridesList: strides, The N and C dimensions must be set to 1 * @param [in] stridesList: strides, The N and C dimensions must be set to 1
* @param [in] padSList: pads, vector<int64_t>(top, bottom, left, right) * @param [in] padSList: pads, vector<int64_t>(top, bottom, left, right)
*/ */
void Convolution(const aclMat& src, const aclMat& kernel, aclMat& dest, const vector<int64_t>& stridesList, const vector<int64_t>& padsList) void Convolution(const aclMat& src, const aclMat& kernel, aclMat& dest, const vector<int64_t>& stridesList, const vector<int64_t>& padsList, int stream_id)
{ {
vector<aclDataBuffer *> inputBuffers_; vector<aclDataBuffer *> inputBuffers_;
vector<aclDataBuffer *> outputBuffers_; vector<aclDataBuffer *> outputBuffers_;
@@ -74,7 +74,7 @@ namespace cv
inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize));
inputBuffers_.emplace_back(aclCreateDataBuffer(kernel.data, kernel.totalSize)); inputBuffers_.emplace_back(aclCreateDataBuffer(kernel.data, kernel.totalSize));
outputBuffers_.emplace_back(aclCreateDataBuffer(acl_dest.data, acl_dest.totalSize)); outputBuffers_.emplace_back(aclCreateDataBuffer(acl_dest.data, acl_dest.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context); compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context, stream_id);
acl_dest.data = aclGetDataBufferAddr(outputBuffers_[0]); acl_dest.data = aclGetDataBufferAddr(outputBuffers_[0]);
dest = acl_dest; dest = acl_dest;

View File

@@ -4,10 +4,10 @@ namespace cv
{ {
namespace acl namespace acl
{ {
aclMat abs(const aclMat& a) aclMat abs(const aclMat& a, int stream_id)
{ {
aclMat dest(a.rows, a.cols, a.type(), a.acl_context); aclMat dest(a.rows, a.cols, a.type(), a.acl_context);
OneInAndOneOut(a, dest, "Abs"); OneInAndOneOut(a, dest, "Abs", stream_id);
return dest; return dest;
} }
@@ -65,7 +65,7 @@ namespace cv
} }
void pow(const aclMat& src, double power, aclMat& dest) void pow(const aclMat& src, double power, aclMat& dest, int stream_id)
{ {
vector<aclMat> input_Mat; vector<aclMat> input_Mat;
vector<aclMat> output_Mat; vector<aclMat> output_Mat;
@@ -82,19 +82,23 @@ namespace cv
opDesc.AddInputTensorDesc(dataType, shape2.size(), shape2.data(), ACL_FORMAT_NHWC); opDesc.AddInputTensorDesc(dataType, shape2.size(), shape2.data(), ACL_FORMAT_NHWC);
size_t size = aclGetTensorDescSize(opDesc.inputDesc[1]); size_t size = aclGetTensorDescSize(opDesc.inputDesc[1]);
void *power_dev = power_data(power, dataType, size);
inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize)); inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize));
inputBuffers_.emplace_back(aclCreateDataBuffer(power_data(power, dataType, size), size)); inputBuffers_.emplace_back(aclCreateDataBuffer(power_dev, size));
outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
aclrtFree(power_dev);
for (size_t i = 0; i < inputBuffers_.size(); i++) for (size_t i = 0; i < inputBuffers_.size(); i++)
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i]));
for (size_t i = 0; i < outputBuffers_.size(); i++) for (size_t i = 0; i < outputBuffers_.size(); i++)
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
} }
void add(const aclMat& src, const aclMat& other_src, aclMat& dest) void add(const aclMat& src, const aclMat& other_src, aclMat& dest, int stream_id)
{ {
bool is_correct; bool is_correct;
@@ -106,10 +110,10 @@ namespace cv
is_correct &= (src.type() == dest.type()); is_correct &= (src.type() == dest.type());
CV_Assert(is_correct); CV_Assert(is_correct);
TwoInAndOneOut(src, other_src, dest, "Add"); TwoInAndOneOut(src, other_src, dest, "Add", stream_id);
} }
void divide(const aclMat& src, const aclMat& other_src, aclMat& dest) void divide(const aclMat& src, const aclMat& other_src, aclMat& dest, int stream_id)
{ {
bool is_correct; bool is_correct;
@@ -121,10 +125,10 @@ namespace cv
is_correct &= (src.type() == dest.type()); is_correct &= (src.type() == dest.type());
CV_Assert(is_correct); CV_Assert(is_correct);
TwoInAndOneOut(src, other_src, dest, "Div"); TwoInAndOneOut(src, other_src, dest, "Div", stream_id);
} }
void exp(const aclMat& src, aclMat& dest) void exp(const aclMat& src, aclMat& dest, int stream_id)
{ {
CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type()); CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type());
@@ -145,13 +149,13 @@ namespace cv
opDesc.AddTensorAttr("scale", OP_FLOAT, 1.0); opDesc.AddTensorAttr("scale", OP_FLOAT, 1.0);
opDesc.AddTensorAttr("shift", OP_FLOAT, 0.0); opDesc.AddTensorAttr("shift", OP_FLOAT, 0.0);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0])); AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0]));
} }
void log(const aclMat &src, aclMat &dest) void log(const aclMat &src, aclMat &dest, int stream_id)
{ {
CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type()); CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type());
@@ -172,13 +176,13 @@ namespace cv
opDesc.AddTensorAttr("scale", OP_FLOAT, 1.0); opDesc.AddTensorAttr("scale", OP_FLOAT, 1.0);
opDesc.AddTensorAttr("shift", OP_FLOAT, 0.0); opDesc.AddTensorAttr("shift", OP_FLOAT, 0.0);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0])); AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0]));
} }
void max(const aclMat &src, const aclMat &other_src, aclMat &dest) void max(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id)
{ {
bool is_correct; bool is_correct;
@@ -190,10 +194,10 @@ namespace cv
is_correct &= (src.type() == dest.type()); is_correct &= (src.type() == dest.type());
CV_Assert(is_correct); CV_Assert(is_correct);
TwoInAndOneOut(src, other_src, dest, "Maximum"); TwoInAndOneOut(src, other_src, dest, "Maximum", stream_id);
} }
void min(const aclMat &src, const aclMat &other_src, aclMat &dest) void min(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id)
{ {
bool is_correct; bool is_correct;
@@ -205,14 +209,14 @@ namespace cv
is_correct &= (src.type() == dest.type()); is_correct &= (src.type() == dest.type());
CV_Assert(is_correct); CV_Assert(is_correct);
TwoInAndOneOut(src, other_src, dest, "Minimum"); TwoInAndOneOut(src, other_src, dest, "Minimum", stream_id);
} }
void sqrt(const aclMat &src, aclMat &dest) void sqrt(const aclMat &src, aclMat &dest, int stream_id)
{ {
CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type()); CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type());
OneInAndOneOut(src, dest, "Sqrt"); OneInAndOneOut(src, dest, "Sqrt", stream_id);
} }
} /* end of namespace acl */ } /* end of namespace acl */

View File

@@ -108,7 +108,7 @@ namespace cv
return -1; return -1;
} }
void merge(const vector<aclMat>& mv, aclMat& dest) void merge(const vector<aclMat>& mv, aclMat& dest, int stream_id)
{ {
vector<aclDataBuffer *> inputBuffers_; vector<aclDataBuffer *> inputBuffers_;
vector<aclDataBuffer *> outputBuffers_; vector<aclDataBuffer *> outputBuffers_;
@@ -159,7 +159,7 @@ namespace cv
dest = temp; dest = temp;
outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context); compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
for (size_t i = 0; i < inputBuffers_.size(); i++) for (size_t i = 0; i < inputBuffers_.size(); i++)
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i]));
@@ -172,11 +172,12 @@ namespace cv
/** /**
* @brief : Dynamic shape reasoning, compiler problems * @brief : Dynamic shape reasoning
* *
*/ */
void transpose(const aclMat& src, aclMat& dest)
void transpose(const aclMat& src, aclMat& dest, int stream_id)
{ {
vector<aclDataBuffer *> inputBuffers_; vector<aclDataBuffer *> inputBuffers_;
vector<aclDataBuffer *> outputBuffers_; vector<aclDataBuffer *> outputBuffers_;
@@ -238,9 +239,7 @@ namespace cv
opDesc.outputDesc.data(), opDesc.outputDesc.data(),
outputBuffers_.data(), outputBuffers_.data(),
opDesc.opAttr, opDesc.opAttr,
src.acl_context->get_stream(0))); dest.acl_context->get_stream(stream_id)));
AclSafeCall(aclrtSynchronizeStream(src.acl_context->get_stream(0)));
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[1])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[1]));
@@ -252,8 +251,8 @@ namespace cv
} }
/* transposeD */
/* #if 0
void transpose(const aclMat& src, aclMat& dest) void transpose(const aclMat& src, aclMat& dest)
{ {
vector<aclDataBuffer *> inputBuffers_; vector<aclDataBuffer *> inputBuffers_;
@@ -279,7 +278,7 @@ namespace cv
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0])); AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0]));
} }
*/ #endif
static int split_type(int depth) static int split_type(int depth)
{ {
@@ -299,7 +298,7 @@ namespace cv
return -1; return -1;
} }
void split(const aclMat& src, vector<aclMat>& mv) void split(const aclMat& src, vector<aclMat>& mv, int stream_id)
{ {
vector<aclDataBuffer *> inputBuffers_; vector<aclDataBuffer *> inputBuffers_;
vector<aclDataBuffer *> outputBuffers_; vector<aclDataBuffer *> outputBuffers_;
@@ -333,7 +332,7 @@ namespace cv
outputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize)); outputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize));
} }
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context); compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context, stream_id);
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
for (int i = 0; i < num_split; ++i) for (int i = 0; i < num_split; ++i)
@@ -341,7 +340,7 @@ namespace cv
} }
/* #if 0
//disable //disable
void split(const aclMat& src, vector<aclMat>& mv) void split(const aclMat& src, vector<aclMat>& mv)
@@ -427,9 +426,9 @@ namespace cv
for (int i = 0; i < num_split; ++i) for (int i = 0; i < num_split; ++i)
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
} }
*/ #endif
static void flip_(const aclMat& src, aclMat& dest, int axis) static void flip_(const aclMat& src, aclMat& dest, int axis, int stream_id)
{ {
vector<aclDataBuffer *> inputBuffers_; vector<aclDataBuffer *> inputBuffers_;
vector<aclDataBuffer *> outputBuffers_; vector<aclDataBuffer *> outputBuffers_;
@@ -456,26 +455,26 @@ namespace cv
outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize)); outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context); compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[1])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[1]));
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0])); AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0]));
} }
void flip(const aclMat& src, aclMat& dest, int filpCode) void flip(const aclMat& src, aclMat& dest, int filpCode, int stream_id)
{ {
if (filpCode == 0) { if (filpCode == 0) {
flip_(src, dest, 1); flip_(src, dest, 1, stream_id);
} }
else if (filpCode > 0) { else if (filpCode > 0) {
flip_(src, dest, 2); flip_(src, dest, 2, stream_id);
} }
else { else {
flip_(src, dest, 2); flip_(src, dest, 2, stream_id);
aclMat tmp(dest.rows, dest.cols, dest.type(), dest.acl_context); aclMat tmp(dest.rows, dest.cols, dest.type(), dest.acl_context);
aclrtMemcpy(tmp.data, dest.totalSize, dest.data, dest.totalSize, ACL_MEMCPY_DEVICE_TO_DEVICE); aclrtMemcpy(tmp.data, dest.totalSize, dest.data, dest.totalSize, ACL_MEMCPY_DEVICE_TO_DEVICE);
flip_(tmp, dest, 1); flip_(tmp, dest, 1, stream_id);
} }
} }
} /* end of namespace acl */ } /* end of namespace acl */

View File

@@ -108,7 +108,7 @@ namespace cv
* @brief compile and run operator * @brief compile and run operator
* *
*/ */
void compileAndRunop(OperatorDesc& opDesc, vector<aclDataBuffer *>& inputBuffers_, vector<aclDataBuffer *>& outputBuffers_, aclCxt *acl_context) void compileAndRunop(OperatorDesc& opDesc, vector<aclDataBuffer *>& inputBuffers_, vector<aclDataBuffer *>& outputBuffers_, aclCxt *acl_context, int stream_id)
{ {
AclSafeCall(aclopCompile(opDesc.opType.c_str(), AclSafeCall(aclopCompile(opDesc.opType.c_str(),
opDesc.inputDesc.size(), opDesc.inputDesc.size(),
@@ -128,14 +128,10 @@ namespace cv
opDesc.outputDesc.data(), opDesc.outputDesc.data(),
outputBuffers_.data(), outputBuffers_.data(),
opDesc.opAttr, opDesc.opAttr,
acl_context->get_stream(0))); acl_context->get_stream(stream_id)));
AclSafeCall(aclrtSynchronizeStream(acl_context->get_stream(0)));
} }
void Runop(vector<aclMat>& input, vector<aclMat>& output, OperatorDesc& opDesc) void Runop(vector<aclMat>& input, vector<aclMat>& output, OperatorDesc& opDesc, int stream_id)
{ {
size_t i; size_t i;
@@ -147,7 +143,7 @@ namespace cv
for (i = 0; i < output.size(); ++i) for (i = 0; i < output.size(); ++i)
outputBuffers_.emplace_back(aclCreateDataBuffer(output[i].data, output[i].totalSize)); outputBuffers_.emplace_back(aclCreateDataBuffer(output[i].data, output[i].totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, output[0].acl_context); compileAndRunop(opDesc, inputBuffers_, outputBuffers_, output[0].acl_context, stream_id);
for (i = 0; i < input.size(); ++i) for (i = 0; i < input.size(); ++i)
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i])); AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i]));
@@ -155,7 +151,7 @@ namespace cv
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i])); AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
} }
void OneInAndOneOut(const aclMat& inputMat, aclMat& outputMat, const string opType) void OneInAndOneOut(const aclMat& inputMat, aclMat& outputMat, const string opType, int stream_id)
{ {
vector<aclMat> input_Mat; vector<aclMat> input_Mat;
vector<aclMat> output_Mat; vector<aclMat> output_Mat;
@@ -164,10 +160,10 @@ namespace cv
output_Mat.emplace_back(outputMat); output_Mat.emplace_back(outputMat);
OperatorDesc opDesc = CreateOpDesc(opType, input_Mat, output_Mat); OperatorDesc opDesc = CreateOpDesc(opType, input_Mat, output_Mat);
Runop(input_Mat, output_Mat, opDesc); Runop(input_Mat, output_Mat, opDesc, stream_id);
} }
void TwoInAndOneOut(const aclMat& inputMat, const aclMat& inputMatOther, aclMat& outputMat, const string opType) void TwoInAndOneOut(const aclMat& inputMat, const aclMat& inputMatOther, aclMat& outputMat, const string opType, int stream_id)
{ {
vector<aclMat> input_Mat; vector<aclMat> input_Mat;
vector<aclMat> output_Mat; vector<aclMat> output_Mat;
@@ -177,7 +173,7 @@ namespace cv
output_Mat.emplace_back(outputMat); output_Mat.emplace_back(outputMat);
OperatorDesc opDesc = CreateOpDesc(opType, input_Mat, output_Mat); OperatorDesc opDesc = CreateOpDesc(opType, input_Mat, output_Mat);
Runop(input_Mat, output_Mat, opDesc); Runop(input_Mat, output_Mat, opDesc, stream_id);
} }
} /* end of namespace acl */ } /* end of namespace acl */

View File

@@ -7,10 +7,9 @@ namespace opencv_test
{ {
namespace namespace
{ {
aclCxt *acl_context_0 = set_device("../../modules/acl/test/acl.json", 0, 2); aclCxt *acl_context_0 = set_device("../../modules/acl/test/acl.json", 0, 3);
////////////////////////////////////////////////////Correctness_test//////////////////////////////////////////////////////// ////////////////////////////////////////////////////Correctness_test////////////////////////////////////////////////////////
#if 0
/* range: rows: 1 ~ 64, cols: 1 ~ 64, type: 0 ~ 7 /* range: rows: 1 ~ 64, cols: 1 ~ 64, type: 0 ~ 7
* test function: * test function:
* config: MEMORY_ALIGN * config: MEMORY_ALIGN
@@ -179,6 +178,7 @@ namespace opencv_test
AclMat_Test test; AclMat_Test test;
test.Test_operator_div(acl_context_0); test.Test_operator_div(acl_context_0);
} }
#endif
////////////////////////////////////////////////////Perf_test//////////////////////////////////////////////////////// ////////////////////////////////////////////////////Perf_test////////////////////////////////////////////////////////
TEST(Operator, add) TEST(Operator, add)
@@ -216,6 +216,7 @@ namespace opencv_test
PERF_TEST test; PERF_TEST test;
test.Test_Pow(acl_context_0); test.Test_Pow(acl_context_0);
} }
TEST(Mathfunction, sqrt) TEST(Mathfunction, sqrt)
{ {
PERF_TEST test; PERF_TEST test;
@@ -302,28 +303,6 @@ namespace opencv_test
release_device(acl_context_0); release_device(acl_context_0);
} }
#ifdef DEBUG
TEST(Test, other)
{
aclCxt *acl_context_0 = set_device("../../modules/acl/test/acl.json", 0, 2);
PERF_TEST test;
test.Test_other(acl_context_0);
release_device(acl_context_0);
}
TEST(Test, other1) }
{
aclCxt *acl_context_0 = set_device("../../modules/acl/test/acl.json", 0, 2);
PERF_TEST test;
test.Test_other1(acl_context_0);
release_device(acl_context_0);
}
TEST(Test, other2)
{
PERF_TEST test;
test.Test_other2();
}
#endif
}
} }

View File

@@ -1 +1,2 @@
{} {
}

View File

@@ -1,19 +1,12 @@
#include "test_common.hpp" #include "test_common.hpp"
#include "test_perf.hpp" #include "test_perf.hpp"
#define CHECK(cmd) do { \
aclError e = cmd; \
if( e != ACL_ERROR_NONE) { \
printf("Failed: ACL error %s:%d '%d'\n", \
__FILE__,__LINE__,e); \
exit(0); \
} \
} while(0)
void PERF_TEST::Test_operator_add_perf(aclCxt *acl_context) void PERF_TEST::Test_operator_add_perf(aclCxt *acl_context)
{ {
int val; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
@@ -23,7 +16,7 @@ void PERF_TEST::Test_operator_add_perf(aclCxt *acl_context)
test.PrintLog("Perf test : Function: operator+=()", type[i]); test.PrintLog("Perf test : Function: operator+=()", type[i]);
for (val = 8; val <= valmax; val *= 2) for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; n = cycle_index;
Mat mat_src(val, val, type[i]); Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]); Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]); Mat mat_dest1(val, val, type[i]);
@@ -38,14 +31,17 @@ void PERF_TEST::Test_operator_add_perf(aclCxt *acl_context)
while (n--) while (n--)
mat_dest += mat_src; mat_dest += mat_src;
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
aclmat_dest += aclmat_src;
wait_stream(acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
aclmat_dest += aclmat_src; aclmat_dest += aclmat_src;
wait_stream(acl_context);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency(); acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -61,18 +57,19 @@ void PERF_TEST::Test_operator_add_perf(aclCxt *acl_context)
void PERF_TEST::Test_operator_sub_perf(aclCxt *acl_context) void PERF_TEST::Test_operator_sub_perf(aclCxt *acl_context)
{ {
int val; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
vector<int> type{CV_32FC1, CV_32SC1, CV_64FC1}; vector<int> type{CV_8UC1, CV_32FC1, CV_32SC1};
for (size_t i = 0; i < type.size(); ++i) for (size_t i = 0; i < type.size(); ++i)
{ {
test.PrintLog("Perf test : Function: operator-=()", type[i]); test.PrintLog("Perf test : Function: operator-=()", type[i]);
for (val = 8; val <= valmax; val *= 2) for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; n = cycle_index;
Mat mat_src(val, val, type[i]); Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]); Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]); Mat mat_dest1(val, val, type[i]);
@@ -87,18 +84,21 @@ void PERF_TEST::Test_operator_sub_perf(aclCxt *acl_context)
while (n--) while (n--)
mat_dest -= mat_src; mat_dest -= mat_src;
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
aclmat_dest -= aclmat_src;
wait_stream(acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
aclmat_dest -= aclmat_src; aclmat_dest -= aclmat_src;
wait_stream(acl_context);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency(); acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); //bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); //ASSERT_TRUE(ret);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
@@ -111,18 +111,19 @@ void PERF_TEST::Test_operator_sub_perf(aclCxt *acl_context)
void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context) void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context)
{ {
int val; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
vector<int> type{CV_8UC1, CV_32FC1, CV_32SC1, CV_64FC1}; vector<int> type{CV_32FC1};
for (size_t i = 0; i < type.size(); ++i) for (size_t i = 0; i < type.size(); ++i)
{ {
test.PrintLog("Perf test : Function: operator/=()", type[i]); test.PrintLog("Perf test : Function: operator/=()", type[i]);
for (val = 8; val <= valmax; val *= 2) for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; n = cycle_index;
Mat mat_src(val, val, type[i], Scalar(1, 2, 4)); Mat mat_src(val, val, type[i], Scalar(1, 2, 4));
Mat mat_dest(val, val, type[i], Scalar(2, 4, 8)); Mat mat_dest(val, val, type[i], Scalar(2, 4, 8));
Mat mat_dest1(val, val, type[i]); Mat mat_dest1(val, val, type[i]);
@@ -134,18 +135,21 @@ void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context)
while (n--) while (n--)
mat_dest /= mat_src; mat_dest /= mat_src;
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
aclmat_dest /= aclmat_src;
wait_stream(acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
aclmat_dest /= aclmat_src; aclmat_dest /= aclmat_src;
wait_stream(acl_context);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency(); acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); //bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); //ASSERT_TRUE(ret);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
@@ -158,296 +162,53 @@ void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context)
void PERF_TEST::Test_operator_mul_perf(aclCxt *acl_context) void PERF_TEST::Test_operator_mul_perf(aclCxt *acl_context)
{ {
int val, type; int val, n;
int valmax = 4096; int valmax = 4096;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1; for (size_t i = 0; i < type.size(); ++i)
for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; for (val = 8; val <= valmax; val *= 2)
Mat mat_src(val, val, type); {
Mat mat_dest(val, val, type); n = cycle_index;
Mat mat_dest1(val, val, type); Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 1); test.SetDataRange(mat_src, 1);
test.SetDataRange(mat_dest, 1); test.SetDataRange(mat_dest, 1);
aclMat aclmat_src(val, val, type, mat_src.data, acl_context); aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
mat_dest *= mat_src; mat_dest *= mat_src;
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
begin = static_cast<double>(getTickCount());
while (n--)
aclmat_dest *= aclmat_src; aclmat_dest *= aclmat_src;
end = static_cast<double>(getTickCount()); wait_stream(acl_context);
acltime = (end - begin) / getTickFrequency(); begin = static_cast<double>(getTickCount());
while (n--)
aclmat_dest *= aclmat_src;
wait_stream(acl_context);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); ASSERT_TRUE(ret);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
cout << "Shape: " << val << " x " << val << "\t"; cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
} }
} }
void PERF_TEST::Test_other(aclCxt *acl_context)
{
std::vector<aclDataBuffer *> input_buffers_;
std::vector<aclDataBuffer *> output_buffers_;
std::vector<aclTensorDesc *> input_descs_;
std::vector<aclTensorDesc *> output_descs_;
string op_type_ = "ConcatD";
auto *attr_ = aclopCreateAttr();
vector<int64_t> a = {0};
aclopSetAttrInt(attr_, "N", 2);
aclopSetAttrInt(attr_, "concat_dim", 0);
vector<int64_t> dims0 = {2, 4};
auto size0 = 2 * 4 * 4;
auto *desc0 = aclCreateTensorDesc(ACL_FLOAT, dims0.size(), dims0.data(), ACL_FORMAT_NCHW);
void *ptr0;
vector<float> data0;
for (auto i = 0; i < 8; ++i)
{
data0.emplace_back(i);
}
CHECK(aclrtMalloc(&ptr0, 2 * 4 * 4, ACL_MEM_MALLOC_HUGE_FIRST));
aclrtMemcpy(ptr0, data0.size() * 4, data0.data(), data0.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer0 = aclCreateDataBuffer(ptr0, size0);
input_descs_.push_back(desc0);
input_buffers_.push_back(buffer0);
cout << "input0 done" << endl;
vector<int64_t> dims1 = {2, 4};
auto *desc1 = aclCreateTensorDesc(ACL_FLOAT, dims1.size(), dims1.data(), ACL_FORMAT_NCHW);
input_descs_.push_back(desc1);
void *ptr1;
CHECK(aclrtMalloc(&ptr1, 1024, ACL_MEM_MALLOC_HUGE_FIRST));
std::vector<float> data1;
for (auto i = 0; i < 8; ++i)
{
data1.emplace_back(i);
}
aclrtMemcpy(ptr1, data1.size() * 4, data1.data(), data1.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer1 = aclCreateDataBuffer(ptr1, 2 * 4 * 4);
input_buffers_.push_back(buffer1);
cout << "input1 done" << endl;
vector<int64_t> dims2 = {4, 4};
auto *desc2 = aclCreateTensorDesc(ACL_FLOAT, dims2.size(), dims2.data(), ACL_FORMAT_NCHW);
output_descs_.push_back(desc2);
void *ptr2;
CHECK(aclrtMalloc(&ptr2, 1024, ACL_MEM_MALLOC_HUGE_FIRST));
std::vector<float> data2;
for (auto i = 0; i < 256; ++i)
{
data1.emplace_back(i);
}
aclrtMemcpy(ptr2, data2.size() * 4, data2.data(), data2.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer2 = aclCreateDataBuffer(ptr2, 4 * 4 * 4);
output_buffers_.push_back(buffer2);
cout << "output0 done" << endl;
aclError ret = aclopCompileAndExecute(
op_type_.c_str(), input_descs_.size(), input_descs_.data(),
input_buffers_.data(), output_descs_.size(), output_descs_.data(),
output_buffers_.data(), attr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL,
acl_context->get_stream(0));
std::cout << "aclopCompileAndExecutr:" << ret << std::endl;
CHECK(aclrtSynchronizeStream(acl_context->get_stream(0)));
std::cout << "aclrtSynchronizeStream ok" << std::endl;
vector<float> res;
for (auto i = 0; i < 256 + 256; ++i)
{
res.emplace_back(i);
}
CHECK(aclrtMemcpy(res.data(), res.size() * 4, ptr2, res.size() * 4, ACL_MEMCPY_DEVICE_TO_HOST));
for (auto item : res)
{
cout << item << " ";
}
cout << endl;
}
void PERF_TEST::Test_other1(aclCxt *acl_context)
{
std::vector<aclDataBuffer *> input_buffers_;
std::vector<aclDataBuffer *> output_buffers_;
std::vector<aclTensorDesc *> input_descs_;
std::vector<aclTensorDesc *> output_descs_;
string op_type_ = "ConcatD";
auto *attr_ = aclopCreateAttr();
vector<int64_t> a = {0};
aclopSetAttrInt(attr_, "N", 2);
aclopSetAttrInt(attr_, "concat_dim", 0);
Common_Test test;
Mat src(2, 4, CV_32FC1);
test.SetDataRange(src, 8);
aclMat acl_src(2, 4, CV_32FC1, src.data, acl_context);
vector<int64_t> dims0 = {2, 4};
auto size0 = 2 * 4 * 4;
auto *desc0 = aclCreateTensorDesc(ACL_FLOAT, dims0.size(), dims0.data(), ACL_FORMAT_NHWC);
auto *buffer0 = aclCreateDataBuffer(acl_src.data, size0);
input_descs_.push_back(desc0);
input_buffers_.push_back(buffer0);
std::cout << "input0 done" << endl;
Mat src1(2, 4, CV_32FC1);
test.SetDataRange(src1, 8);
aclMat acl_src1(2, 4, CV_32FC1, src1.data, acl_context);
vector<int64_t> dims1 = {2, 4};
auto size1 = 2 * 4 * 4;
auto *desc1 = aclCreateTensorDesc(ACL_FLOAT, dims1.size(), dims1.data(), ACL_FORMAT_NHWC);
auto *buffer1 = aclCreateDataBuffer(acl_src1.data, size1);
input_descs_.push_back(desc1);
input_buffers_.push_back(buffer1);
std::cout << "input1 done" << endl;
aclMat acl_dest(4, 4, CV_32FC1, acl_context);
vector<int64_t> dims2 = {4, 4};
auto size3 = 4 * 4 * 4;
auto *desc2 = aclCreateTensorDesc(ACL_FLOAT, dims2.size(), dims2.data(), ACL_FORMAT_NHWC);
auto *buffer2 = aclCreateDataBuffer(acl_dest.data, size3);
output_descs_.push_back(desc2);
output_buffers_.push_back(buffer2);
std::cout << "output0 done" << endl;
aclError ret = aclopCompileAndExecute(
op_type_.c_str(), input_descs_.size(), input_descs_.data(),
input_buffers_.data(), output_descs_.size(), output_descs_.data(),
output_buffers_.data(), attr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL,
acl_context->get_stream(0));
std::cout << "aclopCompileAndExecutr:" << ret << std::endl;
CHECK(aclrtSynchronizeStream(acl_context->get_stream(0)));
std::cout << "aclrtSynchronizeStream ok" << std::endl;
vector<float> res;
for (auto i = 0; i < 256 + 256; ++i)
{
res.emplace_back(i);
}
CHECK(aclrtMemcpy(res.data(), res.size() * 4, acl_dest.data, res.size() * 4, ACL_MEMCPY_DEVICE_TO_HOST));
for (auto item : res)
{
std::cout << item << " ";
}
std::cout << endl;
}
void PERF_TEST::Test_other2()
{
CHECK(aclInit(nullptr));
std::cout << "aclInit ok" << std::endl;
CHECK(aclrtSetDevice(0));
std::cout << "aclrtSetDevice 0 ok" << std::endl;
std::vector<aclDataBuffer *> input_buffers_;
std::vector<aclDataBuffer *> output_buffers_;
std::vector<aclTensorDesc *> input_descs_;
std::vector<aclTensorDesc *> output_descs_;
string op_type_ = "ConcatD";
auto *attr_ = aclopCreateAttr();
vector<int64_t> a = {0};
aclopSetAttrInt(attr_, "N", 2);
aclopSetAttrInt(attr_, "concat_dim", 0);
vector<int64_t> dims0 = {2, 4};
auto size0 = 2 * 4 * 4;
auto *desc0 = aclCreateTensorDesc(ACL_FLOAT, dims0.size(), dims0.data(), ACL_FORMAT_NCHW);
void *ptr0;
vector<float> data0;
for (auto i = 0; i < 8; ++i)
{
data0.emplace_back(i);
}
CHECK(aclrtMalloc(&ptr0, 2 * 4 * 4, ACL_MEM_MALLOC_HUGE_FIRST));
// std::cout << "ptr:" << ptr0 << " ptr+256:" << ptr0+256;
aclrtMemcpy(ptr0, data0.size() * 4, data0.data(), data0.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer0 = aclCreateDataBuffer(ptr0, size0);
input_descs_.push_back(desc0);
input_buffers_.push_back(buffer0);
cout << "input0 done" << endl;
vector<int64_t> dims1 = {2, 4};
auto *desc1 = aclCreateTensorDesc(ACL_FLOAT, dims1.size(), dims1.data(), ACL_FORMAT_NCHW);
input_descs_.push_back(desc1);
void *ptr1;
CHECK(aclrtMalloc(&ptr1, 1024, ACL_MEM_MALLOC_HUGE_FIRST));
std::vector<float> data1;
for (auto i = 0; i < 8; ++i)
{
data1.emplace_back(i);
}
aclrtMemcpy(ptr1, data1.size() * 4, data1.data(), data1.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer1 = aclCreateDataBuffer(ptr1, 2 * 4 * 4);
input_buffers_.push_back(buffer1);
cout << "input1 done" << endl;
vector<int64_t> dims2 = {4, 4};
auto *desc2 = aclCreateTensorDesc(ACL_FLOAT, dims2.size(), dims2.data(), ACL_FORMAT_NCHW);
output_descs_.push_back(desc2);
void *ptr2;
CHECK(aclrtMalloc(&ptr2, 1024, ACL_MEM_MALLOC_HUGE_FIRST));
std::vector<float> data2;
for (auto i = 0; i < 256; ++i)
{
data1.emplace_back(i);
}
aclrtMemcpy(ptr2, data2.size() * 4, data2.data(), data2.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer2 = aclCreateDataBuffer(ptr2, 4 * 4 * 4);
output_buffers_.push_back(buffer2);
cout << "output0 done" << endl;
aclrtStream stream = nullptr;
aclrtCreateStream(&stream);
cout << 2 << endl;
aclError ret = aclopCompileAndExecute(
op_type_.c_str(), input_descs_.size(), input_descs_.data(),
input_buffers_.data(), output_descs_.size(), output_descs_.data(),
output_buffers_.data(), attr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL,
stream);
cout << 3 << endl;
std::cout << "aclopCompileAndExecutr:" << ret << std::endl;
CHECK(aclrtSynchronizeStream(stream));
std::cout << "aclrtSynchronizeStream ok" << std::endl;
vector<float> res;
for (auto i = 0; i < 256 + 256; ++i)
{
res.emplace_back(i);
}
CHECK(aclrtMemcpy(res.data(), res.size() * 4, ptr2, res.size() * 4, ACL_MEMCPY_DEVICE_TO_HOST));
for (auto item : res)
{
cout << item << " ";
}
cout << endl;
}

View File

@@ -159,30 +159,98 @@ void Common_Test::PrintLog(const string& funcname, int type)
cout << funcname << "\t" cout << funcname << "\t"
<< "Type: CV_8UC1" << endl; << "Type: CV_8UC1" << endl;
break; break;
case CV_8UC2:
cout << funcname << "\t"
<< "Type: CV_8UC2" << endl;
break;
case CV_8UC3: case CV_8UC3:
cout << funcname << "\t" cout << funcname << "\t"
<< "Type: CV_8UC3" << endl; << "Type: CV_8UC3" << endl;
break; break;
case CV_8UC4:
cout << funcname << "\t"
<< "Type: CV_8UC4" << endl;
break;
case CV_8SC1:
cout << funcname << "\t"
<< "Type: CV_8SC1" << endl;
break;
case CV_8SC2:
cout << funcname << "\t"
<< "Type: CV_8SC2" << endl;
break;
case CV_8SC3:
cout << funcname << "\t"
<< "Type: CV_8SC3" << endl;
break;
case CV_8SC4:
cout << funcname << "\t"
<< "Type: CV_8SC4" << endl;
break;
case CV_16FC1:
cout << funcname << "\t"
<< "Type: CV_16FC1" << endl;
break;
case CV_16FC2:
cout << funcname << "\t"
<< "Type: CV_16FC2" << endl;
break;
case CV_16FC3:
cout << funcname << "\t"
<< "Type: CV_16FC3" << endl;
break;
case CV_16FC4:
cout << funcname << "\t"
<< "Type: CV_16FC4" << endl;
break;
case CV_32FC1: case CV_32FC1:
cout << funcname << "\t" cout << funcname << "\t"
<< "Type: CV_32FC1" << endl; << "Type: CV_32FC1" << endl;
break; break;
case CV_32FC2:
cout << funcname << "\t"
<< "Type: CV_32FC2" << endl;
break;
case CV_32FC3: case CV_32FC3:
cout << funcname << "\t" cout << funcname << "\t"
<< "Type: CV_32FC3" << endl; << "Type: CV_32FC3" << endl;
break; break;
case CV_32FC4:
cout << funcname << "\t"
<< "Type: CV_32FC4" << endl;
break;
case CV_32SC1: case CV_32SC1:
cout << funcname << "\t" cout << funcname << "\t"
<< "Type: CV_32SC1" << endl; << "Type: CV_32SC1" << endl;
break; break;
case CV_32SC2:
cout << funcname << "\t"
<< "Type: CV_32SC2" << endl;
break;
case CV_32SC3: case CV_32SC3:
cout << funcname << "\t" cout << funcname << "\t"
<< "Type: CV_32SC3" << endl; << "Type: CV_32SC3" << endl;
break; break;
case CV_32SC4:
cout << funcname << "\t"
<< "Type: CV_32SC4" << endl;
break;
case CV_64FC1: case CV_64FC1:
cout << funcname << "\t" cout << funcname << "\t"
<< "Type: CV_64FC1" << endl; << "Type: CV_64FC1" << endl;
break; break;
case CV_64FC2:
cout << funcname << "\t"
<< "Type: CV_64FC2" << endl;
break;
case CV_64FC3:
cout << funcname << "\t"
<< "Type: CV_64FC3" << endl;
break;
case CV_64FC4:
cout << funcname << "\t"
<< "Type: CV_64FC4" << endl;
break;
default: default:
break; break;
} }

View File

@@ -576,6 +576,7 @@ void AclMat_Test::Test_operator_add(aclCxt *acl_context) {
mat_dest += mat_src; mat_dest += mat_src;
aclmat_dest += aclmat_src; aclmat_dest += aclmat_src;
wait_stream(acl_context);
aclmat_dest.download(mat_dest1, MEMORY_ALIGN); aclmat_dest.download(mat_dest1, MEMORY_ALIGN);
ret = test.Test_Diff(mat_dest, mat_dest1); ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -609,7 +610,9 @@ void AclMat_Test::Test_operator_sub(aclCxt *acl_context) {
aclMat aclmat_dest(rows, cols, type[i], mat_dest.data, acl_context, MEMORY_ALIGN); aclMat aclmat_dest(rows, cols, type[i], mat_dest.data, acl_context, MEMORY_ALIGN);
mat_dest -= mat_src; mat_dest -= mat_src;
aclmat_dest -= aclmat_src; aclmat_dest -= aclmat_src;
wait_stream(acl_context);
aclmat_dest.download(mat_dest1, MEMORY_ALIGN); aclmat_dest.download(mat_dest1, MEMORY_ALIGN);
ret = test.Test_Diff(mat_dest, mat_dest1); ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -643,7 +646,9 @@ void AclMat_Test::Test_operator_div(aclCxt *acl_context) {
aclMat aclmat_dest(rows, cols, type[i], mat_dest.data, acl_context, MEMORY_ALIGN); aclMat aclmat_dest(rows, cols, type[i], mat_dest.data, acl_context, MEMORY_ALIGN);
mat_dest /= mat_src; mat_dest /= mat_src;
aclmat_dest /= aclmat_src; aclmat_dest /= aclmat_src;
wait_stream(acl_context);
aclmat_dest.download(mat_dest1, MEMORY_ALIGN); aclmat_dest.download(mat_dest1, MEMORY_ALIGN);
ret = test.Test_Diff(mat_dest, mat_dest1); ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -676,7 +681,9 @@ void AclMat_Test::Test_operator_mul(aclCxt *acl_context) {
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
mat_dest *= mat_src; mat_dest *= mat_src;
aclmat_dest *= aclmat_src; aclmat_dest *= aclmat_src;
wait_stream(acl_context);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
ret = test.Test_Diff(mat_dest, mat_dest1); ret = test.Test_Diff(mat_dest, mat_dest1);

View File

@@ -3,98 +3,111 @@
void PERF_TEST::Test_MatMul(aclCxt *acl_context) void PERF_TEST::Test_MatMul(aclCxt *acl_context)
{ {
int val, type; int val, n;
int valmax = 4096; int valmax = 4096;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1; for (size_t i = 0; i < type.size(); ++i)
for (val = 8; val <= valmax; val *= 2)
{ {
Mat mat_src(val, val, type); for (val = 8; val <= valmax; val *= 2)
Mat mat_src1(val, val, type); {
Mat mat_dest(val, val, type); Mat mat_src(val, val, type[i]);
Mat mat_dest1(val, val, type); Mat mat_src1(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 32); test.SetDataRange(mat_src, 32);
test.SetDataRange(mat_src1, 32); test.SetDataRange(mat_src1, 32);
test.SetDataRange(mat_dest, 32); test.SetDataRange(mat_dest, 32);
aclMat aclmat_src(val, val, type, mat_src.data, acl_context); aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_src1(val, val, type, mat_src1.data, acl_context); aclMat aclmat_src1(val, val, type[i], mat_src1.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
int n = 100;
begin = static_cast<double>(getTickCount()); n = cycle_index;
while (n--) begin = static_cast<double>(getTickCount());
mat_dest = mat_src * mat_src1; while (n--)
end = static_cast<double>(getTickCount()); mat_dest = mat_src * mat_src1;
time = (end - begin) / getTickFrequency(); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
begin = static_cast<double>(getTickCount()); MatMul(aclmat_src1, aclmat_src, aclmat_dest, 0);
while (n--) wait_stream(acl_context, 0);
MatMul(aclmat_src1, aclmat_src, aclmat_dest); begin = static_cast<double>(getTickCount());
end = static_cast<double>(getTickCount()); while (n--)
acltime = (end - begin) / getTickFrequency(); MatMul(aclmat_src1, aclmat_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); ASSERT_TRUE(ret);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
cout << "Shape: " << val << " x " << val << "\t"; cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
} }
} }
void PERF_TEST::Test_Convolution(aclCxt *acl_context) void PERF_TEST::Test_Convolution(aclCxt *acl_context)
{ {
int val, type; int val, n;
int valmax = 4096; int valmax = 4096;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1; for (size_t i = 0; i < type.size(); ++i)
for (val = 8; val <= valmax; val *= 2)
{ {
Mat mat_src(val, val, type, Scalar{1, 2}); for (val = 8; val <= valmax; val *= 2)
Mat mat_kernel(3, 3, type, Scalar(1, 4)); {
Mat mat_dest(val, val, type, Scalar{6}); Mat mat_src(val, val, type[i], Scalar{1, 2});
Mat mat_kernel(3, 3, type[i], Scalar(1, 4));
Mat mat_dest(val, val, type[i], Scalar{6});
aclMat aclmat_src(val, val, type, mat_src.data, acl_context); aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_kernel(3, 3, type, mat_kernel.data, acl_context); aclMat aclmat_kernel(3, 3, type[i], mat_kernel.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
int n = 100;
begin = static_cast<double>(getTickCount()); n = cycle_index;
while (n--) begin = static_cast<double>(getTickCount());
filter2D(mat_src, mat_dest, -1, mat_kernel); while (n--)
end = static_cast<double>(getTickCount()); filter2D(mat_src, mat_dest, -1, mat_kernel);
time = (end - begin) / getTickFrequency(); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; vector<int64_t> strides{1, 1, 1, 1};
begin = static_cast<double>(getTickCount()); vector<int64_t> pads{1, 1, 1, 1};
vector<int64_t> strides{1, 1, 1, 1}; n = (cycle_index - 1);
vector<int64_t> pads{1, 1, 1, 1}; Convolution(aclmat_src, aclmat_kernel, aclmat_dest, strides, pads, 0);
while (n--) wait_stream(acl_context, 0);
Convolution(aclmat_src, aclmat_kernel, aclmat_dest, strides, pads); begin = static_cast<double>(getTickCount());
end = static_cast<double>(getTickCount()); while (n--)
Mat mat_dest1(aclmat_dest.rows, aclmat_dest.cols, type); Convolution(aclmat_src, aclmat_kernel, aclmat_dest, strides, pads, 1);
acltime = (end - begin) / getTickFrequency(); wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
Mat mat_dest1(aclmat_dest.rows, aclmat_dest.cols, type[i]);
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
/* /*
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); ASSERT_TRUE(ret);
*/ */
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
cout << "Shape: " << val << " x " << val << "\t"; cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
} }
} }

View File

@@ -3,18 +3,19 @@
void PERF_TEST::Test_Abs(aclCxt *acl_context) void PERF_TEST::Test_Abs(aclCxt *acl_context)
{ {
int val; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
vector<int> type{CV_32FC1, CV_32SC1}; vector<int> type{CV_32FC1};
for (size_t i = 0; i < type.size(); ++i) for (size_t i = 0; i < type.size(); ++i)
{ {
test.PrintLog("Perf test : Function: Abs()", type[i]); test.PrintLog("Perf test : Function: Abs()", type[i]);
for (val = 8; val <= valmax; val *= 2) for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; n = cycle_index;
Mat mat_src(val, val, type[i], Scalar{-2}); Mat mat_src(val, val, type[i], Scalar{-2});
Mat mat_dest(val, val, type[i], Scalar{-4}); Mat mat_dest(val, val, type[i], Scalar{-4});
Mat mat_dest1(val, val, type[i], Scalar{-6}); Mat mat_dest1(val, val, type[i], Scalar{-6});
@@ -26,14 +27,17 @@ void PERF_TEST::Test_Abs(aclCxt *acl_context)
while (n--) while (n--)
mat_dest = abs(mat_src); mat_dest = abs(mat_src);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
aclmat_dest = abs(aclmat_src, 0);
wait_stream(acl_context, 0);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
aclmat_dest = abs(aclmat_src); aclmat_dest = abs(aclmat_src, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency(); acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -49,8 +53,9 @@ void PERF_TEST::Test_Abs(aclCxt *acl_context)
void PERF_TEST::Test_Pow(aclCxt *acl_context) void PERF_TEST::Test_Pow(aclCxt *acl_context)
{ {
int val; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
@@ -60,7 +65,7 @@ void PERF_TEST::Test_Pow(aclCxt *acl_context)
test.PrintLog("Perf test : Function: Pow()", type[i]); test.PrintLog("Perf test : Function: Pow()", type[i]);
for (val = 8; val <= valmax; val *= 2) for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; n = cycle_index;
int power = test.RandDom_(6); int power = test.RandDom_(6);
Mat mat_src(val, val, type[i]); Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]); Mat mat_dest(val, val, type[i]);
@@ -75,14 +80,17 @@ void PERF_TEST::Test_Pow(aclCxt *acl_context)
while (n--) while (n--)
pow(mat_src, power, mat_dest); pow(mat_src, power, mat_dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
pow(aclmat_src, power, aclmat_dest, 0);
wait_stream(acl_context, 0);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
pow(aclmat_src, power, aclmat_dest); pow(aclmat_src, power, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency(); acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
if (val < 128) if (val < 128)
@@ -96,328 +104,375 @@ void PERF_TEST::Test_Pow(aclCxt *acl_context)
void PERF_TEST::Test_Sqrt(aclCxt *acl_context) void PERF_TEST::Test_Sqrt(aclCxt *acl_context)
{ {
int val, type; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1; for (size_t i = 0; i < type.size(); ++i)
for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; for (val = 8; val <= valmax; val *= 2)
Mat mat_src(val, val, type); {
Mat mat_dest(val, val, type); n = cycle_index;
Mat mat_dest1(val, val, type); Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 32); test.SetDataRange(mat_src, 32);
test.SetDataRange(mat_dest, 32); test.SetDataRange(mat_dest, 32);
aclMat aclmat_src(val, val, type, mat_src.data, acl_context); aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
sqrt(mat_src, mat_dest); sqrt(mat_src, mat_dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
begin = static_cast<double>(getTickCount());
while (n--)
sqrt(aclmat_src, aclmat_dest); sqrt(aclmat_src, aclmat_dest);
end = static_cast<double>(getTickCount()); wait_stream(acl_context);
acltime = (end - begin) / getTickFrequency(); begin = static_cast<double>(getTickCount());
while (n--)
sqrt(aclmat_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
cout << "Shape: " << val << " x " << val << "\t"; cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
} }
} }
void PERF_TEST::Test_Add(aclCxt *acl_context) void PERF_TEST::Test_Add(aclCxt *acl_context)
{ {
int val, type; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test;
type = CV_32FC1; vector<int> type{CV_32FC1};
for (val = 8; val <= valmax; val *= 2) for (size_t i = 0; i < type.size(); ++i)
{ {
Common_Test test; for (val = 8; val <= valmax; val *= 2)
int n = 100; {
Mat mat_src1(val, val, type); n = cycle_index;
Mat mat_src2(val, val, type); Mat mat_src1(val, val, type[i]);
Mat mat_dest(val, val, type); Mat mat_src2(val, val, type[i]);
Mat mat_dest1(val, val, type); Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src1, 32); test.SetDataRange(mat_src1, 32);
test.SetDataRange(mat_src2, 32); test.SetDataRange(mat_src2, 32);
test.SetDataRange(mat_dest, 32); test.SetDataRange(mat_dest, 32);
aclMat aclmat_src1(val, val, type, mat_src1.data, acl_context); aclMat aclmat_src1(val, val, type[i], mat_src1.data, acl_context);
aclMat aclmat_src2(val, val, type, mat_src2.data, acl_context); aclMat aclmat_src2(val, val, type[i], mat_src2.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
add(mat_src1, mat_src2, mat_dest); add(mat_src1, mat_src2, mat_dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
begin = static_cast<double>(getTickCount());
while (n--)
add(aclmat_src1, aclmat_src2, aclmat_dest); add(aclmat_src1, aclmat_src2, aclmat_dest);
end = static_cast<double>(getTickCount()); wait_stream(acl_context);
acltime = (end - begin) / getTickFrequency(); begin = static_cast<double>(getTickCount());
while (n--)
add(aclmat_src1, aclmat_src2, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); ASSERT_TRUE(ret);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
cout << "Shape: " << val << " x " << val << "\t"; cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
} }
} }
void PERF_TEST::Test_Divide(aclCxt *acl_context) void PERF_TEST::Test_Divide(aclCxt *acl_context)
{ {
int val, type; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test;
type = CV_32FC1; vector<int> type{CV_32FC1};
for (val = 8; val <= valmax; val *= 2) for (size_t i = 0; i < type.size(); ++i)
{ {
Common_Test test; for (val = 8; val <= valmax; val *= 2)
int n = 100; {
Mat mat_src1(val, val, type); n = cycle_index;
Mat mat_src2(val, val, type); Mat mat_src1(val, val, type[i]);
Mat mat_dest(val, val, type); Mat mat_src2(val, val, type[i]);
Mat mat_dest1(val, val, type); Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src1, 32); test.SetDataRange(mat_src1, 32);
test.SetDataRange(mat_src2, 4); test.SetDataRange(mat_src2, 4);
test.SetDataRange(mat_dest, 32); test.SetDataRange(mat_dest, 32);
aclMat aclmat_src1(val, val, type, mat_src1.data, acl_context); aclMat aclmat_src1(val, val, type[i], mat_src1.data, acl_context);
aclMat aclmat_src2(val, val, type, mat_src2.data, acl_context); aclMat aclmat_src2(val, val, type[i], mat_src2.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
divide(mat_src1, mat_src2, mat_dest); divide(mat_src1, mat_src2, mat_dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
begin = static_cast<double>(getTickCount());
while (n--)
divide(aclmat_src1, aclmat_src2, aclmat_dest); divide(aclmat_src1, aclmat_src2, aclmat_dest);
end = static_cast<double>(getTickCount()); wait_stream(acl_context);
acltime = (end - begin) / getTickFrequency(); begin = static_cast<double>(getTickCount());
while (n--)
divide(aclmat_src1, aclmat_src2, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); // bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); // ASSERT_TRUE(ret);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
cout << "Shape: " << val << " x " << val << "\t"; cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
} }
} }
void PERF_TEST::Test_Exp(aclCxt *acl_context) void PERF_TEST::Test_Exp(aclCxt *acl_context)
{ {
int val, type; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1;
for (size_t i = 0; i < type.size(); ++i)
for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; for (val = 8; val <= valmax; val *= 2)
Mat mat_src(val, val, type); {
Mat mat_dest(val, val, type); n = cycle_index;
Mat mat_dest1(val, val, type); Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 32); test.SetDataRange(mat_src, 32);
test.SetDataRange(mat_dest, 2); test.SetDataRange(mat_dest, 2);
aclMat aclmat_src(val, val, type, mat_src.data, acl_context); aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
exp(mat_src, mat_dest); exp(mat_src, mat_dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
begin = static_cast<double>(getTickCount());
while (n--)
exp(aclmat_src, aclmat_dest); exp(aclmat_src, aclmat_dest);
end = static_cast<double>(getTickCount()); wait_stream(acl_context);
acltime = (end - begin) / getTickFrequency(); begin = static_cast<double>(getTickCount());
while (n--)
exp(aclmat_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
cout << "Shape: " << val << " x " << val << "\t"; cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
} }
} }
void PERF_TEST::Test_Log(aclCxt *acl_context) void PERF_TEST::Test_Log(aclCxt *acl_context)
{ {
int val, type; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1;
for (size_t i = 0; i < type.size(); ++i)
for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; for (val = 8; val <= valmax; val *= 2)
Mat mat_src(val, val, type); {
Mat mat_dest(val, val, type); n = cycle_index;
Mat mat_dest1(val, val, type); Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 32); test.SetDataRange(mat_src, 32);
test.SetDataRange(mat_dest, 32); test.SetDataRange(mat_dest, 32);
aclMat aclmat_src(val, val, type, mat_src.data, acl_context); aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
log(mat_src, mat_dest); log(mat_src, mat_dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
begin = static_cast<double>(getTickCount()); log(aclmat_src, aclmat_dest, 1);
while (n--) wait_stream(acl_context);
log(aclmat_src, aclmat_dest); begin = static_cast<double>(getTickCount());
end = static_cast<double>(getTickCount()); while (n--)
acltime = (end - begin) / getTickFrequency(); log(aclmat_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
cout << "Shape: " << val << " x " << val << "\t"; cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
} }
} }
void PERF_TEST::Test_Max(aclCxt *acl_context) void PERF_TEST::Test_Max(aclCxt *acl_context)
{ {
int val, type; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test;
type = CV_32FC2; vector<int> type{CV_32FC2};
for (val = 8; val <= valmax; val *= 2) for (size_t i = 0; i < type.size(); ++i)
{ {
Common_Test test; for (val = 8; val <= valmax; val *= 2)
int n = 100; {
Mat mat_src1(val, val, type); n = cycle_index;
Mat mat_src2(val, val, type); Mat mat_src1(val, val, type[i]);
Mat mat_dest(val, val, type); Mat mat_src2(val, val, type[i]);
Mat mat_dest1(val, val, type); Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src1, 32); test.SetDataRange(mat_src1, 32);
test.SetDataRange(mat_src2, 32); test.SetDataRange(mat_src2, 32);
test.SetDataRange(mat_dest, 32); test.SetDataRange(mat_dest, 32);
aclMat aclmat_src1(val, val, type, mat_src2.data, acl_context); aclMat aclmat_src1(val, val, type[i], mat_src2.data, acl_context);
aclMat aclmat_src2(val, val, type, mat_src1.data, acl_context); aclMat aclmat_src2(val, val, type[i], mat_src1.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
cv::max(mat_src1, mat_src2, mat_dest); cv::max(mat_src1, mat_src2, mat_dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
begin = static_cast<double>(getTickCount());
while (n--)
cv::acl::max(aclmat_src1, aclmat_src2, aclmat_dest); cv::acl::max(aclmat_src1, aclmat_src2, aclmat_dest);
end = static_cast<double>(getTickCount()); wait_stream(acl_context);
acltime = (end - begin) / getTickFrequency(); begin = static_cast<double>(getTickCount());
while (n--)
cv::acl::max(aclmat_src1, aclmat_src2, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); ASSERT_TRUE(ret);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
cout << "Shape: " << val << " x " << val << "\t"; cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
} }
} }
void PERF_TEST::Test_Min(aclCxt *acl_context) void PERF_TEST::Test_Min(aclCxt *acl_context)
{ {
int val, type; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test;
type = CV_32FC3; vector<int> type{CV_32FC3};
for (val = 8; val <= valmax; val *= 2) for (size_t i = 0; i < type.size(); ++i)
{ {
Common_Test test; for (val = 8; val <= valmax; val *= 2)
int n = 100; {
Mat mat_src1(val, val, type); int n = cycle_index;
Mat mat_src2(val, val, type); Mat mat_src1(val, val, type[i]);
Mat mat_dest(val, val, type); Mat mat_src2(val, val, type[i]);
Mat mat_dest1(val, val, type); Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src1, 32); test.SetDataRange(mat_src1, 32);
test.SetDataRange(mat_src2, 32); test.SetDataRange(mat_src2, 32);
test.SetDataRange(mat_dest, 32); test.SetDataRange(mat_dest, 32);
aclMat aclmat_src1(val, val, type, mat_src2.data, acl_context); aclMat aclmat_src1(val, val, type[i], mat_src2.data, acl_context);
aclMat aclmat_src2(val, val, type, mat_src1.data, acl_context); aclMat aclmat_src2(val, val, type[i], mat_src1.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context); aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
cv::min(mat_src1, mat_src2, mat_dest); cv::min(mat_src1, mat_src2, mat_dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
begin = static_cast<double>(getTickCount());
while (n--)
cv::acl::min(aclmat_src1, aclmat_src2, aclmat_dest); cv::acl::min(aclmat_src1, aclmat_src2, aclmat_dest);
end = static_cast<double>(getTickCount()); wait_stream(acl_context);
acltime = (end - begin) / getTickFrequency(); begin = static_cast<double>(getTickCount());
while (n--)
cv::acl::min(aclmat_src1, aclmat_src2, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); ASSERT_TRUE(ret);
if (val < 128) if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t"; cout << "Shape: " << val << " x " << val << "\t\t";
else else
cout << "Shape: " << val << " x " << val << "\t"; cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl; cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
} }
} }

View File

@@ -28,8 +28,9 @@ void PERF_TEST::Test_Lookuptable(aclCxt *acl_context_0)
void PERF_TEST::Test_Merge(aclCxt *acl_context) void PERF_TEST::Test_Merge(aclCxt *acl_context)
{ {
int val; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
@@ -41,7 +42,7 @@ void PERF_TEST::Test_Merge(aclCxt *acl_context)
test.PrintLog("Perf test : Function: merge()", srcType[i]); test.PrintLog("Perf test : Function: merge()", srcType[i]);
for (val = 8; val <= valmax; val *= 2) for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; n = cycle_index;
Mat mat_src1(val, val, srcType[i], Scalar(1)); Mat mat_src1(val, val, srcType[i], Scalar(1));
Mat mat_src2(val, val, srcType[i], Scalar(2)); Mat mat_src2(val, val, srcType[i], Scalar(2));
Mat mat_src3(val, val, srcType[i], Scalar(3)); Mat mat_src3(val, val, srcType[i], Scalar(3));
@@ -71,14 +72,17 @@ void PERF_TEST::Test_Merge(aclCxt *acl_context)
while (n--) while (n--)
merge(src, mat_dest); merge(src, mat_dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
merge(acl_src, aclmat_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
merge(acl_src, aclmat_dest); merge(acl_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency(); acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); ASSERT_TRUE(ret);
@@ -94,18 +98,19 @@ void PERF_TEST::Test_Merge(aclCxt *acl_context)
void PERF_TEST::Test_Transpose(aclCxt *acl_context) void PERF_TEST::Test_Transpose(aclCxt *acl_context)
{ {
int val; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
vector<int> type{CV_32FC1, CV_32SC1}; vector<int> type{CV_32FC1};
for (size_t i = 0; i < type.size(); ++i) for (size_t i = 0; i < type.size(); ++i)
{ {
test.PrintLog("Perf test : Function: transpose()", type[i]); test.PrintLog("Perf test : Function: transpose()", type[i]);
for (val = 8; val <= valmax; val *= 2) for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; n = cycle_index;
Mat mat_src(val, val, type[i]); Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]); Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]); Mat mat_dest1(val, val, type[i]);
@@ -119,14 +124,17 @@ void PERF_TEST::Test_Transpose(aclCxt *acl_context)
while (n--) while (n--)
transpose(mat_src, mat_dest); transpose(mat_src, mat_dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
transpose(aclmat_src, aclmat_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
transpose(aclmat_src, aclmat_dest); transpose(aclmat_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency(); acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -142,8 +150,9 @@ void PERF_TEST::Test_Transpose(aclCxt *acl_context)
void PERF_TEST::Test_Split(aclCxt *acl_context) void PERF_TEST::Test_Split(aclCxt *acl_context)
{ {
int val; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
@@ -155,7 +164,7 @@ void PERF_TEST::Test_Split(aclCxt *acl_context)
test.PrintLog("Perf test : Function: split()", srcType[i]); test.PrintLog("Perf test : Function: split()", srcType[i]);
for (val = 8; val <= valmax; val *= 2) for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; n = cycle_index;
Mat mat_src(val, val, srcType[i]); Mat mat_src(val, val, srcType[i]);
Mat mat_dest1(val, val, destType[i]); Mat mat_dest1(val, val, destType[i]);
Mat mat_dest2(val, val, destType[i]); Mat mat_dest2(val, val, destType[i]);
@@ -182,14 +191,17 @@ void PERF_TEST::Test_Split(aclCxt *acl_context)
while (n--) while (n--)
split(mat_src, dest); split(mat_src, dest);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
split(aclmat_src, acl_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
split(aclmat_src, acl_dest); split(aclmat_src, acl_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency(); acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
(acl_dest.data())[0].download(mat_dest1); (acl_dest.data())[0].download(mat_dest1);
(acl_dest.data())[1].download(mat_dest2); (acl_dest.data())[1].download(mat_dest2);
@@ -210,11 +222,11 @@ void PERF_TEST::Test_Split(aclCxt *acl_context)
} }
void PERF_TEST::Test_Flip(aclCxt *acl_context) void PERF_TEST::Test_Flip(aclCxt *acl_context)
{ {
int val; int val, n;
int valmax = 8192; int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime; double begin, end, time, acltime;
Common_Test test; Common_Test test;
@@ -224,7 +236,7 @@ void PERF_TEST::Test_Flip(aclCxt *acl_context)
test.PrintLog("Perf test : Function: flip()", type[i]); test.PrintLog("Perf test : Function: flip()", type[i]);
for (val = 8; val <= valmax; val *= 2) for (val = 8; val <= valmax; val *= 2)
{ {
int n = 100; n = cycle_index;
Mat mat_src(val, val, type[i]); Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]); Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]); Mat mat_dest1(val, val, type[i]);
@@ -238,15 +250,18 @@ void PERF_TEST::Test_Flip(aclCxt *acl_context)
while (n--) while (n--)
flip(mat_src, mat_dest, 0); flip(mat_src, mat_dest, 0);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency(); time = (end - begin) / getTickFrequency() / cycle_index;
n = 100; n = (cycle_index - 1);
flip(aclmat_src, aclmat_dest, 0);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount()); begin = static_cast<double>(getTickCount());
while (n--) while (n--)
flip(aclmat_src, aclmat_dest, 0); flip(aclmat_src, aclmat_dest, 0, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount()); end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency(); acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1); aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1); bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret); ASSERT_TRUE(ret);