update stream

This commit is contained in:
luoliang
2022-08-19 19:04:30 +08:00
parent f87cc9911c
commit 72d80d5421
20 changed files with 660 additions and 748 deletions

View File

@@ -1,6 +1,15 @@
#if(NOT HAVE_ACL)
# ocv_module_disable(acl)
# return()
#endif()
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" "-DENABLE_DVPP_INTERFACE")
set(acl_lib "/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64/stub/")
set(acl_lib "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/lib64/stub/")
link_directories(${acl_lib})
set(acl_inc "/usr/local/Ascend/ascend-toolkit/latest/acllib/include/")
set(acl_inc "/usr/local/Ascend/ascend-toolkit/latest/fwkacllib/include/")
ocv_include_directories(${acl_inc})

View File

@@ -47,6 +47,7 @@ namespace cv
std::vector<aclStream> _acl_streams;
};
CV_EXPORTS void wait_stream(aclCxt* context, const int stream_id = 0);
//////////////////////////////// device ////////////////////////////////
CV_EXPORTS aclCxt *set_device(const char* config_path, int device_id = 0, int stream_count = 1);
CV_EXPORTS void release_device(aclCxt* context);

View File

@@ -8,10 +8,10 @@ namespace cv
namespace acl
{
// matrix multiplication
CV_EXPORTS void MatMul(const aclMat& src1, const aclMat& src2, aclMat& dest);
CV_EXPORTS void MatMul(const aclMat& src1, const aclMat& src2, aclMat& dest, int stream_id = 0);
// convolution
CV_EXPORTS void Convolution(const aclMat& src, const aclMat& kernel, aclMat& dest, \
const vector<int64_t>& stridesList = vector<int64_t> {1, 1, 1, 1}, const vector<int64_t>& padsList = vector<int64_t> {0, 0, 0, 0});
const vector<int64_t>& stridesList = vector<int64_t> {1, 1, 1, 1}, const vector<int64_t>& padsList = vector<int64_t> {0, 0, 0, 0}, int stream_id = 0);
} /* end of namespace acl */

View File

@@ -11,15 +11,15 @@ namespace cv
{
namespace acl
{
CV_EXPORTS aclMat abs(const aclMat &src);
CV_EXPORTS void pow(const aclMat &src, double power, aclMat &dest);
CV_EXPORTS void sqrt(const aclMat &src, aclMat &dest);
CV_EXPORTS void add(const aclMat &src, const aclMat &other_src, aclMat &dest);
CV_EXPORTS void divide(const aclMat &src, const aclMat &other_src, aclMat &dest);
CV_EXPORTS void exp(const aclMat &src, aclMat &dest);
CV_EXPORTS void log(const aclMat &src, aclMat &dest);
CV_EXPORTS void max(const aclMat &src, const aclMat &other_src, aclMat &dest);
CV_EXPORTS void min(const aclMat &src, const aclMat &other_src, aclMat &dest);
CV_EXPORTS aclMat abs(const aclMat &src, int stream_id = 0);
CV_EXPORTS void pow(const aclMat &src, double power, aclMat &dest, int stream_id = 0);
CV_EXPORTS void sqrt(const aclMat &src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void add(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void divide(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void exp(const aclMat &src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void log(const aclMat &src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void max(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id = 0);
CV_EXPORTS void min(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id = 0);
} /* end of namespace acl */
} /* end of namespace cv */

View File

@@ -10,12 +10,12 @@ namespace cv
// Matrix lookup table
//CV_EXPORTS void lookUpTable(const aclMat& src, const aclMat& lut, aclMat& dst);
// Multiple channel merge
CV_EXPORTS void merge(const vector<aclMat>& mv, aclMat& dst);
CV_EXPORTS void merge(const vector<aclMat>& mv, aclMat& dst, int stream_id = 0);
// Split into channels
CV_EXPORTS void split(const aclMat& src, vector<aclMat>& mv);
CV_EXPORTS void split(const aclMat& src, vector<aclMat>& mv, int stream_id = 0);
// Matrix transpose
CV_EXPORTS void transpose(const aclMat& src, aclMat& dest);
CV_EXPORTS void flip(const aclMat& src, aclMat& dest, int flipCode = 0);
CV_EXPORTS void transpose(const aclMat& src, aclMat& dest, int stream_id = 0);
CV_EXPORTS void flip(const aclMat& src, aclMat& dest, int flipCode = 0, int stream_id = 0);
} /* end of namespace acl */
} /* end of namespace cv */

View File

@@ -78,13 +78,13 @@ namespace cv
// Create operator description
CV_EXPORTS OperatorDesc CreateOpDesc(const string opType, const vector<aclMat> &input_Mat, vector<aclMat> &output_Mat, aclFormat format = ACL_FORMAT_NHWC, Opdims config = FOUR_DIMS);
// Compile and run the operator
CV_EXPORTS void compileAndRunop(OperatorDesc &opDesc, vector<aclDataBuffer *> &inputBuffers_, vector<aclDataBuffer *> &outputBuffers_, aclCxt *acl_context);
CV_EXPORTS void compileAndRunop(OperatorDesc &opDesc, vector<aclDataBuffer *> &inputBuffers_, vector<aclDataBuffer *> &outputBuffers_, aclCxt *acl_context, int stream_id);
// Suitable for one input and one output
CV_EXPORTS void OneInAndOneOut(const aclMat &input, aclMat &output, const string opType);
CV_EXPORTS void OneInAndOneOut(const aclMat &input, aclMat &output, const string opType, int stream_id = 0);
// Suitable for tow input and one output
CV_EXPORTS void TwoInAndOneOut(const aclMat &inputMat, const aclMat &inputMatOther, aclMat &outputMat, const string opType);
CV_EXPORTS void TwoInAndOneOut(const aclMat &inputMat, const aclMat &inputMatOther, aclMat &outputMat, const string opType, int stream_id = 0);
// run the operator
CV_EXPORTS void Runop(vector<aclMat> &input, vector<aclMat> &output, OperatorDesc &opDesc);
CV_EXPORTS void Runop(vector<aclMat> &input, vector<aclMat> &output, OperatorDesc &opDesc, int stream_id);
} /* end of namespace acl */

View File

@@ -69,6 +69,10 @@ namespace cv
return global_aclenv;
}
void wait_stream(aclCxt * acl_context, const int stream_id)
{
aclrtSynchronizeStream(acl_context->get_stream(stream_id));
}
/////////////////////////create acl context////////////////////////
/**

View File

@@ -199,7 +199,7 @@ namespace cv
inputBuffers_.emplace_back(aclCreateDataBuffer(nullptr, 0));
outputBuffers_.emplace_back(aclCreateDataBuffer(newMat.data, newMat.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, this->acl_context);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, this->acl_context, 0);
*this = newMat;

View File

@@ -8,7 +8,7 @@ namespace cv
* @brief: matrix multiplication
*
*/
void MatMul(const aclMat& src1, const aclMat& src2, aclMat& dest)
void MatMul(const aclMat& src1, const aclMat& src2, aclMat& dest, int stream_id)
{
CV_Assert(src1.cols == src2.rows && src1.type() == src2.type());
vector<aclMat> input_Mat;
@@ -29,7 +29,7 @@ namespace cv
opDesc.AddInputTensorDesc(ACL_DT_UNDEFINED, 0, nullptr, ACL_FORMAT_UNDEFINED);
opDesc.AddTensorAttr("transpose_x1", OP_BOOL, false);
opDesc.AddTensorAttr("transpose_x2", OP_BOOL, false);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
for (size_t i = 0; i < inputBuffers_.size(); i++)
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i]));
@@ -45,7 +45,7 @@ namespace cv
* @param [in] stridesList: strides, The N and C dimensions must be set to 1
* @param [in] padSList: pads, vector<int64_t>(top, bottom, left, right)
*/
void Convolution(const aclMat& src, const aclMat& kernel, aclMat& dest, const vector<int64_t>& stridesList, const vector<int64_t>& padsList)
void Convolution(const aclMat& src, const aclMat& kernel, aclMat& dest, const vector<int64_t>& stridesList, const vector<int64_t>& padsList, int stream_id)
{
vector<aclDataBuffer *> inputBuffers_;
vector<aclDataBuffer *> outputBuffers_;
@@ -74,7 +74,7 @@ namespace cv
inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize));
inputBuffers_.emplace_back(aclCreateDataBuffer(kernel.data, kernel.totalSize));
outputBuffers_.emplace_back(aclCreateDataBuffer(acl_dest.data, acl_dest.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context, stream_id);
acl_dest.data = aclGetDataBufferAddr(outputBuffers_[0]);
dest = acl_dest;

View File

@@ -4,10 +4,10 @@ namespace cv
{
namespace acl
{
aclMat abs(const aclMat& a)
aclMat abs(const aclMat& a, int stream_id)
{
aclMat dest(a.rows, a.cols, a.type(), a.acl_context);
OneInAndOneOut(a, dest, "Abs");
OneInAndOneOut(a, dest, "Abs", stream_id);
return dest;
}
@@ -65,7 +65,7 @@ namespace cv
}
void pow(const aclMat& src, double power, aclMat& dest)
void pow(const aclMat& src, double power, aclMat& dest, int stream_id)
{
vector<aclMat> input_Mat;
vector<aclMat> output_Mat;
@@ -82,19 +82,23 @@ namespace cv
opDesc.AddInputTensorDesc(dataType, shape2.size(), shape2.data(), ACL_FORMAT_NHWC);
size_t size = aclGetTensorDescSize(opDesc.inputDesc[1]);
void *power_dev = power_data(power, dataType, size);
inputBuffers_.emplace_back(aclCreateDataBuffer(src.data, src.totalSize));
inputBuffers_.emplace_back(aclCreateDataBuffer(power_data(power, dataType, size), size));
inputBuffers_.emplace_back(aclCreateDataBuffer(power_dev, size));
outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
aclrtFree(power_dev);
for (size_t i = 0; i < inputBuffers_.size(); i++)
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i]));
for (size_t i = 0; i < outputBuffers_.size(); i++)
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
}
void add(const aclMat& src, const aclMat& other_src, aclMat& dest)
void add(const aclMat& src, const aclMat& other_src, aclMat& dest, int stream_id)
{
bool is_correct;
@@ -106,10 +110,10 @@ namespace cv
is_correct &= (src.type() == dest.type());
CV_Assert(is_correct);
TwoInAndOneOut(src, other_src, dest, "Add");
TwoInAndOneOut(src, other_src, dest, "Add", stream_id);
}
void divide(const aclMat& src, const aclMat& other_src, aclMat& dest)
void divide(const aclMat& src, const aclMat& other_src, aclMat& dest, int stream_id)
{
bool is_correct;
@@ -121,10 +125,10 @@ namespace cv
is_correct &= (src.type() == dest.type());
CV_Assert(is_correct);
TwoInAndOneOut(src, other_src, dest, "Div");
TwoInAndOneOut(src, other_src, dest, "Div", stream_id);
}
void exp(const aclMat& src, aclMat& dest)
void exp(const aclMat& src, aclMat& dest, int stream_id)
{
CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type());
@@ -145,13 +149,13 @@ namespace cv
opDesc.AddTensorAttr("scale", OP_FLOAT, 1.0);
opDesc.AddTensorAttr("shift", OP_FLOAT, 0.0);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0]));
}
void log(const aclMat &src, aclMat &dest)
void log(const aclMat &src, aclMat &dest, int stream_id)
{
CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type());
@@ -172,13 +176,13 @@ namespace cv
opDesc.AddTensorAttr("scale", OP_FLOAT, 1.0);
opDesc.AddTensorAttr("shift", OP_FLOAT, 0.0);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0]));
}
void max(const aclMat &src, const aclMat &other_src, aclMat &dest)
void max(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id)
{
bool is_correct;
@@ -190,10 +194,10 @@ namespace cv
is_correct &= (src.type() == dest.type());
CV_Assert(is_correct);
TwoInAndOneOut(src, other_src, dest, "Maximum");
TwoInAndOneOut(src, other_src, dest, "Maximum", stream_id);
}
void min(const aclMat &src, const aclMat &other_src, aclMat &dest)
void min(const aclMat &src, const aclMat &other_src, aclMat &dest, int stream_id)
{
bool is_correct;
@@ -205,14 +209,14 @@ namespace cv
is_correct &= (src.type() == dest.type());
CV_Assert(is_correct);
TwoInAndOneOut(src, other_src, dest, "Minimum");
TwoInAndOneOut(src, other_src, dest, "Minimum", stream_id);
}
void sqrt(const aclMat &src, aclMat &dest)
void sqrt(const aclMat &src, aclMat &dest, int stream_id)
{
CV_Assert(src.rows == dest.rows && src.cols == dest.cols && src.type() == dest.type());
OneInAndOneOut(src, dest, "Sqrt");
OneInAndOneOut(src, dest, "Sqrt", stream_id);
}
} /* end of namespace acl */

View File

@@ -108,7 +108,7 @@ namespace cv
return -1;
}
void merge(const vector<aclMat>& mv, aclMat& dest)
void merge(const vector<aclMat>& mv, aclMat& dest, int stream_id)
{
vector<aclDataBuffer *> inputBuffers_;
vector<aclDataBuffer *> outputBuffers_;
@@ -159,7 +159,7 @@ namespace cv
dest = temp;
outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
for (size_t i = 0; i < inputBuffers_.size(); i++)
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i]));
@@ -172,11 +172,12 @@ namespace cv
/**
* @brief : Dynamic shape reasoning, compiler problems
* @brief : Dynamic shape reasoning
*
*/
void transpose(const aclMat& src, aclMat& dest)
void transpose(const aclMat& src, aclMat& dest, int stream_id)
{
vector<aclDataBuffer *> inputBuffers_;
vector<aclDataBuffer *> outputBuffers_;
@@ -238,9 +239,7 @@ namespace cv
opDesc.outputDesc.data(),
outputBuffers_.data(),
opDesc.opAttr,
src.acl_context->get_stream(0)));
AclSafeCall(aclrtSynchronizeStream(src.acl_context->get_stream(0)));
dest.acl_context->get_stream(stream_id)));
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[1]));
@@ -252,8 +251,8 @@ namespace cv
}
/*
/* transposeD */
#if 0
void transpose(const aclMat& src, aclMat& dest)
{
vector<aclDataBuffer *> inputBuffers_;
@@ -279,7 +278,7 @@ namespace cv
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0]));
}
*/
#endif
static int split_type(int depth)
{
@@ -299,7 +298,7 @@ namespace cv
return -1;
}
void split(const aclMat& src, vector<aclMat>& mv)
void split(const aclMat& src, vector<aclMat>& mv, int stream_id)
{
vector<aclDataBuffer *> inputBuffers_;
vector<aclDataBuffer *> outputBuffers_;
@@ -333,7 +332,7 @@ namespace cv
outputBuffers_.emplace_back(aclCreateDataBuffer(mv[i].data, mv[i].totalSize));
}
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context, stream_id);
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
for (int i = 0; i < num_split; ++i)
@@ -341,7 +340,7 @@ namespace cv
}
/*
#if 0
//disable
void split(const aclMat& src, vector<aclMat>& mv)
@@ -427,9 +426,9 @@ namespace cv
for (int i = 0; i < num_split; ++i)
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
}
*/
#endif
static void flip_(const aclMat& src, aclMat& dest, int axis)
static void flip_(const aclMat& src, aclMat& dest, int axis, int stream_id)
{
vector<aclDataBuffer *> inputBuffers_;
vector<aclDataBuffer *> outputBuffers_;
@@ -456,26 +455,26 @@ namespace cv
outputBuffers_.emplace_back(aclCreateDataBuffer(dest.data, dest.totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, src.acl_context);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, dest.acl_context, stream_id);
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[0]));
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[1]));
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[0]));
}
void flip(const aclMat& src, aclMat& dest, int filpCode)
void flip(const aclMat& src, aclMat& dest, int filpCode, int stream_id)
{
if (filpCode == 0) {
flip_(src, dest, 1);
flip_(src, dest, 1, stream_id);
}
else if (filpCode > 0) {
flip_(src, dest, 2);
flip_(src, dest, 2, stream_id);
}
else {
flip_(src, dest, 2);
flip_(src, dest, 2, stream_id);
aclMat tmp(dest.rows, dest.cols, dest.type(), dest.acl_context);
aclrtMemcpy(tmp.data, dest.totalSize, dest.data, dest.totalSize, ACL_MEMCPY_DEVICE_TO_DEVICE);
flip_(tmp, dest, 1);
flip_(tmp, dest, 1, stream_id);
}
}
} /* end of namespace acl */

View File

@@ -108,7 +108,7 @@ namespace cv
* @brief compile and run operator
*
*/
void compileAndRunop(OperatorDesc& opDesc, vector<aclDataBuffer *>& inputBuffers_, vector<aclDataBuffer *>& outputBuffers_, aclCxt *acl_context)
void compileAndRunop(OperatorDesc& opDesc, vector<aclDataBuffer *>& inputBuffers_, vector<aclDataBuffer *>& outputBuffers_, aclCxt *acl_context, int stream_id)
{
AclSafeCall(aclopCompile(opDesc.opType.c_str(),
opDesc.inputDesc.size(),
@@ -128,14 +128,10 @@ namespace cv
opDesc.outputDesc.data(),
outputBuffers_.data(),
opDesc.opAttr,
acl_context->get_stream(0)));
AclSafeCall(aclrtSynchronizeStream(acl_context->get_stream(0)));
acl_context->get_stream(stream_id)));
}
void Runop(vector<aclMat>& input, vector<aclMat>& output, OperatorDesc& opDesc)
void Runop(vector<aclMat>& input, vector<aclMat>& output, OperatorDesc& opDesc, int stream_id)
{
size_t i;
@@ -147,7 +143,7 @@ namespace cv
for (i = 0; i < output.size(); ++i)
outputBuffers_.emplace_back(aclCreateDataBuffer(output[i].data, output[i].totalSize));
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, output[0].acl_context);
compileAndRunop(opDesc, inputBuffers_, outputBuffers_, output[0].acl_context, stream_id);
for (i = 0; i < input.size(); ++i)
AclSafeCall(aclDestroyDataBuffer(inputBuffers_[i]));
@@ -155,7 +151,7 @@ namespace cv
AclSafeCall(aclDestroyDataBuffer(outputBuffers_[i]));
}
void OneInAndOneOut(const aclMat& inputMat, aclMat& outputMat, const string opType)
void OneInAndOneOut(const aclMat& inputMat, aclMat& outputMat, const string opType, int stream_id)
{
vector<aclMat> input_Mat;
vector<aclMat> output_Mat;
@@ -164,10 +160,10 @@ namespace cv
output_Mat.emplace_back(outputMat);
OperatorDesc opDesc = CreateOpDesc(opType, input_Mat, output_Mat);
Runop(input_Mat, output_Mat, opDesc);
Runop(input_Mat, output_Mat, opDesc, stream_id);
}
void TwoInAndOneOut(const aclMat& inputMat, const aclMat& inputMatOther, aclMat& outputMat, const string opType)
void TwoInAndOneOut(const aclMat& inputMat, const aclMat& inputMatOther, aclMat& outputMat, const string opType, int stream_id)
{
vector<aclMat> input_Mat;
vector<aclMat> output_Mat;
@@ -177,7 +173,7 @@ namespace cv
output_Mat.emplace_back(outputMat);
OperatorDesc opDesc = CreateOpDesc(opType, input_Mat, output_Mat);
Runop(input_Mat, output_Mat, opDesc);
Runop(input_Mat, output_Mat, opDesc, stream_id);
}
} /* end of namespace acl */

View File

@@ -7,10 +7,9 @@ namespace opencv_test
{
namespace
{
aclCxt *acl_context_0 = set_device("../../modules/acl/test/acl.json", 0, 2);
aclCxt *acl_context_0 = set_device("../../modules/acl/test/acl.json", 0, 3);
////////////////////////////////////////////////////Correctness_test////////////////////////////////////////////////////////
#if 0
/* range: rows: 1 ~ 64, cols: 1 ~ 64, type: 0 ~ 7
* test function:
* config: MEMORY_ALIGN
@@ -179,6 +178,7 @@ namespace opencv_test
AclMat_Test test;
test.Test_operator_div(acl_context_0);
}
#endif
////////////////////////////////////////////////////Perf_test////////////////////////////////////////////////////////
TEST(Operator, add)
@@ -216,6 +216,7 @@ namespace opencv_test
PERF_TEST test;
test.Test_Pow(acl_context_0);
}
TEST(Mathfunction, sqrt)
{
PERF_TEST test;
@@ -302,28 +303,6 @@ namespace opencv_test
release_device(acl_context_0);
}
#ifdef DEBUG
TEST(Test, other)
{
aclCxt *acl_context_0 = set_device("../../modules/acl/test/acl.json", 0, 2);
PERF_TEST test;
test.Test_other(acl_context_0);
release_device(acl_context_0);
}
TEST(Test, other1)
{
aclCxt *acl_context_0 = set_device("../../modules/acl/test/acl.json", 0, 2);
PERF_TEST test;
test.Test_other1(acl_context_0);
release_device(acl_context_0);
}
TEST(Test, other2)
{
PERF_TEST test;
test.Test_other2();
}
#endif
}
}

View File

@@ -1 +1,2 @@
{}
{
}

View File

@@ -1,19 +1,12 @@
#include "test_common.hpp"
#include "test_perf.hpp"
#define CHECK(cmd) do { \
aclError e = cmd; \
if( e != ACL_ERROR_NONE) { \
printf("Failed: ACL error %s:%d '%d'\n", \
__FILE__,__LINE__,e); \
exit(0); \
} \
} while(0)
void PERF_TEST::Test_operator_add_perf(aclCxt *acl_context)
{
int val;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
@@ -23,7 +16,7 @@ void PERF_TEST::Test_operator_add_perf(aclCxt *acl_context)
test.PrintLog("Perf test : Function: operator+=()", type[i]);
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
n = cycle_index;
Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
@@ -38,14 +31,17 @@ void PERF_TEST::Test_operator_add_perf(aclCxt *acl_context)
while (n--)
mat_dest += mat_src;
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
aclmat_dest += aclmat_src;
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
aclmat_dest += aclmat_src;
wait_stream(acl_context);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -61,18 +57,19 @@ void PERF_TEST::Test_operator_add_perf(aclCxt *acl_context)
void PERF_TEST::Test_operator_sub_perf(aclCxt *acl_context)
{
int val;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1, CV_32SC1, CV_64FC1};
vector<int> type{CV_8UC1, CV_32FC1, CV_32SC1};
for (size_t i = 0; i < type.size(); ++i)
{
test.PrintLog("Perf test : Function: operator-=()", type[i]);
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
n = cycle_index;
Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
@@ -87,18 +84,21 @@ void PERF_TEST::Test_operator_sub_perf(aclCxt *acl_context)
while (n--)
mat_dest -= mat_src;
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
aclmat_dest -= aclmat_src;
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
aclmat_dest -= aclmat_src;
wait_stream(acl_context);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret);
//bool ret = test.Test_Diff(mat_dest, mat_dest1);
//ASSERT_TRUE(ret);
if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t";
else
@@ -111,18 +111,19 @@ void PERF_TEST::Test_operator_sub_perf(aclCxt *acl_context)
void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context)
{
int val;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_8UC1, CV_32FC1, CV_32SC1, CV_64FC1};
vector<int> type{CV_32FC1};
for (size_t i = 0; i < type.size(); ++i)
{
test.PrintLog("Perf test : Function: operator/=()", type[i]);
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
n = cycle_index;
Mat mat_src(val, val, type[i], Scalar(1, 2, 4));
Mat mat_dest(val, val, type[i], Scalar(2, 4, 8));
Mat mat_dest1(val, val, type[i]);
@@ -134,18 +135,21 @@ void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context)
while (n--)
mat_dest /= mat_src;
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
aclmat_dest /= aclmat_src;
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
aclmat_dest /= aclmat_src;
wait_stream(acl_context);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret);
//bool ret = test.Test_Diff(mat_dest, mat_dest1);
//ASSERT_TRUE(ret);
if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t";
else
@@ -158,37 +162,43 @@ void PERF_TEST::Test_operator_div_perf(aclCxt *acl_context)
void PERF_TEST::Test_operator_mul_perf(aclCxt *acl_context)
{
int val, type;
int val, n;
int valmax = 4096;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1;
for (size_t i = 0; i < type.size(); ++i)
{
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
Mat mat_src(val, val, type);
Mat mat_dest(val, val, type);
Mat mat_dest1(val, val, type);
n = cycle_index;
Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 1);
test.SetDataRange(mat_dest, 1);
aclMat aclmat_src(val, val, type, mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context);
aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
mat_dest *= mat_src;
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
aclmat_dest *= aclmat_src;
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
aclmat_dest *= aclmat_src;
wait_stream(acl_context);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -199,255 +209,6 @@ void PERF_TEST::Test_operator_mul_perf(aclCxt *acl_context)
cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
}
}
void PERF_TEST::Test_other(aclCxt *acl_context)
{
std::vector<aclDataBuffer *> input_buffers_;
std::vector<aclDataBuffer *> output_buffers_;
std::vector<aclTensorDesc *> input_descs_;
std::vector<aclTensorDesc *> output_descs_;
string op_type_ = "ConcatD";
auto *attr_ = aclopCreateAttr();
vector<int64_t> a = {0};
aclopSetAttrInt(attr_, "N", 2);
aclopSetAttrInt(attr_, "concat_dim", 0);
vector<int64_t> dims0 = {2, 4};
auto size0 = 2 * 4 * 4;
auto *desc0 = aclCreateTensorDesc(ACL_FLOAT, dims0.size(), dims0.data(), ACL_FORMAT_NCHW);
void *ptr0;
vector<float> data0;
for (auto i = 0; i < 8; ++i)
{
data0.emplace_back(i);
}
CHECK(aclrtMalloc(&ptr0, 2 * 4 * 4, ACL_MEM_MALLOC_HUGE_FIRST));
aclrtMemcpy(ptr0, data0.size() * 4, data0.data(), data0.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer0 = aclCreateDataBuffer(ptr0, size0);
input_descs_.push_back(desc0);
input_buffers_.push_back(buffer0);
cout << "input0 done" << endl;
vector<int64_t> dims1 = {2, 4};
auto *desc1 = aclCreateTensorDesc(ACL_FLOAT, dims1.size(), dims1.data(), ACL_FORMAT_NCHW);
input_descs_.push_back(desc1);
void *ptr1;
CHECK(aclrtMalloc(&ptr1, 1024, ACL_MEM_MALLOC_HUGE_FIRST));
std::vector<float> data1;
for (auto i = 0; i < 8; ++i)
{
data1.emplace_back(i);
}
aclrtMemcpy(ptr1, data1.size() * 4, data1.data(), data1.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer1 = aclCreateDataBuffer(ptr1, 2 * 4 * 4);
input_buffers_.push_back(buffer1);
cout << "input1 done" << endl;
vector<int64_t> dims2 = {4, 4};
auto *desc2 = aclCreateTensorDesc(ACL_FLOAT, dims2.size(), dims2.data(), ACL_FORMAT_NCHW);
output_descs_.push_back(desc2);
void *ptr2;
CHECK(aclrtMalloc(&ptr2, 1024, ACL_MEM_MALLOC_HUGE_FIRST));
std::vector<float> data2;
for (auto i = 0; i < 256; ++i)
{
data1.emplace_back(i);
}
aclrtMemcpy(ptr2, data2.size() * 4, data2.data(), data2.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer2 = aclCreateDataBuffer(ptr2, 4 * 4 * 4);
output_buffers_.push_back(buffer2);
cout << "output0 done" << endl;
aclError ret = aclopCompileAndExecute(
op_type_.c_str(), input_descs_.size(), input_descs_.data(),
input_buffers_.data(), output_descs_.size(), output_descs_.data(),
output_buffers_.data(), attr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL,
acl_context->get_stream(0));
std::cout << "aclopCompileAndExecutr:" << ret << std::endl;
CHECK(aclrtSynchronizeStream(acl_context->get_stream(0)));
std::cout << "aclrtSynchronizeStream ok" << std::endl;
vector<float> res;
for (auto i = 0; i < 256 + 256; ++i)
{
res.emplace_back(i);
}
CHECK(aclrtMemcpy(res.data(), res.size() * 4, ptr2, res.size() * 4, ACL_MEMCPY_DEVICE_TO_HOST));
for (auto item : res)
{
cout << item << " ";
}
cout << endl;
}
void PERF_TEST::Test_other1(aclCxt *acl_context)
{
std::vector<aclDataBuffer *> input_buffers_;
std::vector<aclDataBuffer *> output_buffers_;
std::vector<aclTensorDesc *> input_descs_;
std::vector<aclTensorDesc *> output_descs_;
string op_type_ = "ConcatD";
auto *attr_ = aclopCreateAttr();
vector<int64_t> a = {0};
aclopSetAttrInt(attr_, "N", 2);
aclopSetAttrInt(attr_, "concat_dim", 0);
Common_Test test;
Mat src(2, 4, CV_32FC1);
test.SetDataRange(src, 8);
aclMat acl_src(2, 4, CV_32FC1, src.data, acl_context);
vector<int64_t> dims0 = {2, 4};
auto size0 = 2 * 4 * 4;
auto *desc0 = aclCreateTensorDesc(ACL_FLOAT, dims0.size(), dims0.data(), ACL_FORMAT_NHWC);
auto *buffer0 = aclCreateDataBuffer(acl_src.data, size0);
input_descs_.push_back(desc0);
input_buffers_.push_back(buffer0);
std::cout << "input0 done" << endl;
Mat src1(2, 4, CV_32FC1);
test.SetDataRange(src1, 8);
aclMat acl_src1(2, 4, CV_32FC1, src1.data, acl_context);
vector<int64_t> dims1 = {2, 4};
auto size1 = 2 * 4 * 4;
auto *desc1 = aclCreateTensorDesc(ACL_FLOAT, dims1.size(), dims1.data(), ACL_FORMAT_NHWC);
auto *buffer1 = aclCreateDataBuffer(acl_src1.data, size1);
input_descs_.push_back(desc1);
input_buffers_.push_back(buffer1);
std::cout << "input1 done" << endl;
aclMat acl_dest(4, 4, CV_32FC1, acl_context);
vector<int64_t> dims2 = {4, 4};
auto size3 = 4 * 4 * 4;
auto *desc2 = aclCreateTensorDesc(ACL_FLOAT, dims2.size(), dims2.data(), ACL_FORMAT_NHWC);
auto *buffer2 = aclCreateDataBuffer(acl_dest.data, size3);
output_descs_.push_back(desc2);
output_buffers_.push_back(buffer2);
std::cout << "output0 done" << endl;
aclError ret = aclopCompileAndExecute(
op_type_.c_str(), input_descs_.size(), input_descs_.data(),
input_buffers_.data(), output_descs_.size(), output_descs_.data(),
output_buffers_.data(), attr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL,
acl_context->get_stream(0));
std::cout << "aclopCompileAndExecutr:" << ret << std::endl;
CHECK(aclrtSynchronizeStream(acl_context->get_stream(0)));
std::cout << "aclrtSynchronizeStream ok" << std::endl;
vector<float> res;
for (auto i = 0; i < 256 + 256; ++i)
{
res.emplace_back(i);
}
CHECK(aclrtMemcpy(res.data(), res.size() * 4, acl_dest.data, res.size() * 4, ACL_MEMCPY_DEVICE_TO_HOST));
for (auto item : res)
{
std::cout << item << " ";
}
std::cout << endl;
}
void PERF_TEST::Test_other2()
{
CHECK(aclInit(nullptr));
std::cout << "aclInit ok" << std::endl;
CHECK(aclrtSetDevice(0));
std::cout << "aclrtSetDevice 0 ok" << std::endl;
std::vector<aclDataBuffer *> input_buffers_;
std::vector<aclDataBuffer *> output_buffers_;
std::vector<aclTensorDesc *> input_descs_;
std::vector<aclTensorDesc *> output_descs_;
string op_type_ = "ConcatD";
auto *attr_ = aclopCreateAttr();
vector<int64_t> a = {0};
aclopSetAttrInt(attr_, "N", 2);
aclopSetAttrInt(attr_, "concat_dim", 0);
vector<int64_t> dims0 = {2, 4};
auto size0 = 2 * 4 * 4;
auto *desc0 = aclCreateTensorDesc(ACL_FLOAT, dims0.size(), dims0.data(), ACL_FORMAT_NCHW);
void *ptr0;
vector<float> data0;
for (auto i = 0; i < 8; ++i)
{
data0.emplace_back(i);
}
CHECK(aclrtMalloc(&ptr0, 2 * 4 * 4, ACL_MEM_MALLOC_HUGE_FIRST));
// std::cout << "ptr:" << ptr0 << " ptr+256:" << ptr0+256;
aclrtMemcpy(ptr0, data0.size() * 4, data0.data(), data0.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer0 = aclCreateDataBuffer(ptr0, size0);
input_descs_.push_back(desc0);
input_buffers_.push_back(buffer0);
cout << "input0 done" << endl;
vector<int64_t> dims1 = {2, 4};
auto *desc1 = aclCreateTensorDesc(ACL_FLOAT, dims1.size(), dims1.data(), ACL_FORMAT_NCHW);
input_descs_.push_back(desc1);
void *ptr1;
CHECK(aclrtMalloc(&ptr1, 1024, ACL_MEM_MALLOC_HUGE_FIRST));
std::vector<float> data1;
for (auto i = 0; i < 8; ++i)
{
data1.emplace_back(i);
}
aclrtMemcpy(ptr1, data1.size() * 4, data1.data(), data1.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer1 = aclCreateDataBuffer(ptr1, 2 * 4 * 4);
input_buffers_.push_back(buffer1);
cout << "input1 done" << endl;
vector<int64_t> dims2 = {4, 4};
auto *desc2 = aclCreateTensorDesc(ACL_FLOAT, dims2.size(), dims2.data(), ACL_FORMAT_NCHW);
output_descs_.push_back(desc2);
void *ptr2;
CHECK(aclrtMalloc(&ptr2, 1024, ACL_MEM_MALLOC_HUGE_FIRST));
std::vector<float> data2;
for (auto i = 0; i < 256; ++i)
{
data1.emplace_back(i);
}
aclrtMemcpy(ptr2, data2.size() * 4, data2.data(), data2.size() * 4, ACL_MEMCPY_HOST_TO_DEVICE);
auto *buffer2 = aclCreateDataBuffer(ptr2, 4 * 4 * 4);
output_buffers_.push_back(buffer2);
cout << "output0 done" << endl;
aclrtStream stream = nullptr;
aclrtCreateStream(&stream);
cout << 2 << endl;
aclError ret = aclopCompileAndExecute(
op_type_.c_str(), input_descs_.size(), input_descs_.data(),
input_buffers_.data(), output_descs_.size(), output_descs_.data(),
output_buffers_.data(), attr_, ACL_ENGINE_SYS, ACL_COMPILE_SYS, NULL,
stream);
cout << 3 << endl;
std::cout << "aclopCompileAndExecutr:" << ret << std::endl;
CHECK(aclrtSynchronizeStream(stream));
std::cout << "aclrtSynchronizeStream ok" << std::endl;
vector<float> res;
for (auto i = 0; i < 256 + 256; ++i)
{
res.emplace_back(i);
}
CHECK(aclrtMemcpy(res.data(), res.size() * 4, ptr2, res.size() * 4, ACL_MEMCPY_DEVICE_TO_HOST));
for (auto item : res)
{
cout << item << " ";
}
cout << endl;
}

View File

@@ -159,30 +159,98 @@ void Common_Test::PrintLog(const string& funcname, int type)
cout << funcname << "\t"
<< "Type: CV_8UC1" << endl;
break;
case CV_8UC2:
cout << funcname << "\t"
<< "Type: CV_8UC2" << endl;
break;
case CV_8UC3:
cout << funcname << "\t"
<< "Type: CV_8UC3" << endl;
break;
case CV_8UC4:
cout << funcname << "\t"
<< "Type: CV_8UC4" << endl;
break;
case CV_8SC1:
cout << funcname << "\t"
<< "Type: CV_8SC1" << endl;
break;
case CV_8SC2:
cout << funcname << "\t"
<< "Type: CV_8SC2" << endl;
break;
case CV_8SC3:
cout << funcname << "\t"
<< "Type: CV_8SC3" << endl;
break;
case CV_8SC4:
cout << funcname << "\t"
<< "Type: CV_8SC4" << endl;
break;
case CV_16FC1:
cout << funcname << "\t"
<< "Type: CV_16FC1" << endl;
break;
case CV_16FC2:
cout << funcname << "\t"
<< "Type: CV_16FC2" << endl;
break;
case CV_16FC3:
cout << funcname << "\t"
<< "Type: CV_16FC3" << endl;
break;
case CV_16FC4:
cout << funcname << "\t"
<< "Type: CV_16FC4" << endl;
break;
case CV_32FC1:
cout << funcname << "\t"
<< "Type: CV_32FC1" << endl;
break;
case CV_32FC2:
cout << funcname << "\t"
<< "Type: CV_32FC2" << endl;
break;
case CV_32FC3:
cout << funcname << "\t"
<< "Type: CV_32FC3" << endl;
break;
case CV_32FC4:
cout << funcname << "\t"
<< "Type: CV_32FC4" << endl;
break;
case CV_32SC1:
cout << funcname << "\t"
<< "Type: CV_32SC1" << endl;
break;
case CV_32SC2:
cout << funcname << "\t"
<< "Type: CV_32SC2" << endl;
break;
case CV_32SC3:
cout << funcname << "\t"
<< "Type: CV_32SC3" << endl;
break;
case CV_32SC4:
cout << funcname << "\t"
<< "Type: CV_32SC4" << endl;
break;
case CV_64FC1:
cout << funcname << "\t"
<< "Type: CV_64FC1" << endl;
break;
case CV_64FC2:
cout << funcname << "\t"
<< "Type: CV_64FC2" << endl;
break;
case CV_64FC3:
cout << funcname << "\t"
<< "Type: CV_64FC3" << endl;
break;
case CV_64FC4:
cout << funcname << "\t"
<< "Type: CV_64FC4" << endl;
break;
default:
break;
}

View File

@@ -576,6 +576,7 @@ void AclMat_Test::Test_operator_add(aclCxt *acl_context) {
mat_dest += mat_src;
aclmat_dest += aclmat_src;
wait_stream(acl_context);
aclmat_dest.download(mat_dest1, MEMORY_ALIGN);
ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -609,7 +610,9 @@ void AclMat_Test::Test_operator_sub(aclCxt *acl_context) {
aclMat aclmat_dest(rows, cols, type[i], mat_dest.data, acl_context, MEMORY_ALIGN);
mat_dest -= mat_src;
aclmat_dest -= aclmat_src;
wait_stream(acl_context);
aclmat_dest.download(mat_dest1, MEMORY_ALIGN);
ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -643,7 +646,9 @@ void AclMat_Test::Test_operator_div(aclCxt *acl_context) {
aclMat aclmat_dest(rows, cols, type[i], mat_dest.data, acl_context, MEMORY_ALIGN);
mat_dest /= mat_src;
aclmat_dest /= aclmat_src;
wait_stream(acl_context);
aclmat_dest.download(mat_dest1, MEMORY_ALIGN);
ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -676,7 +681,9 @@ void AclMat_Test::Test_operator_mul(aclCxt *acl_context) {
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
mat_dest *= mat_src;
aclmat_dest *= aclmat_src;
wait_stream(acl_context);
aclmat_dest.download(mat_dest1);
ret = test.Test_Diff(mat_dest, mat_dest1);

View File

@@ -3,41 +3,46 @@
void PERF_TEST::Test_MatMul(aclCxt *acl_context)
{
int val, type;
int val, n;
int valmax = 4096;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1;
for (size_t i = 0; i < type.size(); ++i)
{
for (val = 8; val <= valmax; val *= 2)
{
Mat mat_src(val, val, type);
Mat mat_src1(val, val, type);
Mat mat_dest(val, val, type);
Mat mat_dest1(val, val, type);
Mat mat_src(val, val, type[i]);
Mat mat_src1(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 32);
test.SetDataRange(mat_src1, 32);
test.SetDataRange(mat_dest, 32);
aclMat aclmat_src(val, val, type, mat_src.data, acl_context);
aclMat aclmat_src1(val, val, type, mat_src1.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context);
int n = 100;
aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_src1(val, val, type[i], mat_src1.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
n = cycle_index;
begin = static_cast<double>(getTickCount());
while (n--)
mat_dest = mat_src * mat_src1;
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
MatMul(aclmat_src1, aclmat_src, aclmat_dest, 0);
wait_stream(acl_context, 0);
begin = static_cast<double>(getTickCount());
while (n--)
MatMul(aclmat_src1, aclmat_src, aclmat_dest);
MatMul(aclmat_src1, aclmat_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -48,43 +53,50 @@ void PERF_TEST::Test_MatMul(aclCxt *acl_context)
cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
}
}
void PERF_TEST::Test_Convolution(aclCxt *acl_context)
{
int val, type;
int val, n;
int valmax = 4096;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1;
for (size_t i = 0; i < type.size(); ++i)
{
for (val = 8; val <= valmax; val *= 2)
{
Mat mat_src(val, val, type, Scalar{1, 2});
Mat mat_kernel(3, 3, type, Scalar(1, 4));
Mat mat_dest(val, val, type, Scalar{6});
Mat mat_src(val, val, type[i], Scalar{1, 2});
Mat mat_kernel(3, 3, type[i], Scalar(1, 4));
Mat mat_dest(val, val, type[i], Scalar{6});
aclMat aclmat_src(val, val, type, mat_src.data, acl_context);
aclMat aclmat_kernel(3, 3, type, mat_kernel.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context);
int n = 100;
aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_kernel(3, 3, type[i], mat_kernel.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
n = cycle_index;
begin = static_cast<double>(getTickCount());
while (n--)
filter2D(mat_src, mat_dest, -1, mat_kernel);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
begin = static_cast<double>(getTickCount());
vector<int64_t> strides{1, 1, 1, 1};
vector<int64_t> pads{1, 1, 1, 1};
n = (cycle_index - 1);
Convolution(aclmat_src, aclmat_kernel, aclmat_dest, strides, pads, 0);
wait_stream(acl_context, 0);
begin = static_cast<double>(getTickCount());
while (n--)
Convolution(aclmat_src, aclmat_kernel, aclmat_dest, strides, pads);
Convolution(aclmat_src, aclmat_kernel, aclmat_dest, strides, pads, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
Mat mat_dest1(aclmat_dest.rows, aclmat_dest.cols, type);
acltime = (end - begin) / getTickFrequency();
Mat mat_dest1(aclmat_dest.rows, aclmat_dest.cols, type[i]);
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
/*
@@ -97,4 +109,5 @@ void PERF_TEST::Test_Convolution(aclCxt *acl_context)
cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
}
}

View File

@@ -3,18 +3,19 @@
void PERF_TEST::Test_Abs(aclCxt *acl_context)
{
int val;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1, CV_32SC1};
vector<int> type{CV_32FC1};
for (size_t i = 0; i < type.size(); ++i)
{
test.PrintLog("Perf test : Function: Abs()", type[i]);
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
n = cycle_index;
Mat mat_src(val, val, type[i], Scalar{-2});
Mat mat_dest(val, val, type[i], Scalar{-4});
Mat mat_dest1(val, val, type[i], Scalar{-6});
@@ -26,14 +27,17 @@ void PERF_TEST::Test_Abs(aclCxt *acl_context)
while (n--)
mat_dest = abs(mat_src);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
aclmat_dest = abs(aclmat_src, 0);
wait_stream(acl_context, 0);
begin = static_cast<double>(getTickCount());
while (n--)
aclmat_dest = abs(aclmat_src);
aclmat_dest = abs(aclmat_src, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -49,8 +53,9 @@ void PERF_TEST::Test_Abs(aclCxt *acl_context)
void PERF_TEST::Test_Pow(aclCxt *acl_context)
{
int val;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
@@ -60,7 +65,7 @@ void PERF_TEST::Test_Pow(aclCxt *acl_context)
test.PrintLog("Perf test : Function: Pow()", type[i]);
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
n = cycle_index;
int power = test.RandDom_(6);
Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
@@ -75,14 +80,17 @@ void PERF_TEST::Test_Pow(aclCxt *acl_context)
while (n--)
pow(mat_src, power, mat_dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
pow(aclmat_src, power, aclmat_dest, 0);
wait_stream(acl_context, 0);
begin = static_cast<double>(getTickCount());
while (n--)
pow(aclmat_src, power, aclmat_dest);
pow(aclmat_src, power, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
if (val < 128)
@@ -96,38 +104,43 @@ void PERF_TEST::Test_Pow(aclCxt *acl_context)
void PERF_TEST::Test_Sqrt(aclCxt *acl_context)
{
int val, type;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1;
for (size_t i = 0; i < type.size(); ++i)
{
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
Mat mat_src(val, val, type);
Mat mat_dest(val, val, type);
Mat mat_dest1(val, val, type);
n = cycle_index;
Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 32);
test.SetDataRange(mat_dest, 32);
aclMat aclmat_src(val, val, type, mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context);
aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
sqrt(mat_src, mat_dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
sqrt(aclmat_src, aclmat_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
sqrt(aclmat_src, aclmat_dest);
sqrt(aclmat_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
if (val < 128)
@@ -136,45 +149,52 @@ void PERF_TEST::Test_Sqrt(aclCxt *acl_context)
cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
}
}
void PERF_TEST::Test_Add(aclCxt *acl_context)
{
int val, type;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1;
for (size_t i = 0; i < type.size(); ++i)
{
for (val = 8; val <= valmax; val *= 2)
{
Common_Test test;
int n = 100;
Mat mat_src1(val, val, type);
Mat mat_src2(val, val, type);
Mat mat_dest(val, val, type);
Mat mat_dest1(val, val, type);
n = cycle_index;
Mat mat_src1(val, val, type[i]);
Mat mat_src2(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src1, 32);
test.SetDataRange(mat_src2, 32);
test.SetDataRange(mat_dest, 32);
aclMat aclmat_src1(val, val, type, mat_src1.data, acl_context);
aclMat aclmat_src2(val, val, type, mat_src2.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context);
aclMat aclmat_src1(val, val, type[i], mat_src1.data, acl_context);
aclMat aclmat_src2(val, val, type[i], mat_src2.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
add(mat_src1, mat_src2, mat_dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
add(aclmat_src1, aclmat_src2, aclmat_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
add(aclmat_src1, aclmat_src2, aclmat_dest);
add(aclmat_src1, aclmat_src2, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -185,91 +205,104 @@ void PERF_TEST::Test_Add(aclCxt *acl_context)
cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
}
}
void PERF_TEST::Test_Divide(aclCxt *acl_context)
{
int val, type;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1;
for (size_t i = 0; i < type.size(); ++i)
{
for (val = 8; val <= valmax; val *= 2)
{
Common_Test test;
int n = 100;
Mat mat_src1(val, val, type);
Mat mat_src2(val, val, type);
Mat mat_dest(val, val, type);
Mat mat_dest1(val, val, type);
n = cycle_index;
Mat mat_src1(val, val, type[i]);
Mat mat_src2(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src1, 32);
test.SetDataRange(mat_src2, 4);
test.SetDataRange(mat_dest, 32);
aclMat aclmat_src1(val, val, type, mat_src1.data, acl_context);
aclMat aclmat_src2(val, val, type, mat_src2.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context);
aclMat aclmat_src1(val, val, type[i], mat_src1.data, acl_context);
aclMat aclmat_src2(val, val, type[i], mat_src2.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
divide(mat_src1, mat_src2, mat_dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
divide(aclmat_src1, aclmat_src2, aclmat_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
divide(aclmat_src1, aclmat_src2, aclmat_dest);
divide(aclmat_src1, aclmat_src2, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret);
// bool ret = test.Test_Diff(mat_dest, mat_dest1);
// ASSERT_TRUE(ret);
if (val < 128)
cout << "Shape: " << val << " x " << val << "\t\t";
else
cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
}
}
void PERF_TEST::Test_Exp(aclCxt *acl_context)
{
int val, type;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1;
for (size_t i = 0; i < type.size(); ++i)
{
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
Mat mat_src(val, val, type);
Mat mat_dest(val, val, type);
Mat mat_dest1(val, val, type);
n = cycle_index;
Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 32);
test.SetDataRange(mat_dest, 2);
aclMat aclmat_src(val, val, type, mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context);
aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
exp(mat_src, mat_dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
exp(aclmat_src, aclmat_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
exp(aclmat_src, aclmat_dest);
exp(aclmat_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
if (val < 128)
@@ -278,42 +311,49 @@ void PERF_TEST::Test_Exp(aclCxt *acl_context)
cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
}
}
void PERF_TEST::Test_Log(aclCxt *acl_context)
{
int val, type;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1};
type = CV_32FC1;
for (size_t i = 0; i < type.size(); ++i)
{
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
Mat mat_src(val, val, type);
Mat mat_dest(val, val, type);
Mat mat_dest1(val, val, type);
n = cycle_index;
Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src, 32);
test.SetDataRange(mat_dest, 32);
aclMat aclmat_src(val, val, type, mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context);
aclMat aclmat_src(val, val, type[i], mat_src.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
log(mat_src, mat_dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
log(aclmat_src, aclmat_dest, 1);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
log(aclmat_src, aclmat_dest);
log(aclmat_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
if (val < 128)
@@ -322,45 +362,52 @@ void PERF_TEST::Test_Log(aclCxt *acl_context)
cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
}
}
void PERF_TEST::Test_Max(aclCxt *acl_context)
{
int val, type;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC2};
type = CV_32FC2;
for (size_t i = 0; i < type.size(); ++i)
{
for (val = 8; val <= valmax; val *= 2)
{
Common_Test test;
int n = 100;
Mat mat_src1(val, val, type);
Mat mat_src2(val, val, type);
Mat mat_dest(val, val, type);
Mat mat_dest1(val, val, type);
n = cycle_index;
Mat mat_src1(val, val, type[i]);
Mat mat_src2(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src1, 32);
test.SetDataRange(mat_src2, 32);
test.SetDataRange(mat_dest, 32);
aclMat aclmat_src1(val, val, type, mat_src2.data, acl_context);
aclMat aclmat_src2(val, val, type, mat_src1.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context);
aclMat aclmat_src1(val, val, type[i], mat_src2.data, acl_context);
aclMat aclmat_src2(val, val, type[i], mat_src1.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
cv::max(mat_src1, mat_src2, mat_dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
cv::acl::max(aclmat_src1, aclmat_src2, aclmat_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
cv::acl::max(aclmat_src1, aclmat_src2, aclmat_dest);
cv::acl::max(aclmat_src1, aclmat_src2, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -371,45 +418,51 @@ void PERF_TEST::Test_Max(aclCxt *acl_context)
cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
}
}
void PERF_TEST::Test_Min(aclCxt *acl_context)
{
int val, type;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC3};
type = CV_32FC3;
for (size_t i = 0; i < type.size(); ++i)
{
for (val = 8; val <= valmax; val *= 2)
{
Common_Test test;
int n = 100;
Mat mat_src1(val, val, type);
Mat mat_src2(val, val, type);
Mat mat_dest(val, val, type);
Mat mat_dest1(val, val, type);
int n = cycle_index;
Mat mat_src1(val, val, type[i]);
Mat mat_src2(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
test.SetDataRange(mat_src1, 32);
test.SetDataRange(mat_src2, 32);
test.SetDataRange(mat_dest, 32);
aclMat aclmat_src1(val, val, type, mat_src2.data, acl_context);
aclMat aclmat_src2(val, val, type, mat_src1.data, acl_context);
aclMat aclmat_dest(val, val, type, mat_dest.data, acl_context);
aclMat aclmat_src1(val, val, type[i], mat_src2.data, acl_context);
aclMat aclmat_src2(val, val, type[i], mat_src1.data, acl_context);
aclMat aclmat_dest(val, val, type[i], mat_dest.data, acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
cv::min(mat_src1, mat_src2, mat_dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
cv::acl::min(aclmat_src1, aclmat_src2, aclmat_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
cv::acl::min(aclmat_src1, aclmat_src2, aclmat_dest);
cv::acl::min(aclmat_src1, aclmat_src2, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -420,4 +473,6 @@ void PERF_TEST::Test_Min(aclCxt *acl_context)
cout << "Shape: " << val << " x " << val << "\t";
cout << "CpuTimes: " << time << "\tAclTimes: " << acltime << "\tRate: " << time / acltime << endl;
}
}
}

View File

@@ -28,8 +28,9 @@ void PERF_TEST::Test_Lookuptable(aclCxt *acl_context_0)
void PERF_TEST::Test_Merge(aclCxt *acl_context)
{
int val;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
@@ -41,7 +42,7 @@ void PERF_TEST::Test_Merge(aclCxt *acl_context)
test.PrintLog("Perf test : Function: merge()", srcType[i]);
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
n = cycle_index;
Mat mat_src1(val, val, srcType[i], Scalar(1));
Mat mat_src2(val, val, srcType[i], Scalar(2));
Mat mat_src3(val, val, srcType[i], Scalar(3));
@@ -71,14 +72,17 @@ void PERF_TEST::Test_Merge(aclCxt *acl_context)
while (n--)
merge(src, mat_dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
merge(acl_src, aclmat_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
merge(acl_src, aclmat_dest);
merge(acl_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
ASSERT_TRUE(ret);
@@ -94,18 +98,19 @@ void PERF_TEST::Test_Merge(aclCxt *acl_context)
void PERF_TEST::Test_Transpose(aclCxt *acl_context)
{
int val;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
vector<int> type{CV_32FC1, CV_32SC1};
vector<int> type{CV_32FC1};
for (size_t i = 0; i < type.size(); ++i)
{
test.PrintLog("Perf test : Function: transpose()", type[i]);
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
n = cycle_index;
Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
@@ -119,14 +124,17 @@ void PERF_TEST::Test_Transpose(aclCxt *acl_context)
while (n--)
transpose(mat_src, mat_dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
transpose(aclmat_src, aclmat_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
transpose(aclmat_src, aclmat_dest);
transpose(aclmat_src, aclmat_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);
@@ -142,8 +150,9 @@ void PERF_TEST::Test_Transpose(aclCxt *acl_context)
void PERF_TEST::Test_Split(aclCxt *acl_context)
{
int val;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
@@ -155,7 +164,7 @@ void PERF_TEST::Test_Split(aclCxt *acl_context)
test.PrintLog("Perf test : Function: split()", srcType[i]);
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
n = cycle_index;
Mat mat_src(val, val, srcType[i]);
Mat mat_dest1(val, val, destType[i]);
Mat mat_dest2(val, val, destType[i]);
@@ -182,14 +191,17 @@ void PERF_TEST::Test_Split(aclCxt *acl_context)
while (n--)
split(mat_src, dest);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
split(aclmat_src, acl_dest);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
split(aclmat_src, acl_dest);
split(aclmat_src, acl_dest, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
(acl_dest.data())[0].download(mat_dest1);
(acl_dest.data())[1].download(mat_dest2);
@@ -210,11 +222,11 @@ void PERF_TEST::Test_Split(aclCxt *acl_context)
}
void PERF_TEST::Test_Flip(aclCxt *acl_context)
{
int val;
int val, n;
int valmax = 8192;
int cycle_index = 100;
double begin, end, time, acltime;
Common_Test test;
@@ -224,7 +236,7 @@ void PERF_TEST::Test_Flip(aclCxt *acl_context)
test.PrintLog("Perf test : Function: flip()", type[i]);
for (val = 8; val <= valmax; val *= 2)
{
int n = 100;
n = cycle_index;
Mat mat_src(val, val, type[i]);
Mat mat_dest(val, val, type[i]);
Mat mat_dest1(val, val, type[i]);
@@ -238,14 +250,17 @@ void PERF_TEST::Test_Flip(aclCxt *acl_context)
while (n--)
flip(mat_src, mat_dest, 0);
end = static_cast<double>(getTickCount());
time = (end - begin) / getTickFrequency();
time = (end - begin) / getTickFrequency() / cycle_index;
n = 100;
n = (cycle_index - 1);
flip(aclmat_src, aclmat_dest, 0);
wait_stream(acl_context);
begin = static_cast<double>(getTickCount());
while (n--)
flip(aclmat_src, aclmat_dest, 0);
flip(aclmat_src, aclmat_dest, 0, 1);
wait_stream(acl_context, 1);
end = static_cast<double>(getTickCount());
acltime = (end - begin) / getTickFrequency();
acltime = (end - begin) / getTickFrequency() / (cycle_index - 1);
aclmat_dest.download(mat_dest1);
bool ret = test.Test_Diff(mat_dest, mat_dest1);