[Benchmark] avoid Result mem copy for cpp benchmark (#1203)

* avoid mem copy for cpp benchmark

* set CMAKE_BUILD_TYPE to Release
This commit is contained in:
WJJ1995
2023-02-06 10:19:44 +08:00
committed by GitHub
parent 870551f3f5
commit cfc7af2d45
3 changed files with 115 additions and 58 deletions

View File

@@ -37,7 +37,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake)
# Set C++11 as standard for the whole project
if(NOT MSVC)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_FLAGS "-Wno-format")
set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3")
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1)
endif(NOT MSVC)
@@ -153,6 +153,8 @@ get_osx_architecture()
##################################### Building: FastDeploy C++ SDK #######################################
add_definitions(-DFASTDEPLOY_LIB)
# set CMAKE_BUILD_TYPE to Release
add_definitions(-DCMAKE_BUILD_TYPE=Release)
# configure files before glob sources.
configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h)
configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc)
@@ -466,7 +468,7 @@ if(ANDROID)
list(APPEND DEPEND_LIBS ${log-lib})
if(WITH_LITE_STATIC)
# need omp for static Paddle Lite lib
set(WITH_OPENMP ON CACHE BOOL "Force WITH_OPENMP=ON while WITH_LITE_STATIC=ON" FORCE)
set(WITH_OPENMP ON CACHE BOOL "Force WITH_OPENMP=ON while WITH_LITE_STATIC=ON" FORCE)
message(STATUS "Force WITH_OPENMP=${WITH_OPENMP} while WITH_LITE_STATIC=ON")
endif()
if(WITH_OPENMP)
@@ -482,13 +484,13 @@ if(ANDROID AND WITH_JAVA)
endif()
if(ANDROID AND WITH_STATIC_LIB)
# Here, we use a dummy target (fastdelpoy_dummy)
# Here, we use a dummy target (fastdelpoy_dummy)
# to form a build dependency tree for fastdeploy_static lib.
add_library(fastdelpoy_dummy STATIC ${ALL_DEPLOY_SRCS})
# Still add ${DEPEND_LIBS} for cmake to form link_libraries
# property tree for a static library.
# Still add ${DEPEND_LIBS} for cmake to form link_libraries
# property tree for a static library.
target_link_libraries(fastdelpoy_dummy ${DEPEND_LIBS})
# Build fastdelpoy_dummy when the third-party
# Build fastdelpoy_dummy when the third-party
# libraries (opencv, paddle lite, flycv) are ready.
add_dependencies(fastdelpoy_dummy ${LIBRARY_NAME})
# Add WITH_STATIC_LIB compile definitions, see lite_backend.cc.
@@ -541,9 +543,9 @@ if(WIN32)
RUNTIME DESTINATION lib
)
elseif(ANDROID)
if(WITH_STATIC_LIB)
if(WITH_STATIC_LIB)
install(
FILES
FILES
${CMAKE_CURRENT_BINARY_DIR}/libfastdeploy_static.a
DESTINATION lib/${ANDROID_ABI}
)
@@ -553,11 +555,11 @@ elseif(ANDROID)
LIBRARY DESTINATION lib/${ANDROID_ABI}
)
endif()
# Install omp into fastdeploy lib dir if WITH_OPENMP=ON
# Install omp into fastdeploy lib dir if WITH_OPENMP=ON
# and WITH_LITE_STATIC=OFF.
if(WITH_OPENMP AND (NOT WITH_LITE_STATIC) AND OpenMP_CXX_FOUND AND ENABLE_OPENMP_SHARED)
install(
FILES
FILES
${OpenMP_CXX_LIBRARIES}
DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/${ANDROID_ABI}
)
@@ -594,7 +596,7 @@ else()
# The headers and libs of opencv must be install.
if(ENABLE_VISION)
if(WITH_OPENCV_STATIC AND WITH_STATIC_LIB)
# Only need to install headers while building
# Only need to install headers while building
# FastDeploy static lib. (TODO:qiuyanjun)
install(
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/opencv/sdk/native/jni/include
@@ -621,14 +623,14 @@ else()
)
endif()
endif()
# fast_tokenizer's static lib is not avaliable now!
# fast_tokenizer's static lib is not avaliable now!
# may support some days later(TODO:qiuyanjun)
if(ENABLE_TEXT)
install(
DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/fast_tokenizer
DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs/install
)
endif()
endif()
# some libs may not to install while in static mode
if(ENABLE_LITE_BACKEND)
if(WITH_LITE_STATIC)

View File

@@ -16,11 +16,16 @@
namespace fastdeploy {
namespace vision {
void ClassifyResult::Clear() {
void ClassifyResult::Free() {
std::vector<int32_t>().swap(label_ids);
std::vector<float>().swap(scores);
}
void ClassifyResult::Clear() {
label_ids.clear();
scores.clear();
}
std::string ClassifyResult::Str() {
std::string out;
out = "ClassifyResult(\nlabel_ids: ";
@@ -47,11 +52,16 @@ void Mask::Reserve(int size) { data.reserve(size); }
void Mask::Resize(int size) { data.resize(size); }
void Mask::Clear() {
void Mask::Free() {
std::vector<uint8_t>().swap(data);
std::vector<int64_t>().swap(shape);
}
void Mask::Clear() {
data.clear();
shape.clear();
}
std::string Mask::Str() {
std::string out = "Mask(";
size_t ndim = shape.size();
@@ -94,7 +104,7 @@ DetectionResult& DetectionResult::operator=(DetectionResult&& other) {
return *this;
}
void DetectionResult::Clear() {
void DetectionResult::Free() {
std::vector<std::array<float, 4>>().swap(boxes);
std::vector<float>().swap(scores);
std::vector<int32_t>().swap(label_ids);
@@ -102,18 +112,30 @@ void DetectionResult::Clear() {
contain_masks = false;
}
void DetectionResult::Clear() {
boxes.clear();
scores.clear();
label_ids.clear();
masks.clear();
contain_masks = false;
}
void DetectionResult::Reserve(int size) {
boxes.reserve(size);
scores.reserve(size);
label_ids.reserve(size);
masks.reserve(size);
if (contain_masks) {
masks.reserve(size);
}
}
void DetectionResult::Resize(int size) {
boxes.resize(size);
scores.resize(size);
label_ids.resize(size);
masks.resize(size);
if (contain_masks) {
masks.resize(size);
}
}
std::string DetectionResult::Str() {
@@ -139,12 +161,18 @@ std::string DetectionResult::Str() {
return out;
}
void KeyPointDetectionResult::Clear() {
void KeyPointDetectionResult::Free() {
std::vector<std::array<float, 2>>().swap(keypoints);
std::vector<float>().swap(scores);
num_joints = -1;
}
void KeyPointDetectionResult::Clear() {
keypoints.clear();
scores.clear();
num_joints = -1;
}
void KeyPointDetectionResult::Reserve(int size) { keypoints.reserve(size); }
void KeyPointDetectionResult::Resize(int size) { keypoints.resize(size); }
@@ -155,8 +183,8 @@ std::string KeyPointDetectionResult::Str() {
out = "KeyPointDetectionResult: [x, y, conf]\n";
for (size_t i = 0; i < keypoints.size(); ++i) {
out = out + std::to_string(keypoints[i][0]) + "," +
std::to_string(keypoints[i][1]) + ", " +
std::to_string(scores[i]) + "\n";
std::to_string(keypoints[i][1]) + ", " + std::to_string(scores[i]) +
"\n";
}
out += "num_joints:" + std::to_string(num_joints) + "\n";
return out;
@@ -170,22 +198,22 @@ void OCRResult::Clear() {
cls_labels.clear();
}
void MOTResult::Clear(){
void MOTResult::Clear() {
boxes.clear();
ids.clear();
scores.clear();
class_ids.clear();
}
std::string MOTResult::Str(){
std::string MOTResult::Str() {
std::string out;
out = "MOTResult:\nall boxes counts: "+std::to_string(boxes.size())+"\n";
out = "MOTResult:\nall boxes counts: " + std::to_string(boxes.size()) + "\n";
out += "[xmin\tymin\txmax\tymax\tid\tscore]\n";
for (size_t i = 0; i < boxes.size(); ++i) {
out = out + "["+ std::to_string(boxes[i][0]) + "\t" +
out = out + "[" + std::to_string(boxes[i][0]) + "\t" +
std::to_string(boxes[i][1]) + "\t" + std::to_string(boxes[i][2]) +
"\t" + std::to_string(boxes[i][3]) + "\t" +
std::to_string(ids[i]) + "\t" + std::to_string(scores[i]) + "]\n";
"\t" + std::to_string(boxes[i][3]) + "\t" + std::to_string(ids[i]) +
"\t" + std::to_string(scores[i]) + "]\n";
}
return out;
}
@@ -197,13 +225,20 @@ FaceDetectionResult::FaceDetectionResult(const FaceDetectionResult& res) {
landmarks_per_face = res.landmarks_per_face;
}
void FaceDetectionResult::Clear() {
void FaceDetectionResult::Free() {
std::vector<std::array<float, 4>>().swap(boxes);
std::vector<float>().swap(scores);
std::vector<std::array<float, 2>>().swap(landmarks);
landmarks_per_face = 0;
}
void FaceDetectionResult::Clear() {
boxes.clear();
scores.clear();
landmarks.clear();
landmarks_per_face = 0;
}
void FaceDetectionResult::Reserve(int size) {
boxes.reserve(size);
scores.reserve(size);
@@ -257,23 +292,22 @@ std::string FaceDetectionResult::Str() {
return out;
}
void FaceAlignmentResult::Clear() {
void FaceAlignmentResult::Free() {
std::vector<std::array<float, 2>>().swap(landmarks);
}
void FaceAlignmentResult::Reserve(int size) {
landmarks.resize(size);
}
void FaceAlignmentResult::Clear() { landmarks.clear(); }
void FaceAlignmentResult::Resize(int size) {
landmarks.resize(size);
}
void FaceAlignmentResult::Reserve(int size) { landmarks.resize(size); }
void FaceAlignmentResult::Resize(int size) { landmarks.resize(size); }
std::string FaceAlignmentResult::Str() {
std::string out;
out = "FaceAlignmentResult: [x, y]\n";
out = out + "There are " +std::to_string(landmarks.size()) + " landmarks, the top 10 are listed as below:\n";
out = out + "There are " + std::to_string(landmarks.size()) +
" landmarks, the top 10 are listed as below:\n";
int landmarks_size = landmarks.size();
size_t result_length = std::min(10, landmarks_size);
for (size_t i = 0; i < result_length; ++i) {
@@ -355,7 +389,9 @@ FaceRecognitionResult::FaceRecognitionResult(const FaceRecognitionResult& res) {
embedding.assign(res.embedding.begin(), res.embedding.end());
}
void FaceRecognitionResult::Clear() { std::vector<float>().swap(embedding); }
void FaceRecognitionResult::Free() { std::vector<float>().swap(embedding); }
void FaceRecognitionResult::Clear() { embedding.clear(); }
void FaceRecognitionResult::Reserve(int size) { embedding.reserve(size); }
@@ -536,28 +572,23 @@ std::string OCRResult::Str() {
return no_result;
}
void HeadPoseResult::Clear() {
std::vector<float>().swap(euler_angles);
}
void HeadPoseResult::Free() { std::vector<float>().swap(euler_angles); }
void HeadPoseResult::Reserve(int size) {
euler_angles.resize(size);
}
void HeadPoseResult::Clear() { euler_angles.clear(); }
void HeadPoseResult::Resize(int size) {
euler_angles.resize(size);
}
void HeadPoseResult::Reserve(int size) { euler_angles.resize(size); }
void HeadPoseResult::Resize(int size) { euler_angles.resize(size); }
std::string HeadPoseResult::Str() {
std::string out;
out = "HeadPoseResult: [yaw, pitch, roll]\n";
out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" +
"pitch: " + std::to_string(euler_angles[1]) + "\n" +
"roll: " + std::to_string(euler_angles[2]) + "\n";
out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" + "pitch: " +
std::to_string(euler_angles[1]) + "\n" + "roll: " +
std::to_string(euler_angles[2]) + "\n";
return out;
}
} // namespace vision
} // namespace fastdeploy

View File

@@ -51,9 +51,12 @@ struct FASTDEPLOY_DECL ClassifyResult : public BaseResult {
std::vector<float> scores;
ResultType type = ResultType::CLASSIFY;
/// Clear result
/// Clear ClassifyResult
void Clear();
/// Clear ClassifyResult and free the memory
void Free();
/// Copy constructor
ClassifyResult(const ClassifyResult& other) = default;
/// Move assignment
@@ -72,9 +75,12 @@ struct FASTDEPLOY_DECL Mask : public BaseResult {
std::vector<int64_t> shape; // (H,W) ...
ResultType type = ResultType::MASK;
/// clear mask
/// clear Mask result
void Clear();
/// Clear Mask result and free the memory
void Free();
/// Return a mutable pointer of the mask data buffer
void* Data() { return data.data(); }
@@ -117,9 +123,12 @@ struct FASTDEPLOY_DECL DetectionResult : public BaseResult {
/// Move assignment
DetectionResult& operator=(DetectionResult&& other);
/// Clear detection result
/// Clear DetectionResult
void Clear();
/// Clear DetectionResult and free the memory
void Free();
void Reserve(int size);
void Resize(int size);
@@ -140,9 +149,12 @@ struct FASTDEPLOY_DECL KeyPointDetectionResult : public BaseResult {
int num_joints = -1;
ResultType type = ResultType::KEYPOINT_DETECTION;
/// Clear detection result
/// Clear KeyPointDetectionResult
void Clear();
/// Clear KeyPointDetectionResult and free the memory
void Free();
void Reserve(int size);
void Resize(int size);
@@ -215,9 +227,12 @@ struct FASTDEPLOY_DECL FaceDetectionResult : public BaseResult {
FaceDetectionResult() { landmarks_per_face = 0; }
FaceDetectionResult(const FaceDetectionResult& res);
/// Clear detection result
/// Clear FaceDetectionResult
void Clear();
/// Clear FaceDetectionResult and free the memory
void Free();
void Reserve(int size);
void Resize(int size);
@@ -233,9 +248,12 @@ struct FASTDEPLOY_DECL FaceAlignmentResult : public BaseResult {
std::vector<std::array<float, 2>> landmarks;
ResultType type = ResultType::FACE_ALIGNMENT;
/// Clear facealignment result
/// Clear FaceAlignmentResult
void Clear();
/// Clear FaceAlignmentResult and free the memory
void Free();
void Reserve(int size);
void Resize(int size);
@@ -292,9 +310,12 @@ struct FASTDEPLOY_DECL FaceRecognitionResult : public BaseResult {
FaceRecognitionResult() {}
FaceRecognitionResult(const FaceRecognitionResult& res);
/// Clear detection result
/// Clear FaceRecognitionResult
void Clear();
/// Clear FaceRecognitionResult and free the memory
void Free();
void Reserve(int size);
void Resize(int size);
@@ -347,9 +368,12 @@ struct FASTDEPLOY_DECL HeadPoseResult : public BaseResult {
std::vector<float> euler_angles;
ResultType type = ResultType::HEADPOSE;
/// Clear headpose result
/// Clear HeadPoseResult
void Clear();
/// Clear HeadPoseResult and free the memory
void Free();
void Reserve(int size);
void Resize(int size);