From cfc7af2d457f928a3b971ea6e00e23abdc287bb3 Mon Sep 17 00:00:00 2001 From: WJJ1995 Date: Mon, 6 Feb 2023 10:19:44 +0800 Subject: [PATCH] [Benchmark] avoid Result mem copy for cpp benchmark (#1203) * avoid mem copy for cpp benchmark * set CMAKE_BUILD_TYPE to Release --- CMakeLists.txt | 28 ++++---- fastdeploy/vision/common/result.cc | 105 +++++++++++++++++++---------- fastdeploy/vision/common/result.h | 40 ++++++++--- 3 files changed, 115 insertions(+), 58 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1335c2865..44a71c2fb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,7 +37,7 @@ include(${PROJECT_SOURCE_DIR}/cmake/utils.cmake) # Set C++11 as standard for the whole project if(NOT MSVC) set(CMAKE_CXX_STANDARD 11) - set(CMAKE_CXX_FLAGS "-Wno-format") + set(CMAKE_CXX_FLAGS "-Wno-format -g0 -O3") add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1) endif(NOT MSVC) @@ -153,6 +153,8 @@ get_osx_architecture() ##################################### Building: FastDeploy C++ SDK ####################################### add_definitions(-DFASTDEPLOY_LIB) +# set CMAKE_BUILD_TYPE to Release +add_definitions(-DCMAKE_BUILD_TYPE=Release) # configure files before glob sources. configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/core/config.h) configure_file(${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc.in ${PROJECT_SOURCE_DIR}/${CSRCS_DIR_NAME}/fastdeploy/pybind/main.cc) @@ -466,7 +468,7 @@ if(ANDROID) list(APPEND DEPEND_LIBS ${log-lib}) if(WITH_LITE_STATIC) # need omp for static Paddle Lite lib - set(WITH_OPENMP ON CACHE BOOL "Force WITH_OPENMP=ON while WITH_LITE_STATIC=ON" FORCE) + set(WITH_OPENMP ON CACHE BOOL "Force WITH_OPENMP=ON while WITH_LITE_STATIC=ON" FORCE) message(STATUS "Force WITH_OPENMP=${WITH_OPENMP} while WITH_LITE_STATIC=ON") endif() if(WITH_OPENMP) @@ -482,13 +484,13 @@ if(ANDROID AND WITH_JAVA) endif() if(ANDROID AND WITH_STATIC_LIB) - # Here, we use a dummy target (fastdelpoy_dummy) + # Here, we use a dummy target (fastdelpoy_dummy) # to form a build dependency tree for fastdeploy_static lib. add_library(fastdelpoy_dummy STATIC ${ALL_DEPLOY_SRCS}) - # Still add ${DEPEND_LIBS} for cmake to form link_libraries - # property tree for a static library. + # Still add ${DEPEND_LIBS} for cmake to form link_libraries + # property tree for a static library. target_link_libraries(fastdelpoy_dummy ${DEPEND_LIBS}) - # Build fastdelpoy_dummy when the third-party + # Build fastdelpoy_dummy when the third-party # libraries (opencv, paddle lite, flycv) are ready. add_dependencies(fastdelpoy_dummy ${LIBRARY_NAME}) # Add WITH_STATIC_LIB compile definitions, see lite_backend.cc. @@ -541,9 +543,9 @@ if(WIN32) RUNTIME DESTINATION lib ) elseif(ANDROID) - if(WITH_STATIC_LIB) + if(WITH_STATIC_LIB) install( - FILES + FILES ${CMAKE_CURRENT_BINARY_DIR}/libfastdeploy_static.a DESTINATION lib/${ANDROID_ABI} ) @@ -553,11 +555,11 @@ elseif(ANDROID) LIBRARY DESTINATION lib/${ANDROID_ABI} ) endif() - # Install omp into fastdeploy lib dir if WITH_OPENMP=ON + # Install omp into fastdeploy lib dir if WITH_OPENMP=ON # and WITH_LITE_STATIC=OFF. if(WITH_OPENMP AND (NOT WITH_LITE_STATIC) AND OpenMP_CXX_FOUND AND ENABLE_OPENMP_SHARED) install( - FILES + FILES ${OpenMP_CXX_LIBRARIES} DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/${ANDROID_ABI} ) @@ -594,7 +596,7 @@ else() # The headers and libs of opencv must be install. if(ENABLE_VISION) if(WITH_OPENCV_STATIC AND WITH_STATIC_LIB) - # Only need to install headers while building + # Only need to install headers while building # FastDeploy static lib. (TODO:qiuyanjun) install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/opencv/sdk/native/jni/include @@ -621,14 +623,14 @@ else() ) endif() endif() - # fast_tokenizer's static lib is not avaliable now! + # fast_tokenizer's static lib is not avaliable now! # may support some days later(TODO:qiuyanjun) if(ENABLE_TEXT) install( DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/third_libs/install/fast_tokenizer DESTINATION ${CMAKE_INSTALL_PREFIX}/third_libs/install ) - endif() + endif() # some libs may not to install while in static mode if(ENABLE_LITE_BACKEND) if(WITH_LITE_STATIC) diff --git a/fastdeploy/vision/common/result.cc b/fastdeploy/vision/common/result.cc index 446a39699..d48d9ddc4 100755 --- a/fastdeploy/vision/common/result.cc +++ b/fastdeploy/vision/common/result.cc @@ -16,11 +16,16 @@ namespace fastdeploy { namespace vision { -void ClassifyResult::Clear() { +void ClassifyResult::Free() { std::vector().swap(label_ids); std::vector().swap(scores); } +void ClassifyResult::Clear() { + label_ids.clear(); + scores.clear(); +} + std::string ClassifyResult::Str() { std::string out; out = "ClassifyResult(\nlabel_ids: "; @@ -47,11 +52,16 @@ void Mask::Reserve(int size) { data.reserve(size); } void Mask::Resize(int size) { data.resize(size); } -void Mask::Clear() { +void Mask::Free() { std::vector().swap(data); std::vector().swap(shape); } +void Mask::Clear() { + data.clear(); + shape.clear(); +} + std::string Mask::Str() { std::string out = "Mask("; size_t ndim = shape.size(); @@ -94,7 +104,7 @@ DetectionResult& DetectionResult::operator=(DetectionResult&& other) { return *this; } -void DetectionResult::Clear() { +void DetectionResult::Free() { std::vector>().swap(boxes); std::vector().swap(scores); std::vector().swap(label_ids); @@ -102,18 +112,30 @@ void DetectionResult::Clear() { contain_masks = false; } +void DetectionResult::Clear() { + boxes.clear(); + scores.clear(); + label_ids.clear(); + masks.clear(); + contain_masks = false; +} + void DetectionResult::Reserve(int size) { boxes.reserve(size); scores.reserve(size); label_ids.reserve(size); - masks.reserve(size); + if (contain_masks) { + masks.reserve(size); + } } void DetectionResult::Resize(int size) { boxes.resize(size); scores.resize(size); label_ids.resize(size); - masks.resize(size); + if (contain_masks) { + masks.resize(size); + } } std::string DetectionResult::Str() { @@ -139,12 +161,18 @@ std::string DetectionResult::Str() { return out; } -void KeyPointDetectionResult::Clear() { +void KeyPointDetectionResult::Free() { std::vector>().swap(keypoints); std::vector().swap(scores); num_joints = -1; } +void KeyPointDetectionResult::Clear() { + keypoints.clear(); + scores.clear(); + num_joints = -1; +} + void KeyPointDetectionResult::Reserve(int size) { keypoints.reserve(size); } void KeyPointDetectionResult::Resize(int size) { keypoints.resize(size); } @@ -155,8 +183,8 @@ std::string KeyPointDetectionResult::Str() { out = "KeyPointDetectionResult: [x, y, conf]\n"; for (size_t i = 0; i < keypoints.size(); ++i) { out = out + std::to_string(keypoints[i][0]) + "," + - std::to_string(keypoints[i][1]) + ", " + - std::to_string(scores[i]) + "\n"; + std::to_string(keypoints[i][1]) + ", " + std::to_string(scores[i]) + + "\n"; } out += "num_joints:" + std::to_string(num_joints) + "\n"; return out; @@ -170,22 +198,22 @@ void OCRResult::Clear() { cls_labels.clear(); } -void MOTResult::Clear(){ +void MOTResult::Clear() { boxes.clear(); ids.clear(); scores.clear(); class_ids.clear(); } -std::string MOTResult::Str(){ +std::string MOTResult::Str() { std::string out; - out = "MOTResult:\nall boxes counts: "+std::to_string(boxes.size())+"\n"; + out = "MOTResult:\nall boxes counts: " + std::to_string(boxes.size()) + "\n"; out += "[xmin\tymin\txmax\tymax\tid\tscore]\n"; for (size_t i = 0; i < boxes.size(); ++i) { - out = out + "["+ std::to_string(boxes[i][0]) + "\t" + + out = out + "[" + std::to_string(boxes[i][0]) + "\t" + std::to_string(boxes[i][1]) + "\t" + std::to_string(boxes[i][2]) + - "\t" + std::to_string(boxes[i][3]) + "\t" + - std::to_string(ids[i]) + "\t" + std::to_string(scores[i]) + "]\n"; + "\t" + std::to_string(boxes[i][3]) + "\t" + std::to_string(ids[i]) + + "\t" + std::to_string(scores[i]) + "]\n"; } return out; } @@ -197,13 +225,20 @@ FaceDetectionResult::FaceDetectionResult(const FaceDetectionResult& res) { landmarks_per_face = res.landmarks_per_face; } -void FaceDetectionResult::Clear() { +void FaceDetectionResult::Free() { std::vector>().swap(boxes); std::vector().swap(scores); std::vector>().swap(landmarks); landmarks_per_face = 0; } +void FaceDetectionResult::Clear() { + boxes.clear(); + scores.clear(); + landmarks.clear(); + landmarks_per_face = 0; +} + void FaceDetectionResult::Reserve(int size) { boxes.reserve(size); scores.reserve(size); @@ -257,23 +292,22 @@ std::string FaceDetectionResult::Str() { return out; } -void FaceAlignmentResult::Clear() { +void FaceAlignmentResult::Free() { std::vector>().swap(landmarks); } -void FaceAlignmentResult::Reserve(int size) { - landmarks.resize(size); -} +void FaceAlignmentResult::Clear() { landmarks.clear(); } -void FaceAlignmentResult::Resize(int size) { - landmarks.resize(size); -} +void FaceAlignmentResult::Reserve(int size) { landmarks.resize(size); } + +void FaceAlignmentResult::Resize(int size) { landmarks.resize(size); } std::string FaceAlignmentResult::Str() { std::string out; out = "FaceAlignmentResult: [x, y]\n"; - out = out + "There are " +std::to_string(landmarks.size()) + " landmarks, the top 10 are listed as below:\n"; + out = out + "There are " + std::to_string(landmarks.size()) + + " landmarks, the top 10 are listed as below:\n"; int landmarks_size = landmarks.size(); size_t result_length = std::min(10, landmarks_size); for (size_t i = 0; i < result_length; ++i) { @@ -355,7 +389,9 @@ FaceRecognitionResult::FaceRecognitionResult(const FaceRecognitionResult& res) { embedding.assign(res.embedding.begin(), res.embedding.end()); } -void FaceRecognitionResult::Clear() { std::vector().swap(embedding); } +void FaceRecognitionResult::Free() { std::vector().swap(embedding); } + +void FaceRecognitionResult::Clear() { embedding.clear(); } void FaceRecognitionResult::Reserve(int size) { embedding.reserve(size); } @@ -536,28 +572,23 @@ std::string OCRResult::Str() { return no_result; } -void HeadPoseResult::Clear() { - std::vector().swap(euler_angles); -} +void HeadPoseResult::Free() { std::vector().swap(euler_angles); } -void HeadPoseResult::Reserve(int size) { - euler_angles.resize(size); -} +void HeadPoseResult::Clear() { euler_angles.clear(); } -void HeadPoseResult::Resize(int size) { - euler_angles.resize(size); -} +void HeadPoseResult::Reserve(int size) { euler_angles.resize(size); } + +void HeadPoseResult::Resize(int size) { euler_angles.resize(size); } std::string HeadPoseResult::Str() { std::string out; out = "HeadPoseResult: [yaw, pitch, roll]\n"; - out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" + - "pitch: " + std::to_string(euler_angles[1]) + "\n" + - "roll: " + std::to_string(euler_angles[2]) + "\n"; + out = out + "yaw: " + std::to_string(euler_angles[0]) + "\n" + "pitch: " + + std::to_string(euler_angles[1]) + "\n" + "roll: " + + std::to_string(euler_angles[2]) + "\n"; return out; } - } // namespace vision } // namespace fastdeploy diff --git a/fastdeploy/vision/common/result.h b/fastdeploy/vision/common/result.h index c68f6d4cf..7c4efde23 100755 --- a/fastdeploy/vision/common/result.h +++ b/fastdeploy/vision/common/result.h @@ -51,9 +51,12 @@ struct FASTDEPLOY_DECL ClassifyResult : public BaseResult { std::vector scores; ResultType type = ResultType::CLASSIFY; - /// Clear result + /// Clear ClassifyResult void Clear(); + /// Clear ClassifyResult and free the memory + void Free(); + /// Copy constructor ClassifyResult(const ClassifyResult& other) = default; /// Move assignment @@ -72,9 +75,12 @@ struct FASTDEPLOY_DECL Mask : public BaseResult { std::vector shape; // (H,W) ... ResultType type = ResultType::MASK; - /// clear mask + /// clear Mask result void Clear(); + /// Clear Mask result and free the memory + void Free(); + /// Return a mutable pointer of the mask data buffer void* Data() { return data.data(); } @@ -117,9 +123,12 @@ struct FASTDEPLOY_DECL DetectionResult : public BaseResult { /// Move assignment DetectionResult& operator=(DetectionResult&& other); - /// Clear detection result + /// Clear DetectionResult void Clear(); + /// Clear DetectionResult and free the memory + void Free(); + void Reserve(int size); void Resize(int size); @@ -140,9 +149,12 @@ struct FASTDEPLOY_DECL KeyPointDetectionResult : public BaseResult { int num_joints = -1; ResultType type = ResultType::KEYPOINT_DETECTION; - /// Clear detection result + /// Clear KeyPointDetectionResult void Clear(); + /// Clear KeyPointDetectionResult and free the memory + void Free(); + void Reserve(int size); void Resize(int size); @@ -215,9 +227,12 @@ struct FASTDEPLOY_DECL FaceDetectionResult : public BaseResult { FaceDetectionResult() { landmarks_per_face = 0; } FaceDetectionResult(const FaceDetectionResult& res); - /// Clear detection result + /// Clear FaceDetectionResult void Clear(); + /// Clear FaceDetectionResult and free the memory + void Free(); + void Reserve(int size); void Resize(int size); @@ -233,9 +248,12 @@ struct FASTDEPLOY_DECL FaceAlignmentResult : public BaseResult { std::vector> landmarks; ResultType type = ResultType::FACE_ALIGNMENT; - /// Clear facealignment result + /// Clear FaceAlignmentResult void Clear(); + /// Clear FaceAlignmentResult and free the memory + void Free(); + void Reserve(int size); void Resize(int size); @@ -292,9 +310,12 @@ struct FASTDEPLOY_DECL FaceRecognitionResult : public BaseResult { FaceRecognitionResult() {} FaceRecognitionResult(const FaceRecognitionResult& res); - /// Clear detection result + /// Clear FaceRecognitionResult void Clear(); + /// Clear FaceRecognitionResult and free the memory + void Free(); + void Reserve(int size); void Resize(int size); @@ -347,9 +368,12 @@ struct FASTDEPLOY_DECL HeadPoseResult : public BaseResult { std::vector euler_angles; ResultType type = ResultType::HEADPOSE; - /// Clear headpose result + /// Clear HeadPoseResult void Clear(); + /// Clear HeadPoseResult and free the memory + void Free(); + void Reserve(int size); void Resize(int size);