mirror of
https://github.com/PaddlePaddle/FastDeploy.git
synced 2025-10-21 07:40:37 +08:00
[Other] Remove useless macros (#1095)
* Remove useless macros * triger ci * fix check error * rename INTEGRATE_PADDLE2ONNX to ENABLE_PADDLE2ONNX
This commit is contained in:
@@ -12,8 +12,6 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#ifdef ENABLE_VISION_VISUALIZE
|
||||
|
||||
#include "fastdeploy/vision/visualize/segmentation_arm.h"
|
||||
#ifdef __ARM_NEON
|
||||
#include <arm_neon.h>
|
||||
@@ -24,8 +22,9 @@ namespace vision {
|
||||
|
||||
static constexpr int _OMP_THREADS = 2;
|
||||
|
||||
static inline void QuantizeBlendingWeight8(
|
||||
float weight, uint8_t* old_multi_factor, uint8_t* new_multi_factor) {
|
||||
static inline void QuantizeBlendingWeight8(float weight,
|
||||
uint8_t* old_multi_factor,
|
||||
uint8_t* new_multi_factor) {
|
||||
// Quantize the weight to boost blending performance.
|
||||
// if 0.0 < w <= 1/8, w ~ 1/8=1/(2^3) shift right 3 mul 1, 7
|
||||
// if 1/8 < w <= 2/8, w ~ 2/8=1/(2^3) shift right 3 mul 2, 6
|
||||
@@ -39,34 +38,34 @@ static inline void QuantizeBlendingWeight8(
|
||||
*old_multi_factor = (8 - weight_quantize);
|
||||
}
|
||||
|
||||
cv::Mat VisSegmentationNEON(
|
||||
const cv::Mat& im, const SegmentationResult& result,
|
||||
float weight, bool quantize_weight) {
|
||||
#ifndef __ARM_NEON
|
||||
FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!")
|
||||
cv::Mat VisSegmentationNEON(const cv::Mat& im, const SegmentationResult& result,
|
||||
float weight, bool quantize_weight) {
|
||||
#ifndef __ARM_NEON
|
||||
FDASSERT(false, "FastDeploy was not compiled with Arm NEON support!")
|
||||
#else
|
||||
int64_t height = result.shape[0];
|
||||
int64_t width = result.shape[1];
|
||||
auto vis_img = cv::Mat(height, width, CV_8UC3);
|
||||
|
||||
|
||||
int32_t size = static_cast<int32_t>(height * width);
|
||||
uint8_t *vis_ptr = static_cast<uint8_t*>(vis_img.data);
|
||||
const uint8_t *label_ptr = static_cast<const uint8_t*>(result.label_map.data());
|
||||
const uint8_t *im_ptr = static_cast<const uint8_t*>(im.data);
|
||||
uint8_t* vis_ptr = static_cast<uint8_t*>(vis_img.data);
|
||||
const uint8_t* label_ptr =
|
||||
static_cast<const uint8_t*>(result.label_map.data());
|
||||
const uint8_t* im_ptr = static_cast<const uint8_t*>(im.data);
|
||||
|
||||
if (!quantize_weight) {
|
||||
uint8x16_t zerox16 = vdupq_n_u8(0);
|
||||
#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
|
||||
#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
|
||||
for (int i = 0; i < size - 15; i += 16) {
|
||||
uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3); // 48 bytes
|
||||
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
|
||||
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
|
||||
uint8x16_t ibx16 = bgrx16x3.val[0];
|
||||
uint8x16_t igx16 = bgrx16x3.val[1];
|
||||
uint8x16_t irx16 = bgrx16x3.val[2];
|
||||
// e.g 0b00000001 << 7 -> 0b10000000 128;
|
||||
uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
|
||||
uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
|
||||
uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
|
||||
uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
|
||||
uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
|
||||
uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
|
||||
uint8x16x3_t vbgrx16x3;
|
||||
// Keep the pixels of input im if mask = 0
|
||||
uint8x16_t cezx16 = vceqq_u8(labelx16, zerox16);
|
||||
@@ -77,35 +76,34 @@ cv::Mat VisSegmentationNEON(
|
||||
}
|
||||
for (int i = size - 15; i < size; i++) {
|
||||
uint8_t label = label_ptr[i];
|
||||
vis_ptr[i * 3 + 0] = (label << 7);
|
||||
vis_ptr[i * 3 + 1] = (label << 4);
|
||||
vis_ptr[i * 3 + 2] = (label << 3);
|
||||
vis_ptr[i * 3 + 0] = (label << 7);
|
||||
vis_ptr[i * 3 + 1] = (label << 4);
|
||||
vis_ptr[i * 3 + 2] = (label << 3);
|
||||
}
|
||||
// Blend the colors use OpenCV
|
||||
cv::addWeighted(im, 1.0 - weight, vis_img, weight, 0, vis_img);
|
||||
return vis_img;
|
||||
}
|
||||
|
||||
|
||||
// Quantize the weight to boost blending performance.
|
||||
// After that, we can directly use shift instructions
|
||||
// to blend the colors from input im and mask. Please
|
||||
// to blend the colors from input im and mask. Please
|
||||
// check QuantizeBlendingWeight8 for more details.
|
||||
uint8_t old_multi_factor, new_multi_factor;
|
||||
QuantizeBlendingWeight8(weight, &old_multi_factor,
|
||||
&new_multi_factor);
|
||||
QuantizeBlendingWeight8(weight, &old_multi_factor, &new_multi_factor);
|
||||
if (new_multi_factor == 0) {
|
||||
return im; // Only keep origin image.
|
||||
}
|
||||
|
||||
return im; // Only keep origin image.
|
||||
}
|
||||
|
||||
if (new_multi_factor == 8) {
|
||||
// Only keep mask, no need to blending with origin image.
|
||||
#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
|
||||
// Only keep mask, no need to blending with origin image.
|
||||
#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
|
||||
for (int i = 0; i < size - 15; i += 16) {
|
||||
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
|
||||
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
|
||||
// e.g 0b00000001 << 7 -> 0b10000000 128;
|
||||
uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
|
||||
uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
|
||||
uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
|
||||
uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
|
||||
uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
|
||||
uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
|
||||
uint8x16x3_t vbgr16x3;
|
||||
vbgr16x3.val[0] = mbx16;
|
||||
vbgr16x3.val[1] = mgx16;
|
||||
@@ -114,36 +112,36 @@ cv::Mat VisSegmentationNEON(
|
||||
}
|
||||
for (int i = size - 15; i < size; i++) {
|
||||
uint8_t label = label_ptr[i];
|
||||
vis_ptr[i * 3 + 0] = (label << 7);
|
||||
vis_ptr[i * 3 + 1] = (label << 4);
|
||||
vis_ptr[i * 3 + 2] = (label << 3);
|
||||
}
|
||||
return vis_img;
|
||||
vis_ptr[i * 3 + 0] = (label << 7);
|
||||
vis_ptr[i * 3 + 1] = (label << 4);
|
||||
vis_ptr[i * 3 + 2] = (label << 3);
|
||||
}
|
||||
return vis_img;
|
||||
}
|
||||
|
||||
|
||||
uint8x16_t zerox16 = vdupq_n_u8(0);
|
||||
uint8x16_t old_fx16 = vdupq_n_u8(old_multi_factor);
|
||||
uint8x16_t new_fx16 = vdupq_n_u8(new_multi_factor);
|
||||
// Blend the two colors together with quantize 'weight'.
|
||||
#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
|
||||
// Blend the two colors together with quantize 'weight'.
|
||||
#pragma omp parallel for proc_bind(close) num_threads(_OMP_THREADS)
|
||||
for (int i = 0; i < size - 15; i += 16) {
|
||||
uint8x16x3_t bgrx16x3 = vld3q_u8(im_ptr + i * 3); // 48 bytes
|
||||
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
|
||||
uint8x16_t labelx16 = vld1q_u8(label_ptr + i); // 16 bytes
|
||||
uint8x16_t ibx16 = bgrx16x3.val[0];
|
||||
uint8x16_t igx16 = bgrx16x3.val[1];
|
||||
uint8x16_t irx16 = bgrx16x3.val[2];
|
||||
// e.g 0b00000001 << 7 -> 0b10000000 128;
|
||||
uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
|
||||
uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
|
||||
uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
|
||||
uint8x16_t mbx16 = vshlq_n_u8(labelx16, 7);
|
||||
uint8x16_t mgx16 = vshlq_n_u8(labelx16, 4);
|
||||
uint8x16_t mrx16 = vshlq_n_u8(labelx16, 3);
|
||||
// Moving 7 bits to the right tends to result in zero,
|
||||
// So, We choose to shift 3 bits to get an approximation
|
||||
// So, We choose to shift 3 bits to get an approximation
|
||||
uint8x16_t ibx16_mshr = vmulq_u8(vshrq_n_u8(ibx16, 3), old_fx16);
|
||||
uint8x16_t igx16_mshr = vmulq_u8(vshrq_n_u8(igx16, 3), old_fx16);
|
||||
uint8x16_t igx16_mshr = vmulq_u8(vshrq_n_u8(igx16, 3), old_fx16);
|
||||
uint8x16_t irx16_mshr = vmulq_u8(vshrq_n_u8(irx16, 3), old_fx16);
|
||||
uint8x16_t mbx16_mshr = vmulq_u8(vshrq_n_u8(mbx16, 3), new_fx16);
|
||||
uint8x16_t mgx16_mshr = vmulq_u8(vshrq_n_u8(mgx16, 3), new_fx16);
|
||||
uint8x16_t mrx16_mshr = vmulq_u8(vshrq_n_u8(mrx16, 3), new_fx16);
|
||||
uint8x16_t mrx16_mshr = vmulq_u8(vshrq_n_u8(mrx16, 3), new_fx16);
|
||||
uint8x16_t qbx16 = vqaddq_u8(ibx16_mshr, mbx16_mshr);
|
||||
uint8x16_t qgx16 = vqaddq_u8(igx16_mshr, mgx16_mshr);
|
||||
uint8x16_t qrx16 = vqaddq_u8(irx16_mshr, mrx16_mshr);
|
||||
@@ -152,10 +150,10 @@ cv::Mat VisSegmentationNEON(
|
||||
uint8x16_t abx16 = vandq_u8(cezx16, ibx16);
|
||||
uint8x16_t agx16 = vandq_u8(cezx16, igx16);
|
||||
uint8x16_t arx16 = vandq_u8(cezx16, irx16);
|
||||
uint8x16x3_t vbgr16x3;
|
||||
// Reset qx values to 0 if label is 0, then, keep mask values
|
||||
// if label is not 0
|
||||
uint8x16_t ncezx16 = vmvnq_u8(cezx16);
|
||||
uint8x16x3_t vbgr16x3;
|
||||
// Reset qx values to 0 if label is 0, then, keep mask values
|
||||
// if label is not 0
|
||||
uint8x16_t ncezx16 = vmvnq_u8(cezx16);
|
||||
vbgr16x3.val[0] = vorrq_u8(abx16, vandq_u8(ncezx16, qbx16));
|
||||
vbgr16x3.val[1] = vorrq_u8(agx16, vandq_u8(ncezx16, qgx16));
|
||||
vbgr16x3.val[2] = vorrq_u8(arx16, vandq_u8(ncezx16, qrx16));
|
||||
@@ -164,18 +162,16 @@ cv::Mat VisSegmentationNEON(
|
||||
}
|
||||
for (int i = size - 15; i < size; i++) {
|
||||
uint8_t label = label_ptr[i];
|
||||
vis_ptr[i * 3 + 0] = (im_ptr[i * 3 + 0] >> 3) * old_multi_factor
|
||||
+ ((label << 7) >> 3) * new_multi_factor;
|
||||
vis_ptr[i * 3 + 1] = (im_ptr[i * 3 + 1] >> 3) * old_multi_factor
|
||||
+ ((label << 4) >> 3) * new_multi_factor;
|
||||
vis_ptr[i * 3 + 2] = (im_ptr[i * 3 + 2] >> 3) * old_multi_factor
|
||||
+ ((label << 3) >> 3) * new_multi_factor;
|
||||
}
|
||||
vis_ptr[i * 3 + 0] = (im_ptr[i * 3 + 0] >> 3) * old_multi_factor +
|
||||
((label << 7) >> 3) * new_multi_factor;
|
||||
vis_ptr[i * 3 + 1] = (im_ptr[i * 3 + 1] >> 3) * old_multi_factor +
|
||||
((label << 4) >> 3) * new_multi_factor;
|
||||
vis_ptr[i * 3 + 2] = (im_ptr[i * 3 + 2] >> 3) * old_multi_factor +
|
||||
((label << 3) >> 3) * new_multi_factor;
|
||||
}
|
||||
return vis_img;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace vision
|
||||
} // namespace fastdeploy
|
||||
|
||||
#endif
|
||||
} // namespace fastdeploy
|
Reference in New Issue
Block a user