// // Copyright (C) 2023 nihui // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include #include #include #include #if __ARM_NEON #include #endif // __ARM_NEON #if __SSE2__ #include #endif #include "mono_font_data.h" namespace cv { static void get_text_drawing_size(const char* text, int fontpixelsize, int* w, int* h) { *w = 0; *h = 0; const int n = strlen(text); int line_w = 0; for (int i = 0; i < n; i++) { char ch = text[i]; if (ch == '\n') { // newline *w = std::max(*w, line_w); *h += fontpixelsize * 2; line_w = 0; } if (isprint(ch) != 0) { line_w += fontpixelsize; } } *w = std::max(*w, line_w); *h += fontpixelsize * 2; } static void resize_bilinear_font(const unsigned char* font_bitmap, unsigned char* resized_font_bitmap, int fontpixelsize) { const int INTER_RESIZE_COEF_BITS = 11; const int INTER_RESIZE_COEF_SCALE = 1 << INTER_RESIZE_COEF_BITS; const int srcw = 20; const int srch = 40; const int w = fontpixelsize; const int h = fontpixelsize * 2; double scale = (double)srcw / w; int* buf = new int[w + h + w + h]; int* xofs = buf; //new int[w]; int* yofs = buf + w; //new int[h]; short* ialpha = (short*)(buf + w + h); //new short[w * 2]; short* ibeta = (short*)(buf + w + h + w); //new short[h * 2]; float fx; float fy; int sx; int sy; #define SATURATE_CAST_SHORT(X) (short)::std::min(::std::max((int)(X + (X >= 0.f ? 0.5f : -0.5f)), SHRT_MIN), SHRT_MAX); for (int dx = 0; dx < w; dx++) { fx = (float)((dx + 0.5) * scale - 0.5); sx = static_cast(floor(fx)); fx -= sx; xofs[dx] = sx; float a0 = (1.f - fx) * INTER_RESIZE_COEF_SCALE; float a1 = fx * INTER_RESIZE_COEF_SCALE; ialpha[dx * 2] = SATURATE_CAST_SHORT(a0); ialpha[dx * 2 + 1] = SATURATE_CAST_SHORT(a1); } for (int dy = 0; dy < h; dy++) { fy = (float)((dy + 0.5) * scale - 0.5); sy = static_cast(floor(fy)); fy -= sy; yofs[dy] = sy; float b0 = (1.f - fy) * INTER_RESIZE_COEF_SCALE; float b1 = fy * INTER_RESIZE_COEF_SCALE; ibeta[dy * 2] = SATURATE_CAST_SHORT(b0); ibeta[dy * 2 + 1] = SATURATE_CAST_SHORT(b1); } #undef SATURATE_CAST_SHORT // loop body std::vector rowsbuf0(w); std::vector rowsbuf1(w); short* rows0 = (short*)rowsbuf0.data(); short* rows1 = (short*)rowsbuf1.data(); { short* rows1p = rows1; for (int dx = 0; dx < w; dx++) { rows1p[dx] = 0; } } int prev_sy1 = -2; for (int dy = 0; dy < h; dy++) { sy = yofs[dy]; if (sy == prev_sy1) { // reuse all rows } else if (sy == prev_sy1 + 1) { // hresize one row short* rows0_old = rows0; rows0 = rows1; rows1 = rows0_old; const unsigned char* S1 = font_bitmap + 10 * (sy + 1); if (sy >= srch - 1) { short* rows1p = rows1; for (int dx = 0; dx < w; dx++) { rows1p[dx] = 0; } } else { const short* ialphap = ialpha; short* rows1p = rows1; for (int dx = 0; dx < w; dx++) { sx = xofs[dx]; short a0 = ialphap[0]; short a1 = ialphap[1]; unsigned char S1p0; unsigned char S1p1; if (sx < 0) { S1p0 = 0; S1p1 = S1[0] & 0x0f; } else if (sx >= srcw - 1) { S1p0 = (S1[9] & 0xf0) >> 4; S1p1 = 0; } else { S1p0 = sx % 2 == 0 ? S1[sx / 2] & 0x0f : (S1[sx / 2] & 0xf0) >> 4; S1p1 = sx % 2 == 0 ? (S1[sx / 2] & 0xf0) >> 4 : S1[sx / 2 + 1] & 0x0f; } rows1p[dx] = (S1p0 * a0 + S1p1 * a1) * 17 >> 4; ialphap += 2; } } } else { // hresize two rows const unsigned char* S0 = font_bitmap + 10 * (sy); const unsigned char* S1 = font_bitmap + 10 * (sy + 1); if (sy >= srch - 1) { const short* ialphap = ialpha; short* rows0p = rows0; short* rows1p = rows1; for (int dx = 0; dx < w; dx++) { sx = xofs[dx]; short a0 = ialphap[0]; short a1 = ialphap[1]; unsigned char S0p0; unsigned char S0p1; if (sx < 0) { S0p0 = 0; S0p1 = S0[0] & 0x0f; } else if (sx >= srcw - 1) { S0p0 = (S0[9] & 0xf0) >> 4; S0p1 = 0; } else { S0p0 = sx % 2 == 0 ? S0[sx / 2] & 0x0f : (S0[sx / 2] & 0xf0) >> 4; S0p1 = sx % 2 == 0 ? (S0[sx / 2] & 0xf0) >> 4 : S0[sx / 2 + 1] & 0x0f; } rows0p[dx] = (S0p0 * a0 + S0p1 * a1) * 17 >> 4; rows1p[dx] = 0; ialphap += 2; } } else { const short* ialphap = ialpha; short* rows0p = rows0; short* rows1p = rows1; for (int dx = 0; dx < w; dx++) { sx = xofs[dx]; short a0 = ialphap[0]; short a1 = ialphap[1]; unsigned char S0p0; unsigned char S0p1; unsigned char S1p0; unsigned char S1p1; if (sx < 0) { S0p0 = 0; S0p1 = S0[0] & 0x0f; S1p0 = 0; S1p1 = S1[0] & 0x0f; } else if (sx >= srcw - 1) { S0p0 = (S0[9] & 0xf0) >> 4; S0p1 = 0; S1p0 = (S1[9] & 0xf0) >> 4; S1p1 = 0; } else { S0p0 = sx % 2 == 0 ? S0[sx / 2] & 0x0f : (S0[sx / 2] & 0xf0) >> 4; S0p1 = sx % 2 == 0 ? (S0[sx / 2] & 0xf0) >> 4 : S0[sx / 2 + 1] & 0x0f; S1p0 = sx % 2 == 0 ? S1[sx / 2] & 0x0f : (S1[sx / 2] & 0xf0) >> 4; S1p1 = sx % 2 == 0 ? (S1[sx / 2] & 0xf0) >> 4 : S1[sx / 2 + 1] & 0x0f; } rows0p[dx] = (S0p0 * a0 + S0p1 * a1) * 17 >> 4; rows1p[dx] = (S1p0 * a0 + S1p1 * a1) * 17 >> 4; ialphap += 2; } } } prev_sy1 = sy; if (dy + 1 < h && yofs[dy + 1] == sy) { // vresize for two rows short b0 = ibeta[0]; short b1 = ibeta[1]; short b2 = ibeta[2]; short b3 = ibeta[3]; short* rows0p = rows0; short* rows1p = rows1; unsigned char* Dp0 = resized_font_bitmap + w * (dy); unsigned char* Dp1 = resized_font_bitmap + w * (dy + 1); int dx = 0; #if __ARM_NEON int16x8_t _b0 = vdupq_n_s16(b0); int16x8_t _b1 = vdupq_n_s16(b1); int16x8_t _b2 = vdupq_n_s16(b2); int16x8_t _b3 = vdupq_n_s16(b3); for (; dx + 15 < w; dx += 16) { int16x8_t _r00 = vld1q_s16(rows0p); int16x8_t _r01 = vld1q_s16(rows0p + 8); int16x8_t _r10 = vld1q_s16(rows1p); int16x8_t _r11 = vld1q_s16(rows1p + 8); int16x8_t _acc00 = vaddq_s16(vqdmulhq_s16(_r00, _b0), vqdmulhq_s16(_r10, _b1)); int16x8_t _acc01 = vaddq_s16(vqdmulhq_s16(_r01, _b0), vqdmulhq_s16(_r11, _b1)); int16x8_t _acc10 = vaddq_s16(vqdmulhq_s16(_r00, _b2), vqdmulhq_s16(_r10, _b3)); int16x8_t _acc11 = vaddq_s16(vqdmulhq_s16(_r01, _b2), vqdmulhq_s16(_r11, _b3)); uint8x16_t _Dp0 = vcombine_u8(vqrshrun_n_s16(_acc00, 3), vqrshrun_n_s16(_acc01, 3)); uint8x16_t _Dp1 = vcombine_u8(vqrshrun_n_s16(_acc10, 3), vqrshrun_n_s16(_acc11, 3)); vst1q_u8(Dp0, _Dp0); vst1q_u8(Dp1, _Dp1); Dp0 += 16; Dp1 += 16; rows0p += 16; rows1p += 16; } for (; dx + 7 < w; dx += 8) { int16x8_t _r0 = vld1q_s16(rows0p); int16x8_t _r1 = vld1q_s16(rows1p); int16x8_t _acc0 = vaddq_s16(vqdmulhq_s16(_r0, _b0), vqdmulhq_s16(_r1, _b1)); int16x8_t _acc1 = vaddq_s16(vqdmulhq_s16(_r0, _b2), vqdmulhq_s16(_r1, _b3)); uint8x8_t _Dp0 = vqrshrun_n_s16(_acc0, 3); uint8x8_t _Dp1 = vqrshrun_n_s16(_acc1, 3); vst1_u8(Dp0, _Dp0); vst1_u8(Dp1, _Dp1); Dp0 += 8; Dp1 += 8; rows0p += 8; rows1p += 8; } #endif // __ARM_NEON #if __SSE2__ __m128i _b0 = _mm_set1_epi16(b0); __m128i _b1 = _mm_set1_epi16(b1); __m128i _b2 = _mm_set1_epi16(b2); __m128i _b3 = _mm_set1_epi16(b3); __m128i _v2 = _mm_set1_epi16(2); for (; dx + 15 < w; dx += 16) { __m128i _r00 = _mm_loadu_si128((const __m128i*)rows0p); __m128i _r01 = _mm_loadu_si128((const __m128i*)(rows0p + 8)); __m128i _r10 = _mm_loadu_si128((const __m128i*)rows1p); __m128i _r11 = _mm_loadu_si128((const __m128i*)(rows1p + 8)); __m128i _acc00 = _mm_add_epi16(_mm_mulhi_epi16(_r00, _b0), _mm_mulhi_epi16(_r10, _b1)); __m128i _acc01 = _mm_add_epi16(_mm_mulhi_epi16(_r01, _b0), _mm_mulhi_epi16(_r11, _b1)); __m128i _acc10 = _mm_add_epi16(_mm_mulhi_epi16(_r00, _b2), _mm_mulhi_epi16(_r10, _b3)); __m128i _acc11 = _mm_add_epi16(_mm_mulhi_epi16(_r01, _b2), _mm_mulhi_epi16(_r11, _b3)); _acc00 = _mm_srai_epi16(_mm_add_epi16(_acc00, _v2), 2); _acc01 = _mm_srai_epi16(_mm_add_epi16(_acc01, _v2), 2); _acc10 = _mm_srai_epi16(_mm_add_epi16(_acc10, _v2), 2); _acc11 = _mm_srai_epi16(_mm_add_epi16(_acc11, _v2), 2); __m128i _Dp0 = _mm_packus_epi16(_acc00, _acc01); __m128i _Dp1 = _mm_packus_epi16(_acc10, _acc11); _mm_storeu_si128((__m128i*)Dp0, _Dp0); _mm_storeu_si128((__m128i*)Dp1, _Dp1); Dp0 += 16; Dp1 += 16; rows0p += 16; rows1p += 16; } for (; dx + 7 < w; dx += 8) { __m128i _r0 = _mm_loadu_si128((const __m128i*)rows0p); __m128i _r1 = _mm_loadu_si128((const __m128i*)rows1p); __m128i _acc0 = _mm_add_epi16(_mm_mulhi_epi16(_r0, _b0), _mm_mulhi_epi16(_r1, _b1)); __m128i _acc1 = _mm_add_epi16(_mm_mulhi_epi16(_r0, _b2), _mm_mulhi_epi16(_r1, _b3)); _acc0 = _mm_srai_epi16(_mm_add_epi16(_acc0, _v2), 2); _acc1 = _mm_srai_epi16(_mm_add_epi16(_acc1, _v2), 2); __m128i _Dp0 = _mm_packus_epi16(_acc0, _acc0); __m128i _Dp1 = _mm_packus_epi16(_acc1, _acc1); _mm_storel_epi64((__m128i*)Dp0, _Dp0); _mm_storel_epi64((__m128i*)Dp1, _Dp1); Dp0 += 8; Dp1 += 8; rows0p += 8; rows1p += 8; } #endif // __SSE2__ for (; dx < w; dx++) { short s0 = *rows0p++; short s1 = *rows1p++; *Dp0++ = (unsigned char)(((short)((b0 * s0) >> 16) + (short)((b1 * s1) >> 16) + 2) >> 2); *Dp1++ = (unsigned char)(((short)((b2 * s0) >> 16) + (short)((b3 * s1) >> 16) + 2) >> 2); } ibeta += 4; dy += 1; } else { // vresize short b0 = ibeta[0]; short b1 = ibeta[1]; short* rows0p = rows0; short* rows1p = rows1; unsigned char* Dp = resized_font_bitmap + w * (dy); int dx = 0; #if __ARM_NEON int16x8_t _b0 = vdupq_n_s16(b0); int16x8_t _b1 = vdupq_n_s16(b1); for (; dx + 15 < w; dx += 16) { int16x8_t _r00 = vld1q_s16(rows0p); int16x8_t _r01 = vld1q_s16(rows0p + 8); int16x8_t _r10 = vld1q_s16(rows1p); int16x8_t _r11 = vld1q_s16(rows1p + 8); int16x8_t _acc0 = vaddq_s16(vqdmulhq_s16(_r00, _b0), vqdmulhq_s16(_r10, _b1)); int16x8_t _acc1 = vaddq_s16(vqdmulhq_s16(_r01, _b0), vqdmulhq_s16(_r11, _b1)); uint8x16_t _Dp = vcombine_u8(vqrshrun_n_s16(_acc0, 3), vqrshrun_n_s16(_acc1, 3)); vst1q_u8(Dp, _Dp); Dp += 16; rows0p += 16; rows1p += 16; } for (; dx + 7 < w; dx += 8) { int16x8_t _r0 = vld1q_s16(rows0p); int16x8_t _r1 = vld1q_s16(rows1p); int16x8_t _acc = vaddq_s16(vqdmulhq_s16(_r0, _b0), vqdmulhq_s16(_r1, _b1)); uint8x8_t _Dp = vqrshrun_n_s16(_acc, 3); vst1_u8(Dp, _Dp); Dp += 8; rows0p += 8; rows1p += 8; } #endif // __ARM_NEON #if __SSE2__ __m128i _b0 = _mm_set1_epi16(b0); __m128i _b1 = _mm_set1_epi16(b1); __m128i _v2 = _mm_set1_epi16(2); for (; dx + 15 < w; dx += 16) { __m128i _r00 = _mm_loadu_si128((const __m128i*)rows0p); __m128i _r01 = _mm_loadu_si128((const __m128i*)(rows0p + 8)); __m128i _r10 = _mm_loadu_si128((const __m128i*)rows1p); __m128i _r11 = _mm_loadu_si128((const __m128i*)(rows1p + 8)); __m128i _acc0 = _mm_add_epi16(_mm_mulhi_epi16(_r00, _b0), _mm_mulhi_epi16(_r10, _b1)); __m128i _acc1 = _mm_add_epi16(_mm_mulhi_epi16(_r01, _b0), _mm_mulhi_epi16(_r11, _b1)); _acc0 = _mm_srai_epi16(_mm_add_epi16(_acc0, _v2), 2); _acc1 = _mm_srai_epi16(_mm_add_epi16(_acc1, _v2), 2); __m128i _Dp = _mm_packus_epi16(_acc0, _acc1); _mm_storeu_si128((__m128i*)Dp, _Dp); Dp += 16; rows0p += 16; rows1p += 16; } for (; dx + 7 < w; dx += 8) { __m128i _r0 = _mm_loadu_si128((const __m128i*)rows0p); __m128i _r1 = _mm_loadu_si128((const __m128i*)rows1p); __m128i _acc = _mm_add_epi16(_mm_mulhi_epi16(_r0, _b0), _mm_mulhi_epi16(_r1, _b1)); _acc = _mm_srai_epi16(_mm_add_epi16(_acc, _v2), 2); __m128i _Dp = _mm_packus_epi16(_acc, _acc); _mm_storel_epi64((__m128i*)Dp, _Dp); Dp += 8; rows0p += 8; rows1p += 8; } #endif // __SSE2__ for (; dx < w; dx++) { short s0 = *rows0p++; short s1 = *rows1p++; *Dp++ = (unsigned char)(((short)((b0 * s0) >> 16) + (short)((b1 * s1) >> 16) + 2) >> 2); } ibeta += 2; } } delete[] buf; } static void draw_text_c1(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color) { const unsigned char* pen_color = (const unsigned char*)&color; unsigned char* resized_font_bitmap = new unsigned char[fontpixelsize * fontpixelsize * 2]; const int n = strlen(text); int cursor_x = x; int cursor_y = y; for (int i = 0; i < n; i++) { char ch = text[i]; if (ch == '\n') { // newline cursor_x = x; cursor_y += fontpixelsize * 2; continue; } if (ch == ' ') { cursor_x += fontpixelsize; continue; } if (isprint(ch) != 0) { const unsigned char* font_bitmap = mono_font_data[ch - '!']; // draw resized character resize_bilinear_font(font_bitmap, resized_font_bitmap, fontpixelsize); const int ystart = std::max(cursor_y, 0); const int yend = std::min(cursor_y + fontpixelsize * 2, h); const int xstart = std::max(cursor_x, 0); const int xend = std::min(cursor_x + fontpixelsize, w); for (int j = ystart; j < yend; j++) { const unsigned char* palpha = resized_font_bitmap + (j - cursor_y) * fontpixelsize + xstart - cursor_x; unsigned char* p = pixels + stride * j + xstart; for (int k = xstart; k < xend; k++) { unsigned char alpha = *palpha++; p[0] = (p[0] * (255 - alpha) + pen_color[0] * alpha) / 255; p += 1; } } cursor_x += fontpixelsize; } } delete[] resized_font_bitmap; } static void draw_text_c3(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color) { const unsigned char* pen_color = (const unsigned char*)&color; unsigned char* resized_font_bitmap = new unsigned char[fontpixelsize * fontpixelsize * 2]; const int n = strlen(text); int cursor_x = x; int cursor_y = y; for (int i = 0; i < n; i++) { char ch = text[i]; if (ch == '\n') { // newline cursor_x = x; cursor_y += fontpixelsize * 2; continue; } if (ch == ' ') { cursor_x += fontpixelsize; continue; } if (isprint(ch) != 0) { const unsigned char* font_bitmap = mono_font_data[ch - '!']; // draw resized character resize_bilinear_font(font_bitmap, resized_font_bitmap, fontpixelsize); const int ystart = std::max(cursor_y, 0); const int yend = std::min(cursor_y + fontpixelsize * 2, h); const int xstart = std::max(cursor_x, 0); const int xend = std::min(cursor_x + fontpixelsize, w); for (int j = ystart; j < yend; j++) { const unsigned char* palpha = resized_font_bitmap + (j - cursor_y) * fontpixelsize + xstart - cursor_x; unsigned char* p = pixels + stride * j + xstart * 3; for (int k = xstart; k < xend; k++) { unsigned char alpha = *palpha++; p[0] = (p[0] * (255 - alpha) + pen_color[0] * alpha) / 255; p[1] = (p[1] * (255 - alpha) + pen_color[1] * alpha) / 255; p[2] = (p[2] * (255 - alpha) + pen_color[2] * alpha) / 255; p += 3; } } cursor_x += fontpixelsize; } } delete[] resized_font_bitmap; } static void draw_text_c4(unsigned char* pixels, int w, int h, int stride, const char* text, int x, int y, int fontpixelsize, unsigned int color) { const unsigned char* pen_color = (const unsigned char*)&color; unsigned char* resized_font_bitmap = new unsigned char[fontpixelsize * fontpixelsize * 2]; const int n = strlen(text); int cursor_x = x; int cursor_y = y; for (int i = 0; i < n; i++) { char ch = text[i]; if (ch == '\n') { // newline cursor_x = x; cursor_y += fontpixelsize * 2; continue; } if (ch == ' ') { cursor_x += fontpixelsize; continue; } if (isprint(ch) != 0) { const unsigned char* font_bitmap = mono_font_data[ch - '!']; // draw resized character resize_bilinear_font(font_bitmap, resized_font_bitmap, fontpixelsize); const int ystart = std::max(cursor_y, 0); const int yend = std::min(cursor_y + fontpixelsize * 2, h); const int xstart = std::max(cursor_x, 0); const int xend = std::min(cursor_x + fontpixelsize, w); for (int j = ystart; j < yend; j++) { const unsigned char* palpha = resized_font_bitmap + (j - cursor_y) * fontpixelsize + xstart - cursor_x; unsigned char* p = pixels + stride * j + xstart * 4; for (int k = xstart; k < xend; k++) { unsigned char alpha = *palpha++; p[0] = (p[0] * (255 - alpha) + pen_color[0] * alpha) / 255; p[1] = (p[1] * (255 - alpha) + pen_color[1] * alpha) / 255; p[2] = (p[2] * (255 - alpha) + pen_color[2] * alpha) / 255; p[3] = (p[3] * (255 - alpha) + pen_color[3] * alpha) / 255; p += 4; } } cursor_x += fontpixelsize; } } delete[] resized_font_bitmap; } static void draw_text_c1(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color) { return draw_text_c1(pixels, w, h, w, text, x, y, fontpixelsize, color); } static void draw_text_c3(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color) { return draw_text_c3(pixels, w, h, w * 3, text, x, y, fontpixelsize, color); } static void draw_text_c4(unsigned char* pixels, int w, int h, const char* text, int x, int y, int fontpixelsize, unsigned int color) { return draw_text_c4(pixels, w, h, w * 4, text, x, y, fontpixelsize, color); } }