[Other] Refactor js submodule (#415)

* Refactor js submodule

* Remove change-log

* Update ocr module

* Update ocr-detection module

* Update ocr-detection module

* Remove change-log
This commit is contained in:
chenqianhe
2022-10-23 14:05:13 +08:00
committed by GitHub
parent 30971cf3fd
commit f2619b0546
273 changed files with 14697 additions and 5088 deletions

View File

@@ -0,0 +1,212 @@
import clipper from 'js-clipper';
import { divide, enableBoundaryChecking, plus } from 'number-precision';
import CV from '@paddlejs-mediapipe/opencv/library/opencv_ocr';
import {clip} from './util';
import * as d3Polygon from 'd3-polygon';
import { BOX, POINT, POINTS } from "./type";
export default class DBPostprocess {
private readonly thresh: number;
private readonly box_thresh: number;
private readonly max_candidates: number;
private readonly unclip_ratio: number;
private readonly min_size: number;
private readonly pred: number[];
private readonly segmentation: number[];
private readonly width: number;
private readonly height: number;
constructor(result: number[], shape: number[], thresh = 0.3, box_thresh = 0.6, unclip_ratio = 1.5) {
enableBoundaryChecking(false);
this.thresh = thresh ? thresh : 0.3;
this.box_thresh = box_thresh ? box_thresh : 0.6;
this.max_candidates = 1000;
this.unclip_ratio = unclip_ratio ? unclip_ratio:1.5;
this.min_size = 3;
this.width = shape[0];
this.height = shape[1];
this.pred = result;
this.segmentation = [];
this.pred.forEach((item: number) => {
this.segmentation.push(item > this.thresh ? 255 : 0);
});
}
public outputBox() {
// eslint-disable-next-line new-cap
const src = new CV.matFromArray(this.width, this.height, CV.CV_8UC1, this.segmentation);
const contours = new CV.MatVector();
const hierarchy = new CV.Mat();
// 获取轮廓
CV.findContours(src, contours, hierarchy, CV.RETR_LIST, CV.CHAIN_APPROX_SIMPLE);
const num_contours = Math.min(contours.size(), this.max_candidates);
const boxes: BOX = [];
const scores: number[] = [];
const arr: number[] = [];
for (let i = 0; i < num_contours; i++) {
const contour = contours.get(i);
const minBox = this.get_mini_boxes(contour);
const points = minBox.points;
let side = minBox.side;
if (side < this.min_size) {
continue;
}
const score = this.box_score_fast(this.pred, points);
if (this.box_thresh > score) {
continue;
}
let box = this.unclip(points);
// eslint-disable-next-line new-cap
const boxMap = new CV.matFromArray(box.length / 2, 1, CV.CV_32SC2, box);
const resultObj = this.get_mini_boxes(boxMap);
box = resultObj.points as [number, number][];
side = resultObj.side;
if (side < this.min_size + 2) {
continue;
}
box.forEach(item => {
item[0] = clip(Math.round(item[0]), 0, this.width);
item[1] = clip(Math.round(item[1]), 0, this.height);
});
boxes.push(box);
scores.push(score);
arr.push(i);
boxMap.delete();
}
src.delete();
contours.delete();
hierarchy.delete();
return { boxes, scores };
}
private get_mini_boxes(contour: any) {
// 生成最小外接矩形
const bounding_box = CV.minAreaRect(contour);
const points: POINTS = [];
const mat = new CV.Mat();
// 获取矩形的四个顶点坐标
CV.boxPoints(bounding_box, mat);
for (let i = 0; i < mat.data32F.length; i += 2) {
const arr: POINT = [mat.data32F[i], mat.data32F[i + 1]];
points.push(arr);
}
function sortNumber(a: POINT, b: POINT) {
return a[0] - b[0];
}
points.sort(sortNumber);
let index_1: number;
let index_2: number;
let index_3: number;
let index_4: number;
if (points[1][1] > points[0][1]) {
index_1 = 0;
index_4 = 1;
}
else {
index_1 = 1;
index_4 = 0;
}
if (points[3][1] > points[2][1]) {
index_2 = 2;
index_3 = 3;
}
else {
index_2 = 3;
index_3 = 2;
}
const box = [
points[index_1],
points[index_2],
points[index_3],
points[index_4]
];
const side = Math.min(bounding_box.size.height, bounding_box.size.width);
mat.delete();
return { points: box, side };
}
private box_score_fast(bitmap: number[], _box: POINTS) {
const h = this.height;
const w = this.width;
const box = JSON.parse(JSON.stringify(_box));
const x = [] as number[];
const y = [] as number[];
box.forEach((item: POINT) => {
x.push(item[0]);
y.push(item[1]);
});
// clip这个函数将将数组中的元素限制在a_min, a_max之间大于a_max的就使得它等于 a_max小于a_min,的就使得它等于a_min。
const xmin = clip(Math.floor(Math.min(...x)), 0, w - 1);
const xmax = clip(Math.ceil(Math.max(...x)), 0, w - 1);
const ymin = clip(Math.floor(Math.min(...y)), 0, h - 1);
const ymax = clip(Math.ceil(Math.max(...y)), 0, h - 1);
// eslint-disable-next-line new-cap
const mask = new CV.Mat.zeros(ymax - ymin + 1, xmax - xmin + 1, CV.CV_8UC1);
box.forEach((item: POINT) => {
item[0] = Math.max(item[0] - xmin, 0);
item[1] = Math.max(item[1] - ymin, 0);
});
const npts = 4;
const point_data = new Uint8Array(box.flat());
const points = CV.matFromArray(npts, 1, CV.CV_32SC2, point_data);
const pts = new CV.MatVector();
pts.push_back(points);
const color = new CV.Scalar(255);
// 多个多边形填充
CV.fillPoly(mask, pts, color, 1);
const sliceArr = [];
for (let i = ymin; i < ymax + 1; i++) {
sliceArr.push(...bitmap.slice(this.width * i + xmin, this.height * i + xmax + 1) as []);
}
const mean = this.mean(sliceArr, mask.data);
mask.delete();
points.delete();
pts.delete();
return mean;
}
private unclip(box: POINTS) {
const unclip_ratio = this.unclip_ratio;
const area = Math.abs(d3Polygon.polygonArea(box as [number, number][]));
const length = d3Polygon.polygonLength(box as [number, number][]);
const distance = area * unclip_ratio / length;
const tmpArr: { X: number; Y: number; }[] = [];
box.forEach(item => {
const obj = {
X: 0,
Y: 0
};
obj.X = item[0];
obj.Y = item[1];
tmpArr.push(obj);
});
const offset = new clipper.ClipperOffset();
offset.AddPath(tmpArr, clipper.JoinType.jtRound, clipper.EndType.etClosedPolygon);
const expanded: { X: number; Y: number; }[][] = [];
offset.Execute(expanded, distance);
let expandedArr: POINTS = [];
expanded[0] && expanded[0].forEach(item => {
expandedArr.push([item.X, item.Y]);
});
expandedArr = [].concat(...expandedArr as []);
return expandedArr;
}
private mean(data: number[], mask: number[]) {
let sum = 0;
let length = 0;
for (let i = 0; i < data.length; i++) {
if (mask[i]) {
sum = plus(sum, data[i]);
length++;
}
}
return divide(sum, length);
}
}

View File

@@ -0,0 +1 @@
declare module '*.txt'

View File

@@ -0,0 +1,326 @@
/**
* @file ocr_rec model
*/
import { Runner, env } from '@paddlejs/paddlejs-core';
import '@paddlejs/paddlejs-backend-webgl';
import { BOX, POINTS, POINT } from "./type"
import DBProcess from './dbPostprocess';
import RecProcess from './recPostprocess';
import cv from '@paddlejs-mediapipe/opencv/library/opencv_ocr';
import { flatten, int, clip } from './util';
export interface DrawBoxOptions {
canvas?: HTMLCanvasElement;
style?: CanvasStyleOptions;
}
export interface CanvasStyleOptions {
strokeStyle?: string;
lineWidth?: number;
fillStyle?: string;
}
export interface DetPostConfig {
shape: number;
thresh: number;
box_thresh: number;
unclip_ratio: number;
}
const defaultPostConfig: DetPostConfig = {shape: 960, thresh: 0.3, box_thresh: 0.6, unclip_ratio:1.5};
let DEFAULTDETSHAPE = 960;
let RECWIDTH = 320;
let RECHEIGHT = 48;
const canvas_det = document.createElement('canvas') as HTMLCanvasElement;
const canvas_rec = document.createElement('canvas') as HTMLCanvasElement;
let detectRunner = null as Runner;
let recRunner = null as Runner;
initCanvas(canvas_det);
initCanvas(canvas_rec);
function initCanvas(canvas: HTMLCanvasElement) {
canvas.style.position = 'fixed';
canvas.style.bottom = '0';
canvas.style.zIndex = '-1';
canvas.style.opacity = '0';
document.body.appendChild(canvas);
}
export async function init(detCustomModel = '', recCustomModel = '') {
const detModelPath = 'https://js-models.bj.bcebos.com/PaddleOCR/PP-OCRv3/ch_PP-OCRv3_det_infer_js_960/model.json';
const recModelPath = 'https://js-models.bj.bcebos.com/PaddleOCR/PP-OCRv3/ch_PP-OCRv3_rec_infer_js/model.json';
env.set('webgl_pack_output', true);
detectRunner = new Runner({
modelPath: detCustomModel ? detCustomModel : detModelPath,
fill: '#fff',
mean: [0.485, 0.456, 0.406],
std: [0.229, 0.224, 0.225],
bgr: true,
webglFeedProcess: true
});
const detectInit = detectRunner.init();
recRunner = new Runner({
modelPath: recCustomModel ? recCustomModel : recModelPath,
fill: '#000',
mean: [0.5, 0.5, 0.5],
std: [0.5, 0.5, 0.5],
bgr: true,
webglFeedProcess: true
});
const recInit = recRunner.init();
await Promise.all([detectInit, recInit]);
if (detectRunner.feedShape) {
DEFAULTDETSHAPE = detectRunner.feedShape.fw;
}
if (recRunner.feedShape) {
RECWIDTH = recRunner.feedShape.fw;
RECHEIGHT = recRunner.feedShape.fh;
}
}
async function detect(image: HTMLImageElement, Config:DetPostConfig = defaultPostConfig) {
// 目标尺寸
const DETSHAPE = Config.shape ? Config.shape : DEFAULTDETSHAPE;
const thresh = Config.thresh;
const box_thresh = Config.box_thresh;
const unclip_ratio = Config.unclip_ratio;
const targetWidth = DETSHAPE;
const targetHeight = DETSHAPE;
canvas_det.width = targetWidth;
canvas_det.height = targetHeight;
// 通过canvas将上传原图大小转换为目标尺寸
const ctx = canvas_det.getContext('2d');
ctx!.fillStyle = '#fff';
ctx!.fillRect(0, 0, targetHeight, targetWidth);
// 缩放后的宽高
let sw = targetWidth;
let sh = targetHeight;
let x = 0;
let y = 0;
// target的长宽比大些 就把原图的高变成target那么高
if (targetWidth / targetHeight * image.naturalHeight / image.naturalWidth >= 1) {
sw = Math.round(sh * image.naturalWidth / image.naturalHeight);
x = Math.floor((targetWidth - sw) / 2);
}
// target的长宽比小些 就把原图的宽变成target那么宽
else {
sh = Math.round(sw * image.naturalHeight / image.naturalWidth);
y = Math.floor((targetHeight - sh) / 2);
}
ctx!.drawImage(image, x, y, sw, sh);
const shapeList = [DETSHAPE, DETSHAPE];
const outsDict = await detectRunner.predict(canvas_det);
const postResult = new DBProcess(outsDict, shapeList, thresh, box_thresh, unclip_ratio);
// 获取坐标
const result = postResult.outputBox();
// 转换原图坐标
const points = JSON.parse(JSON.stringify(result.boxes));
// 框选调整大小
const adjust = 8;
points && points.forEach((item: [number, number][]) => {
item.forEach((point, index) => {
// 扩大框选区域,便于文字识别
point[0] = clip(
(Math.round(point[0] - x) * Math.max(image.naturalWidth, image.naturalHeight) / DETSHAPE)
+ (index === 0 ? -adjust : index === 1 ? adjust : index === 2 ? adjust : -adjust),
0,
image.naturalWidth
);
point[1] = clip(
(Math.round(point[1] - y) * Math.max(image.naturalWidth, image.naturalHeight) / DETSHAPE)
+ (index === 0 ? -adjust : index === 1 ? -adjust : index === 2 ? adjust : adjust),
0,
image.naturalHeight
);
});
});
return points;
}
function drawBox(
points: [[number, number], [number, number], [number, number], [number, number]][],
image: HTMLImageElement,
canvas: HTMLCanvasElement,
style?: CanvasStyleOptions
) {
canvas.width = image.naturalWidth;
canvas.height = image.naturalHeight;
const ctx = canvas.getContext('2d');
ctx!.drawImage(image, 0, 0, canvas.width, canvas.height);
points && points.forEach((point: [number, number][] )=> {
// 开始一个新的绘制路径
ctx!.beginPath();
// 设置绘制线条颜色,默认为黑色
ctx!.strokeStyle = style?.strokeStyle || '#000';
// 设置线段宽度默认为1
ctx!.lineWidth = style?.lineWidth || 1;
// 设置填充颜色,默认透明
ctx!.fillStyle = style?.fillStyle || 'transparent';
// 设置路径起点坐标
ctx!.moveTo(point[0][0], point[0][1]);
ctx!.lineTo(point[1][0], point[1][1]);
ctx!.lineTo(point[2][0], point[2][1]);
ctx!.lineTo(point[3][0], point[3][1]);
// 进行内容填充
ctx!.fill();
ctx!.closePath();
ctx!.stroke();
ctx!.restore();
});
}
/**
* 文本识别
* @param {HTMLImageElement} image 原图
* @param {Object} options 绘制文本框配置参数
* @param detConfig 识别相关可调参数
*/
export async function recognize(
image: HTMLImageElement,
options?: DrawBoxOptions,
detConfig:DetPostConfig = defaultPostConfig
) {
// 文本框选坐标点
const point = await detect(image, detConfig);
// 绘制文本框
if (options?.canvas) {
drawBox(point, image, options.canvas, options.style);
}
const boxes = sorted_boxes(point);
const text_list: string[] = [];
for (let i = 0; i < boxes.length; i++) {
const tmp_box = JSON.parse(JSON.stringify(boxes[i]));
get_rotate_crop_image(image, tmp_box);
// 默认ratio=33是经验值可根据实际情况调整。
const ratio = 3;
const width_num = Math.ceil(canvas_det.width / RECWIDTH / ratio);
let text_list_tmp = '';
/**
* 如果输入为长文本情况即box的宽度 > ratio * RECWIDTH按照 ratio * RECWIDTH的宽度裁剪并将每个裁剪部分的识别结果拼接起来。
* 如果输入为短文本情况即box的宽度 < ratio * RECWIDTH直接预测即可。
*/
if (width_num > 1){
// 根据ratio*RECWIDTH宽度进行裁剪拼接
for (let i = 0; i < width_num; i++) {
resize_norm_img_splice(canvas_det, canvas_det.width, canvas_det.height, i, ratio);
const output = await recRunner.predict(canvas_rec);
const recResult = new RecProcess(output);
const text = recResult.outputResult();
text_list_tmp = text_list_tmp.concat(text.text);
}
} else {
// 不裁剪,直接预测
const output = await recRunner.predict(canvas_det);
const recResult = new RecProcess(output);
const text = recResult.outputResult();
text_list_tmp = text_list_tmp.concat(text.text);
}
text_list.push(text_list_tmp);
}
return { text: text_list, points: point };
}
function sorted_boxes(box: BOX) {
function sortNumber(a: POINTS, b: POINTS) {
return a[0][1] - b[0][1];
}
const boxes = box.sort(sortNumber);
const num_boxes = boxes.length;
for (let i = 0; i < num_boxes - 1; i++) {
if (Math.abs(boxes[i + 1][0][1] - boxes[i][0][1]) < 10
&& boxes[i + 1][0][0] < boxes[i][0][0]) {
const tmp = boxes[i];
boxes[i] = boxes[i + 1];
boxes[i + 1] = tmp;
}
}
return boxes;
}
function get_rotate_crop_image(img: HTMLCanvasElement | HTMLImageElement, points: POINTS) {
const img_crop_width = int(Math.max(
linalg_norm(points[0], points[1]),
linalg_norm(points[2], points[3])
));
const img_crop_height = int(Math.max(
linalg_norm(points[0], points[3]),
linalg_norm(points[1], points[2])
));
const pts_std = [
[0, 0],
[img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]
];
const srcTri = cv.matFromArray(4, 1, cv.CV_32FC2, flatten(points));
const dstTri = cv.matFromArray(4, 1, cv.CV_32FC2, flatten(pts_std));
// 获取到目标矩阵
const M = cv.getPerspectiveTransform(srcTri, dstTri);
const src = cv.imread(img);
const dst = new cv.Mat();
const dsize = new cv.Size(img_crop_width, img_crop_height);
// 透视转换
cv.warpPerspective(src, dst, M, dsize, cv.INTER_CUBIC, cv.BORDER_REPLICATE, new cv.Scalar());
const dst_img_height = dst.matSize[0];
const dst_img_width = dst.matSize[1];
let dst_rot;
// 图像旋转
if (dst_img_height / dst_img_width >= 1.5) {
dst_rot = new cv.Mat();
const dsize_rot = new cv.Size(dst.rows, dst.cols);
const center = new cv.Point(dst.cols / 2, dst.cols / 2);
const M = cv.getRotationMatrix2D(center, 90, 1);
cv.warpAffine(dst, dst_rot, M, dsize_rot, cv.INTER_CUBIC, cv.BORDER_REPLICATE, new cv.Scalar());
}
const dst_resize = new cv.Mat();
const dsize_resize = new cv.Size(0, 0);
let scale;
if (dst_rot) {
scale = RECHEIGHT / dst_rot.matSize[0];
cv.resize(dst_rot, dst_resize, dsize_resize, scale, scale, cv.INTER_AREA);
dst_rot.delete();
}
else {
scale = RECHEIGHT / dst_img_height;
cv.resize(dst, dst_resize, dsize_resize, scale, scale, cv.INTER_AREA);
}
canvas_det.width = dst_resize.matSize[1];
canvas_det.height = dst_resize.matSize[0];
cv.imshow(canvas_det, dst_resize);
src.delete();
dst.delete();
dst_resize.delete();
srcTri.delete();
dstTri.delete();
}
function linalg_norm(x: POINT, y: POINT) {
return Math.sqrt(Math.pow(x[0] - y[0], 2) + Math.pow(x[1] - y[1], 2));
}
function resize_norm_img_splice(
image: HTMLImageElement | HTMLCanvasElement,
origin_width: number,
origin_height: number,
index = 0,
ratio = 3,
) {
canvas_rec.width = RECWIDTH;
canvas_rec.height = RECHEIGHT;
const ctx = canvas_rec.getContext('2d');
ctx!.fillStyle = '#fff';
ctx!.clearRect(0, 0, canvas_rec.width, canvas_rec.height);
ctx!.drawImage(image, -index * RECWIDTH * ratio, 0, origin_width, origin_height);
}

View File

@@ -0,0 +1,65 @@
import character from './ppocr_keys_v1.txt';
export default class recPostprocess {
private ocr_character: string[];
private preds_idx: number[];
private preds_prob: number[];
// preds: [1, ?, 6625]
constructor(preds: number[]) {
this.ocr_character = character.toString().split('\n');
const preds_idx: number[] = [];
const preds_prob: number[] = [];
const pred_len = 6625;
for (let i = 0; i < preds.length; i += pred_len) {
const tmpArr = preds.slice(i, i + pred_len - 1);
const tmpMax = Math.max(...tmpArr);
const tmpIdx = tmpArr.indexOf(tmpMax);
preds_prob.push(tmpMax);
preds_idx.push(tmpIdx);
}
this.preds_idx = preds_idx;
this.preds_prob = preds_prob;
}
private decode(text_index: number[], text_prob: number[], is_remove_duplicate = false) {
const ignored_tokens = this.get_ignored_tokens();
const char_list: string[] = [];
const conf_list: number[] = [];
for (let idx = 0; idx < text_index.length; idx++) {
if (text_index[idx] in ignored_tokens) {
continue;
}
if (is_remove_duplicate) {
if (idx > 0 && text_index[idx - 1] === text_index[idx]) {
continue;
}
}
char_list.push(this.ocr_character[text_index[idx] - 1]);
if (text_prob) {
conf_list.push(text_prob[idx]);
}
else {
conf_list.push(1);
}
}
let text = '';
let mean = 0;
if (char_list.length) {
text = char_list.join('');
let sum = 0;
conf_list.forEach(item => {
sum += item;
});
mean = sum / conf_list.length;
}
return { text, mean };
}
private get_ignored_tokens() {
return [0];
}
public outputResult() {
return this.decode(this.preds_idx, this.preds_prob, true);
}
}

View File

@@ -0,0 +1,3 @@
export type POINT = [number, number];
export type POINTS = POINT[];
export type BOX = POINTS[];

View File

@@ -0,0 +1,11 @@
export function flatten(arr: number[] | number[][]) {
return arr.toString().split(',').map(item => +item);
}
export function int(num: number) {
return num > 0 ? Math.floor(num) : Math.ceil(num);
}
export function clip(data: number, min: number, max: number) {
return data < min ? min : data > max ? max : data;
}