[Other] Refactor js submodule (#415)

* Refactor js submodule

* Remove change-log

* Update ocr module

* Update ocr-detection module

* Update ocr-detection module

* Remove change-log
This commit is contained in:
chenqianhe
2022-10-23 14:05:13 +08:00
committed by GitHub
parent 30971cf3fd
commit f2619b0546
273 changed files with 14697 additions and 5088 deletions

View File

@@ -0,0 +1,10 @@
{
"presets": [
[
"@babel/preset-env",
{
"modules": false
}
]
]
}

View File

@@ -0,0 +1,3 @@
module.exports = {
extends: ['@commitlint/config-conventional'], // 使用预设的配置 https://github.com/conventional-changelog/commitlint/blob/master/@commitlint/config-conventional/index.js
}

View File

@@ -0,0 +1 @@
node_modules

View File

@@ -0,0 +1,27 @@
module.exports = {
parser: '@typescript-eslint/parser', // 使用 ts 解析器
extends: [
'eslint:recommended', // eslint 推荐规则
'plugin:@typescript-eslint/recommended', // ts 推荐规则
'plugin:jest/recommended',
],
plugins: [
'@typescript-eslint',
'jest',
],
env: {
browser: true,
node: true,
es6: true,
},
parserOptions: {
project: 'tsconfig.eslint.json',
ecmaVersion: 2019,
sourceType: 'module',
ecmaFeatures: {
experimentalObjectRestSpread: true
}
},
rules: {}
}

View File

@@ -0,0 +1,30 @@
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
pnpm-debug.log*
lerna-debug.log*
node_modules
.DS_Store
dist
etc
lib
dist-ssr
coverage
*.local
/cypress/videos/
/cypress/screenshots/
# Editor directories and files
.vscode/*
!.vscode/extensions.json
.idea
*.suo
*.ntvs*
*.njsproj
*.sln
*.sw?

View File

@@ -0,0 +1 @@
auto-install-peers=true

View File

@@ -0,0 +1,15 @@
{
"$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json",
"mainEntryPointFilePath": "./lib/index.d.ts",
"bundledPackages": [],
"docModel": {
"enabled": true
},
"apiReport": {
"enabled": true
},
"dtsRollup": {
"enabled": true,
"untrimmedFilePath": "./lib/index.d.ts"
}
}

View File

@@ -0,0 +1,17 @@
import path from "path";
import chalk from "chalk";
export const paths = {
root: path.join(__dirname, '../'),
input: path.join(__dirname, '../src/index.ts'),
lib: path.join(__dirname, '../lib'),
}
export const log = {
progress: (text: string) => {
console.log(chalk.green(text))
},
error: (text: string) => {
console.log(chalk.red(text))
},
}

View File

@@ -0,0 +1,160 @@
import path from 'path'
import fse from 'fs-extra'
import { series } from "gulp"
import { paths, log } from "./build_package/util"
import rollupConfig from './rollup.config'
import { rollup } from 'rollup'
import {
Extractor,
ExtractorConfig,
ExtractorResult,
} from '@microsoft/api-extractor'
/**
* 这里是由于 'conventional-changelog' 未提供类型文件
*/
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
import conventionalChangelog from 'conventional-changelog'
interface TaskFunc {
// eslint-disable-next-line @typescript-eslint/ban-types
(cb: Function): void
}
const CHANGE_TRACE = ['paddlejs-models/ocr', 'paddle-js-models/ocr', 'paddlejs-models', 'paddle-js-models', 'all']
/**
* 删除 lib 文件
* @param cb
* @returns {Promise<void>}
*/
const clearLibFile: TaskFunc = async (cb) => {
fse.removeSync(paths.lib)
log.progress('Deleted lib file')
cb()
}
/**
* rollup 打包
* @param cb
*/
const buildByRollup: TaskFunc = async (cb) => {
const inputOptions = {
input: rollupConfig.input,
external: rollupConfig.external,
plugins: rollupConfig.plugins,
}
const outOptions = rollupConfig.output
const bundle = await rollup(inputOptions)
// 写入需要遍历输出配置
if (Array.isArray(outOptions)) {
for (const outOption of outOptions) {
await bundle.write(outOption)
}
cb()
log.progress('Rollup built successfully')
}
}
/**
* api-extractor 整理 .d.ts 文件
* @param cb
*/
const apiExtractorGenerate: TaskFunc = async (cb) => {
const apiExtractorJsonPath: string = path.join(__dirname, './api-extractor.json')
// 加载并解析 api-extractor.json 文件
const extractorConfig: ExtractorConfig = await ExtractorConfig.loadFileAndPrepare(apiExtractorJsonPath)
// 判断是否存在 index.d.ts 文件,这里必须异步先访问一边,不然后面找不到会报错
const isdtxExist: boolean = await fse.pathExists(extractorConfig.mainEntryPointFilePath)
// 判断是否存在 etc 目录api-extractor需要该目录存在
const isEtcExist: boolean = await fse.pathExists('./etc')
if (!isdtxExist) {
log.error('API Extractor not find index.d.ts')
return
}
if (!isEtcExist) {
fse.mkdirSync('etc');
log.progress('Create folder etc for API Extractor')
}
// 调用 API
const extractorResult: ExtractorResult = await Extractor.invoke(extractorConfig, {
localBuild: true,
// 在输出中显示信息
showVerboseMessages: true,
})
if (extractorResult.succeeded) {
// 删除多余的 .d.ts 文件
const libFiles: string[] = await fse.readdir(paths.lib)
for (const file of libFiles) {
if (file.endsWith('.d.ts') && !file.includes('index')) {
await fse.remove(path.join(paths.lib, file))
}
}
log.progress('API Extractor completed successfully')
// api-extractor 会生成 temp 文件夹,完成后进行删除
fse.ensureDirSync('temp')
fse.removeSync('temp')
cb()
} else {
log.error(`API Extractor completed with ${extractorResult.errorCount} errors`
+ ` and ${extractorResult.warningCount} warnings`)
}
}
/**
* 完成
* @param cb
*/
const complete: TaskFunc = (cb) => {
log.progress('---- end ----')
cb()
}
/**
* 生成 CHANGELOG
* @param cb
*/
export const changelog: TaskFunc = async (cb) => {
const checkTrace = (chunk: string) => {
for (const keyWord of CHANGE_TRACE) {
if (chunk.includes(keyWord)) {
return true
}
}
return false
}
const changelogPath: string = path.join(paths.root, 'CHANGELOG.md')
// 对命令 conventional-changelog -p angular -i CHANGELOG.md -w -r 0
const changelogPipe = await conventionalChangelog({
preset: 'angular',
releaseCount: 0,
})
changelogPipe.setEncoding('utf8')
const resultArray = ['# 更新日志\n\n']
changelogPipe.on('data', (chunk) => {
// 原来的 commits 路径是进入提交列表
chunk = chunk.replace(/\/commits\//g, '/commit/')
/**
* title 或 指定跟踪 才会写入CHANGELOG
*/
for (const log of chunk.split("\n")) {
if (log.includes('# ') || log.includes('### ') || checkTrace(log)) {
resultArray.push(log+"\n\n")
}
}
})
changelogPipe.on('end', async () => {
fse.createWriteStream(changelogPath).write(resultArray.join(''))
cb()
log.progress('CHANGELOG generation completed')
})
}
exports.build = series(clearLibFile, buildByRollup, apiExtractorGenerate, complete)

View File

@@ -0,0 +1,4 @@
module.exports = {
preset: 'ts-jest',
testEnvironment: 'node',
}

View File

@@ -0,0 +1,81 @@
{
"name": "@paddle-js-models/ocr",
"version": "4.0.0",
"description": "",
"main": "lib/index.js",
"module": "lib/index.esm.js",
"typings": "lib/index.d.js",
"files": [
"lib",
"LICENSE",
"CHANGELOG.md",
"README.md",
"README_cn.md"
],
"keywords": [],
"author": "",
"license": "ISC",
"publishConfig": {
"access": "public",
"registry": "https://registry.npmjs.org/"
},
"scripts": {
"dev": "yalc publish --push",
"prepublish": "pnpm lint & pnpm test",
"prepublishOnly": "pnpm build",
"build": "gulp build",
"lint": "eslint --ext .js,.ts src --fix",
"api": "api-extractor run",
"test": "jest --coverage --verbose -u",
"changelog": "gulp changelog"
},
"devDependencies": {
"@babel/core": "^7.19.0",
"@babel/preset-env": "^7.19.0",
"@commitlint/cli": "^17.1.2",
"@commitlint/config-conventional": "^17.1.0",
"@microsoft/api-extractor": "^7.30.0",
"@types/d3-polygon": "^3.0.0",
"@types/fs-extra": "^9.0.13",
"@types/gulp": "^4.0.9",
"@types/jest": "^29.0.1",
"@types/node": "^18.7.16",
"@typescript-eslint/eslint-plugin": "^5.36.2",
"@typescript-eslint/parser": "^5.36.2",
"browserify": "^17.0.0",
"chalk": "4.1.2",
"commitlint": "^17.1.2",
"conventional-changelog-cli": "^2.2.2",
"eslint": "8.22.0",
"eslint-plugin-jest": "^27.0.4",
"fs-extra": "^10.1.0",
"gulp": "^4.0.2",
"gulp-clean": "^0.4.0",
"gulp-typescript": "6.0.0-alpha.1",
"gulp-uglify": "^3.0.2",
"husky": "^8.0.1",
"jest": "^29.0.3",
"lint-staged": "^13.0.3",
"rollup": "^2.79.0",
"rollup-plugin-babel": "^4.4.0",
"rollup-plugin-commonjs": "^10.1.0",
"rollup-plugin-eslint": "^7.0.0",
"rollup-plugin-node-resolve": "^5.2.0",
"rollup-plugin-string": "^3.0.0",
"rollup-plugin-typescript2": "^0.34.0",
"ts-jest": "^29.0.0",
"ts-node": "^10.9.1",
"tsify": "^5.0.4",
"typescript": "^4.8.3",
"vinyl-buffer": "^1.0.1",
"vinyl-source-stream": "^2.0.0"
},
"dependencies": {
"@paddlejs-mediapipe/opencv": "^0.0.4",
"@paddlejs/paddlejs-backend-webgl": "^1.2.9",
"@paddlejs/paddlejs-core": "^2.2.0",
"d3-polygon": "^3.0.1",
"js-clipper": "^1.0.1",
"number-precision": "^1.5.2"
}
}

View File

@@ -0,0 +1,76 @@
import path from 'path'
import { RollupOptions } from 'rollup'
import { string } from "rollup-plugin-string";
import rollupTypescript from 'rollup-plugin-typescript2'
import babel from 'rollup-plugin-babel'
import resolve from 'rollup-plugin-node-resolve'
import commonjs from 'rollup-plugin-commonjs'
import { eslint } from 'rollup-plugin-eslint'
import { DEFAULT_EXTENSIONS } from '@babel/core'
import pkg from './package.json'
import { paths } from "./build_package/util";
// rollup 配置项
const rollupConfig: RollupOptions = {
input: paths.input,
output: [
// 输出 commonjs 规范的代码
{
file: path.join(paths.lib, 'index.js'),
format: 'cjs',
name: pkg.name,
},
// 输出 es 规范的代码
{
file: path.join(paths.lib, 'index.esm.js'),
format: 'es',
name: pkg.name,
},
],
external: ['@paddlejs-mediapipe/opencv',
'@paddlejs/paddlejs-backend-webgl',
'@paddlejs/paddlejs-core',
'@types/node',
'd3-polygon',
'js-clipper',
'number-precision'],
// plugins 需要注意引用顺序
plugins: [
eslint({
throwOnError: true,
throwOnWarning: false,
include: ['src/**/*.ts'],
exclude: ['node_modules/**', 'lib/**', '*.js'],
}),
// 处理txt文件
string({
include: "src/ppocr_keys_v1.txt"
}),
// 使得 rollup 支持 commonjs 规范,识别 commonjs 规范的依赖
commonjs(),
// 配合 commnjs 解析第三方模块
resolve({
// 将自定义选项传递给解析插件
customResolveOptions: {
moduleDirectory: 'node_modules',
},
}),
rollupTypescript(),
babel({
runtimeHelpers: true,
// 只转换源代码,不运行外部依赖
exclude: 'node_modules/**',
// babel 默认不支持 ts 需要手动添加
extensions: [
...DEFAULT_EXTENSIONS,
'.ts',
],
}),
],
}
export default rollupConfig

View File

@@ -0,0 +1,212 @@
import clipper from 'js-clipper';
import { divide, enableBoundaryChecking, plus } from 'number-precision';
import CV from '@paddlejs-mediapipe/opencv/library/opencv_ocr';
import {clip} from './util';
import * as d3Polygon from 'd3-polygon';
import { BOX, POINT, POINTS } from "./type";
export default class DBPostprocess {
private readonly thresh: number;
private readonly box_thresh: number;
private readonly max_candidates: number;
private readonly unclip_ratio: number;
private readonly min_size: number;
private readonly pred: number[];
private readonly segmentation: number[];
private readonly width: number;
private readonly height: number;
constructor(result: number[], shape: number[], thresh = 0.3, box_thresh = 0.6, unclip_ratio = 1.5) {
enableBoundaryChecking(false);
this.thresh = thresh ? thresh : 0.3;
this.box_thresh = box_thresh ? box_thresh : 0.6;
this.max_candidates = 1000;
this.unclip_ratio = unclip_ratio ? unclip_ratio:1.5;
this.min_size = 3;
this.width = shape[0];
this.height = shape[1];
this.pred = result;
this.segmentation = [];
this.pred.forEach((item: number) => {
this.segmentation.push(item > this.thresh ? 255 : 0);
});
}
public outputBox() {
// eslint-disable-next-line new-cap
const src = new CV.matFromArray(this.width, this.height, CV.CV_8UC1, this.segmentation);
const contours = new CV.MatVector();
const hierarchy = new CV.Mat();
// 获取轮廓
CV.findContours(src, contours, hierarchy, CV.RETR_LIST, CV.CHAIN_APPROX_SIMPLE);
const num_contours = Math.min(contours.size(), this.max_candidates);
const boxes: BOX = [];
const scores: number[] = [];
const arr: number[] = [];
for (let i = 0; i < num_contours; i++) {
const contour = contours.get(i);
const minBox = this.get_mini_boxes(contour);
const points = minBox.points;
let side = minBox.side;
if (side < this.min_size) {
continue;
}
const score = this.box_score_fast(this.pred, points);
if (this.box_thresh > score) {
continue;
}
let box = this.unclip(points);
// eslint-disable-next-line new-cap
const boxMap = new CV.matFromArray(box.length / 2, 1, CV.CV_32SC2, box);
const resultObj = this.get_mini_boxes(boxMap);
box = resultObj.points as [number, number][];
side = resultObj.side;
if (side < this.min_size + 2) {
continue;
}
box.forEach(item => {
item[0] = clip(Math.round(item[0]), 0, this.width);
item[1] = clip(Math.round(item[1]), 0, this.height);
});
boxes.push(box);
scores.push(score);
arr.push(i);
boxMap.delete();
}
src.delete();
contours.delete();
hierarchy.delete();
return { boxes, scores };
}
private get_mini_boxes(contour: any) {
// 生成最小外接矩形
const bounding_box = CV.minAreaRect(contour);
const points: POINTS = [];
const mat = new CV.Mat();
// 获取矩形的四个顶点坐标
CV.boxPoints(bounding_box, mat);
for (let i = 0; i < mat.data32F.length; i += 2) {
const arr: POINT = [mat.data32F[i], mat.data32F[i + 1]];
points.push(arr);
}
function sortNumber(a: POINT, b: POINT) {
return a[0] - b[0];
}
points.sort(sortNumber);
let index_1: number;
let index_2: number;
let index_3: number;
let index_4: number;
if (points[1][1] > points[0][1]) {
index_1 = 0;
index_4 = 1;
}
else {
index_1 = 1;
index_4 = 0;
}
if (points[3][1] > points[2][1]) {
index_2 = 2;
index_3 = 3;
}
else {
index_2 = 3;
index_3 = 2;
}
const box = [
points[index_1],
points[index_2],
points[index_3],
points[index_4]
];
const side = Math.min(bounding_box.size.height, bounding_box.size.width);
mat.delete();
return { points: box, side };
}
private box_score_fast(bitmap: number[], _box: POINTS) {
const h = this.height;
const w = this.width;
const box = JSON.parse(JSON.stringify(_box));
const x = [] as number[];
const y = [] as number[];
box.forEach((item: POINT) => {
x.push(item[0]);
y.push(item[1]);
});
// clip这个函数将将数组中的元素限制在a_min, a_max之间大于a_max的就使得它等于 a_max小于a_min,的就使得它等于a_min。
const xmin = clip(Math.floor(Math.min(...x)), 0, w - 1);
const xmax = clip(Math.ceil(Math.max(...x)), 0, w - 1);
const ymin = clip(Math.floor(Math.min(...y)), 0, h - 1);
const ymax = clip(Math.ceil(Math.max(...y)), 0, h - 1);
// eslint-disable-next-line new-cap
const mask = new CV.Mat.zeros(ymax - ymin + 1, xmax - xmin + 1, CV.CV_8UC1);
box.forEach((item: POINT) => {
item[0] = Math.max(item[0] - xmin, 0);
item[1] = Math.max(item[1] - ymin, 0);
});
const npts = 4;
const point_data = new Uint8Array(box.flat());
const points = CV.matFromArray(npts, 1, CV.CV_32SC2, point_data);
const pts = new CV.MatVector();
pts.push_back(points);
const color = new CV.Scalar(255);
// 多个多边形填充
CV.fillPoly(mask, pts, color, 1);
const sliceArr = [];
for (let i = ymin; i < ymax + 1; i++) {
sliceArr.push(...bitmap.slice(this.width * i + xmin, this.height * i + xmax + 1) as []);
}
const mean = this.mean(sliceArr, mask.data);
mask.delete();
points.delete();
pts.delete();
return mean;
}
private unclip(box: POINTS) {
const unclip_ratio = this.unclip_ratio;
const area = Math.abs(d3Polygon.polygonArea(box as [number, number][]));
const length = d3Polygon.polygonLength(box as [number, number][]);
const distance = area * unclip_ratio / length;
const tmpArr: { X: number; Y: number; }[] = [];
box.forEach(item => {
const obj = {
X: 0,
Y: 0
};
obj.X = item[0];
obj.Y = item[1];
tmpArr.push(obj);
});
const offset = new clipper.ClipperOffset();
offset.AddPath(tmpArr, clipper.JoinType.jtRound, clipper.EndType.etClosedPolygon);
const expanded: { X: number; Y: number; }[][] = [];
offset.Execute(expanded, distance);
let expandedArr: POINTS = [];
expanded[0] && expanded[0].forEach(item => {
expandedArr.push([item.X, item.Y]);
});
expandedArr = [].concat(...expandedArr as []);
return expandedArr;
}
private mean(data: number[], mask: number[]) {
let sum = 0;
let length = 0;
for (let i = 0; i < data.length; i++) {
if (mask[i]) {
sum = plus(sum, data[i]);
length++;
}
}
return divide(sum, length);
}
}

View File

@@ -0,0 +1 @@
declare module '*.txt'

View File

@@ -0,0 +1,326 @@
/**
* @file ocr_rec model
*/
import { Runner, env } from '@paddlejs/paddlejs-core';
import '@paddlejs/paddlejs-backend-webgl';
import { BOX, POINTS, POINT } from "./type"
import DBProcess from './dbPostprocess';
import RecProcess from './recPostprocess';
import cv from '@paddlejs-mediapipe/opencv/library/opencv_ocr';
import { flatten, int, clip } from './util';
export interface DrawBoxOptions {
canvas?: HTMLCanvasElement;
style?: CanvasStyleOptions;
}
export interface CanvasStyleOptions {
strokeStyle?: string;
lineWidth?: number;
fillStyle?: string;
}
export interface DetPostConfig {
shape: number;
thresh: number;
box_thresh: number;
unclip_ratio: number;
}
const defaultPostConfig: DetPostConfig = {shape: 960, thresh: 0.3, box_thresh: 0.6, unclip_ratio:1.5};
let DEFAULTDETSHAPE = 960;
let RECWIDTH = 320;
let RECHEIGHT = 48;
const canvas_det = document.createElement('canvas') as HTMLCanvasElement;
const canvas_rec = document.createElement('canvas') as HTMLCanvasElement;
let detectRunner = null as Runner;
let recRunner = null as Runner;
initCanvas(canvas_det);
initCanvas(canvas_rec);
function initCanvas(canvas: HTMLCanvasElement) {
canvas.style.position = 'fixed';
canvas.style.bottom = '0';
canvas.style.zIndex = '-1';
canvas.style.opacity = '0';
document.body.appendChild(canvas);
}
export async function init(detCustomModel = '', recCustomModel = '') {
const detModelPath = 'https://js-models.bj.bcebos.com/PaddleOCR/PP-OCRv3/ch_PP-OCRv3_det_infer_js_960/model.json';
const recModelPath = 'https://js-models.bj.bcebos.com/PaddleOCR/PP-OCRv3/ch_PP-OCRv3_rec_infer_js/model.json';
env.set('webgl_pack_output', true);
detectRunner = new Runner({
modelPath: detCustomModel ? detCustomModel : detModelPath,
fill: '#fff',
mean: [0.485, 0.456, 0.406],
std: [0.229, 0.224, 0.225],
bgr: true,
webglFeedProcess: true
});
const detectInit = detectRunner.init();
recRunner = new Runner({
modelPath: recCustomModel ? recCustomModel : recModelPath,
fill: '#000',
mean: [0.5, 0.5, 0.5],
std: [0.5, 0.5, 0.5],
bgr: true,
webglFeedProcess: true
});
const recInit = recRunner.init();
await Promise.all([detectInit, recInit]);
if (detectRunner.feedShape) {
DEFAULTDETSHAPE = detectRunner.feedShape.fw;
}
if (recRunner.feedShape) {
RECWIDTH = recRunner.feedShape.fw;
RECHEIGHT = recRunner.feedShape.fh;
}
}
async function detect(image: HTMLImageElement, Config:DetPostConfig = defaultPostConfig) {
// 目标尺寸
const DETSHAPE = Config.shape ? Config.shape : DEFAULTDETSHAPE;
const thresh = Config.thresh;
const box_thresh = Config.box_thresh;
const unclip_ratio = Config.unclip_ratio;
const targetWidth = DETSHAPE;
const targetHeight = DETSHAPE;
canvas_det.width = targetWidth;
canvas_det.height = targetHeight;
// 通过canvas将上传原图大小转换为目标尺寸
const ctx = canvas_det.getContext('2d');
ctx!.fillStyle = '#fff';
ctx!.fillRect(0, 0, targetHeight, targetWidth);
// 缩放后的宽高
let sw = targetWidth;
let sh = targetHeight;
let x = 0;
let y = 0;
// target的长宽比大些 就把原图的高变成target那么高
if (targetWidth / targetHeight * image.naturalHeight / image.naturalWidth >= 1) {
sw = Math.round(sh * image.naturalWidth / image.naturalHeight);
x = Math.floor((targetWidth - sw) / 2);
}
// target的长宽比小些 就把原图的宽变成target那么宽
else {
sh = Math.round(sw * image.naturalHeight / image.naturalWidth);
y = Math.floor((targetHeight - sh) / 2);
}
ctx!.drawImage(image, x, y, sw, sh);
const shapeList = [DETSHAPE, DETSHAPE];
const outsDict = await detectRunner.predict(canvas_det);
const postResult = new DBProcess(outsDict, shapeList, thresh, box_thresh, unclip_ratio);
// 获取坐标
const result = postResult.outputBox();
// 转换原图坐标
const points = JSON.parse(JSON.stringify(result.boxes));
// 框选调整大小
const adjust = 8;
points && points.forEach((item: [number, number][]) => {
item.forEach((point, index) => {
// 扩大框选区域,便于文字识别
point[0] = clip(
(Math.round(point[0] - x) * Math.max(image.naturalWidth, image.naturalHeight) / DETSHAPE)
+ (index === 0 ? -adjust : index === 1 ? adjust : index === 2 ? adjust : -adjust),
0,
image.naturalWidth
);
point[1] = clip(
(Math.round(point[1] - y) * Math.max(image.naturalWidth, image.naturalHeight) / DETSHAPE)
+ (index === 0 ? -adjust : index === 1 ? -adjust : index === 2 ? adjust : adjust),
0,
image.naturalHeight
);
});
});
return points;
}
function drawBox(
points: [[number, number], [number, number], [number, number], [number, number]][],
image: HTMLImageElement,
canvas: HTMLCanvasElement,
style?: CanvasStyleOptions
) {
canvas.width = image.naturalWidth;
canvas.height = image.naturalHeight;
const ctx = canvas.getContext('2d');
ctx!.drawImage(image, 0, 0, canvas.width, canvas.height);
points && points.forEach((point: [number, number][] )=> {
// 开始一个新的绘制路径
ctx!.beginPath();
// 设置绘制线条颜色,默认为黑色
ctx!.strokeStyle = style?.strokeStyle || '#000';
// 设置线段宽度默认为1
ctx!.lineWidth = style?.lineWidth || 1;
// 设置填充颜色,默认透明
ctx!.fillStyle = style?.fillStyle || 'transparent';
// 设置路径起点坐标
ctx!.moveTo(point[0][0], point[0][1]);
ctx!.lineTo(point[1][0], point[1][1]);
ctx!.lineTo(point[2][0], point[2][1]);
ctx!.lineTo(point[3][0], point[3][1]);
// 进行内容填充
ctx!.fill();
ctx!.closePath();
ctx!.stroke();
ctx!.restore();
});
}
/**
* 文本识别
* @param {HTMLImageElement} image 原图
* @param {Object} options 绘制文本框配置参数
* @param detConfig 识别相关可调参数
*/
export async function recognize(
image: HTMLImageElement,
options?: DrawBoxOptions,
detConfig:DetPostConfig = defaultPostConfig
) {
// 文本框选坐标点
const point = await detect(image, detConfig);
// 绘制文本框
if (options?.canvas) {
drawBox(point, image, options.canvas, options.style);
}
const boxes = sorted_boxes(point);
const text_list: string[] = [];
for (let i = 0; i < boxes.length; i++) {
const tmp_box = JSON.parse(JSON.stringify(boxes[i]));
get_rotate_crop_image(image, tmp_box);
// 默认ratio=33是经验值可根据实际情况调整。
const ratio = 3;
const width_num = Math.ceil(canvas_det.width / RECWIDTH / ratio);
let text_list_tmp = '';
/**
* 如果输入为长文本情况即box的宽度 > ratio * RECWIDTH按照 ratio * RECWIDTH的宽度裁剪并将每个裁剪部分的识别结果拼接起来。
* 如果输入为短文本情况即box的宽度 < ratio * RECWIDTH直接预测即可。
*/
if (width_num > 1){
// 根据ratio*RECWIDTH宽度进行裁剪拼接
for (let i = 0; i < width_num; i++) {
resize_norm_img_splice(canvas_det, canvas_det.width, canvas_det.height, i, ratio);
const output = await recRunner.predict(canvas_rec);
const recResult = new RecProcess(output);
const text = recResult.outputResult();
text_list_tmp = text_list_tmp.concat(text.text);
}
} else {
// 不裁剪,直接预测
const output = await recRunner.predict(canvas_det);
const recResult = new RecProcess(output);
const text = recResult.outputResult();
text_list_tmp = text_list_tmp.concat(text.text);
}
text_list.push(text_list_tmp);
}
return { text: text_list, points: point };
}
function sorted_boxes(box: BOX) {
function sortNumber(a: POINTS, b: POINTS) {
return a[0][1] - b[0][1];
}
const boxes = box.sort(sortNumber);
const num_boxes = boxes.length;
for (let i = 0; i < num_boxes - 1; i++) {
if (Math.abs(boxes[i + 1][0][1] - boxes[i][0][1]) < 10
&& boxes[i + 1][0][0] < boxes[i][0][0]) {
const tmp = boxes[i];
boxes[i] = boxes[i + 1];
boxes[i + 1] = tmp;
}
}
return boxes;
}
function get_rotate_crop_image(img: HTMLCanvasElement | HTMLImageElement, points: POINTS) {
const img_crop_width = int(Math.max(
linalg_norm(points[0], points[1]),
linalg_norm(points[2], points[3])
));
const img_crop_height = int(Math.max(
linalg_norm(points[0], points[3]),
linalg_norm(points[1], points[2])
));
const pts_std = [
[0, 0],
[img_crop_width, 0],
[img_crop_width, img_crop_height],
[0, img_crop_height]
];
const srcTri = cv.matFromArray(4, 1, cv.CV_32FC2, flatten(points));
const dstTri = cv.matFromArray(4, 1, cv.CV_32FC2, flatten(pts_std));
// 获取到目标矩阵
const M = cv.getPerspectiveTransform(srcTri, dstTri);
const src = cv.imread(img);
const dst = new cv.Mat();
const dsize = new cv.Size(img_crop_width, img_crop_height);
// 透视转换
cv.warpPerspective(src, dst, M, dsize, cv.INTER_CUBIC, cv.BORDER_REPLICATE, new cv.Scalar());
const dst_img_height = dst.matSize[0];
const dst_img_width = dst.matSize[1];
let dst_rot;
// 图像旋转
if (dst_img_height / dst_img_width >= 1.5) {
dst_rot = new cv.Mat();
const dsize_rot = new cv.Size(dst.rows, dst.cols);
const center = new cv.Point(dst.cols / 2, dst.cols / 2);
const M = cv.getRotationMatrix2D(center, 90, 1);
cv.warpAffine(dst, dst_rot, M, dsize_rot, cv.INTER_CUBIC, cv.BORDER_REPLICATE, new cv.Scalar());
}
const dst_resize = new cv.Mat();
const dsize_resize = new cv.Size(0, 0);
let scale;
if (dst_rot) {
scale = RECHEIGHT / dst_rot.matSize[0];
cv.resize(dst_rot, dst_resize, dsize_resize, scale, scale, cv.INTER_AREA);
dst_rot.delete();
}
else {
scale = RECHEIGHT / dst_img_height;
cv.resize(dst, dst_resize, dsize_resize, scale, scale, cv.INTER_AREA);
}
canvas_det.width = dst_resize.matSize[1];
canvas_det.height = dst_resize.matSize[0];
cv.imshow(canvas_det, dst_resize);
src.delete();
dst.delete();
dst_resize.delete();
srcTri.delete();
dstTri.delete();
}
function linalg_norm(x: POINT, y: POINT) {
return Math.sqrt(Math.pow(x[0] - y[0], 2) + Math.pow(x[1] - y[1], 2));
}
function resize_norm_img_splice(
image: HTMLImageElement | HTMLCanvasElement,
origin_width: number,
origin_height: number,
index = 0,
ratio = 3,
) {
canvas_rec.width = RECWIDTH;
canvas_rec.height = RECHEIGHT;
const ctx = canvas_rec.getContext('2d');
ctx!.fillStyle = '#fff';
ctx!.clearRect(0, 0, canvas_rec.width, canvas_rec.height);
ctx!.drawImage(image, -index * RECWIDTH * ratio, 0, origin_width, origin_height);
}

View File

@@ -0,0 +1,65 @@
import character from './ppocr_keys_v1.txt';
export default class recPostprocess {
private ocr_character: string[];
private preds_idx: number[];
private preds_prob: number[];
// preds: [1, ?, 6625]
constructor(preds: number[]) {
this.ocr_character = character.toString().split('\n');
const preds_idx: number[] = [];
const preds_prob: number[] = [];
const pred_len = 6625;
for (let i = 0; i < preds.length; i += pred_len) {
const tmpArr = preds.slice(i, i + pred_len - 1);
const tmpMax = Math.max(...tmpArr);
const tmpIdx = tmpArr.indexOf(tmpMax);
preds_prob.push(tmpMax);
preds_idx.push(tmpIdx);
}
this.preds_idx = preds_idx;
this.preds_prob = preds_prob;
}
private decode(text_index: number[], text_prob: number[], is_remove_duplicate = false) {
const ignored_tokens = this.get_ignored_tokens();
const char_list: string[] = [];
const conf_list: number[] = [];
for (let idx = 0; idx < text_index.length; idx++) {
if (text_index[idx] in ignored_tokens) {
continue;
}
if (is_remove_duplicate) {
if (idx > 0 && text_index[idx - 1] === text_index[idx]) {
continue;
}
}
char_list.push(this.ocr_character[text_index[idx] - 1]);
if (text_prob) {
conf_list.push(text_prob[idx]);
}
else {
conf_list.push(1);
}
}
let text = '';
let mean = 0;
if (char_list.length) {
text = char_list.join('');
let sum = 0;
conf_list.forEach(item => {
sum += item;
});
mean = sum / conf_list.length;
}
return { text, mean };
}
private get_ignored_tokens() {
return [0];
}
public outputResult() {
return this.decode(this.preds_idx, this.preds_prob, true);
}
}

View File

@@ -0,0 +1,3 @@
export type POINT = [number, number];
export type POINTS = POINT[];
export type BOX = POINTS[];

View File

@@ -0,0 +1,11 @@
export function flatten(arr: number[] | number[][]) {
return arr.toString().split(',').map(item => +item);
}
export function int(num: number) {
return num > 0 ? Math.floor(num) : Math.ceil(num);
}
export function clip(data: number, min: number, max: number) {
return data < min ? min : data > max ? max : data;
}

View File

@@ -0,0 +1,12 @@
import assert from 'assert'
describe('Example:', () => {
/**
* Example
*/
describe('ExampleTest', () => {
test('Hello World!', () => {
assert.strictEqual('Hello World!', 'Hello World!')
})
})
})

View File

@@ -0,0 +1,11 @@
{
"compilerOptions": {
"baseUrl": "./",
"resolveJsonModule": true
},
"include": [
"**/*.ts",
"**/*.js",
".eslintrc.js"
]
}

View File

@@ -0,0 +1,31 @@
{
"compilerOptions": {
/* Visit https://aka.ms/tsconfig to read more about this file */
"target": "ESNext", /* Set the JavaScript language version for emitted JavaScript and include compatible library declarations. */
"lib": [
"ESNext",
"DOM"
], /* Specify a set of bundled library declaration files that describe the target runtime environment. */
"moduleResolution": "node", /* Specify how TypeScript looks up a file from a given module specifier. */
"baseUrl": "./", /* Specify the base directory to resolve non-relative module names. */
"resolveJsonModule": true, /* Enable importing .json files. */
"allowJs": true, /* Allow JavaScript files to be a part of your program. Use the 'checkJS' option to get errors from these files. */
"declaration": true, /* Generate .d.ts files from TypeScript and JavaScript files in your project. */
"declarationMap": true, /* Create sourcemaps for d.ts files. */
"sourceMap": true, /* Create source map files for emitted JavaScript files. */
"outDir": "./lib", /* Specify an output folder for all emitted files. */
"removeComments": false, /* Disable emitting comments. */
"allowSyntheticDefaultImports": true, /* Allow 'import x from y' when a module doesn't have a default export. */
"esModuleInterop": true, /* Emit additional JavaScript to ease support for importing CommonJS modules. This enables 'allowSyntheticDefaultImports' for type compatibility. */
"forceConsistentCasingInFileNames": true, /* Ensure that casing is correct in imports. */
"strict": true, /* Enable all strict type-checking options. */
"noImplicitAny": false, /* Enable error reporting for expressions and declarations with an implied 'any' type. */
"skipLibCheck": true /* Skip type checking all .d.ts files. */
},
"include": [
"src"
]
}