Skip to content

Commit

Permalink
feat: 新增 slider_match 滑块验证码的识别
Browse files Browse the repository at this point in the history
  • Loading branch information
renxia committed Dec 29, 2023
1 parent 111596b commit a9ecc26
Show file tree
Hide file tree
Showing 20 changed files with 128 additions and 64 deletions.
60 changes: 31 additions & 29 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,63 +3,65 @@

<p align="center"><b>使用 CV (OpenCV) 和 OCR (Tesseract) 进行验证码识别</b></p>

simplest | grids_and_equations | dots_and_chars | ...
:-: | :-: | :-: | :-:
<img src="./test/example/simplest.jpg" height="20" alt="simplest" align=center> | <img src="./test/example/grids_and_equations.jpg" height="20" alt="grids_and_equations" align=center> | <img src="./test/example/dots_and_chars.gif" height="20" alt="dots_and_chars" align=center> | ...
2348 | 2x6=? | 7RVO | ...
simplest | grids_and_equations | dots_and_chars | slide_match | ...
:-: | :-: | :-: | :-: | :-:
<img src="./test/example/simplest.jpg" height="20" alt="simplest" align=center> | <img src="./test/example/grids_and_equations.jpg" height="20" alt="grids_and_equations" align=center> | <img src="./test/example/dots_and_chars.gif" height="20" alt="dots_and_chars" align=center> | - | ...
2348 | 2x6=? | 7RVO | 滑块匹配 | ...

## 快速入门

### 安装
因为所需的 OpenCV 支持模块 [opencv4nodejs](https://github.com/justadudewhohacks/opencv4nodejs) 体积较大,编译过程复杂,请手动安装,或者参考官方的安装指南:

```bash
npm i @u4/opencv4nodejs -g
```

第三种验证码识别改为用 [sharp](https://github.com/lovell/sharp) 和纯 JavaScript 的 CV 算法来实现,方便在树莓派上运行,但效率相比前两者很低。

第二个 Tesseract 支持模块为 [tesseract.js](https://github.com/naptha/tesseract.js)

直接安装

```bash
npm i captcha-cv-ocr
# use npm, yarn or pnpm
npm add captcha-cv-ocr
```

或者
因为所需的 OpenCV 支持模块 [@u4/opencv4nodejs](https://github.com/UrielCh/opencv4nodejs) 体积较大,编译过程复杂,请手动安装,或者参考官方的安装指南:

```bash
git clone https://github.com/PillarsZhang/captcha-cv-ocr
cd captcha-cv-ocr
npm install
npm link #约等于安装为全局模块
```
### 测试

```bash
node judge_and_test.js
npm i @u4/opencv4nodejs -g
```

### 用法

```javascript
const path = require("path");
const cvocrModule = require("captcha-cv-ocr");
const { Cvocr, getCodesList } = require("captcha-cv-ocr");

var mode = "simplest";

(async () => {
let cvocr = new cvocrModule(mode); // mode 表示验证码的种类
let cvocr = new Cvocr(mode); // mode 表示验证码的种类
await cvocr.init(1); //其中的1表示需要启动的 OCR Worker 数(多线程)
let ans = await cvocr.recognize(path.join(__dirname, "docs/img", mode + ".jpg")); //支持文件地址、Base64、Buffer形式
console.log("ans:", ans)
process.exit(0);
})()
```

### 说明

第三种验证码(`dots_and_chars`)识别改为用 [sharp](https://github.com/lovell/sharp) 和纯 JavaScript 的 CV 算法来实现,方便在树莓派上运行,但效率相比前两者很低。

## 开发

```bash
git clone https://github.com/renxia/captcha-cv-ocr
cd captcha-cv-ocr
npm install
# 约等于安装为全局模块
npm link
```

### 测试

```bash
npm test
```

### 已支持

simplest | grids_and_equations | dots_and_chars
:-: | :-: | :-:
<img src="./test/example/simplest.jpg" height="20" alt="simplest" align=center> | <img src="./test/example/grids_and_equations.jpg" height="20" alt="grids_and_equations" align=center> | <img src="./test/example/dots_and_chars.gif" height="20" alt="dots_and_chars" align=center>
Expand Down Expand Up @@ -88,4 +90,4 @@ codes下的文件夹对应着不同种类的名字(自行命名),你可以
- ./lib/fakeOpenCV
- 个人仿照 OpenCV 重写了一些图像算法

C++ / Python 的 OpenCV 海量资料也非常有帮助, 相应的函数基本都能在 [opencv4nodejs 的 API 文档](https://justadudewhohacks.github.io/opencv4nodejs/docs/Mat/) 里找到
C++ / Python 的 OpenCV 海量资料也非常有帮助, 相应的函数基本都能在 [opencv4nodejs 的 API 文档](https://justadudewhohacks.github.io/opencv4nodejs/docs/Mat/) 里找到
24 changes: 24 additions & 0 deletions codes/slide_match/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/*
* @Author: renxia
* @Date: 2023-12-28 18:53:57
* @LastEditors: renxia
* @LastEditTime: 2023-12-28 19:43:22
* @Description:
*/
/**
* 滑块类型验证码匹配
*/

class SlideMatch {
init() {}
recognize(sliderImage, originalImage) {
const cv = require('@u4/opencv4nodejs');
const sliderMat = cv.imdecode(Buffer.from(sliderImage, 'base64'));
const originalMat = cv.imdecode(Buffer.from(originalImage, 'base64'));
const matched = sliderMat.matchTemplate(originalMat, cv.TM_CCOEFF_NORMED);
const matchedPoints = matched.minMaxLoc();
return matchedPoints; // .maxLoc.x;
}
}

module.exports = new SlideMatch();
34 changes: 21 additions & 13 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,33 +2,41 @@

const path = require("path");
const loadImage = require("./lib/loadImage");
const { logger } = require('./lib/utils');
const { logger, getCodesList } = require('./lib/utils');
const { setLogging } = require("tesseract.js");

var modeModule;
global.debugFlag = 0;
global.debugFlag = +process.env.DEBUG_FLAG || 0;

class Cvocr {
modeModule;
constructor(mode = "simplest") {
try {
if (global.debugFlag) logger.updateOptions({ levelType: 'debug' });
if (global.debugFlag) {
logger.updateOptions({ levelType: 'debug' });
if (global.debugFlag > 15) setLogging(true);
}

logger.debug("Debug Mode On!\n");
modeModule = require(path.join(__dirname, "codes", mode));
this.modeModule = require(path.join(__dirname, "codes", mode));
}
catch (err) {
console.error(`no this mode: ${mode}, path: ${path.join(__dirname, "codes", mode)}`);
console.error(err);
process.exit(1);
}
}
recognize = async (img) => {
var image = await loadImage(img);
logger.debug(`image.length: ${image.length}`);
return await modeModule.recognize(image);
async recognize (...img) {
const images = await Promise.all(img.filter(Boolean).map(d => loadImage(d)));
logger.debug(`image.length:`, images.map(d => d?.length));
return this.modeModule.recognize(...images);
}
init = async (config = [{ num: 2 }, { num: 1 }]) => {
await modeModule.init(config);
async init(config = [{ num: 2 }, { num: 1 }]) {
await this.modeModule.init(config);
}
}

exports.config = require('./lib/config');
exports.Cvocr = Cvocr;
module.exports = {
Cvocr,
getCodesList,
logger,
}
3 changes: 1 addition & 2 deletions lib/TesseractOcr.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
const { createWorker, createScheduler, PSM, OEM } = require('tesseract.js');
const { cpus } = require('node:os');
const {log} = require('./utils');
const { log } = require('./utils');

const defaultWorkerConfig = {
/** worker 标记。默认为 index 次序 */
Expand Down
2 changes: 1 addition & 1 deletion lib/config.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
module.exports = {
codeTypeList: ['simplest', 'dots_and_chars', 'grids_and_equations'],
codeTypeList: ['slide_match', 'simplest', 'dots_and_chars', 'grids_and_equations'],
};
13 changes: 11 additions & 2 deletions lib/utils.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
const { NLogger } = require('@lzwme/fe-utils');
const { NLogger, color } = require('@lzwme/fe-utils');
const { readdirSync } = require('node:fs');
const { resolve } = require('node:path');

const logger = new NLogger('[CCOCR]');
const logger = new NLogger('[CCOCR]', { color });
exports.logger = logger;

exports.log = function log(...msg) {
logger.debug(...msg);
}

let codesList = [];
function getCodesList() {
if (codesList.length === 0) codesList = readdirSync(resolve(__dirname, '../codes'));
return codesList;
}
exports.getCodesList = getCodesList;
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
"description": "Verification code identification based on OCR (Tesseract) and CV (OpenCV)",
"main": "index.js",
"scripts": {
"example": "node example.js",
"test": "node test.js"
"example": "node test/example.js",
"test": "node test/judge-case.js"
},
"repository": {
"type": "git",
"url": "git+https://github.com/PillarsZhang/captcha-cv-ocr.git"
"url": "git+https://github.com/renxia/captcha-cv-ocr.git"
},
"keywords": [
"verificationCode",
Expand All @@ -22,7 +22,7 @@
"bugs": {
"url": "https://github.com/PillarsZhang/captcha-cv-ocr/issues"
},
"homepage": "https://github.com/PillarsZhang/captcha-cv-ocr#readme",
"homepage": "https://github.com/renxia/captcha-cv-ocr#readme",
"files": [
"lib",
"codes"
Expand Down
Binary file added test/case/slide_match/original-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/case/slide_match/original-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/case/slide_match/original-3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/case/slide_match/original-4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/case/slide_match/slider-1_24x213.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/case/slide_match/slider-2_117x256.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/case/slide_match/slider-3_189x125.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/case/slide_match/slider-4_7x232.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
27 changes: 17 additions & 10 deletions test/example.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
const path = require("path");
const cco = require("../index.js");
const path = require('path');
const { getCodesList, Cvocr, logger } = require('../index.js');

(async () => {
for (const mode of cco.config.codeTypeList) {
let cvocr = new cco.Cvocr(mode); // mode 表示验证码的种类
await cvocr.init(1); //其中的1表示需要启动的 OCR Worker 数(多线程)
let ans = await cvocr.recognize(path.join(__dirname, "example", mode + ".jpg")); //支持文件地址、Base64、Buffer形式
console.log(`[${mode}]ans:`, ans);
}
process.exit(0);
})()
const codesList = getCodesList();
logger.info('codes:', codesList);

for (const mode of codesList) {
logger.info('test for:', mode);
const cvocr = new Cvocr(mode); // mode 表示验证码的种类
await cvocr.init(1); //其中的1表示需要启动的 OCR Worker 数(多线程)
const isSlideMatch = mode === 'slide_match';
const img = path.join(__dirname, `example/${mode}${isSlideMatch ? '-slider.png' : (mode === 'dots_and_chars' ? '.gif' : '.jpg')}`);
const img2 = isSlideMatch ? path.join(__dirname, `example/${mode}-original.png`) : undefined;
const ans = await cvocr.recognize(img, img2); //支持文件地址、Base64、Buffer形式
logger.log(`[${mode}]ans:`, ans);
}
process.exit(0);
})();
File renamed without changes
Binary file added test/example/slide_match-original.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test/example/slide_match-slider.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
21 changes: 18 additions & 3 deletions test/judge-case.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ global.debugFlag = 1;

//验证码种类与各自的评估函数
const modeList = {
slide_match: (ans, rightAns) => ans.minLoc.x === rightAns[0] && ans.maxLoc.x === rightAns[1],
simplest: (ans, rightAns) => ans.result == rightAns,
grids_and_equations: (ans, rightAns) => ans.equation.slice(0, 3) == rightAns,
dots_and_chars: (ans, rightAns) => ans.result == rightAns,
Expand All @@ -16,6 +17,7 @@ const modeList = {
(async () => {
let modeI = 0;
for (const mode in modeList) {
const isSlideMatch = mode === 'slide_match';
const cvocr = new Cvocr(mode);
console.log(`--- ${++modeI}. ${mode} ---\n`);
await cvocr.init(4, 2);
Expand All @@ -24,11 +26,24 @@ const modeList = {
const files = fs.readdirSync(examplePath);
let rightNum = 0;
for (let i = 0; i < files.length; i++) {
let rightAns = files[i].slice(0, -path.extname(files[i]).length);
let ans = await cvocr.recognize(path.join(examplePath, files[i]));
const fileName = files[i];
let rightAns = fileName.slice(0, -path.extname(files[i]).length);
let img1 = path.join(examplePath, fileName);
let originalImg;

if (isSlideMatch) {
if (!fileName.startsWith('slider-')) continue;
const idx = fileName.match(/-(\d)/)?.[0];
if (!idx) continue;
originalImg = path.join(examplePath, `original${idx}.png`);
const m = fileName.match(/_(\d+)x(\d+)/);
rightAns = [+m?.[1], +m?.[2]];
}

let ans = await cvocr.recognize(img1, originalImg);
let judge = modeList[mode](ans, rightAns);
if (judge) rightNum++;
console.log('ans:', ans);
console.log(`[${color.magenta(mode)}]ans:`, ans, color.cyan(fileName));
console.log(judge ? color.green('Right!') : color.red(`Wrong! | the rightAns is : ${rightAns}`), '\n');
}

Expand Down

0 comments on commit a9ecc26

Please sign in to comment.