Skip to content

Commit

Permalink
Implements text_detection_ppocr java demo(opencv#251)
Browse files Browse the repository at this point in the history
  • Loading branch information
bluehatch committed Jun 27, 2024
1 parent ec5b2c8 commit 2a2e544
Show file tree
Hide file tree
Showing 4 changed files with 316 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@
**/__pycache__/**

.vscode
.idea

build/
**/build
**/build/**

target/
**/target
**/target/**
76 changes: 76 additions & 0 deletions models/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>opencv_zoo</groupId>
<artifactId>demo</artifactId>
<version>1.0.0-SNAPSHOT</version>
<name>opencv zoo demo application</name>
<packaging>pom</packaging>

<build>
<sourceDirectory>${project.basedir}</sourceDirectory>
</build>

<modules>
<module>text_detection_ppocr</module>
</modules>

<dependencies>
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>javacv-platform</artifactId>
<version>1.5.10</version>
<exclusions>
<exclusion>
<groupId>org.bytedeco</groupId>
<artifactId>flycapture-platform</artifactId>
</exclusion>
<exclusion>
<groupId>org.bytedeco</groupId>
<artifactId>libdc1394-platform</artifactId>
</exclusion>
<exclusion>
<groupId>org.bytedeco</groupId>
<artifactId>libfreenect-platform</artifactId>
</exclusion>
<exclusion>
<groupId>org.bytedeco</groupId>
<artifactId>libfreenect2-platform</artifactId>
</exclusion>
<exclusion>
<groupId>org.bytedeco</groupId>
<artifactId>librealsense-platform</artifactId>
</exclusion>
<exclusion>
<groupId>org.bytedeco</groupId>
<artifactId>librealsense2-platform</artifactId>
</exclusion>
<exclusion>
<groupId>org.bytedeco</groupId>
<artifactId>videoinput-platform</artifactId>
</exclusion>
<exclusion>
<groupId>org.bytedeco</groupId>
<artifactId>artoolkitplus-platform</artifactId>
</exclusion>
<exclusion>
<groupId>org.bytedeco</groupId>
<artifactId>leptonica-platform</artifactId>
</exclusion>
<exclusion>
<groupId>org.bytedeco</groupId>
<artifactId>tesseract-platform</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.beust</groupId>
<artifactId>jcommander</artifactId>
<version>1.82</version>
</dependency>
</dependencies>
</project>
215 changes: 215 additions & 0 deletions models/text_detection_ppocr/demo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.UnixStyleUsageFormatter;
import org.bytedeco.javacpp.FloatPointer;
import org.bytedeco.javacv.CanvasFrame;
import org.bytedeco.javacv.OpenCVFrameConverter;
import org.bytedeco.opencv.global.opencv_dnn;
import org.bytedeco.opencv.opencv_core.*;
import org.bytedeco.opencv.opencv_dnn.TextDetectionModel_DB;
import org.bytedeco.opencv.opencv_videoio.VideoCapture;

import java.util.AbstractMap;
import java.util.Map;

import static org.bytedeco.opencv.global.opencv_imgcodecs.imwrite;
import static org.bytedeco.opencv.global.opencv_imgproc.*;

public class demo {

// Valid combinations of backends and targets
static int[][] backendTargetPairs = {
{opencv_dnn.DNN_BACKEND_OPENCV, opencv_dnn.DNN_TARGET_CPU},
{opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA},
{opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA_FP16},
{opencv_dnn.DNN_BACKEND_TIMVX, opencv_dnn.DNN_TARGET_NPU},
{opencv_dnn.DNN_BACKEND_CANN, opencv_dnn.DNN_TARGET_NPU}
};

static class Args {
@Parameter(names = {"--help", "-h"}, order = 0, help = true,
description = "Print help message.")
boolean help;
@Parameter(names = {"--model", "-m"}, order = 1,
description = "Set model type.")
String model = "text_detection_en_ppocrv3_2023may.onnx";
@Parameter(names = {"--input", "-i"}, order = 2,
description = "Path to input image or video file. Skip this argument to capture frames from a camera.")
String input;
@Parameter(names = "--width", order = 3,
description = "Resize input image to certain width, It should be multiple by 32.")
int width = 736;
@Parameter(names = "--height", order = 4,
description = "Resize input image to certain height, It should be multiple by 32.")
int height = 736;
@Parameter(names = "--binary_threshold", order = 5,
description = "Threshold of the binary map.")
float binaryThreshold = 0.3f;
@Parameter(names = "--polygon_threshold", order = 6,
description = "Threshold of polygons.")
float polygonThreshold = 0.5f;
@Parameter(names = "--max_candidates", order = 7,
description = "Set maximum number of polygon candidates.")
int maxCandidates = 200;
@Parameter(names = "--unclip_ratio", order = 8,
description = "The unclip ratio of the detected text region, which determines the output size.")
double unclipRatio = 2.0;
@Parameter(names = {"--save", "-s"}, order = 9, arity = 1,
description = "Specify to save file with results (i.e. bounding box, confidence level). Invalid in case of camera input.")
boolean save = true;
@Parameter(names = {"--viz", "-v"}, order = 10, arity = 1,
description = "Specify to open a new window to show results. Invalid in case of camera input.")
boolean viz = true;
@Parameter(names = {"--backend", "-bt"}, order = 11,
description = "Choose one of computation backends:" +
" 0: OpenCV implementation + CPU," +
" 1: CUDA + GPU (CUDA), " +
" 2: CUDA + GPU (CUDA FP16)," +
" 3: TIM-VX + NPU," +
" 4: CANN + NPU")
int backend = 0;
}

static class PPOCRDet {
private final TextDetectionModel_DB model;
private final Size inputSize;

public PPOCRDet(String modelPath, Size inputSize,
float binaryThreshold, float polygonThreshold, int maxCandidates, double unclipRatio,
int backendId, int targetId) {
this.inputSize = inputSize;

model = new TextDetectionModel_DB(modelPath);
model.setPreferableBackend(backendId);
model.setPreferableTarget(targetId);

model.setBinaryThreshold(binaryThreshold);
model.setPolygonThreshold(polygonThreshold);
model.setUnclipRatio(unclipRatio);
model.setMaxCandidates(maxCandidates);

model.setInputParams(1.0 / 255.0, inputSize,
new Scalar(122.67891434, 116.66876762, 104.00698793, 0), true, false);
}

public Map.Entry<PointVectorVector, FloatPointer> infer(Mat image) {
assert image.rows() == inputSize.height() : "height of input image != net input size";
assert image.cols() == inputSize.width() : "width of input image != net input size";
final PointVectorVector pt = new PointVectorVector();
final FloatPointer confidences = new FloatPointer();
model.detect(image, pt, confidences);
return new AbstractMap.SimpleEntry<>(pt, confidences);
}
}

static Mat visualize(Mat image, Map.Entry<PointVectorVector, FloatPointer> results, double fps, Scalar boxColor,
Scalar textColor, boolean isClosed, int thickness) {
final Mat output = new Mat();
image.copyTo(output);
if (fps > 0) {
putText(output, String.format("FPS: %.2f", fps), new Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, textColor);
}
final PointVectorVector pvv = results.getKey();
final MatVector matVector = new MatVector();
for (int i = 0; i < pvv.size(); i++) {
final PointVector pv = pvv.get(i);
final Point pts = new Point(pv.size());
for (int j = 0; j < pv.size(); j++) {
pts.position(j).x(pv.get(j).x()).y(pv.get(j).y());
}
matVector.push_back(new Mat(pts.position(0)));
}
polylines(output, matVector, isClosed, boxColor, thickness, LINE_AA, 0);
matVector.close();
return output;
}

/**
* Execute:
* mvn compile exec:java -Dexec.mainClass=demo -q -Dexec.args="--help"
*/
public static void main(String[] argv) {
final Args args = new Args();
final JCommander jc = JCommander.newBuilder()
.addObject(args)
.build();
jc.setUsageFormatter(new UnixStyleUsageFormatter(jc));
jc.parse(argv);
if (args.help) {
jc.usage();
return;
}
final int[] backendTargetPair = backendTargetPairs[args.backend];
assert args.model != null && !args.model.isEmpty() : "Model name is empty";
final Size inpSize = new Size(args.width, args.height);

final PPOCRDet model = new PPOCRDet(args.model, inpSize,
args.binaryThreshold, args.polygonThreshold, args.maxCandidates, args.unclipRatio,
backendTargetPair[0], backendTargetPair[1]);

final VideoCapture cap = new VideoCapture();
if (args.input != null) {
cap.open(args.input);
} else {
cap.open(0);
}
assert cap.isOpened() : "Cannot open video or file";
Mat originalImage = new Mat();

final OpenCVFrameConverter.ToMat converter = new OpenCVFrameConverter.ToMat();
CanvasFrame mainframe = null;
if (args.input == null || args.viz) {
mainframe = new CanvasFrame(args.model + " Demo", CanvasFrame.getDefaultGamma() / 2.2);
mainframe.setDefaultCloseOperation(javax.swing.JFrame.EXIT_ON_CLOSE);
mainframe.setVisible(true);
}

final Scalar boxColor = new Scalar(0, 255, 0, 0);
final Scalar textColor = new Scalar(0, 0, 255, 0);
final TickMeter tm = new TickMeter();
while (cap.read(originalImage)) {
cap.read(originalImage);

final int originalW = originalImage.cols();
final int originalH = originalImage.rows();
final double scaleHeight = originalH / (double) inpSize.height();
final double scaleWidth = originalW / (double) inpSize.width();
final Mat image = new Mat();
resize(originalImage, image, inpSize);

// inference
tm.start();
Map.Entry<PointVectorVector, FloatPointer> results = model.infer(image);
tm.stop();
// Scale the results bounding box
final PointVectorVector pvv = results.getKey();
for (int i = 0; i < pvv.size(); i++) {
final PointVector pts = pvv.get(i);
for (int j = 0; j < pts.size(); j++) {
pts.get(j).x((int) (pts.get(j).x() * scaleWidth));
pts.get(j).y((int) (pts.get(j).y() * scaleHeight));
}
}

originalImage = visualize(originalImage, results, tm.getFPS(), boxColor, textColor, true, 2);
tm.reset();
if (args.input != null) {
if (args.save) {
System.out.println("Result image saved to result.jpg");
imwrite("result.jpg", originalImage);
}
if (args.viz) {
mainframe.showImage(converter.convert(originalImage));
}
} else {
mainframe.showImage(converter.convert(originalImage));
}

// clear
pvv.close();
image.close();
}
tm.close();
}

}
20 changes: 20 additions & 0 deletions models/text_detection_ppocr/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>opencv_zoo</groupId>
<artifactId>demo</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>

<artifactId>text_detection_ppocr</artifactId>

<build>
<sourceDirectory>${project.basedir}</sourceDirectory>
</build>

</project>

0 comments on commit 2a2e544

Please sign in to comment.