Skip to content

Commit

Permalink
Implements object_tracking_vittrack java demo(opencv#251)
Browse files Browse the repository at this point in the history
  • Loading branch information
bluehatch committed Jun 28, 2024
1 parent 5962eb4 commit 0f69e83
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 17 deletions.
15 changes: 15 additions & 0 deletions models/object_tracking_vittrack/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,21 @@ cmake --build build
./build/opencv_zoo_object_tracking_vittrack -h
```

## Java

Install Maven to get started.

```shell
# tracking on camera input
mvn compile exec:java -q

# tracking on video
mvn compile exec:java -q -Dexec.args="-i /path/to/video"

# get help messages
mvn compile exec:java -q -Dexec.args="-h"
```

# Example outputs

<img src="example_outputs/vittrack_demo.gif" style="zoom:200%;" />
Expand Down
206 changes: 206 additions & 0 deletions models/object_tracking_vittrack/demo.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
import com.beust.jcommander.JCommander;
import com.beust.jcommander.Parameter;
import com.beust.jcommander.UnixStyleUsageFormatter;
import org.bytedeco.javacpp.BytePointer;
import org.bytedeco.opencv.global.opencv_dnn;
import org.bytedeco.opencv.opencv_core.*;
import org.bytedeco.opencv.opencv_video.TrackerVit;
import org.bytedeco.opencv.opencv_videoio.VideoCapture;
import org.bytedeco.opencv.opencv_videoio.VideoWriter;

import static org.bytedeco.opencv.global.opencv_highgui.*;
import static org.bytedeco.opencv.global.opencv_imgproc.*;
import static org.bytedeco.opencv.global.opencv_videoio.CAP_PROP_FPS;

public class demo {

// Valid combinations of backends and targets
static int[][] backendTargetPairs = {
{opencv_dnn.DNN_BACKEND_OPENCV, opencv_dnn.DNN_TARGET_CPU},
{opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA},
{opencv_dnn.DNN_BACKEND_CUDA, opencv_dnn.DNN_TARGET_CUDA_FP16},
{opencv_dnn.DNN_BACKEND_TIMVX, opencv_dnn.DNN_TARGET_NPU},
{opencv_dnn.DNN_BACKEND_CANN, opencv_dnn.DNN_TARGET_NPU}
};

static class Args {
@Parameter(names = {"--help", "-h"}, order = 0, help = true,
description = "Print help message.")
boolean help;
@Parameter(names = {"--input", "-i"}, order = 1,
description = "Set path to the input video. Omit for using default camera.")
String input;
@Parameter(names = {"--model_path", "-m"}, order = 2,
description = "Set model path.")
String modelPath = "object_tracking_vittrack_2023sep.onnx";
@Parameter(names = {"--backend_target", "-bt"}, order = 3,
description = "Choose one of the backend-target pair to run this demo:" +
" 0: OpenCV implementation + CPU," +
" 1: CUDA + GPU (CUDA), " +
" 2: CUDA + GPU (CUDA FP16)," +
" 3: TIM-VX + NPU," +
" 4: CANN + NPU")
int backendTarget = 0;
@Parameter(names = {"--save", "-s"}, order = 4,
description = "Specify to save a file with results.")
boolean save;
@Parameter(names = {"--vis", "-v"}, order = 5, arity = 1,
description = "Specify to open a new window to show results.")
boolean vis = true;
}

static class TrackingResult {
boolean isLocated;
Rect bbox;
float score;
}

static class VitTrack {
private final TrackerVit model;

VitTrack(String modelPath, int backendId, int targetId) {
final TrackerVit.Params params = new TrackerVit.Params();
params.net(new BytePointer(modelPath))
.backend(backendId)
.target(targetId);
model = TrackerVit.create(params);
}

void init(Mat image, Rect roi) {
model.init(image, roi);
}

TrackingResult infer(Mat image) {
final TrackingResult result = new TrackingResult();
result.bbox = new Rect();
result.isLocated = model.update(image, result.bbox);
result.score = model.getTrackingScore();
return result;
}
}

static Mat visualize(Mat image, Rect bbox, float score, boolean isLocated, double fps, Scalar boxColor,
Scalar textColor, double fontScale, int fontSize) {
final Mat output = image.clone();
final int h = output.rows();
final int w = output.cols();
if (fps >= 0) {
putText(output, String.format("FPS: %.2f", fps), new Point(0, 30), FONT_HERSHEY_DUPLEX, fontScale,
textColor);
}

if (isLocated && score >= 0.3) {
rectangle(output, bbox, boxColor, 2, LINE_8, 0);
putText(output, String.format("%.2f", score), new Point(bbox.x(), bbox.y() + 25),
FONT_HERSHEY_DUPLEX, fontScale, textColor, fontSize, LINE_8, false);
} else {
final Size textSize = getTextSize("Target lost!", FONT_HERSHEY_DUPLEX, fontScale, fontSize, new int[]{0});
final int textX = (w - textSize.width()) / 2;
final int textY = (h - textSize.height()) / 2;
putText(output, "Target lost!", new Point(textX, textY), FONT_HERSHEY_DUPLEX,
fontScale, new Scalar(0, 0, 255, 0), fontSize, LINE_8, false);
}

return output;
}

/**
* Execute: mvn compile exec:java -q -Dexec.args=""
*/
public static void main(String[] argv) {
final Args args = new Args();
final JCommander jc = JCommander.newBuilder()
.addObject(args)
.build();
jc.setUsageFormatter(new UnixStyleUsageFormatter(jc));
jc.parse(argv);
if (args.help) {
jc.usage();
return;
}
final int backendId = backendTargetPairs[args.backendTarget][0];
final int targetId = backendTargetPairs[args.backendTarget][1];
VitTrack tracker = new VitTrack(args.modelPath, backendId, targetId);

final VideoCapture video = new VideoCapture();
if (args.input == null) {
video.open(0);
} else {
video.open(args.input);
}
if (!video.isOpened()) {
System.err.println("Error: Could not open video source");
return;
}

Mat firstFrame = new Mat();
video.read(firstFrame);

if (firstFrame.empty()) {
System.err.println("No frames grabbed!");
return;
}

Mat firstFrameCopy = firstFrame.clone();
putText(firstFrameCopy, "1. Drag a bounding box to track.", new Point(0, 25), FONT_HERSHEY_SIMPLEX, 1, new Scalar(0, 255, 0, 0));
putText(firstFrameCopy, "2. Press ENTER to confirm", new Point(0, 50), FONT_HERSHEY_SIMPLEX, 1, new Scalar(0, 255, 0, 0));
final Rect roi = selectROI("VitTrack Demo", firstFrameCopy);

if (roi.area() == 0) {
System.err.println("No ROI is selected! Exiting...");
return;
} else {
System.out.printf("Selected ROI: (x: %d, y: %d, width: %d, height: %d)%n", roi.x(), roi.y(), roi.width(),
roi.height());
}

// Create VideoWriter if save option is specified
final VideoWriter outputVideo = new VideoWriter();
if (args.save) {
final Size frameSize = firstFrame.size();
outputVideo.open("output.mp4", VideoWriter.fourcc((byte) 'm', (byte) 'p', (byte) '4', (byte) 'v'),
video.get(CAP_PROP_FPS), frameSize);
if (!outputVideo.isOpened()) {
System.err.println("Error: Could not create output video stream");
return;
}
}

// Initialize tracker with ROI
tracker.init(firstFrame, roi);

// Track frame by frame
final TickMeter tm = new TickMeter();
while (waitKey(1) < 0) {
video.read(firstFrame);
if (firstFrame.empty()) {
System.out.println("End of video");
break;
}

// Inference
tm.start();
final TrackingResult result = tracker.infer(firstFrame);
tm.stop();

// Visualize
Mat frame = firstFrame.clone();
frame = visualize(frame, result.bbox, result.score, result.isLocated, tm.getFPS(),
new Scalar(0, 255, 0, 0), new Scalar(0, 255, 0, 0), 1.0, 1);

if (args.save) {
outputVideo.write(frame);
}
if (args.vis) {
imshow("VitTrack Demo", frame);
}
tm.reset();
}
if (args.save) {
outputVideo.release();
}

video.release();
}

}
31 changes: 31 additions & 0 deletions models/object_tracking_vittrack/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>opencv_zoo</groupId>
<artifactId>demo</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>

<artifactId>object_tracking_vittrack</artifactId>

<build>
<sourceDirectory>${project.basedir}</sourceDirectory>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<executable>java</executable>
<mainClass>demo</mainClass>
</configuration>
</plugin>
</plugins>
</build>

</project>
15 changes: 13 additions & 2 deletions models/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,29 @@
<groupId>opencv_zoo</groupId>
<artifactId>demo</artifactId>
<version>1.0.0-SNAPSHOT</version>
<name>opencv zoo demo application</name>
<name>OpenCV Zoo demo application</name>
<packaging>pom</packaging>

<build>
<sourceDirectory>${project.basedir}</sourceDirectory>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<executable>java</executable>
<mainClass>demo</mainClass>
</configuration>
</plugin>
</plugins>
</build>

<modules>
<module>object_tracking_vittrack</module>
<module>text_detection_ppocr</module>
</modules>


<dependencies>
<dependency>
<groupId>org.bytedeco</groupId>
Expand Down
15 changes: 0 additions & 15 deletions models/text_detection_ppocr/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,4 @@

<artifactId>text_detection_ppocr</artifactId>

<build>
<sourceDirectory>${project.basedir}</sourceDirectory>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<executable>java</executable>
<mainClass>demo</mainClass>
</configuration>
</plugin>
</plugins>
</build>

</project>

0 comments on commit 0f69e83

Please sign in to comment.