Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add openvino support #149

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ and evaluate the MOT challenge benchmark.

## Generating detections

### Procedure without Openvino support
Beside the main tracking application, this repository contains a script to
generate features for person re-identification, suitable to compare the visual
appearance of pedestrian bounding boxes using cosine similarity.
Expand Down Expand Up @@ -88,6 +89,49 @@ descriptor. The files generated by this command can be used as input for the
try passing an absolute path to the ``--model`` argument. This might help in
some cases.

### Notes on Openvino
This branch adds support for Openvino. Obviously this is more useful for online
feature extraction instead of generating detections into a text file for MOT16.

**Performance estimation:** during the [AI hackathon](http://www.ai-hackathon.com/)
we used this to generate embeddings from a video stream on a NCS2. It was able
to sustain around 5fps while tracking more than 15 targets. Note that no batch
processing is available on this device, so for each target inference must be
called sequentially.

On CPU it runs roughly at the same speed as vanilla tensorflow. (Intel) GPU
unfortunately doesn't work currently.

For the feature extraction to use Openvino, a few additional steps have to be
taken:

##### 1) Freeze model for Openvino
This is necessary as the default model includes elements which are incompatible
with Openvino:
```
python tools/freeze_model.py --no_preprocess
```

##### 2) Convert model with Model Optimizer
```
cd model_data/networks
mo_tf.py --input_model mars-small128.pb -b 1 --data_type <data_type>
```
As data type you need to use a type which is supported for the device you want
to use. The Movidius NCS2 compute stick for instance needs "FP16", the CPU only
supports the default "FP32".

##### 3) Generate detections
To generate the MOT16 detections in addition you have to supply the Openvino
device (e.g. "CPU" or "MYRIAD" for the NCS2):
```
python tools/generate_detections.py \
--model=resources/networks/mars-small128.pb \
--mot_dir=./MOT16/train \
--output_dir=./resources/detections/MOT16_train \
--use_openvino=MYRIAD
```

## Training the model

To train the deep association metric model we used a novel [cosine metric learning](https://github.com/nwojke/cosine_metric_learning) approach which is provided as a separate repository.
Expand Down
15 changes: 14 additions & 1 deletion deep_sort_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import argparse
import os
import time

import cv2
import numpy as np
Expand Down Expand Up @@ -162,9 +163,16 @@ def run(sequence_dir, detection_file, output_file, min_confidence,
"cosine", max_cosine_distance, nn_budget)
tracker = Tracker(metric)
results = []
last_time = time.time()

def frame_callback(vis, frame_idx):
print("Processing frame %05d" % frame_idx)
curr_time = time.time()
if frame_callback.last_time is not None:
fps = 1 / (curr_time - frame_callback.last_time)
else:
fps = 0
frame_callback.last_time = curr_time
print("Processing frame %05d - %.1ffps" % (frame_idx, fps))

# Load image and generate detections.
detections = create_detections(
Expand Down Expand Up @@ -198,6 +206,9 @@ def frame_callback(vis, frame_idx):
results.append([
frame_idx, track.track_id, bbox[0], bbox[1], bbox[2], bbox[3]])

# Store FPS object
frame_callback.last_time = None

# Run tracker.
if display:
visualizer = visualization.Visualization(seq_info, update_ms=5)
Expand Down Expand Up @@ -257,7 +268,9 @@ def parse_args():

if __name__ == "__main__":
args = parse_args()
start_time = time.time()
run(
args.sequence_dir, args.detection_file, args.output_file,
args.min_confidence, args.nms_max_overlap, args.min_detection_height,
args.max_cosine_distance, args.nn_budget, args.display)
print("Processing time: %.2fs" % (time.time() - start_time))
94 changes: 94 additions & 0 deletions ov_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import timeit
import math

from openvino.inference_engine import IENetwork, IEPlugin
import numpy as np
import tensorflow as tf

all_batch_size = 1
np.random.seed(seed=69)

def _run_in_batches(f, data_dict, out, batch_size):
data_len = len(out)
num_batches = int(data_len / batch_size)

s, e = 0, 0
for i in range(num_batches):
s, e = i * batch_size, (i + 1) * batch_size
batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
out[s:e] = f(batch_data_dict)
if e < len(out):
batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
out[e:] = f(batch_data_dict)

modelname = 'resources/networks/mars-small128'

# OV configuration
ov_net = IENetwork(model=modelname + '.xml', weights=modelname + '.bin')
ov_net.batch_size = all_batch_size
ov_plugin = IEPlugin(device='CPU')

# TF configuration
tf_session = tf.Session()
with tf.gfile.GFile(modelname + '.pb', 'rb') as gfile:
tf_graph = tf.GraphDef()
tf_graph.ParseFromString(gfile.read())
tf.import_graph_def(tf_graph, name='net')
tf_input_node = tf.get_default_graph().get_tensor_by_name('net/images:0')
tf_output_node = tf.get_default_graph().get_tensor_by_name('net/features:0')


# ?x128x64x3
testinput = np.random.random_sample((all_batch_size, 128, 64, 3))
testinput2 = testinput[:, :, :, ::-1]
print(testinput - testinput2)
# openvino expects colors major
ov_testinput = np.transpose(testinput, (0, 3, 1, 2))
ov_testinput2 = np.transpose(testinput2, (0, 3, 1, 2))

# run OV
ov_input_blob = next(iter(ov_net.inputs))
ov_out_blob = next(iter(ov_net.outputs))
ov_exec_net = ov_plugin.load(network=ov_net)

def run_ov(inp):
return ov_exec_net.infer(inputs={ov_input_blob: inp})

ov_res = next(iter(run_ov(ov_testinput).values()))
ov_res2 = next(iter(run_ov(ov_testinput2).values()))

# run TF
def run_tf(inp):
tf_output = np.zeros((all_batch_size, 128), np.float32)
_run_in_batches(lambda x: tf_session.run(tf_output_node, feed_dict=x),
{tf_input_node: inp}, tf_output, all_batch_size)
return tf_output

tf_res = run_tf(testinput)
tf_res2 = run_tf(testinput2)

def compare(vec1, vec2):
print('Diff abs (0.0 is exactly same):\n', vec1 - vec2)
print('Diff rel (1.0 is exactly same):\n', vec1 / vec2)

comp = 'PASSED' if np.allclose(vec1, vec2) else 'FAILED'
print('Comparison: {}'.format(comp))

# compare different results
print('TF: RGB vs BGR')
compare(tf_res, tf_res2)
print('')

print('OV: RGB vs BGR')
compare(ov_res, ov_res2)
print('')

print('TF vs OV')
compare(tf_res, ov_res)
print('')

# timing
iterations = int(300 / all_batch_size)
print('Batch size {}, {} iterations:'.format(all_batch_size, iterations))
print(' OV: {:.5f}s'.format(timeit.timeit('run_ov(ov_testinput)', number=iterations, globals=globals())))
print(' TF: {:.5f}s'.format(timeit.timeit('run_tf(testinput)', number=iterations, globals=globals())))
15 changes: 12 additions & 3 deletions tools/freeze_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,12 @@ def parse_args():
parser.add_argument(
"--graphdef_out",
default="resources/networks/mars-small128.pb")
parser.add_argument(
"--no_preprocess",
default=False,
action='store_true',
help="Do not include preprocessing in model (to avoid compatibility "
"issues)")
return parser.parse_args()


Expand All @@ -197,9 +203,12 @@ def main():
with tf.Session(graph=tf.Graph()) as session:
input_var = tf.placeholder(
tf.uint8, (None, 128, 64, 3), name="images")
image_var = tf.map_fn(
lambda x: _preprocess(x), tf.cast(input_var, tf.float32),
back_prop=False)
if args.no_preprocess:
image_var = tf.cast(input_var, tf.float32)
else:
image_var = tf.map_fn(
lambda x: _preprocess(x), tf.cast(input_var, tf.float32),
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _preprocess() function changes input order (BGR to RGB). Is this handled anywhere in the code?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No... but it seems to not matter for the computation.

I wrote a test script for that:
https://gist.github.com/r-or/e1b85c47e1906763b6e0e7a209812dda

TF: RGB vs BGR
Diff abs (0.0 is exactly same):
 [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
  0. 0. 0. 0. 0. 0. 0. 0.]]
Diff rel (1.0 is exactly same):
 [[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
  1. 1. 1. 1. 1. 1. 1. 1.]]
Comparison: PASSED

OV: RGB vs BGR
Diff abs (0.0 is exactly same):
 [[-7.09891319e-05 -2.31117010e-05 -9.74312425e-05 -1.97231770e-04
   1.63659453e-04  1.75914727e-04 -2.75910832e-04 -1.31983310e-04
  -5.64876944e-04 -1.39862299e-04  3.16947699e-05  9.46968794e-06
  -1.95568660e-04 -2.75567174e-04  1.23247504e-04 -2.15724111e-04
   4.99635935e-05 -5.26588410e-05 -3.16016376e-04 -1.02594495e-05
  -2.16595829e-04 -6.73830509e-05  1.24670565e-04 -9.22754407e-05
  -4.97810543e-05 -2.27943063e-04 -1.59956515e-04 -1.92910433e-04
  -4.10407782e-04  2.03438103e-05  6.73681498e-05 -2.15888023e-04
   2.65173614e-04  2.46569514e-04 -1.86443329e-04  1.69944018e-04
  -2.33516097e-04  3.09579074e-04 -3.15740705e-04  6.92084432e-05
   1.41568482e-04 -2.09584832e-04 -7.94976950e-05 -2.32093036e-04
  -7.49334693e-04  1.75776891e-04 -2.28881836e-04  1.02892518e-05
   3.79905105e-05  1.49380416e-04 -1.06245279e-05  2.01821327e-04
  -5.26068732e-04  3.78191471e-04  4.58016992e-04 -3.39105725e-04
   2.95236707e-04  1.17262825e-05  9.52333212e-05 -1.28746033e-05
  -3.78444791e-04  2.62483954e-05  4.32081521e-04  5.81145287e-07
  -2.42659822e-04  4.62397933e-04  9.70438123e-05 -1.38171017e-05
   2.75548548e-04 -1.63108110e-04  1.20360419e-04  5.72502613e-05
   1.78739429e-04 -1.81391835e-04 -3.15383077e-04  7.68899918e-05
   2.03009695e-05  6.54584961e-04 -2.84455717e-04 -9.59448516e-05
  -6.94066286e-04 -5.18411398e-05 -5.67063689e-05  1.15118921e-04
  -1.28496438e-04  3.14921141e-04 -2.13776948e-04  1.39519572e-04
  -2.22593546e-04  9.27075744e-05 -5.28363511e-04  1.63061544e-04
   1.62258744e-04  3.24718654e-04  5.15423715e-04  2.54072249e-04
  -2.00938433e-04  9.00402665e-05 -1.41721219e-04  1.78741291e-04
   4.18379903e-04 -3.99368349e-04 -2.76640058e-05 -2.14301050e-04
   5.50225377e-06  4.53330576e-05  4.59015369e-04 -2.36004591e-04
  -3.93390656e-06  2.04637647e-04  1.68181956e-04  6.85453415e-07
  -7.73668289e-05 -8.13156366e-05  8.03060830e-05 -3.39727849e-04
   2.70333141e-04 -2.06910074e-04 -2.38791108e-05 -2.48849392e-06
  -1.09579414e-04  2.18413770e-05 -2.44807452e-04  2.52965838e-04
  -1.14329159e-05  2.36977823e-04  2.89959833e-04  1.11445785e-04]]
Diff rel (1.0 is exactly same):
 [[0.99967253 1.0002847  1.0013707  1.000923   1.0048432  1.0253918
  1.0197521  0.99684745 1.1119983  1.0031053  0.99962395 0.99975747
  1.0616565  1.010521   0.998444   1.0013002  1.0002184  0.9978832
  0.9966819  1.0001087  1.0198045  0.9997458  1.0013885  0.9991812
  0.99891114 1.0071045  1.0025008  0.99804175 0.99195236 0.9995756
  0.9990167  0.9979031  1.0026736  1.0044912  0.99593943 1.0042726
  0.997915   0.9958782  0.997491   0.99848914 1.0017109  1.0035508
  1.0006112  0.9962664  0.98856336 1.0163243  0.99638605 0.9998841
  1.0005648  1.0043436  1.0000782  0.9924133  1.0327789  1.0026767
  1.0035827  0.99799013 0.9976437  1.0007793  1.001019   1.0001298
  0.9959444  1.00028    1.0035039  0.9999948  0.9903539  1.0029724
  0.9961764  1.0004318  0.98782665 0.9987333  0.38110238 0.9992418
  0.9938017  1.0022285  0.9964751  1.0004631  0.9993321  0.7935085
  0.99424076 1.0016934  0.9955157  0.99962413 1.0005273  1.0375509
  1.0025675  0.9942383  1.0960878  1.0009598  0.9956771  0.99913466
  1.0227439  1.0079089  1.0007166  0.99399155 1.007452   1.0029502
  1.0042864  0.99888796 1.0130587  0.9905349  1.0048269  0.86005217
  1.0004     0.9964666  0.99983996 0.99923646 0.97229576 0.8474253
  1.0000467  1.0014483  0.9952575  0.9999925  0.99120194 0.99910206
  1.0022343  1.0710478  1.0043408  1.0027815  1.0002646  1.0000274
  1.0019089  0.9993802  0.9919843  0.99496603 0.99971646 0.98349696
  1.0108225  0.99822414]]
Comparison: FAILED

TF vs OV
Diff abs (0.0 is exactly same):
 [[-3.94286215e-03 -4.32543457e-04  5.99548221e-05 -5.05328178e-04
   9.27921385e-04 -4.12844960e-03  2.05636956e-03  7.24088401e-04
   1.61289563e-03  3.92809510e-03  1.66483223e-04 -6.79396465e-03
  -6.77901274e-03  1.13993883e-04  2.14674324e-03 -4.47931886e-03
  -3.22103500e-04 -2.19973177e-03 -3.49406153e-03 -4.19918448e-03
  -6.94096927e-03  4.93526459e-04  2.00904161e-03 -2.98864394e-03
   2.46691331e-03  1.02963299e-04  5.58376312e-04  7.87702948e-03
   1.99251249e-03  5.59294969e-03 -2.11878866e-03 -4.32344526e-03
   1.49095058e-03  6.78264722e-03 -6.30068034e-03 -6.66405261e-03
  -8.42780620e-03 -8.48181546e-04 -7.32316077e-03  5.94160333e-03
   1.05012730e-02 -6.90795481e-04  5.66825271e-04 -7.10094348e-03
  -2.20379978e-03  7.70511758e-03 -9.60171223e-03  6.82964921e-04
  -8.90789181e-03 -1.15070492e-03  1.48507953e-03 -2.18164176e-03
   1.88763440e-03  5.90819120e-03 -5.21732867e-03 -1.01997554e-02
   1.66078657e-03  2.63114460e-04 -4.62488085e-03  7.21380115e-04
   6.82404637e-03 -1.24016032e-02  4.28882986e-03  9.12263989e-04
  -5.39921224e-03  6.79710507e-03  2.50028446e-04  3.97107191e-03
   6.00080565e-03 -9.64917988e-03 -6.06814865e-09  3.70991230e-03
  -3.86371464e-03  6.10897690e-03 -1.71685219e-03  9.97103751e-03
  -1.18000209e-02 -7.65118748e-05 -4.82698902e-03 -1.86500326e-03
  -3.85785103e-03  2.82938778e-03  1.72593445e-03 -1.04905255e-02
  -4.40701842e-04  2.87934020e-03 -6.38315035e-03  5.70577383e-03
   5.37855923e-03 -1.30973756e-04  2.21801363e-03 -6.44079037e-03
   1.08246654e-02 -4.04008850e-03 -1.19326711e-02 -5.89519739e-04
  -4.79451939e-03 -2.68968195e-03  2.76577100e-03  2.17592716e-03
   1.15049183e-02  4.09299321e-03  1.38435513e-03  1.79736689e-02
   1.96448714e-03  2.19316036e-03 -3.39902006e-03  4.71524755e-03
  -9.59008932e-04 -2.22636759e-03  3.81218269e-03  1.75344944e-03
  -6.80692634e-03  2.39557773e-03  7.68467784e-04  4.61338321e-03
  -1.34492777e-02 -4.74420190e-03  2.16958672e-03 -2.16118991e-03
   5.91486692e-04  1.67673826e-03  1.77322514e-03  2.77697667e-03
   2.28755921e-03  1.66156795e-03 -1.35983313e-02  2.19796225e-03]]
Diff rel (1.0 is exactly same):
 [[ 0.9818045   1.0053259   0.9991577   1.0023628   1.0273277   0.41884965
   0.8556379   1.0173502   0.71241933  0.9130556   0.9980241   1.1740589
   3.0130827   0.9956931   0.9728552   1.0269623   0.9985928   0.91138774
   0.96319115  1.0444852   1.6223245   1.0018622   1.0223445   0.9734597
   1.0540177   0.9968135   0.9912921   1.0801181   1.0393881   0.8832715
   1.0309559   0.9579188   1.0149928   1.1229923   0.86221653  0.83317214
   0.92459404  1.0113395   0.9416606   0.87009555  1.1266977   1.011662
   0.99564457  0.88534284  0.96597564  1.704076    0.84784424  0.9923086
   0.8676459   0.9666845   0.98906654  1.0826377   0.8861161   1.0417054
   0.95933443  0.93942523  0.9867138   1.0174726   0.9505658   0.99272746
   1.0734274   0.86775905  1.0346589   0.99188757  0.7832818   1.043563
   0.99011093  0.87596244  0.7316255   0.924968    1.0000819   0.950832
   1.1348207   0.92511624  0.98074347  1.0600327   1.3884995   1.0304168
   0.9017044   1.0328609   0.9749625   1.0205228   0.9839614  -2.298085
   1.0087833   0.9470151   3.6175644   1.0392108   1.1049081   1.0012236
   0.9066464   0.69005466  1.047769    1.0752078   0.8287526   0.99317485
   1.1018409   1.0332566   0.748437    0.88367426  1.1320976   2.6676648
   0.9799915   1.2974048   0.94286215  0.9630336   1.2109962   4.5972047
   1.011381    0.98426676  0.89198875  0.98080915  0.21905166  1.0264783
   1.0213337   0.09919678  0.78497773  1.0636002   0.97595745  1.0237603
   0.989716    0.9523874   1.0585294   0.9444593   1.0567468   0.88234735
   0.49788728  0.9649135 ]]
Comparison: FAILED

Batch size 1, 300 iterations:
 OV: 1.66366s
 TF: 4.50380s

Apart from tensorflow and openvino not creating exactly the same results (which is to be expected) swapping BGR and RGB seems to not do anything.

Anyway, even though the results are slightly different, the tracker worked perfectly fine

Copy link
Owner

@nwojke nwojke May 7, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But it wouldn't hurt to convert from BGR to RGB before handing the image over from NumPy to TensorFlow, right?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also, thanks for posting test results.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, it wouldn't hurt either. I'll add it for consistency.

back_prop=False)

factory_fn = _network_factory()
features, _ = factory_fn(image_var, reuse=None)
Expand Down
89 changes: 78 additions & 11 deletions tools/generate_detections.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,17 @@
import os
import errno
import argparse
import time
import numpy as np
import cv2
import tensorflow as tf

try:
from openvino.inference_engine import IENetwork, IEPlugin
USE_DYN_BATCH = False
except ImportError:
pass


def _run_in_batches(f, data_dict, out, batch_size):
data_len = len(out)
Expand Down Expand Up @@ -71,7 +78,20 @@ def extract_image_patch(image, bbox, patch_shape):
class ImageEncoder(object):

def __init__(self, checkpoint_filename, input_name="images",
output_name="features"):
output_name="features", openvino_device=None):

self.openvino_device=openvino_device
if openvino_device is not None:
# setup device
model_base = os.path.splitext(checkpoint_filename)[0]
self._net = IENetwork(
model=model_base + '.xml',
weights=model_base + '.bin')
self._plugin = IEPlugin(device=openvino_device)
self._input_blob = next(iter(self._net.inputs))
self._out_blob = next(iter(self._net.outputs))
self._reload_openvino_net(1)

self.session = tf.Session()
with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
graph_def = tf.GraphDef()
Expand All @@ -87,20 +107,52 @@ def __init__(self, checkpoint_filename, input_name="images",
self.feature_dim = self.output_var.get_shape().as_list()[-1]
self.image_shape = self.input_var.get_shape().as_list()[1:]

def _reload_openvino_net(self, batch_size):
if self.openvino_device == "MYRIAD" or not USE_DYN_BATCH:
self._net.batch_size = 1
self._exec_net = self._plugin.load(network=self._net)
else:
self._net.batch_size = batch_size
self._exec_net = self._plugin.load(
network=self._net, config={'DYN_BATCH_ENABLED': 'YES'})

def __call__(self, data_x, batch_size=32):
if self.openvino_device != "MYRIAD" \
and batch_size != self._net.batch_size:
self._reload_openvino_net(batch_size)

out = np.zeros((len(data_x), self.feature_dim), np.float32)
_run_in_batches(
lambda x: self.session.run(self.output_var, feed_dict=x),
{self.input_var: data_x}, out, batch_size)
if self.openvino_device:
def reorder(tensor):
return np.transpose(tensor, (0, 3, 1, 2))

if self.openvino_device == "MYRIAD" or not USE_DYN_BATCH:
# doesn't support dynamic batch size
for patch in range(len(data_x)):
out[patch] = next(iter(self._exec_net.infer(
inputs={self._input_blob: reorder(
data_x[patch:patch + 1])}).values()))
else:
_run_in_batches(
lambda x: self._exec_net.infer(inputs=x),
{self._input_blob: reorder(data_x)}, out, batch_size)
else:
_run_in_batches(
lambda x: self.session.run(self.output_var, feed_dict=x),
{self.input_var: data_x}, out, batch_size)
return out


def create_box_encoder(model_filename, input_name="images",
output_name="features", batch_size=32):
image_encoder = ImageEncoder(model_filename, input_name, output_name)
output_name="features", batch_size=32,
openvino_device=None):
image_encoder = ImageEncoder(
model_filename, input_name, output_name,
openvino_device=openvino_device)
image_shape = image_encoder.image_shape

def encoder(image, boxes):
encoder.batch_size = batch_size
image_patches = []
for box in boxes:
patch = extract_image_patch(image, box, image_shape[:2])
Expand Down Expand Up @@ -132,7 +184,6 @@ def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
Path to custom detections. The directory structure should be the default
MOTChallenge structure: `[sequence]/det/det.txt`. If None, uses the
standard MOTChallenge detections.

"""
if detection_dir is None:
detection_dir = mot_dir
Expand Down Expand Up @@ -162,8 +213,12 @@ def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
frame_indices = detections_in[:, 0].astype(np.int)
min_frame_idx = frame_indices.astype(np.int).min()
max_frame_idx = frame_indices.astype(np.int).max()
last_frame_time = 0
for frame_idx in range(min_frame_idx, max_frame_idx + 1):
print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
curr_frame_time = time.time()
print("Frame %05d/%05d - %.2ffps"
% (frame_idx, max_frame_idx, 1 / (curr_frame_time - last_frame_time)))
last_frame_time = curr_frame_time
mask = frame_indices == frame_idx
rows = detections_in[mask]

Expand Down Expand Up @@ -199,14 +254,26 @@ def parse_args():
parser.add_argument(
"--output_dir", help="Output directory. Will be created if it does not"
" exist.", default="detections")
parser.add_argument(
"--use_openvino", help="Use Openvino. Can be any available device as "
"long as it is compatible. Model & weights are expected to be inside "
"the folder specified with '-model' and end with '.xml' and '.bin' "
"respectively. Supply the device identifier (CPU, GPU, MYRIAD etc.)",
default="CPU")
return parser.parse_args()


def main():
args = parse_args()
encoder = create_box_encoder(args.model, batch_size=32)
generate_detections(encoder, args.mot_dir, args.output_dir,
args.detection_dir)
if args.use_openvino:
assert IENetwork, "Openvino could not be imported. " \
"Make sure it is installed correctly."
args.use_openvino = args.use_openvino.upper()

encoder = create_box_encoder(
args.model, batch_size=32, openvino_device=args.use_openvino)
generate_detections(
encoder, args.mot_dir, args.output_dir, args.detection_dir)


if __name__ == "__main__":
Expand Down