Skip to content

Commit f8b595a

Browse files
authored
862 add json output video demo (open-mmlab#906)
* add json output for video demo * add json output for long_video_demo * add json output video demo
1 parent 9afe256 commit f8b595a

File tree

2 files changed

+66
-23
lines changed

2 files changed

+66
-23
lines changed

demo/README.md

+9-2
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,7 @@ python demo/long_video_demo.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${VIDEO_FILE} $
282282
283283
Optional arguments:
284284
285-
- `OUT_FILE`: Path to the output video file.
285+
- `OUT_FILE`: Path to the output, either video or json file
286286
- `INPUT_STEP`: Input step for sampling frames, which can help to get more spare input. If not specified , it will be set to 1.
287287
- `DEVICE_TYPE`: Type of device to run the demo. Allowed values are cuda device like `cuda:0` or `cpu`. If not specified, it will be set to `cuda:0`.
288288
- `THRESHOLD`: Threshold of prediction score for action recognition. Only label with score higher than the threshold will be shown. If not specified, it will be set to 0.01.
@@ -325,7 +325,14 @@ or use checkpoint url from `configs/` to directly load corresponding checkpoint,
325325
326326
```shell
327327
python demo/long_video_demo.py configs/recognition/i3d/i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py \
328-
checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO demo/label_map_k400.txt PATH_TO_SAVED_VIDEO \
328+
checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO demo/label_map_k400.txt PATH_TO_SAVED_VIDEO
329+
```
330+
331+
5. Predict different labels in a long video by using a I3D model on gpu and save the results as a `json` file
332+
333+
```shell
334+
python demo/long_video_demo.py configs/recognition/i3d/i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py \
335+
checkpoints/i3d_r50_256p_32x2x1_100e_kinetics400_rgb_20200801-7d9f44de.pth PATH_TO_LONG_VIDEO demo/label_map_k400.txt ./results.json
329336
```
330337
331338
## SpatioTemporal Action Detection Webcam Demo

demo/long_video_demo.py

+57-21
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import argparse
2+
import json
23
import random
34
from collections import deque
45
from operator import itemgetter
@@ -32,7 +33,7 @@ def parse_args():
3233
parser.add_argument('checkpoint', help='checkpoint file/url')
3334
parser.add_argument('video_path', help='video file/url')
3435
parser.add_argument('label', help='label file')
35-
parser.add_argument('out_file', help='output filename')
36+
parser.add_argument('out_file', help='output result file in video/json')
3637
parser.add_argument(
3738
'--input-step',
3839
type=int,
@@ -58,6 +59,47 @@ def parse_args():
5859
return args
5960

6061

62+
def show_results_video(result_queue, text_info, thr, msg, frame, video_writer):
63+
if len(result_queue) != 0:
64+
text_info = {}
65+
results = result_queue.popleft()
66+
for i, result in enumerate(results):
67+
selected_label, score = result
68+
if score < thr:
69+
break
70+
location = (0, 40 + i * 20)
71+
text = selected_label + ': ' + str(round(score, 2))
72+
text_info[location] = text
73+
cv2.putText(frame, text, location, FONTFACE, FONTSCALE, FONTCOLOR,
74+
THICKNESS, LINETYPE)
75+
elif len(text_info):
76+
for location, text in text_info.items():
77+
cv2.putText(frame, text, location, FONTFACE, FONTSCALE, FONTCOLOR,
78+
THICKNESS, LINETYPE)
79+
else:
80+
cv2.putText(frame, msg, (0, 40), FONTFACE, FONTSCALE, MSGCOLOR,
81+
THICKNESS, LINETYPE)
82+
video_writer.write(frame)
83+
return text_info
84+
85+
86+
def get_results_json(result_queue, text_info, thr, msg, ind, out_json):
87+
if len(result_queue) != 0:
88+
text_info = {}
89+
results = result_queue.popleft()
90+
for i, result in enumerate(results):
91+
selected_label, score = result
92+
if score < thr:
93+
break
94+
text_info[i + 1] = selected_label + ': ' + str(round(score, 2))
95+
out_json[ind] = text_info
96+
elif len(text_info):
97+
out_json[ind] = text_info
98+
else:
99+
out_json[ind] = msg
100+
return text_info, out_json
101+
102+
61103
def show_results(model, data, label, args):
62104
frame_queue = deque(maxlen=args.sample_length)
63105
result_queue = deque(maxlen=1)
@@ -70,11 +112,13 @@ def show_results(model, data, label, args):
70112

71113
msg = 'Preparing action recognition ...'
72114
text_info = {}
115+
out_json = {}
73116
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
74117
frame_size = (frame_width, frame_height)
75118

76119
ind = 0
77-
video_writer = cv2.VideoWriter(args.out_file, fourcc, fps, frame_size)
120+
video_writer = None if args.out_file.endswith('.json') \
121+
else cv2.VideoWriter(args.out_file, fourcc, fps, frame_size)
78122
prog_bar = mmcv.ProgressBar(num_frames)
79123
backup_frames = []
80124

@@ -108,28 +152,20 @@ def show_results(model, data, label, args):
108152
results = scores_sorted[:num_selected_labels]
109153
result_queue.append(results)
110154

111-
if len(result_queue) != 0:
112-
text_info = {}
113-
results = result_queue.popleft()
114-
for i, result in enumerate(results):
115-
selected_label, score = result
116-
if score < args.threshold:
117-
break
118-
location = (0, 40 + i * 20)
119-
text = selected_label + ': ' + str(round(score, 2))
120-
text_info[location] = text
121-
cv2.putText(frame, text, location, FONTFACE, FONTSCALE,
122-
FONTCOLOR, THICKNESS, LINETYPE)
123-
elif len(text_info):
124-
for location, text in text_info.items():
125-
cv2.putText(frame, text, location, FONTFACE, FONTSCALE,
126-
FONTCOLOR, THICKNESS, LINETYPE)
155+
if args.out_file.endswith('.json'):
156+
text_info, out_json = get_results_json(result_queue, text_info,
157+
args.threshold, msg, ind,
158+
out_json)
127159
else:
128-
cv2.putText(frame, msg, (0, 40), FONTFACE, FONTSCALE, MSGCOLOR,
129-
THICKNESS, LINETYPE)
130-
video_writer.write(frame)
160+
text_info = show_results_video(result_queue, text_info,
161+
args.threshold, msg, frame,
162+
video_writer)
163+
131164
cap.release()
132165
cv2.destroyAllWindows()
166+
if args.out_file.endswith('.json'):
167+
with open(args.out_file, 'w') as js:
168+
json.dump(out_json, js)
133169

134170

135171
def inference(model, data, args, frame_queue):

0 commit comments

Comments
 (0)