Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update video rotation when doing detection #1124

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def detect(save_img=False):
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
for path, img, im0s, vid_cap in dataset:
for path, img, im0s, vid_cap, rotation in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
Expand Down Expand Up @@ -131,8 +131,8 @@ def detect(save_img=False):

fourcc = 'mp4v' # output video codec
fps = vid_cap.get(cv2.CAP_PROP_FPS)
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH)) if not rotation or rotation == '180' else int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) if not rotation or rotation == '180' else int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
vid_writer.write(im0)

Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ tensorboard>=2.2
torch>=1.6.0
torchvision>=0.7.0
tqdm>=4.41.0
scikit-video
ffmpeg

# logging -------------------------------------
# wandb
Expand Down
27 changes: 24 additions & 3 deletions utils/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from threading import Thread

import cv2
import math
import skvideo.io
import numpy as np
import torch
from PIL import Image, ExifTags
Expand Down Expand Up @@ -128,9 +130,19 @@ def __init__(self, path, img_size=640):
images = [x for x in files if os.path.splitext(x)[-1].lower() in img_formats]
videos = [x for x in files if os.path.splitext(x)[-1].lower() in vid_formats]
ni, nv = len(images), len(videos)
videos_rotation = [None for _ in videos]
for index in range(nv):
metadata = skvideo.io.ffprobe(videos[index])
if 'video' in metadata and 'tag' in metadata['video']:
tags = metadata['video']['tag']

for tag in tags:
if tag['@key'] == 'rotate':
videos_rotation[index] = tag['@value']

self.img_size = img_size
self.files = images + videos
self.rotation = [None for _ in images] + videos_rotation
self.nf = ni + nv # number of files
self.video_flag = [False] * ni + [True] * nv
self.mode = 'images'
Expand All @@ -149,6 +161,7 @@ def __next__(self):
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
rotation = self.rotation[self.count]

if self.video_flag[self.count]:
# Read video
Expand All @@ -174,14 +187,22 @@ def __next__(self):
assert img0 is not None, 'Image Not Found ' + path
print('image %g/%g %s: ' % (self.count, self.nf, path), end='')

# Rotation Valid
if rotation == '90':
img0 = cv2.rotate(img0, 0)
if rotation == '180':
img0 = cv2.rotate(img0, 1)
if rotation == '270':
img0 = cv2.rotate(img0, 2)

# Padded resize
img = letterbox(img0, new_shape=self.img_size)[0]

# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)

return path, img, img0, self.cap
return path, img, img0, self.cap, rotation

def new_video(self, path):
self.frame = 0
Expand Down Expand Up @@ -243,7 +264,7 @@ def __next__(self):
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)

return img_path, img, img0, None
return img_path, img, img0, None, None

def __len__(self):
return 0
Expand Down Expand Up @@ -316,7 +337,7 @@ def __next__(self):
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)

return self.sources, img, img0, None
return self.sources, img, img0, None, None

def __len__(self):
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
Expand Down