Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UCF101 dataset more *loading* efficient #2475

Closed
wants to merge 5 commits into from
Closed
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 30 additions & 25 deletions torchvision/datasets/ucf101.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import glob
import os

from .utils import list_dir
Expand Down Expand Up @@ -50,17 +49,28 @@ def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1,
if not 1 <= fold <= 3:
raise ValueError("fold should be between 1 and 3, got {}".format(fold))

extensions = ('avi',)
self.fold = fold
self.train = train
self.transform = transform

classes = list(sorted(list_dir(root)))
class_to_idx = {classes[i]: i for i in range(len(classes))}
self.samples = make_dataset(self.root, class_to_idx, extensions, is_valid_file=None)
self.classes = classes
video_list = [x[0] for x in self.samples]
video_clips = VideoClips(
video_list,
# Create class to index mapping with sorted class names
self.classes = list(sorted(list_dir(root)))
class_to_idx = {c: i for i, c in enumerate(self.classes)}

# Iterate through root directory to retrieve the path and the labels
# for each dataset example
self.samples = make_dataset(
self.root, class_to_idx, ('avi',), is_valid_file=None)

# Get the video paths that belong to the selected fold and split
_video_paths_in_fold = self._fold_paths(annotation_path, fold, train)
# Filter the dataset samples so only the video paths belonging to the
# selected fold are processed
self.samples = [o for o in self.samples if o[0] in _video_paths_in_fold]

# At this point, only the needed videos' path are selected
self.video_clips = VideoClips(
[x[0] for x in self.samples],
frames_per_clip,
step_between_clips,
frame_rate,
Expand All @@ -71,35 +81,30 @@ def __init__(self, root, annotation_path, frames_per_clip, step_between_clips=1,
_video_min_dimension=_video_min_dimension,
_audio_samples=_audio_samples,
)
self.video_clips_metadata = video_clips.metadata
self.indices = self._select_fold(video_list, annotation_path, fold, train)
self.video_clips = video_clips.subset(self.indices)
self.transform = transform
self.video_clips_metadata = self.video_clips.metadata
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unfortunately a BC-breaking change as other downstream projects rely on this behavior, see ClassyVision for example.

The original thinking behind this approach was that one could cache the metadata and re-use it over different dataset invocations, so that the creation time would be amortized.

It could indeed be possible to from the beginning create separate metadata for each fold, but now that it's been done as is we unfortunately to keep it for backwards-compatibility reasons.


@property
def metadata(self):
return self.video_clips_metadata

def _select_fold(self, video_list, annotation_path, fold, train):
name = "train" if train else "test"
name = "{}list{:02d}.txt".format(name, fold)
def _fold_paths(self, annotation_path, fold, train):
split = 'train' if train else 'test'
name = f'{split}list{fold:02d}.txt'
f = os.path.join(annotation_path, name)
selected_files = []

with open(f, "r") as fid:
data = fid.readlines()
data = [x.strip().split(" ") for x in data]
data = [os.path.join(self.root, x[0]) for x in data]
selected_files.extend(data)
selected_files = set(selected_files)
indices = [i for i in range(len(video_list)) if video_list[i] in selected_files]
return indices
video_files = fid.readlines()
video_files = [o.strip().split(" ")[0] for o in video_files]
video_files = [os.path.join(self.root, o) for o in video_files]
video_files = set(video_files)
return video_files

def __len__(self):
return self.video_clips.num_clips()

def __getitem__(self, idx):
video, audio, info, video_idx = self.video_clips.get_clip(idx)
label = self.samples[self.indices[video_idx]][1]
label = self.samples[video_idx][1]

if self.transform is not None:
video = self.transform(video)
Expand Down