Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

UCF101 fails if root directory ends in "/" #1703

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions torchvision/datasets/hmdb51.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import glob
import os
from pathlib import Path

import torch

from .utils import list_dir
from .folder import make_dataset
from .utils import list_dir
from .video_utils import VideoClips
from .vision import VisionDataset

Expand Down Expand Up @@ -99,7 +102,11 @@ def _select_fold(self, video_list, annotation_path, fold, train):
data = [x[0] for x in data if int(x[1]) == target_tag]
selected_files.extend(data)
selected_files = set(selected_files)
indices = [i for i in range(len(video_list)) if os.path.basename(video_list[i]) in selected_files]
indices = []
for i in range(len(video_list)):
path = Path(video_list[i])
if str(path.relative_to(path.parent.parent)) in selected_files:
indices.append(i)
return indices

def __len__(self):
Expand All @@ -110,6 +117,10 @@ def __getitem__(self, idx):
label = self.samples[self.indices[video_idx]][1]

if self.transform is not None:
video = self.transform(video)
transformed_video = []
for counter, image in enumerate(video):
image = self.transform(image)
transformed_video.append(image)
video = torch.stack(transformed_video)
Comment on lines +120 to +124
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those are unrelated changes that shouldn't be handled this way.
Instead, we are working on video transforms that can be applied to a video clip right away (they are currently private functions because their API will change and the default transforms will natively support video clips, but the private functions are in https://github.com/pytorch/vision/blob/master/torchvision/transforms/_transforms_video.py)


return video, audio, label
8 changes: 6 additions & 2 deletions torchvision/datasets/kinetics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .utils import list_dir
from .folder import make_dataset
from .utils import list_dir
from .video_utils import VideoClips
from .vision import VisionDataset

Expand Down Expand Up @@ -74,6 +74,10 @@ def __getitem__(self, idx):
label = self.samples[video_idx][1]

if self.transform is not None:
video = self.transform(video)
transformed_video = []
for counter, image in enumerate(video):
image = self.transform(image)
transformed_video.append(image)
video = torch.stack(transformed_video)

return video, audio, label
18 changes: 14 additions & 4 deletions torchvision/datasets/ucf101.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import glob
import os
from pathlib import Path

import torch

from .utils import list_dir
from .folder import make_dataset
from .utils import list_dir
from .video_utils import VideoClips
from .vision import VisionDataset

Expand Down Expand Up @@ -91,9 +94,12 @@ def _select_fold(self, video_list, annotation_path, fold, train):
data = [x[0] for x in data]
selected_files.extend(data)
selected_files = set(selected_files)
indices = [i for i in range(len(video_list)) if video_list[i][len(self.root) + 1:] in selected_files]
indices = []
for i in range(len(video_list)):
path = Path(video_list[i])
if str(path.relative_to(path.parent.parent)) in selected_files:
indices.append(i)
return indices

def __len__(self):
return self.video_clips.num_clips()

Expand All @@ -102,6 +108,10 @@ def __getitem__(self, idx):
label = self.samples[self.indices[video_idx]][1]

if self.transform is not None:
video = self.transform(video)
transformed_video = []
for counter, image in enumerate(video):
image = self.transform(image)
transformed_video.append(image)
video = torch.stack(transformed_video)

return video, audio, label