-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataloader_avvp.py
73 lines (59 loc) · 2.54 KB
/
dataloader_avvp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import numpy as np
import torch
import os
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import pandas as pd
def ids_to_multinomial(ids):
""" label encoding
Returns:
1d array, multimonial representation, e.g. [1,0,1,0,0,...]
"""
categories = ['Speech', 'Car', 'Cheering', 'Dog', 'Cat', 'Frying_(food)',
'Basketball_bounce', 'Fire_alarm', 'Chainsaw', 'Cello', 'Banjo',
'Singing', 'Chicken_rooster', 'Violin_fiddle', 'Vacuum_cleaner',
'Baby_laughter', 'Accordion', 'Lawn_mower', 'Motorcycle', 'Helicopter',
'Acoustic_guitar', 'Telephone_bell_ringing', 'Baby_cry_infant_cry', 'Blender',
'Clapping']
id_to_idx = {id: index for index, id in enumerate(categories)}
y = np.zeros(len(categories))
for id in ids:
index = id_to_idx[id]
y[index] = 1
return y
class LLP_dataset(Dataset):
def __init__(self, label, audio_dir, video_dir, st_dir, transform=None):
self.df = pd.read_csv(label, header=0, sep='\t')
self.filenames = self.df["filename"]
self.audio_dir = audio_dir
self.video_dir = video_dir
self.st_dir = st_dir
self.transform = transform
def __len__(self):
return len(self.filenames)
def __getitem__(self, idx):
row = self.df.loc[idx, :]
name = row[0][:11]
audio = np.load(os.path.join(self.audio_dir, name + '.npy'))
video_s = np.load(os.path.join(self.video_dir, name + '.npy'))
video_st = np.load(os.path.join(self.st_dir, name + '.npy'))
ids = row[-1].split(',')
label = ids_to_multinomial(ids)
sample = {'audio': audio, 'video_s': video_s, 'video_st': video_st, 'label': label}
if self.transform:
sample = self.transform(sample)
return sample
class ToTensor(object):
def __call__(self, sample):
if len(sample) == 2:
audio = sample['audio']
label = sample['label']
return {'audio': torch.from_numpy(audio), 'label': torch.from_numpy(label)}
else:
audio = sample['audio']
video_s = sample['video_s']
video_st = sample['video_st']
label = sample['label']
return {'audio': torch.from_numpy(audio), 'video_s': torch.from_numpy(video_s),
'video_st': torch.from_numpy(video_st),
'label': torch.from_numpy(label)}