-
Notifications
You must be signed in to change notification settings - Fork 3
/
dataset.py
67 lines (53 loc) · 2.17 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
"""PyTorch Dataset for CLIP Bangla :: https://github.com/zabir-nabil/bangla-image-search"""
from logging import config
import os
import cv2
import torch
import albumentations as A
import config as CFG
from normalizer import normalize # pip install git+https://github.com/csebuetnlp/normalizer
class CLIPDataset(torch.utils.data.Dataset):
def __init__(self, image_filenames, captions, tokenizer, transforms):
"""
dataset for CLIP Bangla
"""
self.image_filenames = image_filenames
self.captions = [normalize(cap_sen) for cap_sen in list(captions)]
self.transforms = transforms
def __getitem__(self, idx):
image = cv2.imread(f"{self.image_filenames[idx]}")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = self.transforms(image=image)['image']
# item['image'] = torch.tensor(image).permute(2, 0, 1).float()
# item['caption'] = self.captions[idx]
image = torch.tensor(image).permute(2, 0, 1).float()
caption = self.captions[idx]
return image, caption
def __len__(self):
return len(self.captions)
def get_transforms(mode="train"):
if mode == "train":
config = {
'aug_prob' : 0.2
}
return A.Compose(
[
A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=config['aug_prob']),
A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=config['aug_prob']),
A.CoarseDropout(p=config['aug_prob']),
A.GaussNoise(p=config['aug_prob']),
A.ZoomBlur(p=config['aug_prob']),
A.RandomFog(p=config['aug_prob']),
A.Rotate((-20., 20.), p = 0.5),
A.MotionBlur(p=config['aug_prob']),
A.Resize(CFG.size, CFG.size, always_apply=True),
A.Normalize(max_pixel_value=255.0, always_apply=True),
]
)
else:
return A.Compose(
[
A.Resize(CFG.size, CFG.size, always_apply=True),
A.Normalize(max_pixel_value=255.0, always_apply=True),
]
)