-
Notifications
You must be signed in to change notification settings - Fork 1
/
utils.py
64 lines (43 loc) · 1.54 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import torch
from PIL import Image
from config import cfg
from transformers import AutoTokenizer
class AvgMeter:
def __init__(self, name="Metric"):
self.name = name
self.reset()
def reset(self):
self.avg, self.sum, self.count = [0] * 3
def update(self, val, count=1):
self.count += count
self.sum += val * count
self.avg = self.sum / self.count
def __repr__(self):
text = f"{self.name}: {self.avg:.4f}"
return text
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group["lr"]
class CLIPDataset(torch.utils.data.Dataset):
def __init__(self,image_path,image_filenames, captions, tokenizer, transforms):
self.image_pth = image_path
self.image_filenames = image_filenames
self.captions = list(captions)
self.tokenizer = tokenizer
self.encoded_captions = self.tokenizer(
list(captions), padding=True, truncation=True, return_tensors='pt',
max_length=cfg.train.max_length
)
self.transforms = transforms
def __getitem__(self, idx):
item = {
key: values[idx].clone().detach()
for key, values in self.encoded_captions.items()
}
img = Image.open(f"{self.image_pth}/{self.image_filenames[idx]}")
image = self.transforms(img)
item['image'] = image
item['caption'] = self.captions[idx]
return item
def __len__(self):
return len(self.captions)