-
Notifications
You must be signed in to change notification settings - Fork 10
/
imagenet_dataset.py
65 lines (56 loc) · 2.09 KB
/
imagenet_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import os.path as osp
import numpy as np
import io
from PIL import Image
import logging
import random
from torch.utils.data import Dataset
logger = logging.getLogger('global')
def pil_loader(path):
with open(path, "rb") as f:
img = Image.open(f)
return img.convert("RGB")
class ImageNetDataset(Dataset):
def __init__(self, root_dir, meta_file, transform=None, use_ceph=False):
self.root_dir = root_dir
self.meta_file = meta_file
self.transform = transform
self.initialized = False
self.use_ceph = use_ceph
with open(meta_file) as f:
lines = f.readlines()
self.num = len(lines)
metas_names = []
metas_labels = []
for line in lines:
filename, label = line.rstrip().split()
metas_names.append(osp.join(self.root_dir, filename))
metas_labels.append(int(label))
self.metas_names = np.string_(metas_names)
self.metas_labels = np.int_(metas_labels)
self.initialized = False
if self.use_ceph:
from petrel_client.client import Client as CephClient
self.mclient = CephClient()
self.initialized = True
def __len__(self):
return self.num
def __getitem__(self, idx):
try:
img_path = str(self.metas_names[idx], encoding='utf-8')
label = self.metas_labels[idx]
if self.use_ceph:
value = self.mclient.Get(img_path)
img_bytes = np.fromstring(value, np.uint8)
buff = io.BytesIO(img_bytes)
with Image.open(buff) as img:
img = img.convert('RGB')
else:
img = pil_loader(img_path)
if self.transform is not None:
img = self.transform(img)
return img, label
except Exception as e:
logger.info(f'Error when load {idx}')
logger.info(e)
return self.__getitem__(random.randint(0, len(self.metas_names) - 1))