diff --git a/ptsemseg/loader/pascal_voc_loader.py b/ptsemseg/loader/pascal_voc_loader.py index 2f610964..8afec524 100644 --- a/ptsemseg/loader/pascal_voc_loader.py +++ b/ptsemseg/loader/pascal_voc_loader.py @@ -1,21 +1,23 @@ +# DATASET + import os from os.path import join as pjoin import collections import json import torch import numpy as np -import scipy.misc as m import scipy.io as io import matplotlib.pyplot as plt import glob +import cv2 from PIL import Image from tqdm import tqdm -from torch.utils import data +from torch.utils import data as dt from torchvision import transforms +import warnings - -class pascalVOCLoader(data.Dataset): +class pascalVOCLoader(dt.Dataset): """Data loader for the Pascal VOC semantic segmentation dataset. Annotations from both the original VOC data (which consist of RGB images @@ -28,79 +30,90 @@ class pascalVOCLoader(data.Dataset): is added as a subdirectory of the `SegmentationClass` folder in the original Pascal VOC data layout. - A total of five data splits are provided for working with the VOC data: + A total of four data splits are provided for working with the VOC data: train: The original VOC 2012 training data - 1464 images val: The original VOC 2012 validation data - 1449 images - trainval: The combination of `train` and `val` - 2913 images train_aug: The unique images present in both the train split and training images from SBD: - 8829 images (the unique members of the result of combining lists of length 1464 and 8498) - train_aug_val: The original VOC 2012 validation data minus the images - present in `train_aug` (This is done with the same logic as - the validation set used in FCN PAMI paper, but with VOC 2012 - rather than VOC 2011) - 904 images + test: an arbitrary set of images from the "val" set, written in a text file test.txt in the same directory """ def __init__( self, - root, - sbd_path=None, - split="train_aug", - is_transform=False, - img_size=512, + root="/gpfs/home/fnouraei/data/fnouraei/VOC/VOCdevkit/VOC2012/", # choose your main path as default + sbd_path="/gpfs/home/fnouraei/data/fnouraei/VOC/benchmark_RELEASE", # choose your SBD dir as default + split="train", # loader split {"train", "val", "test", "train_aug"} + is_transform=True, + img_size=(128,128), # choose default input size augmentations=None, img_norm=True, - test_mode=False, + aug_with_sbd = False, # choose whether or not to add Berkeley images to val and train sets by default ): self.root = root self.sbd_path = sbd_path + self.aug_with_sbd = aug_with_sbd self.split = split self.is_transform = is_transform self.augmentations = augmentations self.img_norm = img_norm - self.test_mode = test_mode self.n_classes = 21 self.mean = np.array([104.00699, 116.66877, 122.67892]) self.files = collections.defaultdict(list) self.img_size = img_size if isinstance(img_size, tuple) else (img_size, img_size) - if not self.test_mode: - for split in ["train", "val", "trainval"]: - path = pjoin(self.root, "ImageSets/Segmentation", split + ".txt") - file_list = tuple(open(path, "r")) - file_list = [id_.rstrip() for id_ in file_list] - self.files[split] = file_list - self.setup_annotations() - - self.tf = transforms.Compose( - [ - transforms.ToTensor(), - transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), - ] - ) + + for split in ["train", "val", "test"]: + path = pjoin(self.root, "ImageSets","Segmentation", split + ".txt") + file_list = tuple(open(path, "r")) + file_list = [id_.rstrip() for id_ in file_list] + self.files[split] = file_list + self.setup_annotations() + + if not self.img_norm: + self.tf = transforms.ToTensor() + else: + self.tf = transforms.Compose( + [ + + transforms.ToTensor() + ,transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) + ]) + + def __len__(self): return len(self.files[self.split]) def __getitem__(self, index): + im_name = self.files[self.split][index] im_path = pjoin(self.root, "JPEGImages", im_name + ".jpg") lbl_path = pjoin(self.root, "SegmentationClass/pre_encoded", im_name + ".png") - im = Image.open(im_path) - lbl = Image.open(lbl_path) + if self.aug_with_sbd: + lbl_path = pjoin(self.root, "SegmentationClass/pre_encoded_aug", im_name + ".png") + im = cv2.imread(im_path, cv2.IMREAD_UNCHANGED) + im = cv2.cvtColor(np.array(im), cv2.COLOR_BGR2RGB) + lbl = cv2.imread(lbl_path, cv2.IMREAD_UNCHANGED) + + if self.augmentations is not None: im, lbl = self.augmentations(im, lbl) if self.is_transform: im, lbl = self.transform(im, lbl) + return im, lbl def transform(self, img, lbl): if self.img_size == ("same", "same"): pass else: - img = img.resize((self.img_size[0], self.img_size[1])) # uint8 with RGB mode - lbl = lbl.resize((self.img_size[0], self.img_size[1])) + img = cv2.resize(img, (self.img_size[0], self.img_size[1]), interpolation =cv2.INTER_NEAREST) + lbl = cv2.resize(lbl, (self.img_size[0], self.img_size[1]), interpolation =cv2.INTER_NEAREST) + + img = self.tf(img) + lbl = torch.from_numpy(np.array(lbl)).long() lbl[lbl == 255] = 0 return img, lbl @@ -111,32 +124,29 @@ def get_pascal_labels(self): Returns: np.ndarray with dimensions (21, 3) """ - return np.asarray( - [ - [0, 0, 0], - [128, 0, 0], - [0, 128, 0], - [128, 128, 0], - [0, 0, 128], - [128, 0, 128], - [0, 128, 128], - [128, 128, 128], - [64, 0, 0], - [192, 0, 0], - [64, 128, 0], - [192, 128, 0], - [64, 0, 128], - [192, 0, 128], - [64, 128, 128], - [192, 128, 128], - [0, 64, 0], - [128, 64, 0], - [0, 192, 0], - [128, 192, 0], - [0, 64, 128], - ] - ) - + return np.asarray( [ + [0, 0, 0], #0: bg + [128, 0, 0], #1: aeroplane + [0, 128, 0], #2: bicycle + [128, 128, 0], #3: bird + [0, 0, 128], #4: boat + [128, 0, 128], #5: bottle + [0, 128, 128], #6: bus + [128, 128, 128], #7: car + [64, 0, 0], #8: cat + [192, 0, 0], #9: chair + [64, 128, 0], #10: cow + [192, 128, 0], #11: dining table + [64, 0, 128], #12: dog + [192, 0, 128], #13: horse + [64, 128, 128], #14: motorbike + [192, 128, 128], #15: person + [0, 64, 0], #16: potted plant + [128, 64, 0], #17: sheep + [0, 192, 0], #18: sofa + [128, 192, 0], #19: train + [0, 64, 128], #20: tv/monitor + ] ) def encode_segmap(self, mask): """Encode segmentation label images as pascal classes @@ -153,6 +163,7 @@ def encode_segmap(self, mask): for ii, label in enumerate(self.get_pascal_labels()): label_mask[np.where(np.all(mask == label, axis=-1))[:2]] = ii label_mask = label_mask.astype(int) + return label_mask def decode_segmap(self, label_mask, plot=False): @@ -186,68 +197,137 @@ def decode_segmap(self, label_mask, plot=False): return rgb def setup_annotations(self): - """Sets up Berkley annotations by adding image indices to the - `train_aug` split and pre-encode all segmentation labels into the - common label_mask format (if this has not already been done). This - function also defines the `train_aug` and `train_aug_val` data splits - according to the description in the class docstring + """ + pre-encode all segmentation labels into the + common label_mask format (if this has not already been done). """ sbd_path = self.sbd_path - target_path = pjoin(self.root, "SegmentationClass/pre_encoded") + if not self.aug_with_sbd: + target_path = pjoin(self.root, "SegmentationClass/pre_encoded") + if self.aug_with_sbd: + target_path = pjoin(self.root, "SegmentationClass/pre_encoded_aug") + if not os.path.exists(target_path): os.makedirs(target_path) - path = pjoin(sbd_path, "dataset/train.txt") + + path = pjoin(sbd_path, "dataset", "train.txt") sbd_train_list = tuple(open(path, "r")) sbd_train_list = [id_.rstrip() for id_ in sbd_train_list] - train_aug = self.files["train"] + sbd_train_list + + if not self.aug_with_sbd: + train = self.files["train"] + + if self.aug_with_sbd: + train_aug = self.files["train"] + sbd_train_list + + val = self.files["val"] + test = self.files["test"] + + if self.aug_with_sbd: + # keep unique elements (stable) + train_aug = [train_aug[i] for i in sorted(np.unique(train_aug, return_index=True)[1])] + self.files["train_aug"] = train_aug + + pre_encoded = glob.glob(pjoin(target_path, "*.png")) + + if not self.aug_with_sbd: + expected = len(list(set().union(val,test,train))) + # make sure validation set is held out + #print("[Debug] val and train intersection: ", len(list(set().intersection(val,train)))) + else: + expected = len(list(set().union(val,test,train_aug))) + # make sure validation set is held out + #print("[Debug] val and train intersection: ", len(list(set().intersection(val,train_aug)))) + + + + if len(pre_encoded) < expected: + + if self.aug_with_sbd: + for ii in tqdm(sbd_train_list, desc="pre-encode SBD"): + fname = ii + ".png" + lbl_path = pjoin(sbd_path, "dataset/cls", ii + ".mat") + data = io.loadmat(lbl_path) + lbl = data["GTcls"][0]["Segmentation"][0].astype(np.int32) + cv2.imwrite(pjoin(target_path, fname), lbl) + + for split in ["train", "val", "test"]: + + for ii in tqdm(self.files[split], desc="pre-encode "+split): + fname = ii + ".png" + lbl_path = pjoin(self.root, "SegmentationClass", fname) + lbl = cv2.imread(lbl_path, cv2.IMREAD_UNCHANGED) + lbl = cv2.cvtColor(np.array(lbl), cv2.COLOR_BGR2RGB) + lbl = self.encode_segmap(lbl) + #print("[Debug] pre-encoded label before saving as png: ", np.unique(lbl)) + cv2.imwrite(pjoin(target_path, fname), lbl) + + pre_encoded = glob.glob(pjoin(target_path, "*.png")) + print("[info] num expected labels: {} num pre-encoded labels: {}".format(expected,len(pre_encoded))) - # keep unique elements (stable) - train_aug = [train_aug[i] for i in sorted(np.unique(train_aug, return_index=True)[1])] - self.files["train_aug"] = train_aug - set_diff = set(self.files["val"]) - set(train_aug) # remove overlap - self.files["train_aug_val"] = list(set_diff) + +""" +# Test Pascal Dataloader (JUPYTER NOTEBOOK) - pre_encoded = glob.glob(pjoin(target_path, "*.png")) - expected = np.unique(self.files["train_aug"] + self.files["val"]).size - - if len(pre_encoded) != expected: - print("Pre-encoding segmentation masks...") - for ii in tqdm(sbd_train_list): - lbl_path = pjoin(sbd_path, "dataset/cls", ii + ".mat") - data = io.loadmat(lbl_path) - lbl = data["GTcls"][0]["Segmentation"][0].astype(np.int32) - lbl = m.toimage(lbl, high=lbl.max(), low=lbl.min()) - m.imsave(pjoin(target_path, ii + ".png"), lbl) - - for ii in tqdm(self.files["trainval"]): - fname = ii + ".png" - lbl_path = pjoin(self.root, "SegmentationClass", fname) - lbl = self.encode_segmap(m.imread(lbl_path)) - lbl = m.toimage(lbl, high=lbl.max(), low=lbl.min()) - m.imsave(pjoin(target_path, fname), lbl) - - assert expected == 9733, "unexpected dataset sizes" - - -# Leave code for debugging purposes -# import ptsemseg.augmentations as aug -# if __name__ == '__main__': -# # local_path = '/home/meetshah1995/datasets/VOCdevkit/VOC2012/' -# bs = 4 -# augs = aug.Compose([aug.RandomRotate(10), aug.RandomHorizontallyFlip()]) -# dst = pascalVOCLoader(root=local_path, is_transform=True, augmentations=augs) -# trainloader = data.DataLoader(dst, batch_size=bs) -# for i, data in enumerate(trainloader): -# imgs, labels = data -# imgs = imgs.numpy()[:, ::-1, :, :] -# imgs = np.transpose(imgs, [0,2,3,1]) -# f, axarr = plt.subplots(bs, 2) -# for j in range(bs): -# axarr[j][0].imshow(imgs[j]) -# axarr[j][1].imshow(dst.decode_segmap(labels.numpy()[j])) -# plt.show() -# a = raw_input() -# if a == 'ex': -# break -# else: -# plt.close() +%matplotlib inline +import matplotlib.pyplot as plt +from ptsemseg.augmentations.augmentations import ( + AdjustContrast, + AdjustGamma, + AdjustBrightness, + AdjustSaturation, + AdjustHue, + RandomCrop, + RandomHorizontallyFlip, + RandomVerticallyFlip, + Scale, + RandomSized, + RandomSizedCrop, + RandomRotate, + RandomTranslate, + CenterCrop, + Compose, +) + +if __name__ == '__main__': + + local_path = '/gpfs/home/fnouraei/data/fnouraei/VOC/VOCdevkit/VOC2012/' + bs = BATCH_SIZE + if OVERFIT: + dset = pascalVOCLoader_reduced(root=local_path, split = 'train',img_size=(INPUT_SIZE,INPUT_SIZE) + , is_transform=True , aug_with_sbd=False + , augmentations= None) + else: + dset = pascalVOCLoader(root=local_path, split = 'train',img_size=(INPUT_SIZE,INPUT_SIZE) + , is_transform=True , aug_with_sbd=True + , augmentations= Compose([RandomRotate(10), RandomHorizontallyFlip(p=0.5) + ,RandomVerticallyFlip(p=0.5),RandomSizedCrop(125) + ,AdjustGamma(gamma = 2.2),AdjustContrast(cf=0.4)])) + + + print("[Debug] length of dataset: ",len(dset)) + + trainloader = dt.DataLoader(dset, batch_size=bs, shuffle=True) + + for i, data in enumerate(trainloader): # i = batch idx , data = (img , label) (as tensors - first dim is batch size) + img, label = data + img = img.numpy() + img = np.transpose(img, [0,2,3,1]) + print("[Debug] label values:",label.unique()) + + f, axarr = plt.subplots(bs, 2, figsize=(15, 15), dpi=80) + + for j in range(bs): + print("[Debug] batch item {} img shape: {} label shape: {}".format(bs,img[j].shape,label.numpy()[j].shape)) + axarr[j,0].imshow(img[j]) + axarr[j,1].imshow(dset.decode_segmap((label[j].numpy()))) + plt.show() + + plt.close() +""" + + + + + +