From 7acde28b6d908a89213804eb03350faab42303bc Mon Sep 17 00:00:00 2001 From: soumith Date: Thu, 17 Nov 2016 21:22:41 -0800 Subject: [PATCH] adding utils to save image to disk, and to create grid of images --- README.md | 25 ++++++++++++++++++------- torchvision/utils.py | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 torchvision/utils.py diff --git a/README.md b/README.md index 9732aa4616c..d8d449f4263 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,7 @@ This repository consists of: - [vision.datasets](#datasets) : Data loaders for popular vision datasets - [vision.transforms](#transforms) : Common image transformations such as random crop, rotations etc. +- [vision.utils](#utils) : Useful stuff such as saving tensor (3 x H x W) as image to disk, given a mini-batch creating a grid of images, etc. - `[WIP] vision.models` : Model definitions and Pre-trained models for popular models such as AlexNet, VGG, ResNet etc. # Installation @@ -60,8 +61,8 @@ Example: ```python import torchvision.datasets as dset import torchvision.transforms as transforms -cap = dset.CocoCaptions(root = 'dir where images are', - annFile = 'json annotation file', +cap = dset.CocoCaptions(root = 'dir where images are', + annFile = 'json annotation file', transform=transforms.ToTensor()) print('Number of samples: ', len(cap)) @@ -76,10 +77,10 @@ Output: ``` Number of samples: 82783 Image Size: (3L, 427L, 640L) -[u'A plane emitting smoke stream flying over a mountain.', -u'A plane darts across a bright blue sky behind a mountain covered in snow', -u'A plane leaves a contrail above the snowy mountain top.', -u'A mountain that has a plane flying overheard in the distance.', +[u'A plane emitting smoke stream flying over a mountain.', +u'A plane darts across a bright blue sky behind a mountain covered in snow', +u'A plane leaves a contrail above the snowy mountain top.', +u'A mountain that has a plane flying overheard in the distance.', u'A mountain view with a plume of smoke in the background'] ``` @@ -174,7 +175,7 @@ rescaled to (size * height / width, size) Crops the given PIL.Image at the center to have a region of the given size. size can be a tuple (target_height, target_width) or an integer, in which case the target will be of a square shape (size, size) - + ### `RandomCrop(size)` Crops the given PIL.Image at a random location to have a region of the given size. size can be a tuple (target_height, target_width) @@ -200,3 +201,13 @@ Given mean: (R, G, B) and std: (R, G, B), will normalize each channel of the tor - `ToTensor()` - Converts a PIL.Image (RGB) or numpy.ndarray (H x W x C) in the range [0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] - `ToPILImage()` - Converts a torch.*Tensor of range [0, 1] and shape C x H x W or numpy ndarray of dtype=uint8, range[0, 255] and shape H x W x C to a PIL.Image of range [0, 255] + +# Utils + +### make_grid(tensor, nrow=8, padding=2) +Given a 4D mini-batch Tensor of shape (B x C x H x W), makes a grid of images + +### save_image(tensor, filename, nrow=8, padding=2) +Saves a given Tensor into an image file. + +If given a mini-batch tensor, will save the tensor as a grid of images. diff --git a/torchvision/utils.py b/torchvision/utils.py new file mode 100644 index 00000000000..2cd7bc8a820 --- /dev/null +++ b/torchvision/utils.py @@ -0,0 +1,38 @@ + +def make_grid(tensor, nrow=8, padding=2): + """ + Given a 4D mini-batch Tensor of shape (B x C x H x W), + makes a grid of images + """ + import math + if tensor.dim() == 3: # single image + return tensor + # make the mini-batch of images into a grid + nmaps = tensor.size(0) + xmaps = min(nrow, nmaps) + ymaps = int(math.ceil(nmaps / xmaps)) + height, width = int(tensor.size(2) + padding), int(tensor.size(3) + padding) + grid = tensor.new(3, height * ymaps, width * xmaps).fill_(tensor.max()) + k = 0 + for y in range(ymaps): + for x in range(xmaps): + if k >= nmaps: + break + grid.narrow(1, y*height+1+padding/2,height-padding)\ + .narrow(2, x*width+1+padding/2, width-padding)\ + .copy_(tensor[k]) + k = k + 1 + return grid + + +def save_image(tensor, filename, nrow=8, padding=2): + """ + Saves a given Tensor into an image file. + If given a mini-batch tensor, will save the tensor as a grid of images. + """ + from PIL import Image + tensor = tensor.cpu() + grid = make_grid(tensor, nrow=nrow, padding=padding) + ndarr = grid.mul(0.5).add(0.5).mul(255).byte().transpose(0,2).transpose(0,1).numpy() + im = Image.fromarray(ndarr) + im.save(filename)