Initial commit

juingzhou · web-flow · commit d9e6ad194392 · 2019-07-03T20:50:43.000+08:00
diff --git a/README.md b/README.md
@@ -0,0 +1,87 @@
+# ESPCN
+
+This repository is implementation of the ["Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network"](https://arxiv.org/abs/1609.05158).
+
+<center><img src="./thumbnails/fig1.png"></center>
+
+## Requirements
+
+- PyTorch 1.1.0
+- Numpy 1.15.4
+- Pillow 6.0.0
+- h5py 2.8.0
+- tqdm 4.30.0
+
+## Train
+
+The 91-image, Set5 dataset converted to HDF5 can be downloaded from the links below.
+
+| Dataset | Scale | Type | Link |
+|---------|-------|------|------|
+| 91-image | 3 | Train | [Link](/BLAH_BLAH/) |
+| Set5 | 3 | Eval | [Link](/BLAH_BLAH/) |
+
+Otherwise, you can use `prepare.py` to create custom dataset.
+
+```bash
+bash run.sh               
+```
+
+## Test
+
+Pre-trained weights can be downloaded from the links below.
+
+| Model | Scale | Link |
+|-------|-------|------|
+| ESPCN (91) | 3 | [Link](/BLAH_BLAH/outputs/x3/best.pth) |
+
+The results are stored in the same path as the query image.
+
+```bash
+bash run.sh
+```
+
+## Results
+
+PSNR was calculated on the Y channel.
+
+### Set5
+
+| Eval. Mat | Scale | Paper (91) | Ours (91) |
+|-----------|-------|-------|-----------------|
+| PSNR | 3 | 32.55 | 32.88 |
+
+<table>
+    <tr>
+        <td><center>Original</center></td>
+        <td><center>BICUBIC x3</center></td>
+        <td><center>ESPCN x3 (23.84 dB)</center></td>
+    </tr>
+    <tr>
+    	<td>
+    		<center><img src="./data/baboon.bmp""></center>
+    	</td>
+    	<td>
+    		<center><img src="./data/baboon_bicubic_x3.bmp"></center>
+    	</td>
+    	<td>
+    		<center><img src="./data/baboon_espcn_x3.bmp"></center>
+    	</td>
+    </tr>
+    <tr>
+        <td><center>Original</center></td>
+        <td><center>BICUBIC x3</center></td>
+        <td><center>ESPCN x3 (25.32 dB)</center></td>
+    </tr>
+    <tr>
+    	<td>
+    		<center><img src="./data/comic.bmp""></center>
+    	</td>
+    	<td>
+    		<center><img src="./data/comic_bicubic_x3.bmp"></center>
+    	</td>
+    	<td>
+    		<center><img src="./data/comic_espcn_x3.bmp"></center>
+    	</td>
+    </tr>  
+</table>
diff --git a/datasets.py b/datasets.py
@@ -0,0 +1,31 @@
+import h5py
+import numpy as np
+from torch.utils.data import Dataset
+
+
+class TrainDataset(Dataset):
+    def __init__(self, h5_file):
+        super(TrainDataset, self).__init__()
+        self.h5_file = h5_file
+
+    def __getitem__(self, idx):
+        with h5py.File(self.h5_file, 'r') as f:
+            return np.expand_dims(f['lr'][idx] / 255., 0), np.expand_dims(f['hr'][idx] / 255., 0)
+
+    def __len__(self):
+        with h5py.File(self.h5_file, 'r') as f:
+            return len(f['lr'])
+
+
+class EvalDataset(Dataset):
+    def __init__(self, h5_file):
+        super(EvalDataset, self).__init__()
+        self.h5_file = h5_file
+
+    def __getitem__(self, idx):
+        with h5py.File(self.h5_file, 'r') as f:
+            return np.expand_dims(f['lr'][str(idx)][:, :] / 255., 0), np.expand_dims(f['hr'][str(idx)][:, :] / 255., 0)
+
+    def __len__(self):
+        with h5py.File(self.h5_file, 'r') as f:
+            return len(f['lr'])
diff --git a/models.py b/models.py
@@ -0,0 +1,36 @@
+import math
+from torch import nn
+
+
+class ESPCN(nn.Module):
+    def __init__(self, scale_factor, num_channels=1):
+        super(ESPCN, self).__init__()
+        self.first_part = nn.Sequential(
+            nn.Conv2d(num_channels, 64, kernel_size=5, padding=5//2),
+            nn.Tanh(),
+            nn.Conv2d(64, 32, kernel_size=3, padding=3//2),
+            nn.Tanh(),
+        )
+        self.last_part = nn.Sequential(
+            nn.Conv2d(32, num_channels * (scale_factor ** 2), kernel_size=3, padding=3 // 2),
+            nn.PixelShuffle(scale_factor)
+        )
+
+        self._initialize_weights()
+
+    def _initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                if m.in_channels == 32:
+                    nn.init.normal_(m.weight.data, mean=0.0, std=0.001)
+                    nn.init.zeros_(m.bias.data)
+                else:
+                    nn.init.normal_(m.weight.data, mean=0.0, std=math.sqrt(2/(m.out_channels*m.weight.data[0][0].numel())))
+                    nn.init.zeros_(m.bias.data)
+
+    def forward(self, x):
+        x = self.first_part(x)
+        x = self.last_part(x)
+        return x
+
+
diff --git a/prepare.py b/prepare.py
@@ -0,0 +1,76 @@
+import argparse
+import glob
+import h5py
+import numpy as np
+import PIL.Image as pil_image
+from utils import convert_rgb_to_y
+
+
+def train(args):
+    h5_file = h5py.File(args.output_path, 'w')
+
+    lr_patches = []
+    hr_patches = []
+
+    for image_path in sorted(glob.glob('{}/*'.format(args.images_dir))):
+        hr = pil_image.open(image_path).convert('RGB')
+        hr_width = (hr.width // args.scale) * args.scale
+        hr_height = (hr.height // args.scale) * args.scale
+        hr = hr.resize((hr_width, hr_height), resample=pil_image.BICUBIC)
+        lr = hr.resize((hr_width // args.scale, hr_height // args.scale), resample=pil_image.BICUBIC)
+        hr = np.array(hr).astype(np.float32)
+        lr = np.array(lr).astype(np.float32)
+        hr = convert_rgb_to_y(hr)
+        lr = convert_rgb_to_y(lr)
+
+        for i in range(0, lr.shape[0] - args.patch_size + 1, args.stride):
+            for j in range(0, lr.shape[1] - args.patch_size + 1, args.stride):
+                lr_patches.append(lr[i:i + args.patch_size, j:j + args.patch_size])
+                hr_patches.append(hr[i * args.scale:i * args.scale + args.patch_size * args.scale, j * args.scale:j * args.scale + args.patch_size * args.scale])
+
+    lr_patches = np.array(lr_patches)
+    hr_patches = np.array(hr_patches)
+
+    h5_file.create_dataset('lr', data=lr_patches)
+    h5_file.create_dataset('hr', data=hr_patches)
+
+    h5_file.close()
+
+
+def eval(args):
+    h5_file = h5py.File(args.output_path, 'w')
+
+    lr_group = h5_file.create_group('lr')
+    hr_group = h5_file.create_group('hr')
+
+    for i, image_path in enumerate(sorted(glob.glob('{}/*'.format(args.images_dir)))):
+        hr = pil_image.open(image_path).convert('RGB')
+        hr_width = (hr.width // args.scale) * args.scale
+        hr_height = (hr.height // args.scale) * args.scale
+        hr = hr.resize((hr_width, hr_height), resample=pil_image.BICUBIC)
+        lr = hr.resize((hr.width // args.scale, hr_height // args.scale), resample=pil_image.BICUBIC)
+        hr = np.array(hr).astype(np.float32)
+        lr = np.array(lr).astype(np.float32)
+        hr = convert_rgb_to_y(hr)
+        lr = convert_rgb_to_y(lr)
+
+        lr_group.create_dataset(str(i), data=lr)
+        hr_group.create_dataset(str(i), data=hr)
+
+    h5_file.close()
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--images-dir', type=str, required=True)
+    parser.add_argument('--output-path', type=str, required=True)
+    parser.add_argument('--scale', type=int, default=3)
+    parser.add_argument('--patch-size', type=int, default=17)
+    parser.add_argument('--stride', type=int, default=13)
+    parser.add_argument('--eval', action='store_true')
+    args = parser.parse_args()
+
+    if not args.eval:
+        train(args)
+    else:
+        eval(args)
diff --git a/run.sh b/run.sh
@@ -0,0 +1,4 @@
+# Train step:
+# python train.py --train-file "BLAH_BLAH/91-image_x3.h5" --eval-file "BLAH_BLAH/Set5_x3.h5" --outputs-dir "BLAH_BLAH/outputs" --scale 3 --lr 1e-3 --batch-size 32 --num-epochs 200 --num-workers 8 --seed 123
+# Test step:
+python test.py --weights-file "BLAH_BLAH/outputs/x3/best.pth" --image-file "data/baboon.bmp" --scale 3
diff --git a/test.py b/test.py
@@ -0,0 +1,58 @@
+import argparse
+
+import torch
+import torch.backends.cudnn as cudnn
+import numpy as np
+import PIL.Image as pil_image
+
+from models import ESPCN
+from utils import convert_ycbcr_to_rgb, preprocess, calc_psnr
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights-file', type=str, required=True)
+    parser.add_argument('--image-file', type=str, required=True)
+    parser.add_argument('--scale', type=int, default=3)
+    args = parser.parse_args()
+
+    cudnn.benchmark = True
+    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+
+    model = ESPCN(scale_factor=args.scale).to(device)
+
+    state_dict = model.state_dict()
+    for n, p in torch.load(args.weights_file, map_location=lambda storage, loc: storage).items():
+        if n in state_dict.keys():
+            state_dict[n].copy_(p)
+        else:
+            raise KeyError(n)
+
+    model.eval()
+
+    image = pil_image.open(args.image_file).convert('RGB')
+
+    image_width = (image.width // args.scale) * args.scale
+    image_height = (image.height // args.scale) * args.scale
+
+    hr = image.resize((image_width, image_height), resample=pil_image.BICUBIC)
+    lr = hr.resize((hr.width // args.scale, hr.height // args.scale), resample=pil_image.BICUBIC)
+    bicubic = lr.resize((lr.width * args.scale, lr.height * args.scale), resample=pil_image.BICUBIC)
+    bicubic.save(args.image_file.replace('.', '_bicubic_x{}.'.format(args.scale)))
+
+    lr, _ = preprocess(lr, device)
+    hr, _ = preprocess(hr, device)
+    _, ycbcr = preprocess(bicubic, device)
+
+    with torch.no_grad():
+        preds = model(lr).clamp(0.0, 1.0)
+
+    psnr = calc_psnr(hr, preds)
+    print('PSNR: {:.2f}'.format(psnr))
+
+    preds = preds.mul(255.0).cpu().numpy().squeeze(0).squeeze(0)
+
+    output = np.array([preds, ycbcr[..., 1], ycbcr[..., 2]]).transpose([1, 2, 0])
+    output = np.clip(convert_ycbcr_to_rgb(output), 0.0, 255.0).astype(np.uint8)
+    output = pil_image.fromarray(output)
+    output.save(args.image_file.replace('.', '_espcn_x{}.'.format(args.scale)))
diff --git a/train.py b/train.py
diff --git a/utils.py b/utils.py