Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
pehf committed Apr 23, 2021
2 parents 9273d0f + 65d7967 commit 5bd79c6
Show file tree
Hide file tree
Showing 5 changed files with 290 additions and 177 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ examples/data/cifar*
data/plenoptic-test-files
data/ssim_images
data/ssim_analysis.mat
data/msssim_images
data/cat7*
data/elep*
docs/_build
Expand Down
295 changes: 149 additions & 146 deletions examples/09_Original_MAD.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion plenoptic/metric/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .perceptual_distance import ssim, nlpd, nspd, ssim_map
from .perceptual_distance import ssim, ms_ssim, nlpd, nspd, ssim_map
from .model_metric import model_metric
from .naive import mse
from .classes import NLP
131 changes: 110 additions & 21 deletions plenoptic/metric/perceptual_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,21 +90,16 @@ def _ssim_parts(img1, img2, dynamic_range):
C1 = (0.01 * dynamic_range) ** 2
C2 = (0.03 * dynamic_range) ** 2

v1 = 2.0 * sigma12 + C2
v2 = sigma1_sq + sigma2_sq + C2

# SSIM consists of a luminance component, a contrast component, and a
# structure component. This is the contrast component, which is used to
# compute MS-SSIM This is the contrast component, which is used to compute
# MS-SSIM.
contrast_map = v1 / v2
# SSIM is the product of a luminance component, a contrast component, and a
# structure component. The contrast-structure component has to be separated
# when computing MS-SSIM.
luminance_map = (2 * mu1_mu2 + C1) / (mu1_sq + mu2_sq + C1)
contrast_structure_map = (2.0 * sigma12 + C2) / (sigma1_sq + sigma2_sq + C2)
map_ssim = luminance_map * contrast_structure_map

# the weight used for stability
weight = torch.log(torch.matmul((1+(sigma1_sq/C2)), (1+(sigma2_sq/C2))))

ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2)
ssim_map = ((2 * mu1_mu2 + C1) / (mu1_sq + mu2_sq + C1)) * contrast_map
return ssim_map, contrast_map, weight
weight = torch.log((1 + sigma1_sq/C2) * (1 + sigma2_sq/C2))
return map_ssim, contrast_structure_map, weight


def ssim(img1, img2, weighted=False, dynamic_range=1):
Expand All @@ -113,7 +108,7 @@ def ssim(img1, img2, weighted=False, dynamic_range=1):
As described in [1]_, the structural similarity index (SSIM) is a
perceptual distance metric, giving the distance between two images. SSIM is
based on three comparison measurements between the two images: luminance,
contrast, and structure. All of these are computed in windows across the
contrast, and structure. All of these are computed convolutionally across the
images. See the references for more information.
This implementation follows the original implementation, as found at [2]_,
Expand Down Expand Up @@ -151,8 +146,8 @@ def ssim(img1, img2, weighted=False, dynamic_range=1):
Returns
------
mssim : torch.Tensor
2d tensor containing the mean SSIM for each image, averaged over the
whole image
2d tensor of shape (batch, channel) containing the mean SSIM for each
image, averaged over the whole image
Notes
-----
Expand All @@ -169,7 +164,7 @@ def ssim(img1, img2, weighted=False, dynamic_range=1):
----------
.. [1] Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image
quality assessment: From error measurement to structural similarity"
IEEE Transactios on Image Processing, vol. 13, no. 1, Jan. 2004.
IEEE Transactions on Image Processing, vol. 13, no. 1, Jan. 2004.
.. [2] [matlab code](https://www.cns.nyu.edu/~lcv/ssim/ssim_index.m)
.. [3] [project page](https://www.cns.nyu.edu/~lcv/ssim/)
.. [4] Wang, Z., & Simoncelli, E. P. (2008). Maximum differentiation (MAD)
Expand All @@ -186,6 +181,10 @@ def ssim(img1, img2, weighted=False, dynamic_range=1):
else:
mssim = (map_ssim*weight).sum((-1, -2)) / weight.sum((-1, -2))

if min(img1.shape[2], img1.shape[3]) < 11:
warnings.warn("SSIM uses 11x11 convolutional kernel, but the height and/or "
"the width of the input image is smaller than 11, so the "
"kernel size is set to be the minimum of these two numbers.")
return mssim


Expand All @@ -195,7 +194,7 @@ def ssim_map(img1, img2, dynamic_range=1):
As described in [1]_, the structural similarity index (SSIM) is a
perceptual distance metric, giving the distance between two images. SSIM is
based on three comparison measurements between the two images: luminance,
contrast, and structure. All of these are computed in windows across the
contrast, and structure. All of these are computed convolutionally across the
images. See the references for more information.
This implementation follows the original implementation, as found at [2]_,
Expand Down Expand Up @@ -238,7 +237,7 @@ def ssim_map(img1, img2, dynamic_range=1):
----------
.. [1] Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image
quality assessment: From error measurement to structural similarity"
IEEE Transactios on Image Processing, vol. 13, no. 1, Jan. 2004.
IEEE Transactions on Image Processing, vol. 13, no. 1, Jan. 2004.
.. [2] [matlab code](https://www.cns.nyu.edu/~lcv/ssim/ssim_index.m)
.. [3] [project page](https://www.cns.nyu.edu/~lcv/ssim/)
.. [4] Wang, Z., & Simoncelli, E. P. (2008). Maximum differentiation (MAD)
Expand All @@ -247,9 +246,97 @@ def ssim_map(img1, img2, dynamic_range=1):
http://dx.doi.org/10.1167/8.12.8
"""
if min(img1.shape[2], img1.shape[3]) < 11:
warnings.warn("SSIM uses 11x11 convolutional kernel, but the height and/or "
"the width of the input image is smaller than 11, so the "
"kernel size is set to be the minimum of these two numbers.")
return _ssim_parts(img1, img2, dynamic_range)[0]


def ms_ssim(img1, img2, dynamic_range=1, power_factors=None):
r"""Multiscale structural similarity index (MS-SSIM)
As described in [1]_, multiscale structural similarity index (MS-SSIM) is
an improvement upon structural similarity index (SSIM) that takes into
account the perceptual distance between two images on different scales.
SSIM is based on three comparison measurements between the two images:
luminance, contrast, and structure. All of these are computed convolutionally
across the images, producing three maps instead of scalars. The SSIM map is
the elementwise product of these three maps. See `metric.ssim` and
`metric.ssim_map` for a full description of SSIM.
To get images of different scales, average pooling operations with kernel
size 2 are performed recursively on the input images. The product of
contrast map and structure map (the "contrast-structure map") is computed
for all but the coarsest scales, and the overall SSIM map is only computed
for the coarsest scale. Their mean values are raised to exponents and
multiplied to produce MS-SSIM:
.. math::
MSSSIM = {SSIM}_M^{a_M} \prod_{i=1}^{M-1} ({CS}_i)^{a_i}
Here :math: `M` is the number of scales, :math: `{CS}_i` is the mean value
of the contrast-structure map for the i'th finest scale, and :math: `{SSIM}_M`
is the mean value of the SSIM map for the coarsest scale. If at least one
of these terms are negative, the value of MS-SSIM is zero. The values of
:math: `a_i, i=1,...,M` are taken from the argument `power_factors`.
Parameters
----------
img1 : torch.Tensor
4d tensor with first image to compare
img2 : torch.Tensor
4d tensor with second image to compare. Must have the same height and
width (last two dimensions) as `img1`
dynamic_range : int, optional.
dynamic range of the images. Note we assume that both images have the
same dynamic range. 1, the default, is appropriate for float images
between 0 and 1, as is common in synthesis. 2 is appropriate for float
images between -1 and 1, and 255 is appropriate for standard 8-bit
integer images. We'll raise a warning if it looks like your value is
not appropriate for `img1` or `img2`, but will calculate it anyway.
power_factors : 1D array, optional.
power exponents for the mean values of maps, for different scales (from
fine to coarse). The length of this array determines the number of scales.
By default, this is set to [0.0448, 0.2856, 0.3001, 0.2363, 0.1333],
which is what psychophysical experiments in [1]_ found.
Returns
------
msssim : torch.Tensor
2d tensor of shape (batch, channel) containing the MS-SSIM for each image
References
----------
.. [1] Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale
structural similarity for image quality assessment." The Thrity-Seventh
Asilomar Conference on Signals, Systems & Computers, 2003. Vol. 2. IEEE, 2003.
"""
if power_factors is None:
power_factors = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]

def downsample(img):
img = F.pad(img, (0, img.shape[3] % 2, 0, img.shape[2] % 2), mode="replicate")
img = F.avg_pool2d(img, kernel_size=2)
return img

msssim = 1
for i in range(len(power_factors) - 1):
_, contrast_structure_map, _ = _ssim_parts(img1, img2, dynamic_range)
msssim *= F.relu(contrast_structure_map.mean((-1, -2))).pow(power_factors[i])
img1 = downsample(img1)
img2 = downsample(img2)
map_ssim, _, _ = _ssim_parts(img1, img2, dynamic_range)
msssim *= F.relu(map_ssim.mean((-1, -2))).pow(power_factors[-1])

if min(img1.shape[2], img1.shape[3]) < 11:
warnings.warn("SSIM uses 11x11 convolutional kernel, but for some scales "
"of the input image, the height and/or the width is smaller "
"than 11, so the kernel size in SSIM is set to be the "
"minimum of these two numbers for these scales.")
return msssim


def normalized_laplacian_pyramid(im):
"""computes the normalized Laplacian Pyramid using pre-optimized parameters
Expand All @@ -273,7 +360,8 @@ def normalized_laplacian_pyramid(im):
padd = 2
normalized_laplacian_activations = []
for N_b in range(0, N_scales):
filt = torch.tensor(spatialpooling_filters[N_b], dtype=torch.float32, device=im.device).unsqueeze(0).unsqueeze(0)
filt = torch.tensor(spatialpooling_filters[N_b], dtype=torch.float32,
device=im.device).unsqueeze(0).unsqueeze(0)
filtered_activations = F.conv2d(torch.abs(laplacian_activations[N_b]), filt, padding=padd, groups=channel)
normalized_laplacian_activations.append(laplacian_activations[N_b] / (sigmas[N_b] + filtered_activations))

Expand Down Expand Up @@ -314,7 +402,8 @@ def nlpd(IM_1, IM_2):
References
----------
.. [1] Laparra, V., Ballé, J., Berardino, A. and Simoncelli, E.P., 2016. Perceptual image quality assessment using a normalized Laplacian pyramid. Electronic Imaging, 2016(16), pp.1-6.
.. [1] Laparra, V., Ballé, J., Berardino, A. and Simoncelli, E.P., 2016. Perceptual image quality
assessment using a normalized Laplacian pyramid. Electronic Imaging, 2016(16), pp.1-6.
"""

y = normalized_laplacian_pyramid(torch.cat((IM_1, IM_2), 0))
Expand Down
38 changes: 29 additions & 9 deletions tests/test_plenoptic.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

# If you add anything here, remember to update the docstring in osf_download!
OSF_URL = {'plenoptic-test-files.tar.gz': 'q9kn8', 'ssim_images.tar.gz': 'j65tw',
'ssim_analysis.mat': 'ndtc7', 'MAD_results.tar.gz': 'jwcsr'}
'ssim_analysis.mat': 'ndtc7', 'msssim_images.tar.gz': '5fuba', 'MAD_results.tar.gz': 'jwcsr'}


def osf_download(filename):
Expand All @@ -28,7 +28,7 @@ def osf_download(filename):
Parameters
----------
filename : {'plenoptic-test-files.tar.gz', 'ssim_images.tar.gz',
'ssim_analysis.mat', 'MAD_results.tar.gz'}
'ssim_analysis.mat', 'msssim_images.tar.gz', 'MAD_results.tar.gz'}
Which file to download.
Returns
Expand Down Expand Up @@ -74,6 +74,11 @@ def ssim_images():
return osf_download('ssim_images.tar.gz')


@pytest.fixture()
def msssim_images():
return osf_download('msssim_images.tar.gz')


@pytest.fixture()
def ssim_analysis():
ssim_analysis = osf_download('ssim_analysis.mat')
Expand Down Expand Up @@ -168,20 +173,25 @@ def test_ssim(self, einstein_img, curie_img, weighted):
curie_img.requires_grad_()
assert po.metric.ssim(einstein_img, curie_img, weighted=weighted).requires_grad

@pytest.mark.parametrize('func_name', ['noise', 'mse', 'ssim'])
def test_msssim(self, einstein_img, curie_img):
curie_img.requires_grad_()
assert po.metric.ms_ssim(einstein_img, curie_img).requires_grad

@pytest.mark.parametrize('func_name', ['noise', 'mse', 'ssim', 'ms-ssim'])
@pytest.mark.parametrize('size_A', [1, 3])
@pytest.mark.parametrize('size_B', [1, 2, 3])
def test_batch_handling(self, einstein_img, curie_img, func_name, size_A, size_B):
if func_name == 'noise':
func = po.add_noise
A = einstein_img.repeat(size_A, 1, 1, 1)
B = size_B * [4]
elif func_name == 'mse':
func = po.metric.mse
A = einstein_img.repeat(size_A, 1, 1, 1)
B = curie_img.repeat(size_B, 1, 1, 1)
elif func_name == 'ssim':
func = po.metric.ssim
else:
if func_name == 'mse':
func = po.metric.mse
elif func_name == 'ssim':
func = po.metric.ssim
elif func_name == 'ms-ssim':
func = po.metric.ms_ssim
A = einstein_img.repeat(size_A, 1, 1, 1)
B = curie_img.repeat(size_B, 1, 1, 1)
if size_A != size_B and size_A != 1 and size_B != 1:
Expand Down Expand Up @@ -238,6 +248,16 @@ def test_ssim_analysis(self, weighted, other_img, ssim_images,
print(plen_val-mat_val, plen_val, mat_val)
assert torch.allclose(plen_val, mat_val.view_as(plen_val), atol=1e-5)

def test_msssim_analysis(self, msssim_images):
# True values are defined by https://ece.uwaterloo.ca/~z70wang/research/iwssim/msssim.zip
true_values = torch.tensor([1.0000000, 0.9112161, 0.7699084, 0.8785111, 0.9488805], device=DEVICE)
computed_values = torch.zeros_like(true_values)
base_img = po.load_images(op.join(msssim_images, "samp0.tiff")).to(DEVICE)
for i in range(len(true_values)):
other_img = po.load_images(op.join(msssim_images, f"samp{i}.tiff")).to(DEVICE)
computed_values[i] = po.metric.ms_ssim(base_img, other_img)
assert torch.allclose(true_values, computed_values)

def test_nlpd(self, einstein_img, curie_img):
curie_img.requires_grad_()
assert po.metric.nlpd(einstein_img, curie_img).requires_grad
Expand Down

0 comments on commit 5bd79c6

Please sign in to comment.