plenoptic-org · pehf · Mar 18, 2021 · Mar 18, 2021 · Mar 18, 2021 · Mar 22, 2021
@@ -1,4 +1,4 @@
-from .perceptual_distance import ssim, ms_ssim, nlpd, nspd, ssim_map
+from .perceptual_distance import ssim, nlpd, nspd, ssim_map
 from .model_metric import model_metric
 from .naive import mse
 from .classes import NLP
@@ -90,16 +90,21 @@ def _ssim_parts(img1, img2, dynamic_range):
     C1 = (0.01 * dynamic_range) ** 2
     C2 = (0.03 * dynamic_range) ** 2
 
-    # SSIM is the product of a luminance component, a contrast component, and a
-    # structure component. The contrast-structure component has to be separated
-    # when computing MS-SSIM.
-    luminance_map = (2 * mu1_mu2 + C1) / (mu1_sq + mu2_sq + C1)
-    contrast_structure_map = (2.0 * sigma12 + C2) / (sigma1_sq + sigma2_sq + C2)
-    map_ssim = luminance_map * contrast_structure_map
+    v1 = 2.0 * sigma12 + C2
+    v2 = sigma1_sq + sigma2_sq + C2
+
+    # SSIM consists of a luminance component, a contrast component, and a
+    # structure component. This is the contrast component, which is used to
+    # compute MS-SSIM This is the contrast component, which is used to compute
+    # MS-SSIM.
+    contrast_map = v1 / v2
 
     # the weight used for stability
-    weight = torch.log((1 + sigma1_sq/C2) * (1 + sigma2_sq/C2))
-    return map_ssim, contrast_structure_map, weight
+    weight = torch.log(torch.matmul((1+(sigma1_sq/C2)), (1+(sigma2_sq/C2))))
+
+    ssim_map = ((2 * mu1_mu2 + C1) * v1) / ((mu1_sq + mu2_sq + C1) * v2)
+    ssim_map = ((2 * mu1_mu2 + C1) / (mu1_sq + mu2_sq + C1)) * contrast_map
+    return ssim_map, contrast_map, weight
 
 
 def ssim(img1, img2, weighted=False, dynamic_range=1):
@@ -108,7 +113,7 @@ def ssim(img1, img2, weighted=False, dynamic_range=1):
     As described in [1]_, the structural similarity index (SSIM) is a
     perceptual distance metric, giving the distance between two images. SSIM is
     based on three comparison measurements between the two images: luminance,
-    contrast, and structure. All of these are computed convolutionally across the
+    contrast, and structure. All of these are computed in windows across the
     images. See the references for more information.
 
     This implementation follows the original implementation, as found at [2]_,
@@ -146,8 +151,8 @@ def ssim(img1, img2, weighted=False, dynamic_range=1):
     Returns
     ------
     mssim : torch.Tensor
-        2d tensor of shape (batch, channel) containing the mean SSIM for each
-        image, averaged over the whole image
+        2d tensor containing the mean SSIM for each image, averaged over the
+        whole image
 
     Notes
     -----
@@ -164,7 +169,7 @@ def ssim(img1, img2, weighted=False, dynamic_range=1):
     ----------
     .. [1] Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image
        quality assessment: From error measurement to structural similarity"
-       IEEE Transactions on Image Processing, vol. 13, no. 1, Jan. 2004.
+       IEEE Transactios on Image Processing, vol. 13, no. 1, Jan. 2004.
     .. [2] [matlab code](https://www.cns.nyu.edu/~lcv/ssim/ssim_index.m)
     .. [3] [project page](https://www.cns.nyu.edu/~lcv/ssim/)
     .. [4] Wang, Z., & Simoncelli, E. P. (2008). Maximum differentiation (MAD)
@@ -181,10 +186,6 @@ def ssim(img1, img2, weighted=False, dynamic_range=1):
     else:
         mssim = (map_ssim*weight).sum((-1, -2)) / weight.sum((-1, -2))
 
-    if min(img1.shape[2], img1.shape[3]) < 11:
-        warnings.warn("SSIM uses 11x11 convolutional kernel, but the height and/or "
-                      "the width of the input image is smaller than 11, so the "
-                      "kernel size is set to be the minimum of these two numbers.")
     return mssim
 
 
@@ -194,7 +195,7 @@ def ssim_map(img1, img2, dynamic_range=1):
     As described in [1]_, the structural similarity index (SSIM) is a
     perceptual distance metric, giving the distance between two images. SSIM is
     based on three comparison measurements between the two images: luminance,
-    contrast, and structure. All of these are computed convolutionally across the
+    contrast, and structure. All of these are computed in windows across the
     images. See the references for more information.
 
     This implementation follows the original implementation, as found at [2]_,
@@ -237,7 +238,7 @@ def ssim_map(img1, img2, dynamic_range=1):
     ----------
     .. [1] Z. Wang, A. C. Bovik, H. R. Sheikh, and E. P. Simoncelli, "Image
        quality assessment: From error measurement to structural similarity"
-       IEEE Transactions on Image Processing, vol. 13, no. 1, Jan. 2004.
+       IEEE Transactios on Image Processing, vol. 13, no. 1, Jan. 2004.
     .. [2] [matlab code](https://www.cns.nyu.edu/~lcv/ssim/ssim_index.m)
     .. [3] [project page](https://www.cns.nyu.edu/~lcv/ssim/)
     .. [4] Wang, Z., & Simoncelli, E. P. (2008). Maximum differentiation (MAD)
@@ -246,97 +247,9 @@ def ssim_map(img1, img2, dynamic_range=1):
        http://dx.doi.org/10.1167/8.12.8
 
     """
-    if min(img1.shape[2], img1.shape[3]) < 11:
-        warnings.warn("SSIM uses 11x11 convolutional kernel, but the height and/or "
-                      "the width of the input image is smaller than 11, so the "
-                      "kernel size is set to be the minimum of these two numbers.")
     return _ssim_parts(img1, img2, dynamic_range)[0]
 
 
-def ms_ssim(img1, img2, dynamic_range=1, power_factors=None):
-    r"""Multiscale structural similarity index (MS-SSIM)
-
-    As described in [1]_, multiscale structural similarity index (MS-SSIM) is
-    an improvement upon structural similarity index (SSIM) that takes into
-    account the perceptual distance between two images on different scales.
-
-    SSIM is based on three comparison measurements between the two images:
-    luminance, contrast, and structure. All of these are computed convolutionally
-    across the images, producing three maps instead of scalars. The SSIM map is
-    the elementwise product of these three maps. See `metric.ssim` and
-    `metric.ssim_map` for a full description of SSIM.
-
-    To get images of different scales, average pooling operations with kernel
-    size 2 are performed recursively on the input images. The product of
-    contrast map and structure map (the "contrast-structure map") is computed
-    for all but the coarsest scales, and the overall SSIM map is only computed
-    for the coarsest scale. Their mean values are raised to exponents and
-    multiplied to produce MS-SSIM:
-    .. math::
-        MSSSIM = {SSIM}_M^{a_M} \prod_{i=1}^{M-1} ({CS}_i)^{a_i}
-    Here :math: `M` is the number of scales, :math: `{CS}_i` is the mean value
-    of the contrast-structure map for the i'th finest scale, and :math: `{SSIM}_M`
-    is the mean value of the SSIM map for the coarsest scale. If at least one
-    of these terms are negative, the value of MS-SSIM is zero. The values of
-    :math: `a_i, i=1,...,M` are taken from the argument `power_factors`.
-
-    Parameters
-    ----------
-    img1 : torch.Tensor
-        4d tensor with first image to compare
-    img2 : torch.Tensor
-        4d tensor with second image to compare. Must have the same height and
-        width (last two dimensions) as `img1`
-    dynamic_range : int, optional.
-        dynamic range of the images. Note we assume that both images have the
-        same dynamic range. 1, the default, is appropriate for float images
-        between 0 and 1, as is common in synthesis. 2 is appropriate for float
-        images between -1 and 1, and 255 is appropriate for standard 8-bit
-        integer images. We'll raise a warning if it looks like your value is
-        not appropriate for `img1` or `img2`, but will calculate it anyway.
-    power_factors : 1D array, optional.
-        power exponents for the mean values of maps, for different scales (from
-        fine to coarse). The length of this array determines the number of scales.
-        By default, this is set to [0.0448, 0.2856, 0.3001, 0.2363, 0.1333],
-        which is what psychophysical experiments in [1]_ found.
-
-    Returns
-    ------
-    msssim : torch.Tensor
-        2d tensor of shape (batch, channel) containing the MS-SSIM for each image
-
-    References
-    ----------
-    .. [1] Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale
-       structural similarity for image quality assessment." The Thrity-Seventh
-       Asilomar Conference on Signals, Systems & Computers, 2003. Vol. 2. IEEE, 2003.
-
-    """
-    if power_factors is None:
-        power_factors = [0.0448, 0.2856, 0.3001, 0.2363, 0.1333]
-
-    def downsample(img):
-        img = F.pad(img, (0, img.shape[3] % 2, 0, img.shape[2] % 2), mode="replicate")
-        img = F.avg_pool2d(img, kernel_size=2)
-        return img
-
-    msssim = 1
-    for i in range(len(power_factors) - 1):
-        _, contrast_structure_map, _ = _ssim_parts(img1, img2, dynamic_range)
-        msssim *= F.relu(contrast_structure_map.mean((-1, -2))).pow(power_factors[i])
-        img1 = downsample(img1)
-        img2 = downsample(img2)
-    map_ssim, _, _ = _ssim_parts(img1, img2, dynamic_range)
-    msssim *= F.relu(map_ssim.mean((-1, -2))).pow(power_factors[-1])
-
-    if min(img1.shape[2], img1.shape[3]) < 11:
-        warnings.warn("SSIM uses 11x11 convolutional kernel, but for some scales "
-                      "of the input image, the height and/or the width is smaller "
-                      "than 11, so the kernel size in SSIM is set to be the "
-                      "minimum of these two numbers for these scales.")
-    return msssim
-
-
 def normalized_laplacian_pyramid(im):
     """computes the normalized Laplacian Pyramid using pre-optimized parameters
 
@@ -360,8 +273,7 @@ def normalized_laplacian_pyramid(im):
     padd = 2
     normalized_laplacian_activations = []
     for N_b in range(0, N_scales):
-        filt = torch.tensor(spatialpooling_filters[N_b], dtype=torch.float32,
-                            device=im.device).unsqueeze(0).unsqueeze(0)
+        filt = torch.tensor(spatialpooling_filters[N_b], dtype=torch.float32, device=im.device).unsqueeze(0).unsqueeze(0)
         filtered_activations = F.conv2d(torch.abs(laplacian_activations[N_b]), filt, padding=padd, groups=channel)
         normalized_laplacian_activations.append(laplacian_activations[N_b] / (sigmas[N_b] + filtered_activations))
 
@@ -402,8 +314,7 @@ def nlpd(IM_1, IM_2):
 
     References
     ----------
-    .. [1] Laparra, V., Ballé, J., Berardino, A. and Simoncelli, E.P., 2016. Perceptual image quality
-       assessment using a normalized Laplacian pyramid. Electronic Imaging, 2016(16), pp.1-6.
+    .. [1] Laparra, V., Ballé, J., Berardino, A. and Simoncelli, E.P., 2016. Perceptual image quality assessment using a normalized Laplacian pyramid. Electronic Imaging, 2016(16), pp.1-6.
     """
 
     y = normalized_laplacian_pyramid(torch.cat((IM_1, IM_2), 0))

@@ -3,18 +3,18 @@
 from ...tools.signal import rectangular_to_polar, polar_to_rectangular
 
 
-def rectangular_to_polar_dict(coeff_dict, dim=-1, residuals=False):
-    """Return the complex modulus and the phase of each complex tensor
-    in a dictionary.
+def rectangular_to_polar_dict(coeff_dict, residuals=True):
+    """Wraps the rectangular to polar transform to act on all
+    the values in a dictionary.
 
     Parameters
     ----------
     coeff_dict : dict
        A dictionary containing complex tensors.
-    dim : int, optional
-       The dimension that contains the real and imaginary components.
     residuals: bool, optional
         An option to carry around residuals in the energy branch.
+        Note that the transformation is not applied to the residuals,
+        that is dictionary elements with a key starting in "residual".
 
     Returns
     -------
@@ -27,24 +27,12 @@ def rectangular_to_polar_dict(coeff_dict, dim=-1, residuals=False):
 
     Note
     ----
-    Since complex numbers are not supported by pytorch, we represent
-    complex tensors as having an extra dimension with two slices, where
-    one contains the real and the other contains the imaginary
-    components. E.g., ``1+2j`` would be represented as
-    ``torch.tensor([1, 2])`` and ``[1+2j, 4+5j]`` would be
-    ``torch.tensor([[1, 2], [4, 5]])``. In the cases represented here,
-    this "complex dimension" is the last one, and so the default
-    argument ``dim=-1`` would work.
-
-    Note that energy and state is not computed on the residuals.
-
     Computing the state is local gain control in disguise, see
-    ``rectangular_to_polar_real`` and ``local_gain_control``.
+    ``local_gain_control`` and ``local_gain_control_dict``.
     """
     energy = {}
     state = {}
     for key in coeff_dict.keys():
-        # ignore residuals
         if isinstance(key, tuple) or not key.startswith('residual'):
             energy[key], state[key] = rectangular_to_polar(coeff_dict[key])
 
@@ -56,7 +44,8 @@ def rectangular_to_polar_dict(coeff_dict, dim=-1, residuals=False):
 
 
 def polar_to_rectangular_dict(energy, state, residuals=True):
-    """Return the real and imaginary part  tensor in a dictionary.
+    """Wraps the polar to rectangular transform to act on all
+    the values in a matching pair of dictionaries.
 
     Parameters
     ----------
@@ -65,10 +54,10 @@ def polar_to_rectangular_dict(energy, state, residuals=True):
         modulus.
     state : dict
         The dictionary of torch.Tensors containing the local phase.
-    dim : int, optional
-       The dimension that contains the real and imaginary components.
     residuals: bool, optional
         An option to carry around residuals in the energy branch.
+        Note that the transformation is not applied to the residuals,
+        that is dictionary elements with a key starting in "residual".
 
     Returns
     -------
@@ -78,8 +67,6 @@ def polar_to_rectangular_dict(energy, state, residuals=True):
 
     coeff_dict = {}
     for key in energy.keys():
-        # ignore residuals
-
         if isinstance(key, tuple) or not key.startswith('residual'):
             coeff_dict[key] = polar_to_rectangular(energy[key], state[key])
 

@@ -1,2 +1,3 @@
 from .frontend import *
 from .naive import *
+from .factorized_pyramid import *
@@ -0,0 +1,89 @@
+import torch.nn as nn
+from plenoptic.simulate.canonical_computations.non_linearities import (
+    local_gain_control, local_gain_control_dict, local_gain_release,
+    local_gain_release_dict, polar_to_rectangular_dict,
+    rectangular_to_polar_dict)
+from plenoptic.simulate.canonical_computations.steerable_pyramid_freq import \
+    Steerable_Pyramid_Freq
+from plenoptic.tools.signal import polar_to_rectangular, rectangular_to_polar
+
+
+class Factorized_Pyramid(nn.Module):
+    """
+    An non-linear transform which factorizes signal and is exactely invertible.
+
+    Loosely partitions things and stuff.
+
+    Analogous to Fourier amplitude and phase for a localized multiscale
+    and oriented transform.
+
+    Notes
+    -----
+    residuals are stored in amplitude
+
+    by default the not downsampled version also returns a tensor,
+    which allows easy further processing
+        eg. recursive Factorized Pyr
+        (analogous to the scattering transform)
+
+    TODO
+    ----
+    flesh out the relationship btw real and complex cases
+
+    handle multi channel input
+        eg. from front end, or from recursive calls
+        hack: fold channels into batch dim and then back out
+
+    cross channel processing - thats next level
+    """
+    def __init__(self, image_size, n_ori=4, n_scale='auto',
+                 downsample_dict=True, is_complex=True):
+        super().__init__()
+
+        self.downsample_dict = downsample_dict
+        self.is_complex = is_complex
+
+        pyr = Steerable_Pyramid_Freq(image_size,
+                                     order=n_ori-1,
+                                     height=n_scale,
+                                     is_complex=is_complex,
+                                     downsample=downsample_dict)
+        self.n_ori = pyr.num_orientations
+        self.n_scale = pyr.num_scales
+
+        if downsample_dict:
+            self.pyramid_analysis  = lambda x: pyr.forward(x)
+            self.pyramid_synthesis = lambda y: pyr.recon_pyr(y)
+            if is_complex:
+                self.decomposition = rectangular_to_polar_dict
+                self.recomposition = polar_to_rectangular_dict
+            else:
+                self.decomposition = local_gain_control_dict
+                self.recomposition = local_gain_release_dict
+        else:
+            def stash(y, info):
+                self.pyr_info = info
+                return y
+            self.pyramid_analysis  = lambda x: stash(*pyr.convert_pyr_to_tensor(
+                                                           pyr.forward(x)))
+            self.pyramid_synthesis = lambda y: pyr.recon_pyr(
+                                 pyr.convert_tensor_to_pyr(y, *self.pyr_info))
+            if is_complex:
+                self.decomposition = rectangular_to_polar
+                self.recomposition = polar_to_rectangular
+            else:
+                self.decomposition = local_gain_control
+                self.recomposition = local_gain_release
+
+    def analysis(self, x):
+        y = self.pyramid_analysis(x)
+        energy, state = self.decomposition(y)
+        return energy, state
+
+    def synthesis(self, energy, state):
+        y = self.recomposition(energy, state)
+        x = self.pyramid_synthesis(y)
+        return x
+
+    def forward(self, x):
+        return self.analysis(x)
@@ -168,7 +168,6 @@ def convert_float_to_int(im, dtype=np.uint8):
     return (im * np.iinfo(dtype).max).astype(dtype)
 
 
-
 def make_synthetic_stimuli(size=256, requires_grad=True):
     r""" Make a set of basic stimuli, useful for developping and debugging models
Original file line number	Diff line number	Diff line change
Expand Up		@@ -168,7 +168,6 @@ def convert_float_to_int(im, dtype=np.uint8):
		return (im * np.iinfo(dtype).max).astype(dtype)



		def make_synthetic_stimuli(size=256, requires_grad=True):
		r""" Make a set of basic stimuli, useful for developping and debugging models

Expand Down