plenoptic-org · billbrod · Feb 29, 2024 · Aug 25, 2023 · Aug 25, 2023 · Aug 25, 2023
diff --git a/.gitignore b/.gitignore
@@ -36,3 +36,6 @@ docs/tutorials/assets
 
 # created automatically by setuptools.scm, don't track
 src/plenoptic/version.py
+
+# automatically created by something
+build/
@@ -36,7 +36,7 @@ methods.
 - For metamers, this means double-checking that the difference between the model
   representation of the metamer and the target image is small enough. If your
   model's representation is multi-scale, trying coarse-to-fine optimization may
-  help (see `notebook <tutorials/06_Metamer.html#Coarse-to-fine-optimization>`_
+  help (see `notebook <tutorials/intro/06_Metamer.html#Coarse-to-fine-optimization>`_
   for details).
 - For MAD competition, this means double-checking that the reference metric is
   constant and that the optimized metric has converged at a lower or higher
@@ -59,6 +59,40 @@ Additionally, it may be helpful to visualize the progression of synthesis, using
 each synthesis method's ``animate`` or ``plot_synthesis_status`` helper
 functions (e.g., :func:`plenoptic.synthesize.metamer.plot_synthesis_status`).
 
+Tweaking the model
+------------------
+
+You can also improve your changes of finding a good synthesis by tweaking the
+model. For example, the loss function used for metamer synthesis by default is
+mean-squared error. This implicitly weights all aspects of the model's
+representation equally. Thus, if there are portions of the representation whose
+magnitudes are significantly smaller than the others, they might not be matched
+at the same rate as the others. You can address this using coarse-to-fine
+synthesis or picking a more suitable loss function, but it's generally a good
+idea for all of a model's representation to have roughly the same magnitude. You
+can do this in a principled or empirical manner:
+
+- Principled: compose your representation of statistics that you know lie within
+  the same range. For example, use correlations instead of covariances (see the
+  Portilla-Simoncelli model, and in particular `how plenoptic's implementation
+  differs from matlab
+  <tutorials/models/Metamer-Portilla-Simoncelli#7.-Notable-differences-between-Matlab-and-Python-Implementations>`_
+  for an example of this).
+- Empirical: measure your model's representation on a dataset of relevant
+  natural images and then use this output to z-score your model's representation
+  on each pass (see [Ziemba2021]_ for an example; this is what the Van Hateren
+  database is used for).
+- In the middle: normalize statistics based on their value in the original image
+  (note: not the image the model is taking as input! this will likely make
+  optimization very difficult).
+
+If you are computing a multi-channel representation, you may have a similar
+problem where one channel is larger or smaller than the others. Here, tweaking
+the loss function might be more useful. Using something like `logsumexp` (the
+log of the sum of exponentials, a smooth approximation of the maximum function)
+to combine across channels after using something like L2-norm to compute the
+loss within each channel might help.
+
 None of the existing synthesis methods meet my needs
 ====================================================
 
@@ -79,4 +113,4 @@ methods.
 
 If you extend a method successfully or would like help making it work, please
 let us know by posting a `discussion!
-<https://github.com/Flatiron-CCN/plenoptic/discussions>`_
+<https://github.com/LabForComputationalVision/plenoptic/discussions>`_
@@ -566,20 +566,10 @@
    "source": [
     "# a couple helper functions\n",
     "\n",
-    "def center_crop(im, n):\n",
-    "    \"\"\"Crop an nxn image from the center of im\"\"\"\n",
-    "    im_height, im_width = im.shape[:2]\n",
-    "    assert n<im_height and n<im_width\n",
-    "\n",
-    "    im_crop = im[im_height//2-n//2:im_height//2+n//2,\n",
-    "              im_width//2-n//2:im_width//2+n//2]\n",
-    "    return im_crop\n",
-    "\n",
-    "n = 128  # this will be the img_height and width of the input, you can change this to accommodate your machine\n",
     "img = plt.imread(op.join('../data', 'color_wheel.jpg')).copy()\n",
     "\n",
     "# center crop the image to nxn\n",
-    "img = center_crop(img, n)\n",
+    "img = po.tools.center_crop(img, n)\n",
     "\n",
     "img = po.tools.rescale(img)  # rescale to [0, 1]\n",
     "\n",
@@ -907,7 +897,7 @@
     "img = plt.imread(op.join('../data', '256/curie.pgm')).copy()\n",
     "\n",
     "# center crop the image to nxn\n",
-    "img = center_crop(img, n)\n",
+    "img = po.tools.center_crop(img, n)\n",
     "\n",
     "img = po.tools.rescale(img)  # rescale to [0, 1]\n",
     "\n",
@@ -1009,9 +999,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:plen_3.10]",
+   "display_name": "plenoptic",
    "language": "python",
-   "name": "conda-env-plen_3.10-py"
+   "name": "plenoptic"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1023,7 +1013,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,

@@ -107,7 +107,7 @@
     "for k in pyr_coeffs.keys():\n",
     "    # we ignore the residual_highpass and residual_lowpass, since we're focusing on the filters here\n",
     "    if isinstance(k, tuple):\n",
-    "        reconList.append(pyr.recon_pyr(pyr_coeffs, k[0], k[1]))\n",
+    "        reconList.append(pyr.recon_pyr(pyr_coeffs, [k[0]], [k[1]]))\n",
     "        \n",
     "po.imshow(reconList, col_wrap=order+1, vrange='indep1', zoom=2);"
    ]

@@ -57,7 +57,6 @@
     "    !pip install torchvision\n",
     "    import torchvision\n",
     "import torchvision.transforms as transforms\n",
-    "from torchvision.transforms.functional import center_crop\n",
     "from torchvision import models\n",
     "\n",
     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
@@ -108,7 +107,7 @@
     "einstein = po.load_images('../data/256/einstein.pgm')\n",
     "einstein = po.tools.conv.blur_downsample(einstein, n_scales=2)\n",
     "vid = po.tools.translation_sequence(einstein, n_steps=20)\n",
-    "vid = center_crop(vid, image_size // 2)\n",
+    "vid = po.tools.center_crop(vid, image_size // 2)\n",
     "vid = po.tools.rescale(vid, 0, 1)\n",
     "\n",
     "imgA = vid[0:1]\n",
@@ -1059,9 +1058,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:plen_3.10]",
+   "display_name": "plenoptic",
    "language": "python",
-   "name": "conda-env-plen_3.10-py"
+   "name": "plenoptic"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1073,7 +1072,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.0"
+   "version": "3.10.13"
   },
   "toc-autonumbering": true,
   "toc-showtags": true

@@ -49,14 +49,14 @@ dev = [
     "pytest>=5.1.2",
     'pytest-cov',
     'pytest-xdist',
-    "torchvision>=0.3",
     "requests>=2.21",
 ]
 
 nb = [
     'jupyter',
     'ipywidgets',
     'nbclient>=0.5.5',
+    "torchvision>=0.3",
 ]
 
 [project.urls]

@@ -38,7 +38,7 @@ def forward(self, x):
 
         Parameters
         ----------
-        x: torch.Tensor of shape (B, C, H, W)
+        x: torch.Tensor of shape (batch, channel, height, width)
             Image, or batch of images. If there are multiple channels,
             the Laplacian is computed separately for each of them
 
@@ -71,7 +71,7 @@ def recon_pyr(self, y):
 
         Returns
         -------
-        x: torch.Tensor of shape (B, C, H, W)
+        x: torch.Tensor of shape (batch, channel, height, width)
             Image, or batch of images
         """
 

@@ -79,7 +79,7 @@ def local_gain_control(x, epsilon=1e-8):
     Parameters
     ----------
     x : torch.Tensor
-        Tensor of shape (B,C,H,W)
+        Tensor of shape (batch, channel, height, width)
     epsilon: float, optional
         Small constant to avoid division by zero.
 
@@ -134,7 +134,7 @@ def local_gain_release(norm, direction, epsilon=1e-8):
     Returns
     -------
     x : torch.Tensor
-        Tensor of shape (B,C,H,W)
+        Tensor of shape (batch, channel, height, width)
 
     Notes
     -----
@@ -163,7 +163,7 @@ def local_gain_control_dict(coeff_dict, residuals=True):
     Parameters
     ----------
     coeff_dict : dict
-        A dictionary containing tensors of shape (B,C,H,W)
+        A dictionary containing tensors of shape (batch, channel, height, width)
     residuals: bool, optional
         An option to carry around residuals in the energy dict.
         Note that the transformation is not applied to the residuals,
@@ -219,7 +219,7 @@ def local_gain_release_dict(energy, state, residuals=True):
     Returns
     -------
     coeff_dict : dict
-        A dictionary containing tensors of shape (B,C,H,W)
+        A dictionary containing tensors of shape (batch, channel, height, width)
 
     Notes
     -----