Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generalized Proximal Gradient Fix to match Proximal Gradient #143

Merged
merged 8 commits into from
Nov 29, 2023
86 changes: 54 additions & 32 deletions pyproximal/optimization/primal.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,9 @@ def ProximalPoint(prox, x0, tau, niter=10, callback=None, show=False):
return x


def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
epsg=1., niter=10, niterback=100,
def ProximalGradient(proxf, proxg, x0, epsg=1.,
tau=None, beta=0.5, eta=1.,
niter=10, niterback=100,
acceleration=None,
callback=None, show=False):
r"""Proximal gradient (optionally accelerated)
Expand All @@ -127,17 +128,19 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
Proximal operator of g function
x0 : :obj:`numpy.ndarray`
Initial vector
epsg : :obj:`float` or :obj:`np.ndarray`, optional
Scaling factor of g function
tau : :obj:`float` or :obj:`numpy.ndarray`, optional
Positive scalar weight, which should satisfy the following condition
to guarantees convergence: :math:`\tau \in (0, 1/L]` where ``L`` is
the Lipschitz constant of :math:`\nabla f`. When ``tau=None``,
backtracking is used to adaptively estimate the best tau at each
iteration. Finally note that :math:`\tau` can be chosen to be a vector
iteration. Finally, note that :math:`\tau` can be chosen to be a vector
when dealing with problems with multiple right-hand-sides
beta : :obj:`float`, optional
Backtracking parameter (must be between 0 and 1)
epsg : :obj:`float` or :obj:`np.ndarray`, optional
Scaling factor of g function
eta : :obj:`float`, optional
Relaxation parameter (must be between 0 and 1, 0 excluded).
niter : :obj:`int`, optional
Number of iterations of iterative scheme
niterback : :obj:`int`, optional
Expand All @@ -161,9 +164,8 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,

.. math::


\mathbf{x}^{k+1} = \prox_{\tau^k \epsilon g}(\mathbf{y}^{k+1} -
\tau^k \nabla f(\mathbf{y}^{k+1})) \\
\mathbf{x}^{k+1} = \mathbf{y}^k + \eta (\prox_{\tau^k \epsilon g}(\mathbf{y}^k -
\tau^k \nabla f(\mathbf{y}^k)) - \mathbf{y}^k) \\
\mathbf{y}^{k+1} = \mathbf{x}^k + \omega^k
(\mathbf{x}^k - \mathbf{x}^{k-1})

Expand All @@ -187,7 +189,7 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
Different accelerations are provided:

- ``acceleration=None``: :math:`\omega^k = 0`;
- `acceleration=vandenberghe`` [1]_: :math:`\omega^k = k / (k + 3)` for `
- ``acceleration=vandenberghe`` [1]_: :math:`\omega^k = k / (k + 3)` for `
- ``acceleration=fista``: :math:`\omega^k = (t_{k-1}-1)/t_k` for where
:math:`t_k = (1 + \sqrt{1+4t_{k-1}^{2}}) / 2` [2]_

Expand All @@ -197,7 +199,7 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
Imaging Sciences, vol. 2, pp. 183-202. 2009.

"""
# check if epgs is a ve
# check if epgs is a vector
if np.asarray(epsg).size == 1.:
epsg_print = str(epsg)
else:
Expand All @@ -218,7 +220,7 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
'niterback = %d\tacceleration = %s\n' % (type(proxf), type(proxg),
'Adaptive' if tau is None else str(tau), beta,
epsg_print, niter, niterback, acceleration))
head = ' Itn x[0] f g J=f+eps*g'
head = ' Itn x[0] f g J=f+eps*g tau'
print(head)

backtracking = False
Expand All @@ -237,10 +239,15 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,

# proximal step
if not backtracking:
x = proxg.prox(y - tau * proxf.grad(y), epsg * tau)
if eta == 1.:
x = proxg.prox(y - tau * proxf.grad(y), epsg * tau)
else:
x = x + eta * (proxg.prox(x - tau * proxf.grad(x), epsg * tau) - x)
else:
x, tau = _backtracking(y, tau, proxf, proxg, epsg,
beta=beta, niterback=niterback)
if eta != 1.:
x = x + eta * (proxg.prox(x - tau * proxf.grad(x), epsg * tau) - x)

# update internal parameters for bilinear operator
if isinstance(proxf, BilinearOperator):
Expand All @@ -264,10 +271,11 @@ def ProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
if show:
if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0:
pf, pg = proxf(x), proxg(x)
msg = '%6g %12.5e %10.3e %10.3e %10.3e' % \
msg = '%6g %12.5e %10.3e %10.3e %10.3e %10.3e' % \
(iiter + 1, np.real(to_numpy(x[0])) if x.ndim == 1 else np.real(to_numpy(x[0, 0])),
pf, pg[0] if epsg_print == 'Multi' else pg,
pf + np.sum(epsg * pg))
pf + np.sum(epsg * pg),
tau)
print(msg)
if show:
print('\nTotal time (s) = %.2f' % (time.time() - tstart))
Expand Down Expand Up @@ -296,8 +304,9 @@ def AcceleratedProximalGradient(proxf, proxg, x0, tau=None, beta=0.5,
callback=callback, show=show)


def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,
epsg=1., niter=10,
def GeneralizedProximalGradient(proxfs, proxgs, x0, tau,
epsg=1., weights=None,
eta=1., niter=10,
acceleration=None,
callback=None, show=False):
r"""Generalized Proximal gradient
Expand All @@ -316,24 +325,27 @@ def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,

Parameters
----------
proxfs : :obj:`List of pyproximal.ProxOperator`
proxfs : :obj:`list of pyproximal.ProxOperator`
Proximal operators of the :math:`f_i` functions (must have ``grad`` implemented)
proxgs : :obj:`List of pyproximal.ProxOperator`
proxgs : :obj:`list of pyproximal.ProxOperator`
Proximal operators of the :math:`g_j` functions
x0 : :obj:`numpy.ndarray`
Initial vector
tau : :obj:`float` or :obj:`numpy.ndarray`, optional
tau : :obj:`float`
Positive scalar weight, which should satisfy the following condition
to guarantees convergence: :math:`\tau \in (0, 1/L]` where ``L`` is
the Lipschitz constant of :math:`\sum_{i=1}^n \nabla f_i`. When ``tau=None``,
backtracking is used to adaptively estimate the best tau at each
iteration.
the Lipschitz constant of :math:`\sum_{i=1}^n \nabla f_i`.
epsg : :obj:`float` or :obj:`np.ndarray`, optional
Scaling factor(s) of ``g`` function(s)
weights : :obj:`float`, optional
Weighting factors of ``g`` functions. Must sum to 1.
eta : :obj:`float`, optional
Relaxation parameter (must be between 0 and 1, 0 excluded). Note that
this will be only used when ``acceleration=None``.
niter : :obj:`int`, optional
Number of iterations of iterative scheme
acceleration: :obj:`str`, optional
Acceleration (``vandenberghe`` or ``fista``)
Acceleration (``None``, ``vandenberghe`` or ``fista``)
callback : :obj:`callable`, optional
Function with signature (``callback(x)``) to call after each iteration
where ``x`` is the current model vector
Expand All @@ -352,16 +364,27 @@ def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,

.. math::
\text{for } j=1,\cdots,n, \\
~~~~\mathbf z_j^{k+1} = \mathbf z_j^{k} + \epsilon_j
\left[prox_{\frac{\tau^k}{\omega_j} g_j}\left(2 \mathbf{x}^{k} - \mathbf{z}_j^{k}
~~~~\mathbf z_j^{k+1} = \mathbf z_j^{k} + \eta
\left[prox_{\frac{\tau^k \epsilon_j}{w_j} g_j}\left(2 \mathbf{x}^{k} - \mathbf{z}_j^{k}
- \tau^k \sum_{i=1}^n \nabla f_i(\mathbf{x}^{k})\right) - \mathbf{x}^{k} \right] \\
\mathbf{x}^{k+1} = \sum_{j=1}^n \omega_j f_j \\

where :math:`\sum_{j=1}^n \omega_j=1`. In the current implementation :math:`\omega_j=1/n`.
\mathbf{x}^{k+1} = \sum_{j=1}^n w_j \mathbf z_j^{k+1} \\

where :math:`\sum_{j=1}^n w_j=1`. In the current implementation, :math:`w_j=1/n` when
not provided.

"""
# check if weights sum to 1
if weights is None:
weights = np.ones(len(proxgs)) / len(proxgs)
if len(weights) != len(proxgs) or np.sum(weights) != 1.:
raise ValueError(f'omega={weights} must be an array of size {len(proxgs)} '
f'summing to 1')
print(weights)

# check if epgs is a vector
if np.asarray(epsg).size == 1.:
epsg_print = str(epsg)
epsg = epsg * np.ones(len(proxgs))
else:
epsg_print = 'Multi'

Expand Down Expand Up @@ -403,9 +426,9 @@ def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,
x = np.zeros_like(x)
for i, proxg in enumerate(proxgs):
ztmp = 2 * y - zs[i] - tau * grad
ztmp = proxg.prox(ztmp, tau * len(proxgs))
zs[i] += epsg * (ztmp - y)
x += zs[i] / len(proxgs)
ztmp = proxg.prox(ztmp, tau * epsg[i] / weights[i])
zs[i] += eta * (ztmp - y)
x += weights[i] * zs[i]

# update y
if acceleration == 'vandenberghe':
Expand All @@ -416,7 +439,6 @@ def GeneralizedProximalGradient(proxfs, proxgs, x0, tau=None,
omega = ((told - 1.) / t)
else:
omega = 0

y = x + omega * (x - xold)

# run callback
Expand Down
74 changes: 74 additions & 0 deletions pytests/test_solver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import pytest

import numpy as np
from numpy.testing import assert_array_almost_equal
from pylops.basicoperators import MatrixMult
from pyproximal.proximal import L1, L2
from pyproximal.optimization.primal import ProximalGradient, GeneralizedProximalGradient

par1 = {'n': 8, 'm': 10, 'dtype': 'float32'} # float64
par2 = {'n': 8, 'm': 10, 'dtype': 'float64'} # float32


@pytest.mark.parametrize("par", [(par1), (par2)])
def test_GPG_weights(par):
"""Check GPG raises error if weight is not summing to 1
"""
with pytest.raises(ValueError):
np.random.seed(0)
n, m = par['n'], par['m']

# Random mixing matrix
R = np.random.normal(0., 1., (n, m))
Rop = MatrixMult(R)

# Model and data
x = np.zeros(m)
y = Rop @ x

# Operators
l2 = L2(Op=Rop, b=y, niter=10, warm=True)
l1 = L1(sigma=5e-1)
_ = GeneralizedProximalGradient([l2, ], [l1, ],
x0=np.zeros(m),
tau=1.,
weights=[1., 1.])


@pytest.mark.parametrize("par", [(par1), (par2)])
def test_PG_GPG(par):
"""Check equivalency of ProximalGradient and GeneralizedProximalGradient when using
a single regularization term
"""
np.random.seed(0)
n, m = par['n'], par['m']

# Define sparse model
x = np.zeros(m)
x[2], x[4] = 1, 0.5

# Random mixing matrix
R = np.random.normal(0., 1., (n, m))
Rop = MatrixMult(R)

y = Rop @ x

# Step size
L = (Rop.H * Rop).eigs(1).real
tau = 0.99 / L

# PG
l2 = L2(Op=Rop, b=y, niter=10, warm=True)
l1 = L1(sigma=5e-1)
xpg = ProximalGradient(l2, l1, x0=np.zeros(m),
tau=tau, niter=100,
acceleration='fista')

# GPG
l2 = L2(Op=Rop, b=y, niter=10, warm=True)
l1 = L1(sigma=5e-1)
xgpg = GeneralizedProximalGradient([l2, ], [l1, ], x0=np.zeros(m),
tau=tau, niter=100,
acceleration='fista')

assert_array_almost_equal(xpg, xgpg, decimal=2)