diff --git a/experiments/cubic_sparse_logreg/runme.jl b/experiments/cubic_sparse_logreg/runme.jl index 69d2d22..a50b8a9 100644 --- a/experiments/cubic_sparse_logreg/runme.jl +++ b/experiments/cubic_sparse_logreg/runme.jl @@ -66,61 +66,73 @@ function run_cubic_logreg_data( f = Cubic(Q, q, lam) g = ProximalCore.Zero() + + x0 = zeros(n) + x_pert = x0 + randn(size(x0)) + + grad_x, _ = ProximalCore.gradient(f, x0) + grad_x_pert, _ = ProximalCore.gradient(f, x_pert) + gam_init = norm(x0 - x_pert)^2 / dot(grad_x - grad_x_pert, x0 - x_pert) + # run algorithm with 1/10 the tolerance to get "accurate" solution sol, numit = AdaProx.adaptive_proxgrad( - zeros(n), + x0, f = f, g = g, - rule = AdaProx.OurRule(gamma = 1.0), + rule = AdaProx.OurRule(gamma = gam_init), tol = tol / 10, maxit = maxit * 10, name = nothing, ) - - sol, numit = AdaProx.backtracking_proxgrad( - zeros(n), - f = AdaProx.Counting(f), - g = g, - gamma0 = 1.0, - tol = tol, - maxit = maxit, - name = "PGM (backtracking)", - ) + xi_values = [1, 1.5, 2] + for xi = xi_values + sol, numit = AdaProx.backtracking_proxgrad( + x0, + f = AdaProx.Counting(f), + g = g, + gamma0 = gam_init, + xi = xi, + tol = tol, + maxit = maxit, + name = "PGM (backtracking)-(xi=$(xi))", + ) + end sol, numit = AdaProx.backtracking_nesterov( - zeros(n), + x0, f = AdaProx.Counting(f), g = g, - gamma0 = 1.0, + gamma0 = gam_init, tol = tol, maxit = maxit, name = "Nesterov (backtracking)", ) sol, numit = AdaProx.adaptive_proxgrad( - zeros(n), + x0, f = AdaProx.Counting(f), g = g, - rule = AdaProx.MalitskyMishchenkoRule(gamma = 1.0), + rule = AdaProx.MalitskyMishchenkoRule(gamma = gam_init), tol = tol, maxit = maxit, name = "AdaPGM (MM)", ) sol, numit = AdaProx.adaptive_proxgrad( - zeros(n), + x0, f = AdaProx.Counting(f), g = g, - rule = AdaProx.OurRule(gamma = 1.0), + rule = AdaProx.OurRule(gamma = gam_init), tol = tol, maxit = maxit, name = "AdaPGM (Ours)", ) sol, numit = AdaProx.agraal( - zeros(n), + x0, f = AdaProx.Counting(f), g = g, + gamma0 = gam_init, tol = tol, maxit = maxit, name = "aGRAAL" diff --git a/experiments/lasso/runme.jl b/experiments/lasso/runme.jl index f09f138..82088c5 100644 --- a/experiments/lasso/runme.jl +++ b/experiments/lasso/runme.jl @@ -90,7 +90,7 @@ function run_random_lasso(; gamma = gam_init, tol = tol, maxit = maxit, - name = "PGM (1/Lf)" + name = "PGM (fixed)" ) xi_values = [1, 1.5, 2] @@ -117,6 +117,17 @@ function run_random_lasso(; name = "Nesterov (backtracking)" ) + + sol, numit = AdaProx.fixed_nesterov( + zeros(n), + f = AdaProx.Counting(f), + g = g, + gamma = gam_init, + tol = tol, + maxit = maxit, + name = "Nesterov (fixed)" + ) + sol, numit = AdaProx.adaptive_proxgrad( zeros(n), f = AdaProx.Counting(f), @@ -142,6 +153,7 @@ function run_random_lasso(; zeros(n), f = AdaProx.Counting(f), g = g, + gamma0 = gam_init, tol = tol, maxit = maxit, name = "aGRAAL" diff --git a/experiments/sparse_logreg/runme.jl b/experiments/sparse_logreg/runme.jl index 1923e2d..06317d5 100644 --- a/experiments/sparse_logreg/runme.jl +++ b/experiments/sparse_logreg/runme.jl @@ -56,12 +56,14 @@ function run_logreg_l1_data( Lf = norm(X1 * X1') / 4 / m x0 = zeros(n) + + gam_init = 1.0 / Lf # run algorithm with 1/10 the tolerance to get "accurate" solution sol, numit = AdaProx.adaptive_proxgrad( x0, f = f, g = g, - rule = AdaProx.OurRule(gamma = 1.0), + rule = AdaProx.OurRule(gamma = gam_init), tol = tol / 10, maxit = maxit * 10, name = nothing @@ -71,7 +73,7 @@ function run_logreg_l1_data( x0, f = AdaProx.Counting(f), g = g, - gamma = 1.0 / Lf, + gamma = gam_init, tol = tol, maxit = maxit, name = "PGM (1/Lf)" @@ -83,7 +85,7 @@ function run_logreg_l1_data( zeros(n), f = AdaProx.Counting(f), g = g, - gamma0 = 5.0, + gamma0 = gam_init, xi = xi, #increase in stepsize tol = tol, maxit = maxit/2, @@ -95,17 +97,26 @@ function run_logreg_l1_data( x0, f = AdaProx.Counting(f), g = g, - gamma0 = 5.0, + gamma0 = gam_init, tol = tol, maxit = maxit/2, name = "Nesterov (backtracking)" ) + sol, numit = AdaProx.fixed_nesterov( + x0, + f = AdaProx.Counting(f), + g = g, + gamma = gam_init, + tol = tol, + maxit = maxit/2, + name = "Nesterov (fixed)" + ) sol, numit = AdaProx.adaptive_proxgrad( x0, f = AdaProx.Counting(f), g = g, - rule = AdaProx.MalitskyMishchenkoRule(gamma = 1.0), + rule = AdaProx.MalitskyMishchenkoRule(gamma = gam_init), tol = tol, maxit = maxit, name = "AdaPGM (MM)" @@ -115,7 +126,7 @@ function run_logreg_l1_data( x0, f = AdaProx.Counting(f), g = g, - rule = AdaProx.OurRule(gamma = 1.0), + rule = AdaProx.OurRule(gamma = gam_init), tol = tol, maxit = maxit, name = "AdaPGM (Ours)" @@ -125,6 +136,7 @@ function run_logreg_l1_data( x0, f = AdaProx.Counting(f), g = g, + gamma0 = gam_init, tol = tol, maxit = maxit, name = "aGRAAL"