From 1d48b748d1ad5c786683e88652c773b1ce27c73f Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 10 Nov 2022 10:59:54 -0600 Subject: [PATCH] Adding tron_qp_kernel (#45) --- Project.toml | 3 +- src/KA/ExaTronKAKernels.jl | 4 +- src/KA/tron_qp_kernel.jl | 149 +++++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 2 deletions(-) create mode 100644 src/KA/tron_qp_kernel.jl diff --git a/Project.toml b/Project.toml index f516fda..d3d388e 100644 --- a/Project.toml +++ b/Project.toml @@ -5,6 +5,7 @@ version = "2.1.0" [deps] DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab" +KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" @@ -12,9 +13,9 @@ Requires = "ae029012-a4dd-5104-9daa-d747884805df" TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76" [compat] -julia = "1.7" Requires = "1" TOML = "1" +julia = "1.7" [extras] AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e" diff --git a/src/KA/ExaTronKAKernels.jl b/src/KA/ExaTronKAKernels.jl index a1c6319..1bb456e 100644 --- a/src/KA/ExaTronKAKernels.jl +++ b/src/KA/ExaTronKAKernels.jl @@ -23,4 +23,6 @@ module ExaTronKAKernels include("dtrpcg.jl") include("dspcg.jl") include("dtron.jl") -end \ No newline at end of file + include("tron_qp_kernel.jl") +end + diff --git a/src/KA/tron_qp_kernel.jl b/src/KA/tron_qp_kernel.jl new file mode 100644 index 0000000..04c3040 --- /dev/null +++ b/src/KA/tron_qp_kernel.jl @@ -0,0 +1,149 @@ +# Assume that scaling factor has already been applied to Q and c. +@inline function eval_qp_f_kernel(n::Int, x, Q, c, tx) + # f = xQx/2 + cx + f = 0.0 + @inbounds begin + for j=1:n + for i=1:n + f += x[i]*Q[i,j]*x[j] + end + end + f *= 0.5 + for j=1:n + f += c[j]*x[j] + end + end + return f +end + +@inline function eval_qp_grad_f_kernel(n::Int, x, g, Q, c, tx) + # g = Qx + c + + @inbounds begin + if tx <= n + g[tx] = c[tx] + end + @synchronize + if tx <= n + for j=1:n + g[tx] += Q[tx,j]*x[j] + end + end + @synchronize + end + return +end + +@inline function tron_qp_kernel(n::Int, max_feval::Int, max_minor::Int, gtol::Float64, scale::Float64, + x, xl, xu, + A, c, tx) + + g = @localmem Float64 (n,) + xc = @localmem Float64 (n,) + s = @localmem Float64 (n,) + wa = @localmem Float64 (n,) + wa1 = @localmem Float64 (n,) + wa2 = @localmem Float64 (n,) + wa3 = @localmem Float64 (n,) + wa4 = @localmem Float64 (n,) + wa5 = @localmem Float64 (n,) + gfree = @localmem Float64 (n,) + dsave = @localmem Float64 (3,) + indfree = @localmem Int (n,) + iwa = @localmem Int (2*n,) + isave = @localmem Int (3,) + + B = @localmem Float64 (n,n) + L = @localmem Float64 (n,n) + + if tx <= n + @inbounds begin + for j=1:n + B[tx,j] = 0.0 + L[tx,j] = 0.0 + end + end + end + @synchronize + + task = 0 + status = 0 + + delta = 0.0 + fatol = 0.0 + frtol = 1e-12 + fmin = -1e32 + cgtol = 0.1 + cg_itermax = n + + f = 0.0 + nfev = 0 + ngev = 0 + nhev = 0 + minor_iter = 0 + search = true + + while search + + # [0|1]: Evaluate function. + + if task == 0 || task == 1 + f = eval_qp_f_kernel(n, x, A, c, tx) + nfev += 1 + if nfev >= max_feval + search = false + end + end + + # [2] G or H: Evaluate gradient and Hessian. + + if task == 0 || task == 2 + eval_qp_grad_f_kernel(n, x, g, A, c, tx) + # We do not have to evaluate Hessian since A does not change. + ngev += 1 + nhev += 1 + minor_iter += 1 + end + + # Initialize the trust region bound. + + if task == 0 + gnorm0 = dnrm2(n, g, 1, tx) + delta = gnorm0 + end + + # Call Tron. + + if search + delta, task = ExaTron.dtron(n, x, xl, xu, f, g, A, frtol, fatol, fmin, cgtol, + cg_itermax, delta, task, B, L, xc, s, indfree, gfree, + isave, dsave, wa, iwa, wa1, wa2, wa3, wa4, wa5, tx) + end + + # [3] NEWX: a new point was computed. + + if task == 3 + gnorm_inf = ExaTron.dgpnorm(n, x, xl, xu, g, tx) + + if gnorm_inf <= gtol + task = 4 + end + + if minor_iter >= max_minor + status = 1 + search = false + end + end + + # [4] CONV: convergence was achieved. + # [10] : warning fval is less than fmin + + if task == 4 || task == 10 + search = false + end + end + + @synchronize + + return status, minor_iter +end