Adding tron_qp_kernel (#45)

exanauts · Nov 10, 2022 · 1d48b74 · 1d48b74
1 parent 0c4c6e0
commit 1d48b74
Show file tree

Hide file tree

Showing 3 changed files with 154 additions and 2 deletions.
diff --git a/Project.toml b/Project.toml
@@ -5,16 +5,17 @@ version = "2.1.0"
 
 [deps]
 DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
+KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 TOML = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
 
 [compat]
-julia = "1.7"
 Requires = "1"
 TOML = "1"
+julia = "1.7"
 
 [extras]
 AMDGPU = "21141c5a-9bdb-4563-92ae-f87d6854732e"

diff --git a/src/KA/ExaTronKAKernels.jl b/src/KA/ExaTronKAKernels.jl
@@ -23,4 +23,6 @@ module ExaTronKAKernels
     include("dtrpcg.jl")
     include("dspcg.jl")
     include("dtron.jl")
-end
+    include("tron_qp_kernel.jl")
+end
+
diff --git a/src/KA/tron_qp_kernel.jl b/src/KA/tron_qp_kernel.jl
@@ -0,0 +1,149 @@
+# Assume that scaling factor has already been applied to Q and c.
+@inline function eval_qp_f_kernel(n::Int, x, Q, c, tx)
+    # f = xQx/2 + cx
+    f = 0.0
+    @inbounds begin
+        for j=1:n
+            for i=1:n
+                f += x[i]*Q[i,j]*x[j]
+            end
+        end
+        f *= 0.5
+        for j=1:n
+            f += c[j]*x[j]
+        end
+    end
+    return f
+end
+
+@inline function eval_qp_grad_f_kernel(n::Int, x, g, Q, c, tx)
+    # g = Qx + c
+
+    @inbounds begin
+        if tx <= n
+            g[tx] = c[tx]
+        end
+        @synchronize
+        if tx <= n
+            for j=1:n
+                g[tx] += Q[tx,j]*x[j]
+            end
+        end
+        @synchronize
+    end
+    return
+end
+
+@inline function tron_qp_kernel(n::Int, max_feval::Int, max_minor::Int, gtol::Float64, scale::Float64,
+    x, xl, xu,
+    A, c, tx)
+
+    g =  @localmem Float64 (n,)
+    xc =  @localmem Float64 (n,)
+    s =  @localmem Float64 (n,)
+    wa =  @localmem Float64 (n,)
+    wa1 =  @localmem Float64 (n,)
+    wa2 =  @localmem Float64 (n,)
+    wa3 =  @localmem Float64 (n,)
+    wa4 =  @localmem Float64 (n,)
+    wa5 =  @localmem Float64 (n,)
+    gfree =  @localmem Float64 (n,)
+    dsave =  @localmem Float64 (3,)
+    indfree =  @localmem Int (n,)
+    iwa =  @localmem Int (2*n,)
+    isave =  @localmem Int (3,)
+
+    B =  @localmem Float64 (n,n)
+    L =  @localmem Float64 (n,n)
+
+    if tx <= n
+        @inbounds begin
+            for j=1:n
+                B[tx,j] = 0.0
+                L[tx,j] = 0.0
+            end
+        end
+    end
+    @synchronize
+
+    task = 0
+    status = 0
+
+    delta = 0.0
+    fatol = 0.0
+    frtol = 1e-12
+    fmin = -1e32
+    cgtol = 0.1
+    cg_itermax = n
+
+    f = 0.0
+    nfev = 0
+    ngev = 0
+    nhev = 0
+    minor_iter = 0
+    search = true
+
+    while search
+
+        # [0|1]: Evaluate function.
+
+        if task == 0 || task == 1
+            f = eval_qp_f_kernel(n, x, A, c, tx)
+            nfev += 1
+            if nfev >= max_feval
+                search = false
+            end
+        end
+
+        # [2] G or H: Evaluate gradient and Hessian.
+
+        if task == 0 || task == 2
+            eval_qp_grad_f_kernel(n, x, g, A, c, tx)
+            # We do not have to evaluate Hessian since A does not change.
+            ngev += 1
+            nhev += 1
+            minor_iter += 1
+        end
+
+        # Initialize the trust region bound.
+
+        if task == 0
+            gnorm0 = dnrm2(n, g, 1, tx)
+            delta = gnorm0
+        end
+
+        # Call Tron.
+
+        if search
+            delta, task = ExaTron.dtron(n, x, xl, xu, f, g, A, frtol, fatol, fmin, cgtol,
+                                cg_itermax, delta, task, B, L, xc, s, indfree, gfree,
+                                isave, dsave, wa, iwa, wa1, wa2, wa3, wa4, wa5, tx)
+        end
+
+        # [3] NEWX: a new point was computed.
+
+        if task == 3
+            gnorm_inf = ExaTron.dgpnorm(n, x, xl, xu, g, tx)
+
+            if gnorm_inf <= gtol
+                task = 4
+            end
+
+            if minor_iter >= max_minor
+                status = 1
+                search = false
+            end
+        end
+
+        # [4] CONV: convergence was achieved.
+        # [10] : warning fval is less than fmin
+
+        if task == 4 || task == 10
+            search = false
+        end
+    end
+
+    @synchronize
+
+    return status, minor_iter
+end