diff --git a/.coin-or/projDesc.xml b/.coin-or/projDesc.xml index 79a87a858..5bce386cb 100644 --- a/.coin-or/projDesc.xml +++ b/.coin-or/projDesc.xml @@ -199,7 +199,7 @@ - A sparse linear solver (MA27, MA57, HSL_MA86, HSL_MA97, WSMP, Pardiso, MUMPS) + A sparse linear solver (MA27, MA57, HSL_MA86, HSL_MA97, WSMP, Pardiso, MUMPS, SPRAL) diff --git a/COMPILE.md b/COMPILE.md new file mode 100644 index 000000000..06acbea7d --- /dev/null +++ b/COMPILE.md @@ -0,0 +1,88 @@ +This is a guide detailing the compilation of Ipopt with SPRAL as a linear solver. +It was developed assuming a standard installation of Ubuntu 18.04 LTS. +To begin, first, compile the [LANL ANSI version of SPRAL](https://github.com/lanl-ansi/spral) using the [compilation suggestions described therein](https://github.com/lanl-ansi/spral/blob/master/COMPILE.md). + +## Cloning the Repository +First, create a directory where Ipopt will be compiled from source (not via `coinbrew`), e.g., +```bash +mkdir -p ${HOME}/Software +``` +The remainder of this guide assumes such a directory has been created. +Then, clone the Ipopt repository via +```bash +cd ${HOME}/Software +git clone https://github.com/lanl-ansi/Ipopt.git +``` + +## Rebuilding Configuration Files (optional) +To rebuild configuration files for Ipopt, if needed (e.g., during development), execute +```bash +cd ${HOME}/Software/Ipopt +git clone https://github.com/coin-or-tools/BuildTools.git +export COIN_AUTOTOOLS_DIR="${HOME}/local2" +./BuildTools/install_autotools.sh +./BuildTools/run_autotools +``` +If you are not modifying the directory or source structure of Ipopt, this step is not required. + +## Compilation with SPRAL +### Multicore CPUs Only +To compile Ipopt with SPRAL (CPU support only), specify `${SPRALDIR}` as the directory containing `lib/libspral.a`, then execute +```bash +cd ${HOME}/Software/Ipopt +mkdir build +cd build +../configure --prefix=${PWD} --with-spral="-L${SPRALDIR}/lib -L${METISDIR}/lib \ + -lspral -lgfortran -lhwloc -lm -lcoinmetis -lopenblas -lstdc++ -fopenmp" \ + --with-lapack-lflags="-llapack -lopenblas" +make && make install +``` + +### Multicore CPUs and NVIDIA GPUs +To compile with GPU support, execute +```bash +cd ${HOME}/Software/Ipopt +mkdir build +cd build +../configure --prefix=${PWD} --with-spral="-L${SPRALDIR}/lib -L${METISDIR}/lib \ + -lspral -lgfortran -lhwloc -lm -lcoinmetis -lopenblas -lstdc++ -fopenmp \ + -lcudadevrt -lcudart -lcuda -lcublas" --with-lapack-lflags="-llapack -lopenblas" +make && make install +``` + +## Usage +Ensure the following environment variables are set when using the SPRAL library: +```bash +export OMP_CANCELLATION=TRUE +export OMP_NESTED=TRUE +export OMP_PROC_BIND=TRUE +``` + +## Testing +Within the `build` directory created above, the `examples/ScalableProblems` directory contains a set of scalable test problems. +After compilation of Ipopt, these examples can be compiled via +```bash +cd ${HOME}/Software/Ipopt/build +cd examples/ScalableProblems && make +``` +As an example, if Ipopt was compiled with SPRAL support, try creating a file named `ipopt.opt` in this directory with the contents +``` +linear_solver spral +spral_use_gpu no +``` +Then, solve a test problem, e.g., +```bash +time ./solve_problem MBndryCntrl1 768 +``` +If SPRAL was compiled with GPU support, next try modifying the `ipopt.opt` file to contain +``` +linear_solver spral +spral_use_gpu yes +``` +Then, solve the same test problem, e.g., +```bash +time ./solve_problem MBndryCntrl1 768 +``` +The `real` time required by the solver should typically decrease on very large, dense problems, compared with a solve using `spral_use_gpu no`. +If this is not the case, ensure your GPU is actually being recognized by SPRAL. +This issue is briefly discussed near the end of the associated [SPRAL compilation guide](https://github.com/lanl-ansi/spral/blob/master/COMPILE.md#multicore-cpus-and-nvidia-gpus-optional). diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 000000000..5a93c110e --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,13 @@ +Copyright (c) 2020. Triad National Security, LLC. All rights reserved. + +This program was produced under U.S. Government contract 89233218CNA000001 for Los Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC for the U.S. Department of Energy/National Nuclear Security Administration. All rights in the program are reserved by Triad National Security, LLC, and the U.S. Department of Energy/National Nuclear Security Administration. The Government is granted for itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide license in this material to reproduce, prepare derivative works, distribute copies to the public, perform publicly and display publicly, and to permit others to do so. + +This program is open source under the BSD-3 License. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. +3. Neither the name of the the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/configure b/configure index e5ac5b106..4b0ff7769 100755 --- a/configure +++ b/configure @@ -667,6 +667,8 @@ BIT32FCOMMENT BITS_PER_POINTER HAVE_WSMP_FALSE HAVE_WSMP_TRUE +HAVE_SPRAL_FALSE +HAVE_SPRAL_TRUE HAVE_PARDISO_FALSE HAVE_PARDISO_TRUE HAVE_MA28_FALSE @@ -881,6 +883,7 @@ with_hsl with_hsl_lflags with_hsl_cflags with_pardiso +with_spral with_wsmp enable_inexact_solver enable_java @@ -1608,6 +1611,7 @@ Optional Packages: directories.) --with-pardiso specify Pardiso library (>= 4.0) from pardiso-project.org + --with-spral specify SPRAL library --with-wsmp specify WSMP library Some influential environment variables: @@ -24258,6 +24262,107 @@ else fi +######### +# SPRAL # +######### + + +# Check whether --with-spral was given. +if test "${with_spral+set}" = set; then : + withval=$with_spral; have_spral=yes; spral_lflags=$withval +else + have_spral=no +fi + + +if test "$have_spral" = "yes"; then + + ac_save_LIBS="$LIBS" + LIBS="$spral_lflags $LIBS" + + + spral_ssids_analyse_namemangling=unknown + + for ac_extra in "no extra underscore" ; do + for ac_case in "lower case" "upper case" ; do + for ac_trail in "underscore" "no underscore" ; do + case $ac_case in + "lower case") + ac_name=spral_ssids_analyse + ;; + "upper case") + ac_name=SPRAL_SSIDS_ANALYSE + ;; + esac + if test "$ac_trail" = underscore ; then + ac_name=${ac_name}_ + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: checking for function $ac_name in $LIBS" >&5 +$as_echo_n "checking for function $ac_name in $LIBS... " >&6; } + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +#ifdef __cplusplus +extern "C" +#endif +char $ac_name (); +#ifdef F77_DUMMY_MAIN + +# ifdef __cplusplus + extern "C" +# endif + int F77_DUMMY_MAIN() { return 1; } + +#endif +int +main () +{ +return $ac_name (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO"; then : + spral_ssids_analyse_namemangling="${ac_case}, ${ac_trail}, ${ac_extra}" + ac_success=yes +else + ac_success=no +fi +rm -f core conftest.err conftest.$ac_objext \ + conftest$ac_exeext conftest.$ac_ext + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_success" >&5 +$as_echo "$ac_success" >&6; } + if test $ac_success = yes ; then + break 3 + fi + done + done + done + LIBS=$ac_save_LIBS + + if test $ac_success = yes ; then + IPOPTLIB_LFLAGS="$spral_lflags $IPOPTLIB_LFLAGS" + +$as_echo "#define IPOPT_HAS_SPRAL 1" >>confdefs.h + + + else as_fn_error $? "Symbol ssids_solve not found with SPRAL flags $spral_lflags." "$LINENO" 5 + fi + +fi + + if test $have_spral = yes; then + HAVE_SPRAL_TRUE= + HAVE_SPRAL_FALSE='#' +else + HAVE_SPRAL_TRUE='#' + HAVE_SPRAL_FALSE= +fi + + ######## # WSMP # ######## @@ -24898,7 +25003,7 @@ else JAVA_TEST=Test.java CLASS_TEST=Test.class cat << \EOF > $JAVA_TEST -/* #line 24901 "configure" */ +/* #line 25006 "configure" */ public class Test { } EOF @@ -25384,7 +25489,7 @@ else JAVA_TEST=Test.java CLASS_TEST=Test.class cat << \EOF > $JAVA_TEST -/* #line 25387 "configure" */ +/* #line 25492 "configure" */ public class Test { } EOF @@ -25418,7 +25523,7 @@ JAVA_TEST=Test.java CLASS_TEST=Test.class TEST=Test cat << \EOF > $JAVA_TEST -/* [#]line 25421 "configure" */ +/* [#]line 25526 "configure" */ public class Test { public static void main (String args[]) { System.exit (0); @@ -26434,6 +26539,10 @@ if test -z "${HAVE_PARDISO_TRUE}" && test -z "${HAVE_PARDISO_FALSE}"; then as_fn_error $? "conditional \"HAVE_PARDISO\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi +if test -z "${HAVE_SPRAL_TRUE}" && test -z "${HAVE_SPRAL_FALSE}"; then + as_fn_error $? "conditional \"HAVE_SPRAL\" was never defined. +Usually this means the macro was only invoked conditionally." "$LINENO" 5 +fi if test -z "${HAVE_WSMP_TRUE}" && test -z "${HAVE_WSMP_FALSE}"; then as_fn_error $? "conditional \"HAVE_WSMP\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 diff --git a/configure.ac b/configure.ac index 567f28570..ba0faff41 100644 --- a/configure.ac +++ b/configure.ac @@ -147,6 +147,25 @@ fi AM_CONDITIONAL([HAVE_PARDISO],[test "$have_pardiso_mkl$have_pardiso_project" != nono]) +######### +# SPRAL # +######### + +AC_ARG_WITH([spral], + AC_HELP_STRING([--with-spral],[specify SPRAL library]), + [have_spral=yes; spral_lflags=$withval], + [have_spral=no]) + +if test "$have_spral" = "yes"; then + AC_COIN_TRY_LINK([spral_ssids_analyse],[$spral_lflags],[], + [IPOPTLIB_LFLAGS="$spral_lflags $IPOPTLIB_LFLAGS" + AC_DEFINE(IPOPT_HAS_SPRAL,1,[Define to 1 if SPRAL is available]) + ], + [AC_MSG_ERROR([Symbol ssids_solve not found with SPRAL flags $spral_lflags.])]) +fi + +AM_CONDITIONAL([HAVE_SPRAL],[test $have_spral = yes]) + ######## # WSMP # ######## diff --git a/doc/install.dox b/doc/install.dox index f2274d083..2cb3dd3b7 100644 --- a/doc/install.dox +++ b/doc/install.dox @@ -339,6 +339,39 @@ that is included in Intel MKL, it is sufficient to ensure that MKL is used for the linear algebra routines (Blas/Lapack), see \ref EXTERNALCODE_LINALG. +\subsection DOWNLOAD_SPRAL SPRAL (Sparse Parallel Robust Algorithms Library) + +To compile \Ipopt with the linear solver SPRAL, the [LANL ANSI version of +SPRAL](https://github.com/lanl-ansi/spral) should first be downloaded and +compiled according to the associated SPRAL +compilation guide. After compilation of SPRAL, while configuring \Ipopt, +you need to specify the link flags for the library with the `--with-spral` +flag, including required additional libraries and flags. For example, if you +want to compile \Ipopt with a purely CPU-parallel version of SPRAL (located in +`$HOME/lib`) on a GNU/Linux system, you should add the following flags to the +`configure` command: +\verbatim +--with-spral="-L${HOME}/lib -lspral -lgfortran -lhwloc -lm -lcoinmetis \ + -lopenblas -lstdc++ -fopenmp" +\endverbatim +For a CPU and graphics processing unit- (GPU-) parallel version of SPRAL +(located in `$HOME/lib`), the following flags should be used, instead: +\verbatim +--with-spral="-L${HOME}/lib -lspral -lgfortran -lhwloc -lm -lcoinmetis \ + -lopenblas -lstdc++ -fopenmp -lcudadevrt -lcudart -lcuda -lcublas" +\endverbatim +The above assume a SPRAL compilation using OpenBLAS libraries. For best +performance, use SPRAL together with linear algebra routines from the Intel MKL +(see \ref EXTERNALCODE_LINALG). + +Finally, ensure the following environment variables are employed when using SPRAL +\verbatim +export OMP_CANCELLATION=TRUE +export OMP_NESTED=TRUE +export OMP_PROC_BIND=TRUE +\endverbatim + \subsection DOWNLOAD_WSMP WSMP (Watson Sparse Matrix Package) If you would like to compile \Ipopt with WSMP, you need to obtain the @@ -399,7 +432,6 @@ to `libhsl.so`. If you have problems compiling this feature, you can disable this by specifying `--disable-linear-solver-loader` for the `configure` script. - \section GETIPOPT Getting the Ipopt Code \Ipopt is available from the COIN-OR group at GitHub. You can either diff --git a/doc/main.dox b/doc/main.dox index 25e3b262e..8409d372b 100644 --- a/doc/main.dox +++ b/doc/main.dox @@ -143,6 +143,8 @@ required: or Intel MKL. Note that current versions from Pardiso Project typically offer much better performance than the one from Intel MKL. + - [SPRAL](https://github.com/lanl-ansi/spral) (Sparse Parallel Robust Algorithms Library) + - [WSMP](http://researcher.ibm.com/view_project.php?id=1426) (Watson Sparse Matrix Package) You must include at least one of the linear solvers above in order @@ -164,8 +166,8 @@ required: this in mind, particularly when you are comparing \Ipopt with other optimization codes. - If you are compiling MA57, HSL_MA77, HSL_MA86, HSL_MA97, or MUMPS - within the \Ipopt build system, you should also include + If you are compiling MA57, HSL_MA77, HSL_MA86, HSL_MA97, MUMPS, + or SPRAL within the \Ipopt build system, you should also include the METIS linear system ordering package. Interfaces to other linear solvers might be added in the future; if diff --git a/doc/options.dox b/doc/options.dox index e4aae8fc5..92b4644e0 100644 --- a/doc/options.dox +++ b/doc/options.dox @@ -780,6 +780,7 @@ Possible values: - ma86: use the Harwell routine HSL_MA86 - ma97: use the Harwell routine HSL_MA97 - pardiso: use the Pardiso package + - spral: use the SPRAL package - wsmp: use WSMP package - mumps: use MUMPS package - custom: use custom linear solver @@ -1513,6 +1514,222 @@ Possible values: - five: undocumented +\subsection OPT_SPRAL_Linear_Solver SPRAL Linear Solver + +\anchor OPT_spral_print_level + spral_print_level: Print level for the linear solver SPRAL. +
+ The valid range for this integer option is unrestricted, and its default value is 0. + + Possible values: + - < 0 No printing. + - 0 Error and warning messages only. + - 1 Limited diagnostic printing. + - > 1 Additional diagnostic printing. + +
+ +\anchor OPT_spral_nemin + spral_nemin: Node amalgamation parameter. +
+ Two nodes in the elimination tree are merged if the result has fewer than + spral_nemin variables. The lower bound for this integer option is 1 and its + default value is 32. + +
+ +\anchor OPT_spral_order + spral_order: Controls type of ordering used by SPRAL. +
+ The default value for this string option is "matching". + +Possible values: + - metis: Use METIS with default settings. + - matching: Use matching-based elimination ordering. +
+ +\anchor OPT_spral_scaling + spral_scaling: Specifies strategy for scaling in SPRAL linear solver. +
+ The default value for this string option is "matching". + +Possible values: + - none: Do not scale the linear system matrix. + - mc64: Scale using weighted bipartite matching (MC64). + - auction: Scale using the auction algorithm. + - matching: Scale using the matching-based ordering. + - ruiz: Scale using the norm-equilibration algorithm of Ruiz (MC77). + - dynamic: Dynamically select scaling according to switch options. +
+ +\anchor OPT_spral_use_gpu + spral_use_gpu: Specifies whether or not graphics processing units (GPUs) are used by the SPRAL linear solver. +
+ The default value for this string option is "yes". + +Possible values: + - no: Do not use GPUs, even if present. + - yes: Use GPUs if present. +
+ +\anchor OPT_spral_gpu_perf_coeff + spral_gpu_perf_coeff: GPU performance coefficient. +
+ How many times faster a GPU is than a CPU at factoring a subtree. + The lower bound for this real option is 0 and its default value is 1. +
+ +\anchor OPT_spral_min_gpu_work + spral_min_gpu_work: Minimum GPU work. +
+ Minimum number of FLOPS in subtree before scheduling on GPU. + The strict lower bound for this real option is 0 and its default value is 5 * 109. +
+ +\anchor OPT_spral_ignore_numa + spral_ignore_numa: Non-uniform memory access (NUMA) region setting. +
+ The default value for this string option is "yes". + +Possible values: + - no: Do not treat CPUs and GPUs as belonging to a single NUMA region. + - yes: Treat CPUs and GPUs as belonging to a single NUMA region. +
+ +\anchor OPT_spral_cpu_block_size + spral_cpu_block_size: CPU parallelization block size. +
+ Block size to use for parallelization of large nodes on CPU resources. + The lower bound for this integer option is 1 and its default value is 256. +
+ +\anchor OPT_spral_max_load_inbalance + spral_max_load_inbalance: Maximum permissible load. +
+ Maximum permissible load inbalance for leaf subtree allocations. + The lower bound for this real option is 1 and its default value is 1.2. +
+ +\anchor OPT_spral_small + spral_small: Zero pivot threshold. +
+ Any pivot less than spral_small is treated as zero. The lower bound for this real option is 0 and its default value is 10-20. +
+ +\anchor OPT_spral_u + spral_u: Pivoting threshold +
+ Relative pivot threshold. See SPRAL documentation. The valid range for this + real option is 0 ≤ spral_u ≤ 0.5 and its default value is + 10-8. +
+ +\anchor OPT_spral_umax + spral_umax: Maximum pivoting threshold +
+ See SPRAL documentation. The valid range for this real option is 0 ≤ spral_umax ≤ 0.5 and its default value is 0.0001. +
+ +\anchor OPT_spral_scaling_1 + spral_scaling_1: First scaling. +
+ If spral_scaling = dynamic, this scaling is used according to the trigger + spral_switch_1. If spral_switch_2 is triggered, it is disabled. The default + value for this string option is "matching". + +Possible values: + - none: Do not scale the linear system matrix. + - mc64: Scale using weighted bipartite matching (MC64). + - auction: Scale using the auction algorithm. + - matching: Scale using the matching-based ordering. + - ruiz: Scale using the norm-equilibration algorithm of Ruiz (MC77). +
+ +\anchor OPT_spral_scaling_2 + spral_scaling_2: Second scaling. +
+ If spral_scaling = dynamic, this scaling is used according to the trigger + spral_switch_2. If spral_switch_3 is triggered, it is disabled. The default + value for this string option is "mc64". + +Possible values: + - none: Do not scale the linear system matrix. + - mc64: Scale using weighted bipartite matching (MC64). + - auction: Scale using the auction algorithm. + - matching: Scale using the matching-based ordering. + - ruiz: Scale using the norm-equilibration algorithm of Ruiz (MC77). +
+ +\anchor OPT_spral_scaling3 + spral_scaling3: Third scaling. +
+ If spral_scaling = dynamic, this scaling is used according to the trigger + spral_switch_3. The default value for this string option is "none". + +Possible values: + - none: Do not scale the linear system matrix. + - mc64: Scale using weighted bipartite matching (MC64). + - auction: Scale using the auction algorithm. + - matching: Scale using the matching-based ordering. + - ruiz: Scale using the norm-equilibration algorithm of Ruiz (MC77). +
+ +\anchor OPT_spral_switch_1 + spral_switch_1: First switch, determining when spral_scaling_1 is enabled. +
+ If spral_scaling = dynamic, spral_scaling_1 is enabled according to this + condition. If spral_switch_2 occurs, this option is henceforth ignored. + The default value for this string option is "at_start". + +Possible values: + - never: Scaling is never enabled. + - at_start: Scaling to be used from the very start. + - at_start_reuse: Scaling is used on the first iteration, then reused thereafter. + - on_demand: Scaling is used when iterative refinement has failed. + - on_demand_reuse: As on_demand, but scaling from previous iteration is reused. + - high_delay: Scaling is used after more than 0.05*n delays are present. + - high_delay_reuse: Scaling is used only when previous iteration created more that 0.05*n additional delays; otherwise, reuse scaling from the previous iteration. + - od_hd: Combination of on_demand and high_delay. + - od_hd_reuse: Combination of on_demand_reuse and high_delay_reuse +
+ +\anchor OPT_spral_switch_2 + spral_switch_2: Second switch, determining when spral_scaling_2 is enabled. +
+ If spral_scaling = dynamic, spral_scaling_2 is enabled according to this + condition. If spral_switch_3 occurs, this option is henceforth ignored. + The default value for this string option is "on_demand". + +Possible values: + - never: Scaling is never enabled. + - at_start: Scaling to be used from the very start. + - at_start_reuse: Scaling is used on the first iteration, then reused thereafter. + - on_demand: Scaling is used when iterative refinement has failed. + - on_demand_reuse: As on_demand, but scaling from previous iteration is reused. + - high_delay: Scaling is used after more than 0.05*n delays are present. + - high_delay_reuse: Scaling is used only when previous iteration created more that 0.05*n additional delays; otherwise, reuse scaling from the previous iteration. + - od_hd: Combination of on_demand and high_delay. + - od_hd_reuse: Combination of on_demand_reuse and high_delay_reuse +
+ +\anchor OPT_spral_switch_3 + spral_switch_3: Third switch, determining when spral_scaling_3 is enabled. +
+ If spral_scaling = dynamic, spral_scaling_3 is enabled according to this + condition. The default value for this string option is "never". + +Possible values: + - never: Scaling is never enabled. + - at_start: Scaling to be used from the very start. + - at_start_reuse: Scaling is used on the first iteration, then reused thereafter. + - on_demand: Scaling is used when iterative refinement has failed. + - on_demand_reuse: As on_demand, but scaling from previous iteration is reused. + - high_delay: Scaling is used after more than 0.05*n delays are present. + - high_delay_reuse: Scaling is used only when previous iteration created more that 0.05*n additional delays; otherwise, reuse scaling from the previous iteration. + - od_hd: Combination of on_demand and high_delay. + - od_hd_reuse: Combination of on_demand_reuse and high_delay_reuse +
+ \section OPTIONS_AMPL Options available via the AMPL Interface The following is a list of options that is available via AMPL: diff --git a/doc/special.dox b/doc/special.dox index 2970ca6cb..d0b793d1e 100644 --- a/doc/special.dox +++ b/doc/special.dox @@ -266,15 +266,14 @@ descent direction for the objective function when the constraint violation is sufficiently small, which in turn is necessary to guarantee global convergence. -To detect the presence of negative curvature, the default method -implemented in IPOPT requires inertia information of the augmented -system. The inertia of the augmented system is the number of positive, -negative, and zero eigenvalues. Inertia is currently estimated using -symmetric indefinite factorization routines implemented in powerful -packages such as MA27, MA57, or Pardiso. When more general linear -algebra strategies/packages are used (e.g., iterative, parallel -decomposition), however, inertia information is difficult (if not -impossible) to obtain. +To detect the presence of negative curvature, the default method implemented in +IPOPT requires inertia information of the augmented system. The inertia of the +augmented system is the number of positive, negative, and zero eigenvalues. +Inertia is currently estimated using symmetric indefinite factorization +routines implemented in powerful packages such as MA27, MA57, Pardiso, or +SPRAL. When more general linear algebra strategies/packages are used (e.g., +iterative, parallel decomposition), however, inertia information is difficult +(if not impossible) to obtain. In \cite ChiangZavala2014, we present acceptance tests for the search step that do not require inertia information of the linear system and prove diff --git a/src/Algorithm/IpAlgBuilder.cpp b/src/Algorithm/IpAlgBuilder.cpp index ddfdea485..3900e6bef 100644 --- a/src/Algorithm/IpAlgBuilder.cpp +++ b/src/Algorithm/IpAlgBuilder.cpp @@ -62,6 +62,9 @@ #include "IpPardisoSolverInterface.hpp" #include "IpSlackBasedTSymScalingMethod.hpp" +#ifdef IPOPT_HAS_SPRAL +# include "IpSpralSolverInterface.hpp" +#endif #ifdef IPOPT_HAS_WSMP # include "IpWsmpSolverInterface.hpp" # include "IpIterativeWsmpSolverInterface.hpp" @@ -92,7 +95,7 @@ void AlgorithmBuilder::RegisterOptions( ) { roptions->SetRegisteringCategory("Linear Solver"); - roptions->AddStringOption9( + roptions->AddStringOption10( "linear_solver", "Linear solver used for step computations.", #ifdef COINHSL_HAS_MA27 @@ -105,21 +108,25 @@ void AlgorithmBuilder::RegisterOptions( "ma97", #else # ifdef COINHSL_HAS_MA86 - "ma86", + "ma86", # else # ifdef IPOPT_HAS_PARDISO "pardiso", # else -# ifdef IPOPT_HAS_WSMP - "wsmp", +# ifdef IPOPT_HAS_SPRAL + "spral", # else -# ifdef IPOPT_HAS_MUMPS - "mumps", +# ifdef IPOPT_HAS_WSMP + "wsmp", # else -# ifdef COINHSL_HAS_MA77 - "ma77", +# ifdef IPOPT_HAS_MUMPS + "mumps", # else - "ma27", +# ifdef COINHSL_HAS_MA77 + "ma77", +# else + "ma27", +# endif # endif # endif # endif @@ -134,6 +141,7 @@ void AlgorithmBuilder::RegisterOptions( "ma86", "use the Harwell routine HSL_MA86", "ma97", "use the Harwell routine HSL_MA97", "pardiso", "use the Pardiso package", + "spral", "use the SPRAL package", "wsmp", "use WSMP package", "mumps", "use MUMPS package", "custom", "use custom linear solver", @@ -380,6 +388,15 @@ SmartPtr AlgorithmBuilder::SymLinearSolverFactory( SolverInterface = new PardisoSolverInterface(); #endif + } + else if( linear_solver == "spral" ) + { +#ifdef IPOPT_HAS_SPRAL + SolverInterface = new SpralSolverInterface(); +#else + THROW_EXCEPTION(OPTION_INVALID, "Selected linear solver SPRAL not available."); +#endif + } else if( linear_solver == "ma97" ) { diff --git a/src/Algorithm/LinearSolvers/IpLinearSolversRegOp.cpp b/src/Algorithm/LinearSolvers/IpLinearSolversRegOp.cpp index c33d126ac..ddd0acc0c 100644 --- a/src/Algorithm/LinearSolvers/IpLinearSolversRegOp.cpp +++ b/src/Algorithm/LinearSolvers/IpLinearSolversRegOp.cpp @@ -22,6 +22,9 @@ #ifdef IPOPT_HAS_MUMPS # include "IpMumpsSolverInterface.hpp" #endif +#ifdef IPOPT_HAS_SPRAL +# include "IpSpralSolverInterface.hpp" +#endif #ifdef IPOPT_HAS_WSMP # include "IpWsmpSolverInterface.hpp" # include "IpIterativeWsmpSolverInterface.hpp" @@ -67,6 +70,11 @@ void RegisterOptions_LinearSolvers( PardisoSolverInterface::RegisterOptions(roptions); #endif +#ifdef IPOPT_HAS_SPRAL + roptions->SetRegisteringCategory("SPRAL Linear Solver"); + SpralSolverInterface::RegisterOptions(roptions); +#endif + #ifdef IPOPT_HAS_WSMP roptions->SetRegisteringCategory("WSMP Linear Solver"); WsmpSolverInterface::RegisterOptions(roptions); diff --git a/src/Algorithm/LinearSolvers/IpSpralSolverInterface.cpp b/src/Algorithm/LinearSolvers/IpSpralSolverInterface.cpp new file mode 100644 index 000000000..a822abc96 --- /dev/null +++ b/src/Algorithm/LinearSolvers/IpSpralSolverInterface.cpp @@ -0,0 +1,710 @@ +// Copyright (C) 2020, Los Alamos National Laboratory +// Copyright (C) 2012, The Science and Technology Facilities Council. +// Copyright (C) 2009, Jonathan Hogg . +// Copyright (C) 2004, 2007 International Business Machines and others. +// All Rights Reserved. +// This code is published under the Eclipse Public License. +// +// $Id: IpSpralSolverInterface.cpp 2020-03-21 00:00:00Z tasseff $ +// +// Authors: Byron Tasseff LANL 2020-03-21 +// Jonathan Hogg STFC 2012-12-21 +// Jonathan Hogg 2009-07-29 +// Carl Laird, Andreas Waechter IBM 2004-03-17 + +#include "IpoptConfig.h" +#include "IpSpralSolverInterface.hpp" +#include +#include +#include +#include + +using namespace std; + +namespace Ipopt +{ + +SpralSolverInterface::~SpralSolverInterface() +{ + delete[] val_; + + if ( scaling_ ) + { + delete[] scaling_; + } + + spral_ssids_free(&akeep_, &fkeep_); +} + +void SpralSolverInterface::RegisterOptions( + SmartPtr roptions +) +{ + roptions->AddLowerBoundedIntegerOption( + "spral_cpu_block_size", "CPU Parallelization Block Size", 1, 256, + "Block size to use for parallelization of large nodes on CPU resources."); + + roptions->AddLowerBoundedNumberOption( + "spral_gpu_perf_coeff", "GPU Performance Coefficient", 0.0, true, 1.0, + "How many times faster a GPU is than CPU at factoring a subtree."); + + roptions->AddStringOption2( + "spral_ignore_numa", "Non-uniform memory access (NUMA) region setting.", + "yes", "no", "Do not treat CPUs and GPUs as belonging to a single NUMA region.", + "yes", "Treat CPUs and GPUs as belonging to a single NUMA region.", ""); + + roptions->AddLowerBoundedNumberOption( + "spral_max_load_inbalance", + "Maximum Permissible Load", 1.0, true, 1.2, + "Maximum permissible load inbalance for leaf subtree allocations."); + + roptions->AddLowerBoundedNumberOption( + "spral_min_gpu_work", "Minimum GPU Work", 0.0, false, 5.0e9, + "Minimum number of flops in subtree before scheduling on GPU."); + + roptions->AddLowerBoundedIntegerOption( + "spral_nemin", "Node Amalgamation Parameter", 1, 32, + "Two nodes in elimination tree are merged if result has fewer than " + "spral_nemin variables."); + + roptions->AddStringOption2( + "spral_order", + "Controls type of ordering used by SPRAL", "matching", + "metis", "Use METIS with default settings.", + "matching", "Use matching-based elimination ordering.", ""); + + roptions->AddStringOption3( + "spral_pivot_method", + "Specifies strategy for scaling in SPRAL linear solver.", "block", + "aggressive", "Aggressive a posteori pivoting.", + "block", "Block a posteori pivoting.", + "threshold", "Threshold partial pivoting (not parallel).", ""); + + roptions->AddIntegerOption( + "spral_print_level", "Print level for the linear solver SPRAL", -1, "" + /* + "<0 No printing.\n" + "0 Error and warning messages only.\n" + "=1 Limited diagnostic printing.\n" + ">1 Additional diagnostic printing."*/); + + roptions->AddStringOption6( + "spral_scaling", + "Specifies strategy for scaling in SPRAL linear solver.", "matching", + "none", "Do not scale the linear system matrix.", + "mc64", "Scale using weighted bipartite matching (MC64).", + "auction", "Scale using the auction algorithm.", + "matching", "Scale using the matching-based ordering.", + "ruiz", "Scale using the norm-equilibration algorithm of Ruiz (MC77).", + "dynamic", "Dynamically select scaling according to switch options.", ""); + + roptions->AddStringOption5( + "spral_scaling_1", + "First scaling strategy.", "matching", + "none", "Do not scale the linear system matrix.", + "mc64", "Scale using weighted bipartite matching (MC64).", + "auction", "Scale using the auction algorithm.", + "matching", "Scale using the matching-based ordering.", + "ruiz", "Scale using the norm-equilibration algorithm of Ruiz (MC77)." + "If spral_scaling = dynamic, this scaling is used according to the trigger " + "spral_switch_1. If spral_switch_2 is triggered, it is disabled."); + + roptions->AddStringOption5( + "spral_scaling_2", + "Second scaling strategy.", "mc64", + "none", "Do not scale the linear system matrix.", + "mc64", "Scale using weighted bipartite matching (MC64).", + "auction", "Scale using the auction algorithm.", + "matching", "Scale using the matching-based ordering.", + "ruiz", "Scale using the norm-equilibration algorithm of Ruiz (MC77)." + "If spral_scaling = dynamic, this scaling is used according to the trigger " + "spral_switch_2. If spral_switch_3 is triggered, it is disabled."); + + roptions->AddStringOption5( + "spral_scaling_3", + "Third scaling strategy.", "none", + "none", "Do not scale the linear system matrix.", + "mc64", "Scale using weighted bipartite matching (MC64).", + "auction", "Scale using the auction algorithm.", + "matching", "Scale using the matching-based ordering.", + "ruiz", "Scale using the norm-equilibration algorithm of Ruiz (MC77)." + "If spral_scaling = dynamic, this scaling is used according to the trigger " + "spral_switch_3."); + + roptions->AddLowerBoundedNumberOption( + "spral_small", "Zero Pivot Threshold", 0.0, true, 1.0e-20, + "Any pivot less than spral_small is treated as zero."); + + roptions->AddLowerBoundedNumberOption( + "spral_small_subtree_threshold", "Small Subtree Threshold", 0.0, true, 4.0e6, + "Maximum number of flops in a subtree treated as a single task."); + + roptions->AddStringOption9( + "spral_switch_1", + "First switch, determining when spral_scaling_1 is enabled.", "at_start", + "never", "Scaling is never enabled.", + "at_start", "Scaling is used from the very start.", + "at_start_reuse", "Scaling is used on the first iteration, then reused thereafter.", + "on_demand", "Scaling is used when iterative refinement has failed.", + "on_demand_reuse", "As on_demand, but scaling from previous iteration is reused.", + "high_delay", "Scaling is used after more than 0.05*n delays are present.", + "high_delay_reuse", "Scaling is used only when previous iteration created " + "more that 0.05*n additional delays; otherwise, reuse scaling from the previous iteration.", + "od_hd", "Combination of on_demand and high_delay.", + "od_hd_reuse", "Combination of on_demand_reuse and high_delay_reuse", + "If spral_scaling = dynamic, spral_scaling_1 is enabled according to this " + "condition. If spral_switch_2 occurs this option is henceforth ignored."); + + roptions->AddStringOption9( + "spral_switch_2", + "Second switch, determining when spral_scaling_2 is enabled.", "on_demand", + "never", "Scaling is never enabled.", + "at_start", "Scaling is used from the very start.", + "at_start_reuse", "Scaling is used on the first iteration, then reused thereafter.", + "on_demand", "Scaling is used when iterative refinement has failed.", + "on_demand_reuse", "As on_demand, but scaling from previous iteration is reused.", + "high_delay", "Scaling is used after more than 0.05*n delays are present", + "high_delay_reuse", "Scaling is used only when previous iteration created " + "more that 0.05*n additional delays; otherwise, reuse scaling from the previous iteration.", + "od_hd", "Combination of on_demand and high_delay", + "od_hd_reuse", "Combination of on_demand_reuse and high_delay_reuse", + "If spral_scaling = dynamic, spral_scaling_2 is enabled according to this " + "condition. If spral_switch_3 occurs this option is henceforth ignored."); + + roptions->AddStringOption9( + "spral_switch_3", + "Third switch, determining when spral_scaling_3 is enabled.", "never", + "never", "Scaling is never enabled.", + "at_start", "Scaling is used from the very start.", + "at_start_reuse", "Scaling is used on the first iteration, then reused thereafter.", + "on_demand", "Scaling is used when iterative refinement has failed.", + "on_demand_reuse", "As on_demand, but scaling from previous iteration is reused.", + "high_delay", "Scaling is used after more than 0.05*n delays are present", + "high_delay_reuse", "Scaling is used only when previous iteration created " + "more that 0.05*n additional delays; otherwise, reuse scaling from the previous iteration.", + "od_hd", "Combination of on_demand and high_delay", + "od_hd_reuse", "Combination of on_demand_reuse and high_delay_reuse", + "If spral_scaling = dynamic, spral_scaling_3 is enabled according to this condition."); + + roptions->AddBoundedNumberOption( + "spral_u", + "Pivoting Threshold", 0.0, true, 0.5, false, 1.0e-8, + "Relative pivot threshold used in symmetric indefinite case."); + + roptions->AddBoundedNumberOption( + "spral_umax", "Maximum Pivoting Threshold", 0.0, true, 0.5, false, 1.0e-4, + "See SPRAL documentation."); + + roptions->AddStringOption2( + "spral_use_gpu", "GPU Setting", + "yes", "no", "Do not use NVIDIA GPUs.", + "yes", "Use NVIDIA GPUs if present.", ""); +} + +int SpralSolverInterface::PivotMethodNameToNum( + const std::string& name +) +{ + if ( name == "aggressive" ) + { + return 0; + } + else if ( name == "block" ) + { + return 1; + } + else if ( name == "threshold" ) + { + return 2; + } + else + { + assert(0); + return -1; + } +} + +int SpralSolverInterface::ScaleNameToNum( + const std::string& name +) +{ + if ( name == "none" ) + { + return 0; + } + else if ( name == "mc64" ) + { + return 1; + } + else if ( name == "auction" ) + { + return 2; + } + else if ( name == "matching" ) + { + return 3; + } + else if ( name == "ruiz" ) + { + return 4; + } + else + { + assert(0); + return -1; + } +} + +bool SpralSolverInterface::InitializeImpl( + const OptionsList& options, + const std::string& prefix +) +{ + spral_ssids_default_options(&control_); + control_.array_base = 0; // Use Fortran numbering (documentation incorrect). + control_.action = true; // Continue factorization on discovery of a zero pivot. + /* Note: we can't set control_.action = false as we need to know the + * inertia. (Otherwise we just enter the restoration phase and fail.) */ + + options.GetBoolValue("spral_ignore_numa", control_.ignore_numa, prefix); + options.GetBoolValue("spral_use_gpu", control_.use_gpu, prefix); + options.GetIntegerValue("spral_cpu_block_size", control_.cpu_block_size, prefix); + options.GetIntegerValue("spral_nemin", control_.nemin, prefix); + options.GetIntegerValue("spral_print_level", control_.print_level, prefix); + options.GetNumericValue("spral_small", control_.small, prefix); + options.GetNumericValue("spral_u", control_.u, prefix); + options.GetNumericValue("spral_umax", umax_, prefix); + + // Set gpu_perf_coeff. + double gpu_perf_coeff_tmp = 1.0; + options.GetNumericValue("spral_gpu_perf_coeff", gpu_perf_coeff_tmp, prefix); + control_.gpu_perf_coeff = (float)gpu_perf_coeff_tmp; + + // Set max_load_inbalance. + double max_load_inbalance_tmp = 1.2; + options.GetNumericValue("spral_max_load_inbalance", max_load_inbalance_tmp, prefix); + control_.max_load_inbalance = (float)max_load_inbalance_tmp; + + // Set min_gpu_work. + double min_gpu_work_tmp = 5.0e9; + options.GetNumericValue("spral_min_gpu_work", min_gpu_work_tmp, prefix); + control_.min_gpu_work = (long)min_gpu_work_tmp; + + // Set the pivot method. + std::string pivot_method; + options.GetStringValue("spral_pivot_method", pivot_method, prefix); + control_.pivot_method = PivotMethodNameToNum(pivot_method); + + // Set small_subtree_threshold. + double small_subtree_threshold_tmp = 4.0e6; + options.GetNumericValue("spral_small_subtree_threshold", small_subtree_threshold_tmp, prefix); + control_.small_subtree_threshold = (long)small_subtree_threshold_tmp; + + // Reset all private data. + pivtol_changed_ = false; + + std::string order_method; + options.GetStringValue("spral_order", order_method, prefix); + + if (order_method == "metis") + { + control_.ordering = 1; + } + else if (order_method == "matching") + { + control_.ordering = 2; + } + + std::string scaling_method; + options.GetStringValue("spral_scaling", scaling_method, prefix); + current_level_ = 0; + + if ( scaling_method == "dynamic" ) + { + scaling_type_ = 0; + std::string switch_val[3], scaling_val[3]; + + options.GetStringValue("spral_switch_1", switch_val[0], prefix); + options.GetStringValue("spral_scaling_1", scaling_val[0], prefix); + options.GetStringValue("spral_switch_2", switch_val[1], prefix); + options.GetStringValue("spral_scaling_2", scaling_val[1], prefix); + options.GetStringValue("spral_switch_3", switch_val[2], prefix); + options.GetStringValue("spral_scaling_3", scaling_val[2], prefix); + + for ( int i = 0; i < 3; i++ ) + { + scaling_val_[i] = ScaleNameToNum(scaling_val[i]); + + if ( switch_val[i] == "never" ) + { + switch_[i] = SWITCH_NEVER; + } + else if ( switch_val[i] == "at_start" ) + { + switch_[i] = SWITCH_AT_START; + scaling_type_ = scaling_val_[i]; + current_level_ = i; + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "SPRAL: Enabled " + "scaling level %d on initialization\n", current_level_); + } + else if( switch_val[i] == "at_start_reuse" ) + { + switch_[i] = SWITCH_AT_START_REUSE; + scaling_type_ = scaling_val_[i]; + current_level_ = i; + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "SPRAL: Enabled " + "scaling level %d on initialization\n", current_level_); + } + else if( switch_val[i] == "on_demand" ) + { + switch_[i] = SWITCH_ON_DEMAND; + } + else if( switch_val[i] == "on_demand_reuse" ) + { + switch_[i] = SWITCH_ON_DEMAND_REUSE; + } + else if( switch_val[i] == "high_delay" ) + { + switch_[i] = SWITCH_NDELAY; + } + else if( switch_val[i] == "high_delay_reuse" ) + { + switch_[i] = SWITCH_NDELAY_REUSE; + } + else if( switch_val[i] == "od_hd" ) + { + switch_[i] = SWITCH_OD_ND; + } + else if( switch_val[i] == "od_hd_reuse" ) + { + switch_[i] = SWITCH_OD_ND_REUSE; + } + } + } + else + { + switch_[0] = SWITCH_AT_START; + switch_[1] = SWITCH_NEVER; + switch_[2] = SWITCH_NEVER; + scaling_type_ = ScaleNameToNum(scaling_method); + } + + // Set whether we scale on first iteration or not + switch ( switch_[current_level_] ) + { + case SWITCH_NEVER: + case SWITCH_ON_DEMAND: + case SWITCH_ON_DEMAND_REUSE: + case SWITCH_NDELAY: + case SWITCH_NDELAY_REUSE: + case SWITCH_OD_ND: + case SWITCH_OD_ND_REUSE: + rescale_ = false; + break; + case SWITCH_AT_START: + case SWITCH_AT_START_REUSE: + rescale_ = true; + break; + } + + // Set scaling and ordering. + control_.scaling = scaling_type_; + control_.ordering = scaling_type_ != 3 ? 1 : 2; + + return true; // All is well. +} + +ESymSolverStatus SpralSolverInterface::InitializeStructure( + Index dim, + Index nonzeros, + const Index* ia, + const Index* ja +) +{ + struct spral_ssids_inform info; + + // Store size for later use + ndim_ = dim; + + // Setup memory for values + if ( val_ != NULL ) + { + delete[] val_; + } + + val_ = new double[nonzeros]; + + // Correct scaling and ordering if necessary. + if ( control_.ordering == 2 && control_.scaling != 3 ) + { + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "In SpralSolverInterface, " + "matching-based ordering was used, but matching-based scaling was " + "not. Setting scaling using the matching-based ordering.\n"); + control_.scaling = scaling_type_ = 3; + } + + if ( control_.ordering != 2 && control_.scaling == 3 ) + { + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "In SpralSolverInterface, " + "matching-based scaling was used, but matching-based ordering was " + "not. Setting ordering using the matching-based algorithm.\n"); + control_.ordering = 2; + } + + // Perform analyse. + if ( !( control_.ordering == 2 && control_.scaling == 3 ) ) + { + if ( HaveIpData() ) + { + IpData().TimingStats().LinearSystemSymbolicFactorization().Start(); + } + + spral_ssids_analyse_ptr32(false, ndim_, NULL, ia, ja, NULL, &akeep_, &control_, &info); + + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "nfactor = %d, nflops = %d:\n", + info.num_factor, info.num_flops); + + if ( HaveIpData() ) + { + IpData().TimingStats().LinearSystemSymbolicFactorization().End(); + } + + if ( info.flag >= 0 ) + { + return SYMSOLVER_SUCCESS; + } + else + { + return SYMSOLVER_FATAL_ERROR; + } + } + else + { + return SYMSOLVER_SUCCESS; + } +} + +ESymSolverStatus SpralSolverInterface::MultiSolve( + bool new_matrix, + const Index* ia, + const Index* ja, + Index nrhs, + double* rhs_vals, + bool check_NegEVals, + Index numberOfNegEVals +) +{ + struct spral_ssids_inform info; + Number t1 = 0, t2; + + if ( new_matrix || pivtol_changed_ ) + { + // Set scaling option + if ( rescale_ ) + { + control_.scaling = scaling_type_; + control_.ordering = scaling_type_ != 3 ? 1 : 2; + + if ( scaling_type_ != 0 && scaling_ == NULL ) + { + scaling_ = new double[ndim_]; + } + } + else + { + control_.scaling = 0; // None or user (depends if scaling_ is allocated). + } + + if ( control_.ordering == 2 && control_.scaling == 3 && rescale_ ) + { + if ( HaveIpData() ) + { + IpData().TimingStats().LinearSystemSymbolicFactorization().Start(); + } + + spral_ssids_analyse_ptr32(false, ndim_, NULL, ia, ja, val_, &akeep_, &control_, &info); + + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "nfactor = %d, nflops = %d:\n", + info.num_factor, info.num_flops); + + if ( HaveIpData() ) + { + IpData().TimingStats().LinearSystemSymbolicFactorization().End(); + } + + if ( info.flag == 6 || info.flag == -5 ) + { + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "In SpralSolverInterface::Factorization: " + "Singular system, estimated rank %d of %d\n", info.matrix_rank, ndim_); + return SYMSOLVER_SINGULAR; + } + else if ( info.flag < 0 ) + { + return SYMSOLVER_FATAL_ERROR; + } + } + + if ( HaveIpData() ) + { + t1 = IpData().TimingStats().LinearSystemFactorization().TotalWallclockTime(); + IpData().TimingStats().LinearSystemFactorization().Start(); + } + + spral_ssids_factor_ptr32(false, ia, ja, val_, scaling_, akeep_, &fkeep_, &control_, &info); + + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "SPRAL: delays %d, nfactor %d, " + "nflops %ld, maxfront %d\n", info.num_delay, info.num_factor, info.num_flops, + info.maxfront); + + if ( HaveIpData() ) + { + IpData().TimingStats().LinearSystemFactorization().End(); + t2 = IpData().TimingStats().LinearSystemFactorization().TotalWallclockTime(); + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "SpralSolverInterface::Factorization: " + "spral_factor_solve took %10.3f\n", t2 - t1); + } + + if ( info.flag == 7 || info.flag == 6 || info.flag == -5 ) + { + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "In SpralSolverInterface::Factorization: " + "Singular system, estimated rank %d of %d\n", info.matrix_rank, ndim_); + return SYMSOLVER_SINGULAR; + } + + for ( int i = current_level_; i < 3; i++ ) + { + switch ( switch_[i] ) + { + case SWITCH_NEVER: + case SWITCH_AT_START: + case SWITCH_ON_DEMAND: + // Nothing to do here. + break; + case SWITCH_AT_START_REUSE: + // Scaled exactly once, never changed again. + rescale_ = false; + break; + case SWITCH_ON_DEMAND_REUSE: + if ( i == current_level_ && rescale_ ) + { + rescale_ = false; + } + + break; + case SWITCH_NDELAY_REUSE: + case SWITCH_OD_ND_REUSE: + if ( rescale_ ) + { + // Need to do this before we reset rescale. + numdelay_ = info.num_delay; + } + + if ( i == current_level_ && rescale_ ) + { + rescale_ = false; + } + // Falls through to: + case SWITCH_NDELAY: + case SWITCH_OD_ND: + if ( rescale_ ) + { + numdelay_ = info.num_delay; + } + + if ( info.num_delay - numdelay_ > 0.05 * ndim_ ) + { + // Number of delays has signficantly increased, so trigger. + current_level_ = i; + scaling_type_ = scaling_val_[i]; + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "SPRAL: " + "Enabling scaling %d due to excess delays\n", i); + rescale_ = true; + } + + break; + } + } + + if ( info.flag < 0 ) + { + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "In SpralSolverInterface::Factorization: " + "Unhandled error. info.flag = %d\n", info.flag); + return SYMSOLVER_FATAL_ERROR; + } + + if ( check_NegEVals && info.num_neg != numberOfNegEVals ) + { + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "In SpralSolverInterface::Factorization: " + "info.num_neg = %d, but numberOfNegEVals = %d\n", info.num_neg, numberOfNegEVals); + return SYMSOLVER_WRONG_INERTIA; + } + + if ( HaveIpData() ) + { + IpData().TimingStats().LinearSystemBackSolve().Start(); + } + + spral_ssids_solve(0, nrhs, rhs_vals, ndim_, akeep_, fkeep_, &control_, &info); + + if ( HaveIpData() ) + { + IpData().TimingStats().LinearSystemBackSolve().End(); + } + + numneg_ = info.num_neg; + + pivtol_changed_ = false; + } + else + { + if ( HaveIpData() ) + { + IpData().TimingStats().LinearSystemBackSolve().Start(); + } + + spral_ssids_solve(0, nrhs, rhs_vals, ndim_, akeep_, fkeep_, &control_, &info); + + if ( HaveIpData() ) + { + IpData().TimingStats().LinearSystemBackSolve().End(); + } + } + + return SYMSOLVER_SUCCESS; +} + +bool SpralSolverInterface::IncreaseQuality() +{ + for ( int i = current_level_; i < 3; i++ ) + { + switch ( switch_[i] ) + { + case SWITCH_ON_DEMAND: + case SWITCH_ON_DEMAND_REUSE: + case SWITCH_OD_ND: + case SWITCH_OD_ND_REUSE: + rescale_ = true; + current_level_ = i; + scaling_type_ = scaling_val_[i]; + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "SPRAL: Enabling scaling " + "%d due to failure of iterative refinement\n", current_level_); + break; + default: + ; + } + } + + if ( control_.u >= umax_ ) + { + return false; + } + + pivtol_changed_ = true; + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "Increasing pivot tolerance " + "for SPRAL from %7.2e ", control_.u); + control_.u = Min(umax_, pow(control_.u, 0.75)); + Jnlst().Printf(J_DETAILED, J_LINEAR_ALGEBRA, "to %7.2e.\n", control_.u); + return true; +} + +} // namespace Ipopt diff --git a/src/Algorithm/LinearSolvers/IpSpralSolverInterface.hpp b/src/Algorithm/LinearSolvers/IpSpralSolverInterface.hpp new file mode 100644 index 000000000..09a75a9bd --- /dev/null +++ b/src/Algorithm/LinearSolvers/IpSpralSolverInterface.hpp @@ -0,0 +1,165 @@ +// Copyright (C) 2020, Los Alamos National Laboratory +// Copyright (C) 2012, The Science and Technology Facilities Council (STFC) +// Copyright (C) 2009, Jonathan Hogg +// Copyright (C) 2004, 2007 International Business Machines and others. +// All Rights Reserved. +// This code is published under the Eclipse Public License. +// +// Authors: Byron Tasseff LANL 2020-03-21 +// Jonathan Hogg STFC 2012-12-21 +// Jonathan Hogg 2009-07-29 +// Carl Laird, Andreas Waechter IBM 2004-03-17 + +#ifndef __IPSPRALSOLVERINTERFACE_HPP__ +#define __IPSPRALSOLVERINTERFACE_HPP__ + +#include "IpSparseSymLinearSolverInterface.hpp" + +extern "C" +{ +#include "spral_ssids.h" +} + +namespace Ipopt +{ + +class SpralSolverInterface: public SparseSymLinearSolverInterface +{ +private: + enum scaling_opts + { + SWITCH_NEVER, + SWITCH_AT_START, + SWITCH_AT_START_REUSE, + SWITCH_ON_DEMAND, + SWITCH_ON_DEMAND_REUSE, + SWITCH_NDELAY, + SWITCH_NDELAY_REUSE, + SWITCH_OD_ND, + SWITCH_OD_ND_REUSE + }; + + int ndim_; ///< Number of dimensions + double* val_; ///< Storage for variables + int numneg_; ///< Number of negative pivots in last factorization + int numdelay_; ///< Number of delayed pivots last time we scaled + void* akeep_; ///< Stores pointer to factors + void* fkeep_; ///< Stores pointer to factors + bool pivtol_changed_; ///< indicates if pivtol has been changed + bool rescale_; ///< Indicates if we should rescale next factorization + double* scaling_; ///< Store scaling for reuse if doing dynamic scaling + int fctidx_; ///< Current factorization number to dump to + + /* Options */ + struct spral_ssids_options control_; + double umax_; + int ordering_; + int scaling_type_; + enum scaling_opts switch_[3]; + int scaling_val_[3]; + int current_level_; + bool dump_; + +public: + + SpralSolverInterface() + : val_(NULL), + numdelay_(0), + akeep_(NULL), + fkeep_(NULL), + pivtol_changed_(false), + rescale_(false), + scaling_(NULL), + fctidx_(0), + scaling_type_(0), + dump_(false) + { } + + ~SpralSolverInterface(); + + static void RegisterOptions( + SmartPtr roptions + ); + + bool InitializeImpl( + const OptionsList& options, + const std::string& prefix + ); + + /** @name Methods for requesting solution of the linear system. */ + //@{ + ESymSolverStatus InitializeStructure( + Index dim, + Index nonzeros, + const Index* ia, + const Index* ja + ); + + double* GetValuesArrayPtr() + { + return val_; + } + + ESymSolverStatus MultiSolve( + bool new_matrix, + const Index* ia, + const Index* ja, + Index nrhs, + double* rhs_vals, + bool check_NegEVals, + Index numberOfNegEVals + ); + + Index NumberOfNegEVals() const + { + return numneg_; + } + //@} + + //* @name Options of Linear solver */ + //@{ + bool IncreaseQuality(); + + bool ProvidesInertia() const + { + return true; + } + + EMatrixFormat MatrixFormat() const + { + return CSR_Format_1_Offset; + } + //@} + + /** @name Methods related to the detection of linearly dependent + * rows in a matrix */ + //@{ + bool ProvidesDegeneracyDetection() const + { + return false; + } + + ESymSolverStatus DetermineDependentRows( + const Index* ia, + const Index* ja, + std::list& c_deps + ) + { + return SYMSOLVER_FATAL_ERROR; + } + //@} + + /** converts a scaling option name to its spral option number */ + static int ScaleNameToNum( + const std::string& name + ); + + /** converts a pivot method option name to its spral option number */ + static int PivotMethodNameToNum( + const std::string& name + ); +}; + +} // namespace Ipopt + +#endif diff --git a/src/Algorithm/LinearSolvers/Makefile.am b/src/Algorithm/LinearSolvers/Makefile.am index 07e95beb1..15c6f16d4 100644 --- a/src/Algorithm/LinearSolvers/Makefile.am +++ b/src/Algorithm/LinearSolvers/Makefile.am @@ -45,6 +45,10 @@ if COIN_HAS_MUMPS liblinsolvers_la_SOURCES += IpMumpsSolverInterface.cpp endif +if HAVE_SPRAL + liblinsolvers_la_SOURCES += IpSpralSolverInterface.cpp +endif + AM_CPPFLAGS = \ -I$(srcdir)/../../Common \ -I$(srcdir)/../../LinAlg \ diff --git a/src/Algorithm/LinearSolvers/Makefile.in b/src/Algorithm/LinearSolvers/Makefile.in index 9b69b7f6d..605855191 100644 --- a/src/Algorithm/LinearSolvers/Makefile.in +++ b/src/Algorithm/LinearSolvers/Makefile.in @@ -101,6 +101,7 @@ host_triplet = @host@ @HAVE_MA28_TRUE@ IpMa28Partition.F @HAVE_WSMP_TRUE@am__append_4 = IpWsmpSolverInterface.cpp IpIterativeWsmpSolverInterface.cpp @COIN_HAS_MUMPS_TRUE@am__append_5 = IpMumpsSolverInterface.cpp +@HAVE_SPRAL_TRUE@am__append_6 = IpSpralSolverInterface.cpp subdir = src/Algorithm/LinearSolvers ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/configure.ac @@ -122,6 +123,7 @@ liblinsolvers_la_LIBADD = @HAVE_WSMP_TRUE@am__objects_4 = IpWsmpSolverInterface.lo \ @HAVE_WSMP_TRUE@ IpIterativeWsmpSolverInterface.lo @COIN_HAS_MUMPS_TRUE@am__objects_5 = IpMumpsSolverInterface.lo +@HAVE_SPRAL_TRUE@am__objects_6 = IpSpralSolverInterface.lo am_liblinsolvers_la_OBJECTS = IpLinearSolversRegOp.lo \ IpSlackBasedTSymScalingMethod.lo IpTripletToCSRConverter.lo \ IpTSymDependencyDetector.lo IpTSymLinearSolver.lo \ @@ -129,7 +131,7 @@ am_liblinsolvers_la_OBJECTS = IpLinearSolversRegOp.lo \ IpMa86SolverInterface.lo IpMa97SolverInterface.lo \ IpMc19TSymScalingMethod.lo IpMa77SolverInterface.lo \ $(am__objects_1) $(am__objects_2) $(am__objects_3) \ - $(am__objects_4) $(am__objects_5) + $(am__objects_4) $(am__objects_5) $(am__objects_6) liblinsolvers_la_OBJECTS = $(am_liblinsolvers_la_OBJECTS) AM_V_lt = $(am__v_lt_@AM_V@) am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) @@ -162,6 +164,7 @@ am__depfiles_remade = ./$(DEPDIR)/IpIterativeWsmpSolverInterface.Plo \ ./$(DEPDIR)/IpMumpsSolverInterface.Plo \ ./$(DEPDIR)/IpPardisoSolverInterface.Plo \ ./$(DEPDIR)/IpSlackBasedTSymScalingMethod.Plo \ + ./$(DEPDIR)/IpSpralSolverInterface.Plo \ ./$(DEPDIR)/IpTSymDependencyDetector.Plo \ ./$(DEPDIR)/IpTSymLinearSolver.Plo \ ./$(DEPDIR)/IpTripletToCSRConverter.Plo \ @@ -455,7 +458,7 @@ liblinsolvers_la_SOURCES = IpLinearSolversRegOp.cpp \ IpMa86SolverInterface.cpp IpMa97SolverInterface.cpp \ IpMc19TSymScalingMethod.cpp IpMa77SolverInterface.cpp \ $(am__append_1) $(am__append_2) $(am__append_3) \ - $(am__append_4) $(am__append_5) + $(am__append_4) $(am__append_5) $(am__append_6) AM_CPPFLAGS = \ -I$(srcdir)/../../Common \ -I$(srcdir)/../../LinAlg \ @@ -531,6 +534,7 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/IpMumpsSolverInterface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/IpPardisoSolverInterface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/IpSlackBasedTSymScalingMethod.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/IpSpralSolverInterface.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/IpTSymDependencyDetector.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/IpTSymLinearSolver.Plo@am__quote@ # am--include-marker @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/IpTripletToCSRConverter.Plo@am__quote@ # am--include-marker @@ -710,6 +714,7 @@ distclean: distclean-am -rm -f ./$(DEPDIR)/IpMumpsSolverInterface.Plo -rm -f ./$(DEPDIR)/IpPardisoSolverInterface.Plo -rm -f ./$(DEPDIR)/IpSlackBasedTSymScalingMethod.Plo + -rm -f ./$(DEPDIR)/IpSpralSolverInterface.Plo -rm -f ./$(DEPDIR)/IpTSymDependencyDetector.Plo -rm -f ./$(DEPDIR)/IpTSymLinearSolver.Plo -rm -f ./$(DEPDIR)/IpTripletToCSRConverter.Plo @@ -771,6 +776,7 @@ maintainer-clean: maintainer-clean-am -rm -f ./$(DEPDIR)/IpMumpsSolverInterface.Plo -rm -f ./$(DEPDIR)/IpPardisoSolverInterface.Plo -rm -f ./$(DEPDIR)/IpSlackBasedTSymScalingMethod.Plo + -rm -f ./$(DEPDIR)/IpSpralSolverInterface.Plo -rm -f ./$(DEPDIR)/IpTSymDependencyDetector.Plo -rm -f ./$(DEPDIR)/IpTSymLinearSolver.Plo -rm -f ./$(DEPDIR)/IpTripletToCSRConverter.Plo diff --git a/src/Algorithm/LinearSolvers/spral_ssids.h b/src/Algorithm/LinearSolvers/spral_ssids.h new file mode 100644 index 000000000..1b2224b3c --- /dev/null +++ b/src/Algorithm/LinearSolvers/spral_ssids.h @@ -0,0 +1,122 @@ +#ifndef SPRAL_SSIDS_H +#define SPRAL_SSIDS_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/************************************ + * Derived types + ************************************/ + +struct spral_ssids_options +{ + int array_base; // Not in Fortran type + int print_level; + int unit_diagnostics; + int unit_error; + int unit_warning; + int ordering; + int nemin; + bool ignore_numa; + bool use_gpu; + long min_gpu_work; + float max_load_inbalance; + float gpu_perf_coeff; + int scaling; + long small_subtree_threshold; + int cpu_block_size; + bool action; + int pivot_method; + double small; + double u; + char unused[80]; // Allow for future expansion +}; + +struct spral_ssids_inform +{ + int flag; + int matrix_dup; + int matrix_missing_diag; + int matrix_outrange; + int matrix_rank; + int maxdepth; + int maxfront; + int num_delay; + long num_factor; + long num_flops; + int num_neg; + int num_sup; + int num_two; + int stat; + int cuda_error; + int cublas_error; + char unused[80]; // Allow for future expansion +}; + +/************************************ + * Basic subroutines + ************************************/ + +/* Initialize options to defaults */ +void spral_ssids_default_options(struct spral_ssids_options* options); +/* Perform analysis phase for CSC data */ +void spral_ssids_analyse(bool check, int n, int* order, const long* ptr, + const int* row, const double* val, void** akeep, + const struct spral_ssids_options* options, + struct spral_ssids_inform* inform); +void spral_ssids_analyse_ptr32(bool check, int n, int* order, const int* ptr, + const int* row, const double* val, void** akeep, + const struct spral_ssids_options* options, + struct spral_ssids_inform* inform); +/* Perform analysis phase for coordinate data */ +void spral_ssids_analyse_coord(int n, int* order, long ne, const int* row, + const int* col, const double* val, void** akeep, + const struct spral_ssids_options* options, + struct spral_ssids_inform* inform); +/* Perform numerical factorization */ +void spral_ssids_factor(bool posdef, const long* ptr, const int* row, + const double* val, double* scale, void* akeep, void** fkeep, + const struct spral_ssids_options* options, + struct spral_ssids_inform* inform); +void spral_ssids_factor_ptr32(bool posdef, const int* ptr, const int* row, + const double* val, double* scale, void* akeep, void** fkeep, + const struct spral_ssids_options* options, + struct spral_ssids_inform* inform); +/* Perform triangular solve(s) for single rhs */ +void spral_ssids_solve1(int job, double* x1, void* akeep, void* fkeep, + const struct spral_ssids_options* options, + struct spral_ssids_inform* inform); +/* Perform triangular solve(s) for one or more rhs */ +void spral_ssids_solve(int job, int nrhs, double* x, int ldx, void* akeep, + void* fkeep, const struct spral_ssids_options* options, + struct spral_ssids_inform* inform); +/* Free memory */ +int spral_ssids_free_akeep(void** akeep); +int spral_ssids_free_fkeep(void** fkeep); +int spral_ssids_free(void** akeep, void** fkeep); + +/************************************ + * Advanced subroutines + ************************************/ + +/* Retrieve information on pivots (positive-definite case) */ +void spral_ssids_enquire_posdef(const void* akeep, const void* fkeep, + const struct spral_ssids_options* options, + struct spral_ssids_inform* inform, double* d); +/* Retrieve information on pivots (indefinite case) */ +void spral_ssids_enquire_indef(const void* akeep, const void* fkeep, + const struct spral_ssids_options* options, + struct spral_ssids_inform* inform, int* piv_order, double* d); +/* Alter pivots (indefinite case only) */ +void spral_ssids_alter(const double* d, const void* akeep, void* fkeep, + const struct spral_ssids_options* options, + struct spral_ssids_inform* inform); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif // SPRAL_SSIDS_H diff --git a/src/Common/config.h.in b/src/Common/config.h.in index acb0d722d..532c6d56a 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -135,6 +135,9 @@ /* Define to 1 if function rand is available */ #undef IPOPT_HAS_RAND +/* Define to 1 if SPRAL is available */ +#undef IPOPT_HAS_SPRAL + /* Define to 1 if function std::rand is available */ #undef IPOPT_HAS_STD__RAND diff --git a/src/Common/config_default.h b/src/Common/config_default.h index c5aa9b1ad..b5b27e9e2 100644 --- a/src/Common/config_default.h +++ b/src/Common/config_default.h @@ -50,6 +50,9 @@ /* Define to 1 if you are using the parallel version of Pardiso */ /* #undef IPOPT_HAS_PARDISO_PARALLEL */ +/* Define to 1 if SPRAL is available */ +/* #undef IPOPT_HAS_SPRAL */ + /* Define to 1 if WSMP is available */ /* #undef IPOPT_HAS_WSMP */ @@ -68,5 +71,6 @@ #define IPOPT_BLAS_FUNC(name,NAME) F77_FUNC(name,NAME) #define IPOPT_LAPACK_FUNC(name,NAME) F77_FUNC(name,NAME) #define IPOPT_PARDISO_FUNC(name,NAME) F77_FUNC(name,NAME) +#define IPOPT_SPRAL_FUNC(name,NAME) F77_FUNC(name,NAME) #define IPOPT_WSMP_FUNC(name,NAME) F77_FUNC(name,NAME) #define IPOPT_WSMP_FUNC_(name,NAME) F77_FUNC_(name,NAME) diff --git a/src/Interfaces/IpIpoptApplication.cpp b/src/Interfaces/IpIpoptApplication.cpp index b764c756f..2e01a8524 100644 --- a/src/Interfaces/IpIpoptApplication.cpp +++ b/src/Interfaces/IpIpoptApplication.cpp @@ -468,6 +468,32 @@ ApplicationReturnStatus IpoptApplication::Initialize( //options_to_print.push_back("pardiso_out_of_core_power"); #endif +#ifdef IPOPT_HAS_SPRAL + + options_to_print.push_back("#SPRAL Linear Solver"); + options_to_print.push_back("spral_cpu_block_size"); + options_to_print.push_back("spral_gpu_perf_coeff"); + options_to_print.push_back("spral_ignore_numa"); + options_to_print.push_back("spral_max_load_inbalance"); + options_to_print.push_back("spral_min_gpu_work"); + options_to_print.push_back("spral_nemin"); + options_to_print.push_back("spral_order"); + options_to_print.push_back("spral_pivot_method"); + options_to_print.push_back("spral_print_level"); + options_to_print.push_back("spral_scaling"); + options_to_print.push_back("spral_scaling_1"); + options_to_print.push_back("spral_scaling_2"); + options_to_print.push_back("spral_scaling_3"); + options_to_print.push_back("spral_small"); + options_to_print.push_back("spral_small_subtree_threshold"); + options_to_print.push_back("spral_switch_1"); + options_to_print.push_back("spral_switch_2"); + options_to_print.push_back("spral_switch_3"); + options_to_print.push_back("spral_u"); + options_to_print.push_back("spral_umax"); + options_to_print.push_back("spral_use_gpu"); +#endif + #ifdef IPOPT_HAS_WSMP options_to_print.push_back("#WSMP Linear Solver"); @@ -508,6 +534,10 @@ ApplicationReturnStatus IpoptApplication::Initialize( categories.push_back("MA27 Linear Solver"); categories.push_back("MA57 Linear Solver"); categories.push_back("Pardiso Linear Solver"); +#ifdef IPOPT_HAS_SPRAL + + categories.push_back("SPRAL Linear Solver"); +#endif #ifdef IPOPT_HAS_WSMP categories.push_back("WSMP Linear Solver");