diff --git a/benchmarks/README.md b/benchmarks/README.md index 1280356b9..1e56f9dfb 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,3 +1,26 @@ # Benchmarks Scripts -This directory contains the scripts for the benchmarks. +This directory contains the scripts for running benchmarks. Currently it supports running benchmarks on LP with and MILP. + + +# Linear Programming Benchmarking + + +- Mittelmann LP benchmark + +```bash +benchmarks/linear_programming/utils/benchmark_lp_mittelmann.sh +``` + +- MIPLIB Benchmark + +``` +mkdir miplib_data +mkdir miplib_result +wget https://miplib.zib.de/downloads/benchmark.zip -O miplib_data/benchmark.zip +unzip miplib_data/benchmark.zip -d miplib_data +find miplib_data -name "*.gz" -exec gunzip {} \; +find miplib_data -name "*.gz" -delete + +benchmarks/linear_programming/run_mps_files.sh --path miplib_data/ --write-log-file --log-to-console false --output-dir miplib_result --time-limit 600 --presolve t > miplib_result/output.log 2>&1 +``` diff --git a/benchmarks/linear_programming/utils/get_datasets.py b/benchmarks/linear_programming/utils/get_datasets.py index 447ac2e99..01ed86dc9 100644 --- a/benchmarks/linear_programming/utils/get_datasets.py +++ b/benchmarks/linear_programming/utils/get_datasets.py @@ -21,11 +21,11 @@ import subprocess -# From: http://plato.asu.edu/bench.html +# From: https://plato.asu.edu/bench.html # Folder containg instances: # - https://miplib2010.zib.de/miplib2010.php # - https://www.netlib.org/lp/data/ -# - http://old.sztaki.hu/~meszaros/public_ftp/lptestset/ (and it's subfolders) +# - https://old.sztaki.hu/~meszaros/public_ftp/lptestset/ (and it's subfolders) # - https://plato.asu.edu/ftp/lptestset/ (and it's subfolders) # - https://miplib.zib.de/tag_benchmark.html # - https://miplib.zib.de/tag_collection.html @@ -83,7 +83,7 @@ ] MittelmannInstances = { - "emps": "http://old.sztaki.hu/~meszaros/public_ftp/lptestset/emps.c", + "emps": "https://old.sztaki.hu/~meszaros/public_ftp/lptestset/emps.c", "problems" : { "irish-electricity" : [ "https://plato.asu.edu/ftp/lptestset/irish-electricity.mps.bz2", @@ -94,88 +94,88 @@ "mps" ], "16_n14" : [ - "http://plato.asu.edu/ftp/lptestset/network/16_n14.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/network/16_n14.mps.bz2", "mps" ], "Dual2_5000" : [ - "http://plato.asu.edu/ftp/lptestset/Dual2_5000.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/Dual2_5000.mps.bz2", "mps" ], "L1_six1000" : [ - "http://plato.asu.edu/ftp/lptestset/L1_sixm1000obs.bz2", + "https://plato.asu.edu/ftp/lptestset/L1_sixm1000obs.bz2", "netlib" ], "L1_sixm" : ["", "mps"], "L1_sixm1000obs" : [ - "http://plato.asu.edu/ftp/lptestset/L1_sixm1000obs.bz2", + "https://plato.asu.edu/ftp/lptestset/L1_sixm1000obs.bz2", "netlib" ], "L1_sixm250" : ["", "netlib"], "L1_sixm250obs" : [ - "http://plato.asu.edu/ftp/lptestset/L1_sixm250obs.bz2", + "https://plato.asu.edu/ftp/lptestset/L1_sixm250obs.bz2", "netlib" ], "L2CTA3D" : [ - "http://plato.asu.edu/ftp/lptestset/L2CTA3D.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/L2CTA3D.mps.bz2", "mps" ], "Linf_520c" : [ - "http://plato.asu.edu/ftp/lptestset/Linf_520c.bz2", + "https://plato.asu.edu/ftp/lptestset/Linf_520c.bz2", "netlib" ], "Primal2_1000" : [ - "http://plato.asu.edu/ftp/lptestset/Primal2_1000.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/Primal2_1000.mps.bz2", "mps" ], "a2864" : [ - "http://plato.asu.edu/ftp/lptestset/a2864.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/a2864.mps.bz2", "mps" ], "bdry2" : [ - "http://plato.asu.edu/ftp/lptestset/bdry2.bz2", + "https://plato.asu.edu/ftp/lptestset/bdry2.bz2", "netlib" ], "braun" : ["", "mps"], "cont1" : [ - "http://plato.asu.edu/ftp/lptestset/misc/cont1.bz2", + "https://plato.asu.edu/ftp/lptestset/misc/cont1.bz2", "netlib" ], "cont11" : [ - "http://plato.asu.edu/ftp/lptestset/misc/cont11.bz2", + "https://plato.asu.edu/ftp/lptestset/misc/cont11.bz2", "netlib" ], "datt256" : [ - "http://plato.asu.edu/ftp/lptestset/datt256_lp.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/datt256_lp.mps.bz2", "mps" ], "datt256_lp" : [ - "http://plato.asu.edu/ftp/lptestset/datt256_lp.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/datt256_lp.mps.bz2", "mps" ], "degme" : [ - "http://old.sztaki.hu/~meszaros/public_ftp/lptestset/New/degme.gz", + "https://old.sztaki.hu/~meszaros/public_ftp/lptestset/New/degme.gz", "netlib" ], "dlr1" : [ - "http://plato.asu.edu/ftp/lptestset/dlr1.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/dlr1.mps.bz2", "mps" ], "dlr2" : [ - "http://plato.asu.edu/ftp/lptestset/dlr2.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/dlr2.mps.bz2", "mps" ], "energy1" : ["", "mps"], # Kept secret by Mittlemman "energy2" : ["", "mps"], "ex10" : [ - "http://plato.asu.edu/ftp/lptestset/ex10.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/ex10.mps.bz2", "mps" ], "fhnw-binschedule1" : [ - "http://plato.asu.edu/ftp/lptestset/fhnw-binschedule1.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/fhnw-binschedule1.mps.bz2", "mps" ], "fome13" : [ - "http://plato.asu.edu/ftp/lptestset/fome/fome13.bz2", + "https://plato.asu.edu/ftp/lptestset/fome/fome13.bz2", "netlib" ], "gamora" : ["", "mps"], # Kept secret by Mittlemman @@ -195,106 +195,106 @@ "goto32_512_4" : ["", "mps"], "goto32_512_5" : ["", "mps"], "graph40-40" : [ - "http://plato.asu.edu/ftp/lptestset/graph40-40.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/graph40-40.mps.bz2", "mps" ], "graph40-40_lp" : [ - "http://plato.asu.edu/ftp/lptestset/graph40-40.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/graph40-40.mps.bz2", "mps" ], "groot" : ["", "mps"], # Kept secret by Mittlemman "heimdall" : ["", "mps"], # Kept secret by Mittlemman "hulk" : ["", "mps"], # Kept secret by Mittlemman "i_n13" : [ - "http://plato.asu.edu/ftp/lptestset/network/i_n13.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/network/i_n13.mps.bz2", "mps" ], "irish-e" : ["", "mps"], "karted" : [ - "http://old.sztaki.hu/~meszaros/public_ftp/lptestset/New/karted.gz", + "https://old.sztaki.hu/~meszaros/public_ftp/lptestset/New/karted.gz", "netlib" ], "lo10" : [ - "http://plato.asu.edu/ftp/lptestset/network/lo10.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/network/lo10.mps.bz2", "mps" ], "loki" : ["", "mps"], # Kept secret by Mittlemman "long15" : [ - "http://plato.asu.edu/ftp/lptestset/network/long15.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/network/long15.mps.bz2", "mps" ], "nebula" : ["", "mps"], # Kept secret by Mittlemman "neos" : [ - "http://plato.asu.edu/ftp/lptestset/misc/neos.bz2", + "https://plato.asu.edu/ftp/lptestset/misc/neos.bz2", "netlib" ], "neos-3025225" : [ - "http://plato.asu.edu/ftp/lptestset/neos-3025225.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/neos-3025225.mps.bz2", "mps" ], "neos-3025225_lp" : [ - "http://plato.asu.edu/ftp/lptestset/neos-3025225.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/neos-3025225.mps.bz2", "mps" ], "neos-5251015" : [ - "http://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", "mps" ], "neos-5251015_lp" : [ - "http://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", "mps" ], "neos3" : [ - "http://plato.asu.edu/ftp/lptestset/misc/neos3.bz2", + "https://plato.asu.edu/ftp/lptestset/misc/neos3.bz2", "netlib" ], "neos-5052403-cygnet" : [ - "http://plato.asu.edu/ftp/lptestset/neos-5052403-cygnet.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/neos-5052403-cygnet.mps.bz2", "mps" ], "neos5251015_lp" : [ - "http://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", "mps" ], "neos5251915" : [ - "http://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/neos-5251015.mps.bz2", "mps" ], "netlarge1" : [ - "http://plato.asu.edu/ftp/lptestset/network/netlarge1.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/network/netlarge1.mps.bz2", "mps" ], "netlarge2" : [ - "http://plato.asu.edu/ftp/lptestset/network/netlarge2.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/network/netlarge2.mps.bz2", "mps" ], "netlarge3" : [ - "http://plato.asu.edu/ftp/lptestset/network/netlarge3.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/network/netlarge3.mps.bz2", "mps" ], "netlarge6" : [ - "http://plato.asu.edu/ftp/lptestset/network/netlarge6.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/network/netlarge6.mps.bz2", "mps" ], "ns1687037" : [ - "http://plato.asu.edu/ftp/lptestset/misc/ns1687037.bz2", + "https://plato.asu.edu/ftp/lptestset/misc/ns1687037.bz2", "netlib" ], "ns1688926" : [ - "http://plato.asu.edu/ftp/lptestset/misc/ns1688926.bz2", + "https://plato.asu.edu/ftp/lptestset/misc/ns1688926.bz2", "netlib" ], "nug08-3rd" : [ - "http://plato.asu.edu/ftp/lptestset/nug/nug08-3rd.bz2", + "https://plato.asu.edu/ftp/lptestset/nug/nug08-3rd.bz2", "netlib" ], "pds-100" : [ - "http://plato.asu.edu/ftp/lptestset/pds/pds-100.bz2", + "https://plato.asu.edu/ftp/lptestset/pds/pds-100.bz2", "netlib" ], "psched3-3" : ["", "mps"], "qap15" : [ - "http://plato.asu.edu/ftp/lptestset/qap15.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/qap15.mps.bz2", "mps" ], "rail02" : [ @@ -302,35 +302,35 @@ "mps" ], "rail4284" : [ - "http://plato.asu.edu/ftp/lptestset/rail/rail4284.bz2", + "https://plato.asu.edu/ftp/lptestset/rail/rail4284.bz2", "netlib" ], "rmine15" : [ - "http://plato.asu.edu/ftp/lptestset/rmine15.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/rmine15.mps.bz2", "mps" ], "s100" : [ - "http://plato.asu.edu/ftp/lptestset/s100.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/s100.mps.bz2", "mps" ], "s250r10" : [ - "http://plato.asu.edu/ftp/lptestset/s250r10.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/s250r10.mps.bz2", "mps" ], "s82" : [ - "http://plato.asu.edu/ftp/lptestset/s82.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/s82.mps.bz2", "mps" ], "savsched1" : [ - "http://plato.asu.edu/ftp/lptestset/savsched1.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/savsched1.mps.bz2", "mps" ], "scpm1" : [ - "http://plato.asu.edu/ftp/lptestset/scpm1.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/scpm1.mps.bz2", "mps" ], "set-cover-model" : [ - "http://plato.asu.edu/ftp/lptestset/set-cover-model.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/set-cover-model.mps.bz2", "mps" ], "shs1023" : [ @@ -338,19 +338,19 @@ "mps" ], "square15" : [ - "http://plato.asu.edu/ftp/lptestset/network/square15.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/network/square15.mps.bz2", "mps" ], "square41" : [ - "http://plato.asu.edu/ftp/lptestset/square41.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/square41.mps.bz2", "mps" ], "stat96v2" : [ - "http://old.sztaki.hu/~meszaros/public_ftp/lptestset/misc/stat96v2.gz", + "https://old.sztaki.hu/~meszaros/public_ftp/lptestset/misc/stat96v2.gz", "netlib" ], "stormG2_1000" : [ - "http://plato.asu.edu/ftp/lptestset/misc/stormG2_1000.bz2", + "https://plato.asu.edu/ftp/lptestset/misc/stormG2_1000.bz2", "netlib" ], "storm_1000" : ["", "mps"], @@ -359,15 +359,15 @@ "mps" ], "supportcase10" : [ - "http://plato.asu.edu/ftp/lptestset/supportcase10.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/supportcase10.mps.bz2", "mps" ], "support19" : [ - "http://plato.asu.edu/ftp/lptestset/supportcase19.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/supportcase19.mps.bz2", "mps" ], "supportcase19" : [ - "http://plato.asu.edu/ftp/lptestset/supportcase19.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/supportcase19.mps.bz2", "mps" ], "test03" : ["", "mps"], # Kept secret by Mittlemman @@ -388,15 +388,15 @@ "thor" : ["", "mps"], # Kept secret by Mittlemman "tpl-tub-ws" : ["", "mps"], "tpl-tub-ws1617" : [ - "http://plato.asu.edu/ftp/lptestset/tpl-tub-ws1617.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/tpl-tub-ws1617.mps.bz2", "mps" ], "wide15" : [ - "http://plato.asu.edu/ftp/lptestset/network/wide15.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/network/wide15.mps.bz2", "mps" ], "woodlands09" : [ - "http://plato.asu.edu/ftp/lptestset/woodlands09.mps.bz2", + "https://plato.asu.edu/ftp/lptestset/woodlands09.mps.bz2", "mps" ] }, @@ -550,7 +550,7 @@ "square15", "wide15" ], - # <=100s in bench: http://plato.asu.edu/ftp/lpbar.html + # <=100s in bench: https://plato.asu.edu/ftp/lpbar.html "L0" : [ "ex10", "datt256", @@ -693,11 +693,11 @@ def extract(file, dir, type): raise Exception(f"Unknown file extension found for extraction {file}") # download emps and compile # Disable emps for now - if type == "netlib" and False: + if type == "netlib": url = MittelmannInstances["emps"] file = os.path.join(dir, "emps.c") download(url, file) - subprocess.run(f"cd {dir} && gcc emps.c -o emps", + subprocess.run(f"cd {dir} && gcc -Wno-implicit-int emps.c -o emps", shell=True) # determine output file and run emps subprocess.run(f"cd {dir} && ./emps {unzippedfile} > {outfile}", diff --git a/build.sh b/build.sh index a5f57f9c9..ffb9e1a0f 100755 --- a/build.sh +++ b/build.sh @@ -27,7 +27,7 @@ REPODIR=$(cd "$(dirname "$0")"; pwd) LIBCUOPT_BUILD_DIR=${LIBCUOPT_BUILD_DIR:=${REPODIR}/cpp/build} LIBMPS_PARSER_BUILD_DIR=${LIBMPS_PARSER_BUILD_DIR:=${REPODIR}/cpp/libmps_parser/build} -VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" +VALIDARGS="clean libcuopt libmps_parser cuopt_mps_parser cuopt cuopt_server cuopt_sh_client docs deb -a -b -g -fsanitize -v -l= --verbose-pdlp --build-lp-only --no-fetch-rapids --skip-c-python-adapters --skip-tests-build --skip-routing-build --skip-fatbin-write --host-lineinfo [--cmake-args=\\\"\\\"] [--cache-tool=] -n --allgpuarch --ci-only-arch --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) @@ -54,6 +54,7 @@ HELP="$0 [ ...] [ ...] --skip-tests-build - disable building of all tests --skip-routing-build - skip building routing components --skip-fatbin-write - skip the fatbin write + --host-lineinfo - build with debug line information for host code --cache-tool= - pass the build cache tool (eg: ccache, sccache, distcc) that will be used to speedup the build process. --cmake-args=\\\"\\\" - pass arbitrary list of CMake configuration options (escape all quotes in argument) @@ -91,6 +92,7 @@ SKIP_C_PYTHON_ADAPTERS=0 SKIP_TESTS_BUILD=0 SKIP_ROUTING_BUILD=0 WRITE_FATBIN=1 +HOST_LINEINFO=0 CACHE_ARGS=() PYTHON_ARGS_FOR_INSTALL=("-m" "pip" "install" "--no-build-isolation" "--no-deps") LOGGING_ACTIVE_LEVEL="INFO" @@ -133,7 +135,7 @@ function cacheTool { } function loggingArgs { - if [[ $(echo "$ARGS" | { grep -Eo "\-l" || true; } | wc -l ) -gt 1 ]]; then + if [[ $(echo "$ARGS" | { grep -Eo "\-l=" || true; } | wc -l ) -gt 1 ]]; then echo "Multiple -l logging options were provided, please provide only one: ${ARGS}" exit 1 fi @@ -141,7 +143,7 @@ function loggingArgs { LOG_LEVEL_LIST=("TRACE" "DEBUG" "INFO" "WARN" "ERROR" "CRITICAL" "OFF") # Check for logging option - if [[ -n $(echo "$ARGS" | { grep -E "\-l" || true; } ) ]]; then + if [[ -n $(echo "$ARGS" | { grep -E "\-l=" || true; } ) ]]; then LOGGING_ARGS=$(echo "$ARGS" | { grep -Eo "\-l=\S+" || true; }) if [[ -n ${LOGGING_ARGS} ]]; then # Remove the full log argument from list of args so that it passes validArgs function @@ -252,6 +254,9 @@ fi if hasArg --skip-fatbin-write; then WRITE_FATBIN=0 fi +if hasArg --host-lineinfo; then + HOST_LINEINFO=1 +fi function contains_string { local search_string="$1" @@ -355,6 +360,7 @@ if buildAll || hasArg libcuopt; then -DBUILD_TESTS=$((1 - ${SKIP_TESTS_BUILD})) \ -DSKIP_ROUTING_BUILD=${SKIP_ROUTING_BUILD} \ -DWRITE_FATBIN=${WRITE_FATBIN} \ + -DHOST_LINEINFO=${HOST_LINEINFO} \ "${CACHE_ARGS[@]}" \ "${EXTRA_CMAKE_ARGS[@]}" \ "${REPODIR}"/cpp diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 2a3120f5a..e173a0410 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -30,7 +30,6 @@ dependencies: - gcc_linux-aarch64=14.* - gmock - gtest -- httpx - ipython - jsonref==1.1.0 - libcurand-dev diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index da6d19c19..7767e5a95 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -30,7 +30,6 @@ dependencies: - gcc_linux-64=14.* - gmock - gtest -- httpx - ipython - jsonref==1.1.0 - libcurand-dev diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index cf306f74e..e8b8ab2b5 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -30,7 +30,6 @@ dependencies: - gcc_linux-aarch64=14.* - gmock - gtest -- httpx - ipython - jsonref==1.1.0 - libcurand-dev diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index c9ba5beba..66b5f8f4b 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -30,7 +30,6 @@ dependencies: - gcc_linux-64=14.* - gmock - gtest -- httpx - ipython - jsonref==1.1.0 - libcurand-dev diff --git a/conda/recipes/cuopt-server/recipe.yaml b/conda/recipes/cuopt-server/recipe.yaml index ad9952f25..32c9efa0f 100644 --- a/conda/recipes/cuopt-server/recipe.yaml +++ b/conda/recipes/cuopt-server/recipe.yaml @@ -34,11 +34,11 @@ requirements: - cuopt =${{ version }} - fastapi >=0.104.1 - jsonref =1.1.0 - - httpx - msgpack-python =1.1.0 - msgpack-numpy =0.4.8 - numpy >=1.23,<3.0a0 - pandas>=2 + - psutil>=5.9,<6.0a0 - python - uvicorn ${{ uvicorn_version }} diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 45789cedd..b377f659b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -56,6 +56,7 @@ option(BUILD_LP_ONLY "Build only linear programming components, exclude routing option(SKIP_C_PYTHON_ADAPTERS "Skip building C and Python adapter files (cython_solve.cu and cuopt_c.cpp)" OFF) option(SKIP_ROUTING_BUILD "Skip building routing components" OFF) option(WRITE_FATBIN "Enable fatbin writing" ON) +option(HOST_LINEINFO "Build with debug line information for host code" OFF) message(VERBOSE "cuOpt: Enable nvcc -lineinfo: ${CMAKE_CUDA_LINEINFO}") message(VERBOSE "cuOpt: Build cuOpt unit-tests: ${BUILD_TESTS}") @@ -64,6 +65,7 @@ message(VERBOSE "cuOpt: Disable OpenMP: ${DISABLE_OPENMP}") message(VERBOSE "cuOpt: Build LP-only mode: ${BUILD_LP_ONLY}") message(VERBOSE "cuOpt: Skip C/Python adapters: ${SKIP_C_PYTHON_ADAPTERS}") message(VERBOSE "cuOpt: Skip routing build: ${SKIP_ROUTING_BUILD}") +message(VERBOSE "cuOpt: Build with debug line information for host code: ${HOST_LINEINFO}") message(VERBOSE "cuOpt: fatbin: ${WRITE_FATBIN}") # ################################################################################################## @@ -203,6 +205,9 @@ endif() set(CUOPT_SRC_FILES ) add_subdirectory(src) +if (HOST_LINEINFO) + set_source_files_properties(${CUOPT_SRC_FILES} DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTIES COMPILE_OPTIONS "-g1") +endif() add_library(cuopt SHARED ${CUOPT_SRC_FILES} ) diff --git a/cpp/src/mip/CMakeLists.txt b/cpp/src/mip/CMakeLists.txt index e694a9f6f..2d11a8dcb 100644 --- a/cpp/src/mip/CMakeLists.txt +++ b/cpp/src/mip/CMakeLists.txt @@ -48,7 +48,7 @@ set(MIP_NON_LP_FILES ${CMAKE_CURRENT_SOURCE_DIR}/presolve/trivial_presolve.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump.cu ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/feasibility_jump_kernels.cu -) + ${CMAKE_CURRENT_SOURCE_DIR}/feasibility_jump/fj_cpu.cu) # Choose which files to include based on build mode if(BUILD_LP_ONLY) diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu index 0fb3f9fc0..5da9dff84 100644 --- a/cpp/src/mip/diversity/diversity_manager.cu +++ b/cpp/src/mip/diversity/diversity_manager.cu @@ -229,13 +229,16 @@ bool diversity_manager_t::run_presolve(f_t time_limit) trivial_presolve(*problem_ptr); if (!problem_ptr->empty && !check_bounds_sanity(*problem_ptr)) { return false; } } - if (!problem_ptr->empty) { - // do the resizing no-matter what, bounds presolve might not change the bounds but initial - // trivial presolve might have - ls.constraint_prop.bounds_update.resize(*problem_ptr); - ls.constraint_prop.conditional_bounds_update.update_constraint_bounds( - *problem_ptr, ls.constraint_prop.bounds_update); - if (!check_bounds_sanity(*problem_ptr)) { return false; } + // May overconstrain if Papilo presolve has been run before + if (!context.settings.presolve) { + if (!problem_ptr->empty) { + // do the resizing no-matter what, bounds presolve might not change the bounds but initial + // trivial presolve might have + ls.constraint_prop.bounds_update.resize(*problem_ptr); + ls.constraint_prop.conditional_bounds_update.update_constraint_bounds( + *problem_ptr, ls.constraint_prop.bounds_update); + if (!check_bounds_sanity(*problem_ptr)) { return false; } + } } stats.presolve_time = presolve_timer.elapsed_time(); lp_optimal_solution.resize(problem_ptr->n_variables, problem_ptr->handle_ptr->get_stream()); @@ -314,6 +317,13 @@ void diversity_manager_t::run_fp_alone(solution_t& solution) CUOPT_LOG_INFO("FP alone finished!"); } +template +struct ls_cpufj_raii_guard_t { + ls_cpufj_raii_guard_t(local_search_t& ls) : ls(ls) {} + ~ls_cpufj_raii_guard_t() { ls.stop_cpufj_scratch_threads(); } + local_search_t& ls; +}; + // returns the best feasible solution template solution_t diversity_manager_t::run_solver() @@ -342,8 +352,15 @@ solution_t diversity_manager_t::run_solver() "The problem must not be ii"); population.initialize_population(); if (check_b_b_preemption()) { return population.best_feasible(); } + + // Run CPUFJ early to find quick initial solutions + population.allocate_solutions(); + ls_cpufj_raii_guard_t ls_cpufj_raii_guard(ls); // RAII to stop cpufj threads on solve stop + ls.start_cpufj_scratch_threads(population); + // before probing cache or LP, run FJ to generate initial primal feasible solution - if (!from_dir && !fj_only_run) { generate_quick_feasible_solution(); } + // TODO: commenting this out decreases the gap on trento1.mps dramatically. figure out why? + // if (!from_dir && !fj_only_run) { generate_quick_feasible_solution(); } const f_t time_ratio_of_probing_cache = diversity_config.time_ratio_of_probing_cache; const f_t max_time_on_probing = diversity_config.max_time_on_probing; f_t time_for_probing_cache = @@ -412,8 +429,8 @@ solution_t diversity_manager_t::run_solver() clamp_within_var_bounds(lp_optimal_solution, problem_ptr, problem_ptr->handle_ptr); } - population.allocate_solutions(); population.add_solutions_from_vec(std::move(initial_sol_vector)); + if (check_b_b_preemption()) { return population.best_feasible(); } if (context.settings.benchmark_info_ptr != nullptr) { diff --git a/cpp/src/mip/diversity/population.cu b/cpp/src/mip/diversity/population.cu index 63f4b3c85..17c88df3f 100644 --- a/cpp/src/mip/diversity/population.cu +++ b/cpp/src/mip/diversity/population.cu @@ -142,13 +142,36 @@ size_t population_t::get_external_solution_size() } template -void population_t::add_external_solution(std::vector& solution, f_t objective) +void population_t::add_external_solution(const std::vector& solution, + f_t objective, + solution_origin_t origin) { std::lock_guard lock(solution_mutex); - CUOPT_LOG_INFO("B&B added a solution to population, solution queue size %lu with objective %g", + + if (origin == solution_origin_t::CPUFJ) { + external_solution_queue_cpufj.emplace_back(solution, objective, origin); + } else { + external_solution_queue.emplace_back(solution, objective, origin); + } + + // Prevent CPUFJ scratch solutions from flooding the queue + if (external_solution_queue_cpufj.size() >= 10) { + auto worst_obj_it = + std::max_element(external_solution_queue_cpufj.begin(), + external_solution_queue_cpufj.end(), + [](const external_solution_t& a, const external_solution_t& b) { + return a.objective < b.objective; + }); + if (objective > worst_obj_it->objective) return; + auto worst_obj_idx = std::distance(external_solution_queue_cpufj.begin(), worst_obj_it); + + external_solution_queue_cpufj.erase(external_solution_queue_cpufj.begin() + worst_obj_idx); + } + + CUOPT_LOG_INFO("%s added a solution to population, solution queue size %lu with objective %g", + solution_origin_to_string(origin), external_solution_queue.size(), problem_ptr->get_user_obj_from_solver_obj(objective)); - external_solution_queue.emplace_back(solution); if (external_solution_queue.size() >= 5) { early_exit_primal_generation = true; } } @@ -166,18 +189,49 @@ std::vector> population_t::get_external_solutions { std::lock_guard lock(solution_mutex); std::vector> return_vector; - for (auto h_solution_vec : external_solution_queue) { - solution_t sol(*problem_ptr); - sol.copy_new_assignment(h_solution_vec); - sol.compute_feasibility(); - sol.handle_ptr->sync_stream(); - return_vector.emplace_back(std::move(sol)); + i_t counter = 0; + f_t new_best_feasible_objective = best_feasible_objective; + for (auto& queue : {external_solution_queue, external_solution_queue_cpufj}) { + for (auto& h_entry : queue) { + // ignore CPUFJ solutions if they're not better than the best feasible. + // It seems they worsen results on some instances despite the potential for improved diversity + if (h_entry.origin == solution_origin_t::CPUFJ && + h_entry.objective > new_best_feasible_objective) { + continue; + } else if (h_entry.origin != solution_origin_t::CPUFJ && + h_entry.objective > new_best_feasible_objective) { + new_best_feasible_objective = h_entry.objective; + } + + solution_t sol(*problem_ptr); + sol.copy_new_assignment(h_entry.solution); + sol.compute_feasibility(); + if (!sol.get_feasible()) { + CUOPT_LOG_ERROR( + "External solution %d is infeasible, excess %g, obj %g, int viol %g, var viol %g, cstr " + "viol %g, n_feasible %d/%d, integers %d/%d", + counter, + sol.get_total_excess(), + sol.get_user_objective(), + sol.compute_max_int_violation(), + sol.compute_max_variable_violation(), + sol.compute_max_constraint_violation(), + sol.n_feasible_constraints.value(sol.handle_ptr->get_stream()), + problem_ptr->n_constraints, + sol.compute_number_of_integers(), + problem_ptr->n_integer_vars); + } + sol.handle_ptr->sync_stream(); + return_vector.emplace_back(std::move(sol)); + counter++; + } } if (external_solution_queue.size() > 0) { CUOPT_LOG_INFO("Consuming B&B solutions, solution queue size %lu", external_solution_queue.size()); external_solution_queue.clear(); } + external_solution_queue_cpufj.clear(); return return_vector; } @@ -276,7 +330,8 @@ void population_t::run_solution_callbacks(solution_t& sol) cuopt_assert(std::abs(outside_sol.get_user_objective() - outside_sol_objective) <= 1e-6, "External solution objective mismatch"); auto h_outside_sol = outside_sol.get_host_assignment(); - add_external_solution(h_outside_sol, outside_sol.get_objective()); + add_external_solution( + h_outside_sol, outside_sol.get_objective(), solution_origin_t::EXTERNAL); } } } @@ -319,7 +374,7 @@ i_t population_t::add_solution(solution_t&& sol) raft::common::nvtx::range fun_scope("add_solution"); population_hash_map.insert(sol); double sol_cost = sol.get_quality(weights); - CUOPT_LOG_TRACE("Adding solution with quality %f and objective %f n_integers %d!", + CUOPT_LOG_DEBUG("Adding solution with quality %f and objective %f n_integers %d!", sol_cost, sol.get_user_objective(), sol.n_assigned_integers); @@ -366,6 +421,7 @@ i_t population_t::add_solution(solution_t&& sol) int inserted_pos = insert_index(std::pair((size_t)hint, sol_cost)); cuopt_assert(test_invariant(), "Population invariant doesn't hold"); + test_invariant(); return inserted_pos; } else if (sol_cost + OBJECTIVE_EPSILON < indices[index].second) { @@ -379,10 +435,12 @@ i_t population_t::add_solution(solution_t&& sol) int inserted_pos = insert_index(std::pair((size_t)free, sol_cost)); cuopt_assert(test_invariant(), "Population invariant doesn't hold"); + test_invariant(); return inserted_pos; } CUOPT_LOG_TRACE("Adding solution failed!"); cuopt_assert(test_invariant(), "Population invariant doesn't hold"); + test_invariant(); return -1; } diff --git a/cpp/src/mip/diversity/population.cuh b/cpp/src/mip/diversity/population.cuh index 532b9f24d..79b2040ff 100644 --- a/cpp/src/mip/diversity/population.cuh +++ b/cpp/src/mip/diversity/population.cuh @@ -35,6 +35,18 @@ namespace cuopt::linear_programming::detail { template class diversity_manager_t; +enum class solution_origin_t { BRANCH_AND_BOUND, CPUFJ, EXTERNAL }; + +constexpr const char* solution_origin_to_string(solution_origin_t origin) +{ + switch (origin) { + case solution_origin_t::BRANCH_AND_BOUND: return "B&B"; + case solution_origin_t::CPUFJ: return "CPUFJ"; + case solution_origin_t::EXTERNAL: return "injected"; + default: return "unknown"; + } +} + template class population_t { public: @@ -101,7 +113,9 @@ class population_t { * \return { -1 = not inserted , others = inserted index} */ i_t add_solution(solution_t&& sol); - void add_external_solution(std::vector& solution, f_t objective); + void add_external_solution(const std::vector& solution, + f_t objective, + solution_origin_t origin); std::vector> get_external_solutions(); size_t get_external_solution_size(); void preempt_heuristic_solver(); @@ -172,7 +186,20 @@ class population_t { weight_t weights; std::vector> indices; std::vector>> solutions; - std::vector> external_solution_queue; + + struct external_solution_t { + external_solution_t() = default; + external_solution_t(const std::vector& solution, f_t objective, solution_origin_t origin) + : solution(solution), objective(objective), origin(origin) + { + } + std::vector solution; + f_t objective; + solution_origin_t origin; + }; + + std::vector external_solution_queue; + std::vector external_solution_queue_cpufj; std::mt19937 rng; i_t update_iter = 0; std::mutex solution_mutex; diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump.cu b/cpp/src/mip/feasibility_jump/feasibility_jump.cu index a30a9b182..ddb45200f 100644 --- a/cpp/src/mip/feasibility_jump/feasibility_jump.cu +++ b/cpp/src/mip/feasibility_jump/feasibility_jump.cu @@ -80,11 +80,13 @@ fj_t::fj_t(mip_solver_context_t& context_, fj_settings_t in_ TPB_update_changed_constraints, pb_ptr->handle_ptr); resetmoves_launch_dims = get_launch_dims_max_occupancy( - (void*)compute_mtm_moves_kernel, TPB_resetmoves, pb_ptr->handle_ptr); - resetmoves_bin_launch_dims = - get_launch_dims_max_occupancy((void*)compute_mtm_moves_kernel, - TPB_resetmoves, - pb_ptr->handle_ptr); + (void*)compute_mtm_moves_kernel, + TPB_resetmoves, + pb_ptr->handle_ptr); + resetmoves_bin_launch_dims = get_launch_dims_max_occupancy( + (void*)compute_mtm_moves_kernel, + TPB_resetmoves, + pb_ptr->handle_ptr); update_weights_launch_dims = get_launch_dims_max_occupancy( (void*)handle_local_minimum_kernel, TPB_localmin, pb_ptr->handle_ptr); lift_move_launch_dims = get_launch_dims_max_occupancy( @@ -149,6 +151,10 @@ void fj_t::randomize_weights(const raft::handle_t* handle_ptr) f_t h_max_weight = *std::max_element(h_cstr_vec.begin(), h_cstr_vec.end()); max_cstr_weight.set_value_async(h_max_weight, handle_ptr->get_stream()); raft::copy(cstr_weights.data(), h_cstr_vec.data(), h_cstr_vec.size(), handle_ptr->get_stream()); + raft::copy( + cstr_left_weights.data(), h_cstr_vec.data(), h_cstr_vec.size(), handle_ptr->get_stream()); + raft::copy( + cstr_right_weights.data(), h_cstr_vec.data(), h_cstr_vec.size(), handle_ptr->get_stream()); handle_ptr->sync_stream(); } @@ -722,7 +728,7 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream } else { if (is_binary_pb) { cudaLaunchCooperativeKernel( - (void*)compute_mtm_moves_kernel, + (void*)compute_mtm_moves_kernel, grid_resetmoves_bin, blocks_resetmoves_bin, reset_moves_args, @@ -730,7 +736,7 @@ void fj_t::run_step_device(const rmm::cuda_stream_view& climber_stream climber_stream); } else { cudaLaunchCooperativeKernel( - (void*)compute_mtm_moves_kernel, + (void*)compute_mtm_moves_kernel, grid_resetmoves, blocks_resetmoves, reset_moves_args, @@ -1061,6 +1067,7 @@ i_t fj_t::solve(solution_t& solution) resize_vectors(solution.handle_ptr); bool is_initial_feasible = solution.compute_feasibility(); + auto initial_solution = solution; // if we're in rounding mode, split the time/iteration limit between the first and second stage cuopt_assert(settings.parameters.rounding_second_stage_split >= 0 && settings.parameters.rounding_second_stage_split <= 1, @@ -1133,11 +1140,11 @@ i_t fj_t::solve(solution_t& solution) bool is_new_feasible = solution.compute_feasibility(); if (is_initial_feasible && !is_new_feasible) { - CUOPT_LOG_ERROR( - "Feasibility jump caused feasible solution to become infeasible\n" - "Best excess is %g", + CUOPT_LOG_WARN( + "Feasibility jump caused feasible solution to become infeasible: Best excess is %g", climbers[0]->best_excess.value(handle_ptr->get_stream())); - cuopt_assert(false, "Feasibility jump caused feasible solution to become infeasible"); + solution.copy_from(initial_solution); + cuopt_assert(solution.compute_feasibility(), "Reverted solution should be feasible"); } return is_new_feasible; diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump.cuh b/cpp/src/mip/feasibility_jump/feasibility_jump.cuh index e5e7c6d97..c7e999509 100644 --- a/cpp/src/mip/feasibility_jump/feasibility_jump.cuh +++ b/cpp/src/mip/feasibility_jump/feasibility_jump.cuh @@ -57,7 +57,7 @@ struct fj_hyper_parameters_t { int global_move_update_period = 10; int heavy_move_update_period = 50; int sync_period = 200; - int lhs_refresh_period = 1000; + int lhs_refresh_period = 500; int allow_infeasibility_iterations = 200; // The value added to the objective weight everytime a new best solution is // found in order to move towards better solutions @@ -98,6 +98,8 @@ enum class fj_mode_t { EXIT_NON_IMPROVING // iterate until we are don't improve the best }; +enum class MTMMoveType { FJ_MTM_VIOLATED, FJ_MTM_SATISFIED, FJ_MTM_ALL }; + enum class fj_load_balancing_mode_t { ALWAYS_ON, AUTO, ALWAYS_OFF }; enum class fj_candidate_selection_t { WEIGHTED_SCORE, FEASIBLE_FIRST }; @@ -121,7 +123,16 @@ struct fj_move_t { int var_idx; double value; - bool operator<(const fj_move_t& rhs) const { return value < rhs.value; } + bool operator<(const fj_move_t& rhs) const + { + if (var_idx == rhs.var_idx) return value < rhs.value; + return var_idx < rhs.var_idx; + } + bool operator==(const fj_move_t& rhs) const + { + return var_idx == rhs.var_idx && value == rhs.value; + } + bool operator!=(const fj_move_t& rhs) const { return !(*this == rhs); } }; // TODO: use 32bit integers instead, @@ -187,6 +198,9 @@ struct fj_move_candidate_t { f_t delta; }; +template +struct fj_cpu_climber_t; + template class fj_t { public: @@ -198,6 +212,15 @@ class fj_t { ~fj_t(); void reset_cuda_graph(); i_t solve(solution_t& solution); + std::unique_ptr> create_cpu_climber( + solution_t& solution, + const std::vector& left_weights, + const std::vector& right_weights, + f_t objective_weight, + fj_settings_t settings = fj_settings_t{}, + bool randomize_params = false); + bool cpu_solve(fj_cpu_climber_t& fj_cpu, + f_t time_limit = +std::numeric_limits::infinity()); i_t alloc_max_climbers(i_t desired_climbers); void resize_vectors(const raft::handle_t* handle_ptr); void device_init(const rmm::cuda_stream_view& stream); @@ -501,41 +524,52 @@ class fj_t { fj_settings_t* settings; - DI f_t lower_excess_score(i_t cstr, f_t lhs) const + HDI f_t lower_excess_score(i_t cstr, f_t lhs, f_t c_lb) const { - return raft::min(lhs - pb.constraint_lower_bounds[cstr], (f_t)0); + return raft::min(lhs - c_lb, (f_t)0); } - DI f_t upper_excess_score(i_t cstr, f_t lhs) const + HDI f_t upper_excess_score(i_t cstr, f_t lhs, f_t c_ub) const { - return raft::min(pb.constraint_upper_bounds[cstr] - lhs, (f_t)0); + return raft::min(c_ub - lhs, (f_t)0); } // Computes the constraint's contribution to the feasibility score: // If the constraint is satisfied by the given LHS value, returns 0. // If the constraint is violated by the given LHS value, returns -|lhs-rhs|. // caution: is inverted compared to solution_t's excess convention - DI f_t excess_score(i_t cstr, f_t lhs) const + HDI f_t excess_score(i_t cstr, f_t cnstr_value, f_t c_lb, f_t c_ub) const + { + f_t right_score = upper_excess_score(cstr, cnstr_value, c_ub); + if (right_score < 0.) { return right_score; } + return lower_excess_score(cstr, cnstr_value, c_lb); + } + HDI f_t excess_score(i_t cstr, f_t lhs) const { - f_t right_score = upper_excess_score(cstr, lhs); + f_t c_lb = pb.constraint_lower_bounds[cstr]; + f_t c_ub = pb.constraint_upper_bounds[cstr]; + f_t right_score = upper_excess_score(cstr, lhs, c_ub); if (right_score < 0.) { return right_score; } - return lower_excess_score(cstr, lhs); + return lower_excess_score(cstr, lhs, c_lb); } // FJ relies on maintaining a running LHS value for each constraint // which may suffer from numerical errors and lead to very slight (~machine epsilon) // violations of the actual bounds. // Use a slightly tightened tolerance in FJ to account for this. - DI f_t get_corrected_tolerance(i_t cstr) const + HDI f_t get_corrected_tolerance(i_t cstr, f_t c_lb, f_t c_ub) const { - f_t cstr_tolerance = get_cstr_tolerance(pb.constraint_lower_bounds[cstr], - pb.constraint_upper_bounds[cstr], - pb.tolerances.absolute_tolerance, - pb.tolerances.relative_tolerance); - return max((f_t)0, cstr_tolerance - MACHINE_EPSILON); + f_t cstr_tolerance = get_cstr_tolerance( + c_lb, c_ub, pb.tolerances.absolute_tolerance, pb.tolerances.relative_tolerance); + return max((f_t)1e-12, cstr_tolerance - MACHINE_EPSILON); + } + HDI f_t get_corrected_tolerance(i_t cstr) const + { + return get_corrected_tolerance( + cstr, pb.constraint_lower_bounds[cstr], pb.constraint_upper_bounds[cstr]); } - DI bool cstr_satisfied(i_t cstr, f_t lhs) const + HDI bool cstr_satisfied(i_t cstr, f_t lhs) const { f_t cstr_tolerance = get_cstr_tolerance(pb.constraint_lower_bounds[cstr], pb.constraint_upper_bounds[cstr], @@ -544,7 +578,7 @@ class fj_t { return excess_score(cstr, lhs) >= -cstr_tolerance; } - DI bool move_numerically_stable(f_t old_val, f_t new_val, f_t infeasibility) const + HDI bool move_numerically_stable(f_t old_val, f_t new_val, f_t infeasibility) const { return fabs(new_val - old_val) < 1e6 && fabs(new_val) < 1e20 && fabs(*violation_score - infeasibility) < 1e20; diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump_impl_common.cuh b/cpp/src/mip/feasibility_jump/feasibility_jump_impl_common.cuh new file mode 100644 index 000000000..71513a2ec --- /dev/null +++ b/cpp/src/mip/feasibility_jump/feasibility_jump_impl_common.cuh @@ -0,0 +1,221 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "feasibility_jump.cuh" + +namespace cuopt::linear_programming::detail { + +// Returns the current slack, and the variable delta that would nullify this slack ("tighten" it) +template +HDI thrust::tuple get_mtm_for_bound( + const typename fj_t::climber_data_t::view_t& fj, + i_t var_idx, + i_t cstr_idx, + f_t cstr_coeff, + f_t bound, + f_t sign) +{ + f_t delta_ij = 0; + f_t slack = 0; + f_t old_val = fj.incumbent_assignment[var_idx]; + + f_t lhs = fj.incumbent_lhs[cstr_idx] * sign; + f_t rhs = bound * sign; + slack = rhs - lhs; // bound might be infinite. let the caller handle this case + + delta_ij = slack / (cstr_coeff * sign); + + return {delta_ij, slack}; +} + +template +HDI thrust::tuple get_mtm_for_constraint( + const typename fj_t::climber_data_t::view_t& fj, + i_t var_idx, + i_t cstr_idx, + f_t cstr_coeff, + f_t c_lb, + f_t c_ub) +{ + f_t sign = -1; + f_t delta_ij = 0; + f_t slack = 0; + + f_t cstr_tolerance = fj.get_corrected_tolerance(cstr_idx); + + f_t old_val = fj.incumbent_assignment[var_idx]; + + // process each bound as two separate constraints + f_t bounds[2] = {c_lb, c_ub}; + cuopt_assert(isfinite(bounds[0]) || isfinite(bounds[1]), "bounds are not finite"); + + for (i_t bound_idx = 0; bound_idx < 2; ++bound_idx) { + if (!isfinite(bounds[bound_idx])) continue; + + // factor to correct the lhs/rhs to turn a lb <= lhs <= ub constraint into + // two virtual constraints lhs <= ub and -lhs <= -lb + sign = bound_idx == 0 ? -1 : 1; + f_t lhs = fj.incumbent_lhs[cstr_idx] * sign; + f_t rhs = bounds[bound_idx] * sign; + slack = rhs - lhs; + + // skip constraints that are violated/satisfied based on the MTM move type + bool violated = slack < -cstr_tolerance; + if (move_type == MTMMoveType::FJ_MTM_VIOLATED ? !violated : violated) continue; + + f_t new_val = old_val; + + delta_ij = slack / (cstr_coeff * sign); + break; + } + + return {delta_ij, sign, slack, cstr_tolerance}; +} + +template +HDI std::pair feas_score_constraint( + const typename fj_t::climber_data_t::view_t& fj, + i_t var_idx, + f_t delta, + i_t cstr_idx, + f_t cstr_coeff, + f_t c_lb, + f_t c_ub, + f_t current_lhs) +{ + cuopt_assert(isfinite(delta), "invalid delta"); + cuopt_assert(cstr_coeff != 0 && isfinite(cstr_coeff), "invalid coefficient"); + + f_t base_feas = 0; + f_t bonus_robust = 0; + + f_t bounds[2] = {c_lb, c_ub}; + cuopt_assert(isfinite(c_lb) || isfinite(c_ub), "no range"); + for (i_t bound_idx = 0; bound_idx < 2; ++bound_idx) { + if (!isfinite(bounds[bound_idx])) continue; + + // factor to correct the lhs/rhs to turn a lb <= lhs <= ub constraint into + // two virtual leq constraints "lhs <= ub" and "-lhs <= -lb" in order to match + // the convention of the paper + + // TODO: broadcast left/right weights to a csr_offset-indexed table? local minimums + // usually occur on a rarer basis (around 50 iteratiosn to 1 local minimum) + // likely unreasonable and overkill however + f_t cstr_weight = + bound_idx == 0 ? fj.cstr_left_weights[cstr_idx] : fj.cstr_right_weights[cstr_idx]; + f_t sign = bound_idx == 0 ? -1 : 1; + f_t rhs = bounds[bound_idx] * sign; + f_t old_lhs = current_lhs * sign; + f_t new_lhs = (current_lhs + cstr_coeff * delta) * sign; + f_t old_slack = rhs - old_lhs; + f_t new_slack = rhs - new_lhs; + + cuopt_assert(isfinite(cstr_weight), "invalid weight"); + cuopt_assert(cstr_weight >= 0, "invalid weight"); + cuopt_assert(isfinite(old_lhs), ""); + cuopt_assert(isfinite(new_lhs), ""); + cuopt_assert(isfinite(old_slack) && isfinite(new_slack), ""); + + f_t cstr_tolerance = fj.get_corrected_tolerance(cstr_idx); + + bool old_viol = fj.excess_score(cstr_idx, current_lhs, c_lb, c_ub) < -cstr_tolerance; + bool new_viol = + fj.excess_score(cstr_idx, current_lhs + cstr_coeff * delta, c_lb, c_ub) < -cstr_tolerance; + + bool old_sat = old_lhs < rhs + cstr_tolerance; + bool new_sat = new_lhs < rhs + cstr_tolerance; + + // equality + if (fj.pb.integer_equal(c_lb, c_ub)) { + if (!old_viol) cuopt_assert(old_sat == !old_viol, ""); + if (!new_viol) cuopt_assert(new_sat == !new_viol, ""); + } + + // if it would feasibilize this constraint + if (!old_sat && new_sat) { + cuopt_assert(old_viol, ""); + base_feas += cstr_weight; + } + // would cause this constraint to be violated + else if (old_sat && !new_sat) { + cuopt_assert(new_viol, ""); + base_feas -= cstr_weight; + } + // simple improvement + else if (!old_sat && !new_sat && old_lhs > new_lhs) { + cuopt_assert(old_viol && new_viol, ""); + base_feas += (i_t)(cstr_weight * fj.settings->parameters.excess_improvement_weight); + } + // simple worsening + else if (!old_sat && !new_sat && old_lhs <= new_lhs) { + cuopt_assert(old_viol && new_viol, ""); + base_feas -= (i_t)(cstr_weight * fj.settings->parameters.excess_improvement_weight); + } + + // robustness score bonus if this would leave some strick slack + bool old_stable = old_lhs < rhs - cstr_tolerance; + bool new_stable = new_lhs < rhs - cstr_tolerance; + if (!old_stable && new_stable) { + bonus_robust += cstr_weight; + } else if (old_stable && !new_stable) { + bonus_robust -= cstr_weight; + } + } + + return {base_feas, bonus_robust}; +} + +template +HDI f_t get_breakthrough_move(typename fj_t::climber_data_t::view_t fj, i_t var_idx) +{ + f_t obj_coeff = fj.pb.objective_coefficients[var_idx]; + auto bounds = fj.pb.variable_bounds[var_idx]; + f_t v_lb = get_lower(bounds); + f_t v_ub = get_upper(bounds); + cuopt_assert(isfinite(v_lb) || isfinite(v_ub), "unexpected free variable"); + cuopt_assert(v_lb <= v_ub, "invalid bounds"); + cuopt_assert(fj.pb.check_variable_within_bounds(var_idx, fj.incumbent_assignment[var_idx]), + "invalid incumbent assignment"); + cuopt_assert(isfinite(obj_coeff), "invalid objective coefficient"); + cuopt_assert(obj_coeff != 0, "objective coefficient shouldn't be null"); + + f_t excess = (*fj.best_objective) - *fj.incumbent_objective; + + cuopt_assert(isfinite(excess) && excess < 0, + "breakthru move invoked during invalid solver state"); + + f_t old_val = fj.incumbent_assignment[var_idx]; + f_t new_val = old_val; + + f_t delta_ij = excess / obj_coeff; + + if (fj.pb.is_integer_var(var_idx)) { + new_val = obj_coeff > 0 ? floor(old_val + delta_ij + fj.pb.tolerances.integrality_tolerance) + : ceil(old_val + delta_ij - fj.pb.tolerances.integrality_tolerance); + } else { + new_val = old_val + delta_ij; + } + + // fallback + if (!fj.pb.check_variable_within_bounds(var_idx, new_val)) { + new_val = obj_coeff > 0 ? v_lb : v_ub; + } + + return new_val; +} + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cu b/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cu index 0d96cd4b2..dc63d94cd 100644 --- a/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cu +++ b/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cu @@ -26,6 +26,8 @@ #include +#include "feasibility_jump_impl_common.cuh" + namespace cg = cooperative_groups; #define CONSTRAINT_FLAG_INSERT 0 @@ -76,98 +78,6 @@ DI thrust::pair move_objective_score( return {base_obj, bonus_breakthrough}; } -template -DI std::pair feas_score_constraint( - const typename fj_t::climber_data_t::view_t& fj, - i_t var_idx, - f_t delta, - i_t cstr_idx, - f_t cstr_coeff, - f_t c_lb, - f_t c_ub, - f_t current_lhs) -{ - cuopt_assert(isfinite(delta), "invalid delta"); - cuopt_assert(cstr_coeff != 0 && isfinite(cstr_coeff), "invalid coefficient"); - - f_t base_feas = 0; - f_t bonus_robust = 0; - - f_t bounds[2] = {c_lb, c_ub}; - cuopt_assert(isfinite(c_lb) || isfinite(c_ub), "no range"); - for (i_t bound_idx = 0; bound_idx < 2; ++bound_idx) { - if (!isfinite(bounds[bound_idx])) continue; - - // factor to correct the lhs/rhs to turn a lb <= lhs <= ub constraint into - // two virtual leq constraints "lhs <= ub" and "-lhs <= -lb" in order to match - // the convention of the paper - - // TODO: broadcast left/right weights to a csr_offset-indexed table? local minimums - // usually occur on a rarer basis (around 50 iteratiosn to 1 local minimum) - // likely unreasonable and overkill however - f_t cstr_weight = - bound_idx == 0 ? fj.cstr_left_weights[cstr_idx] : fj.cstr_right_weights[cstr_idx]; - f_t sign = bound_idx == 0 ? -1 : 1; - f_t rhs = bounds[bound_idx] * sign; - f_t old_lhs = current_lhs * sign; - f_t new_lhs = (current_lhs + cstr_coeff * delta) * sign; - f_t old_slack = rhs - old_lhs; - f_t new_slack = rhs - new_lhs; - - cuopt_assert(isfinite(cstr_weight), "invalid weight"); - cuopt_assert(cstr_weight >= 0, "invalid weight"); - cuopt_assert(isfinite(old_lhs), ""); - cuopt_assert(isfinite(new_lhs), ""); - cuopt_assert(isfinite(old_slack) && isfinite(new_slack), ""); - - f_t cstr_tolerance = fj.get_corrected_tolerance(cstr_idx); - - bool old_viol = fj.excess_score(cstr_idx, current_lhs) < -cstr_tolerance; - bool new_viol = fj.excess_score(cstr_idx, current_lhs + cstr_coeff * delta) < -cstr_tolerance; - - bool old_sat = old_lhs < rhs + cstr_tolerance; - bool new_sat = new_lhs < rhs + cstr_tolerance; - - // equality - if (fj.pb.integer_equal(c_lb, c_ub)) { - if (!old_viol) cuopt_assert(old_sat == !old_viol, ""); - if (!new_viol) cuopt_assert(new_sat == !new_viol, ""); - } - - // if it would feasibilize this constraint - if (!old_sat && new_sat) { - cuopt_assert(old_viol, ""); - base_feas += cstr_weight; - } - // would cause this constraint to be violated - else if (old_sat && !new_sat) { - cuopt_assert(new_viol, ""); - base_feas -= cstr_weight; - } - // simple improvement - else if (!old_sat && !new_sat && old_lhs > new_lhs) { - cuopt_assert(old_viol && new_viol, ""); - base_feas += (i_t)(cstr_weight * fj.settings->parameters.excess_improvement_weight); - } - // simple worsening - else if (!old_sat && !new_sat && old_lhs <= new_lhs) { - cuopt_assert(old_viol && new_viol, ""); - base_feas -= (i_t)(cstr_weight * fj.settings->parameters.excess_improvement_weight); - } - - // robustness score bonus if this would leave some strick slack - bool old_stable = old_lhs < rhs - cstr_tolerance; - bool new_stable = new_lhs < rhs - cstr_tolerance; - if (!old_stable && new_stable) { - bonus_robust += cstr_weight; - } else if (old_stable && !new_stable) { - bonus_robust -= cstr_weight; - } - } - - return {base_feas, bonus_robust}; -} - template DI void smooth_weights(typename fj_t::climber_data_t::view_t& fj) { @@ -203,9 +113,11 @@ DI void update_weights(typename fj_t::climber_data_t::view_t& fj) for (i_t i = blockIdx.x; i < fj.violated_constraints.size(); i += gridDim.x) { i_t cstr_idx = fj.violated_constraints.contents[i]; f_t curr_incumbent_lhs = fj.incumbent_lhs[cstr_idx]; - f_t curr_lower_excess = fj.lower_excess_score(cstr_idx, curr_incumbent_lhs); - f_t curr_upper_excess = fj.upper_excess_score(cstr_idx, curr_incumbent_lhs); - f_t curr_excess_score = curr_lower_excess + curr_upper_excess; + f_t curr_lower_excess = + fj.lower_excess_score(cstr_idx, curr_incumbent_lhs, fj.pb.constraint_lower_bounds[cstr_idx]); + f_t curr_upper_excess = + fj.upper_excess_score(cstr_idx, curr_incumbent_lhs, fj.pb.constraint_upper_bounds[cstr_idx]); + f_t curr_excess_score = curr_lower_excess + curr_upper_excess; f_t old_weight; if (curr_lower_excess < 0.) { @@ -362,73 +274,6 @@ DI typename fj_t::move_score_info_t compute_new_score( return score_info; } -// Returns the current slack, and the variable delta that would nullify this slack ("tighten" it) -template -DI thrust::tuple get_mtm_for_bound( - const typename fj_t::climber_data_t::view_t& fj, - i_t var_idx, - i_t cstr_idx, - f_t cstr_coeff, - f_t bound, - f_t sign) -{ - f_t delta_ij = 0; - f_t slack = 0; - f_t old_val = fj.incumbent_assignment[var_idx]; - - f_t lhs = fj.incumbent_lhs[cstr_idx] * sign; - f_t rhs = bound * sign; - slack = rhs - lhs; // bound might be infinite. let the caller handle this case - - delta_ij = slack / (cstr_coeff * sign); - - return {delta_ij, slack}; -} - -template -DI thrust::tuple get_mtm_for_constraint( - const typename fj_t::climber_data_t::view_t& fj, - i_t var_idx, - i_t cstr_idx, - f_t cstr_coeff) -{ - f_t sign = -1; - f_t delta_ij = 0; - f_t slack = 0; - - f_t c_lb = fj.pb.constraint_lower_bounds[cstr_idx]; - f_t c_ub = fj.pb.constraint_upper_bounds[cstr_idx]; - f_t cstr_tolerance = fj.get_corrected_tolerance(cstr_idx); - - f_t old_val = fj.incumbent_assignment[var_idx]; - - // process each bound as two separate constraints - f_t bounds[2] = {c_lb, c_ub}; - cuopt_assert(isfinite(bounds[0]) || isfinite(bounds[1]), ""); - - for (i_t bound_idx = 0; bound_idx < 2; ++bound_idx) { - if (!isfinite(bounds[bound_idx])) continue; - - // factor to correct the lhs/rhs to turn a lb <= lhs <= ub constraint into - // two virtual constraints lhs <= ub and -lhs <= -lb - sign = bound_idx == 0 ? -1 : 1; - f_t lhs = fj.incumbent_lhs[cstr_idx] * sign; - f_t rhs = bounds[bound_idx] * sign; - slack = rhs - lhs; - - // skip constraints that are violated/satisfied based on the MTM move type - bool violated = slack < -cstr_tolerance; - if (move_type == MTMMoveType::FJ_MTM_VIOLATED ? !violated : violated) continue; - - f_t new_val = old_val; - - delta_ij = slack / (cstr_coeff * sign); - break; - } - - return {delta_ij, sign, slack, cstr_tolerance}; -} - // find best mixed tight move template DI std::pair::move_score_info_t> compute_best_mtm( @@ -472,9 +317,11 @@ DI std::pair::move_score_info_t> compute_best_mtm( ; } + f_t c_lb = fj.pb.constraint_lower_bounds[cstr_idx]; + f_t c_ub = fj.pb.constraint_upper_bounds[cstr_idx]; f_t new_val; auto [delta_ij, sign, slack, cstr_tolerance] = - get_mtm_for_constraint(fj, var_idx, cstr_idx, cstr_coeff); + get_mtm_for_constraint(fj, var_idx, cstr_idx, cstr_coeff, c_lb, c_ub); if (fj.pb.is_integer_var(var_idx)) { new_val = cstr_coeff * sign > 0 ? floor(old_val + delta_ij + fj.pb.tolerances.integrality_tolerance) @@ -990,45 +837,6 @@ __global__ void update_lift_moves_kernel(typename fj_t::climber_data_t update_lift_moves(fj); } -template -DI f_t get_breakthrough_move(typename fj_t::climber_data_t::view_t fj, i_t var_idx) -{ - f_t obj_coeff = fj.pb.objective_coefficients[var_idx]; - auto bounds = fj.pb.variable_bounds[var_idx]; - f_t v_lb = get_lower(bounds); - f_t v_ub = get_upper(bounds); - cuopt_assert(isfinite(v_lb) || isfinite(v_ub), "unexpected free variable"); - cuopt_assert(v_lb <= v_ub, "invalid bounds"); - cuopt_assert(fj.pb.check_variable_within_bounds(var_idx, fj.incumbent_assignment[var_idx]), - "invalid incumbent assignment"); - cuopt_assert(isfinite(obj_coeff), "invalid objective coefficient"); - cuopt_assert(obj_coeff != 0, "objective coefficient shouldn't be null"); - - f_t excess = (*fj.best_objective) - *fj.incumbent_objective; - - cuopt_assert(isfinite(excess) && excess < 0, - "breakthru move invoked during invalid solver state"); - - f_t old_val = fj.incumbent_assignment[var_idx]; - f_t new_val = old_val; - - f_t delta_ij = excess / obj_coeff; - - if (fj.pb.is_integer_var(var_idx)) { - new_val = obj_coeff > 0 ? floor(old_val + delta_ij + fj.pb.tolerances.integrality_tolerance) - : ceil(old_val + delta_ij - fj.pb.tolerances.integrality_tolerance); - } else { - new_val = old_val + delta_ij; - } - - // fallback - if (!fj.pb.check_variable_within_bounds(var_idx, new_val)) { - new_val = obj_coeff > 0 ? v_lb : v_ub; - } - - return new_val; -} - template DI void update_breakthrough_moves(typename fj_t::climber_data_t::view_t fj) { diff --git a/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cuh b/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cuh index 2b299ccd2..080a6c210 100644 --- a/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cuh +++ b/cpp/src/mip/feasibility_jump/feasibility_jump_kernels.cuh @@ -96,11 +96,9 @@ __global__ void update_changed_constraints_kernel( template __global__ void update_best_solution_kernel(typename fj_t::climber_data_t::view_t fj); -enum MTMMoveType { FJ_MTM_VIOLATED, FJ_MTM_SATISFIED, FJ_MTM_ALL }; - template __global__ void compute_mtm_moves_kernel(typename fj_t::climber_data_t::view_t fj, bool ForceRefresh = false); diff --git a/cpp/src/mip/feasibility_jump/fj_cpu.cu b/cpp/src/mip/feasibility_jump/fj_cpu.cu new file mode 100644 index 000000000..b617f5a1d --- /dev/null +++ b/cpp/src/mip/feasibility_jump/fj_cpu.cu @@ -0,0 +1,1004 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "feasibility_jump.cuh" +#include "feasibility_jump_impl_common.cuh" +#include "fj_cpu.cuh" + +#include + +#include +#include +#include +#include + +#define CPUFJ_TIMING_TRACE 0 + +namespace cuopt::linear_programming::detail { + +template +class timing_raii_t { + public: + timing_raii_t(std::vector& times_vec) + : times_vec_(times_vec), start_time_(std::chrono::high_resolution_clock::now()) + { + } + + ~timing_raii_t() + { + auto end_time = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast>(end_time - start_time_); + times_vec_.push_back(duration.count()); + } + + private: + std::vector& times_vec_; + std::chrono::high_resolution_clock::time_point start_time_; +}; + +template +static void print_timing_stats(fj_cpu_climber_t& fj_cpu) +{ + auto compute_avg_and_total = [](const std::vector& times) -> std::pair { + if (times.empty()) return {0.0, 0.0}; + double sum = 0.0; + for (double time : times) + sum += time; + return {sum / times.size(), sum}; + }; + + auto [lift_avg, lift_total] = compute_avg_and_total(fj_cpu.find_lift_move_times); + auto [viol_avg, viol_total] = compute_avg_and_total(fj_cpu.find_mtm_move_viol_times); + auto [sat_avg, sat_total] = compute_avg_and_total(fj_cpu.find_mtm_move_sat_times); + auto [apply_avg, apply_total] = compute_avg_and_total(fj_cpu.apply_move_times); + auto [weights_avg, weights_total] = compute_avg_and_total(fj_cpu.update_weights_times); + auto [compute_score_avg, compute_score_total] = compute_avg_and_total(fj_cpu.compute_score_times); + CUOPT_LOG_TRACE("=== Timing Statistics (Iteration %d) ===\n", fj_cpu.iterations); + CUOPT_LOG_TRACE("find_lift_move: avg=%.6f ms, total=%.6f ms, calls=%zu\n", + lift_avg * 1000.0, + lift_total * 1000.0, + fj_cpu.find_lift_move_times.size()); + CUOPT_LOG_TRACE("find_mtm_move_viol: avg=%.6f ms, total=%.6f ms, calls=%zu\n", + viol_avg * 1000.0, + viol_total * 1000.0, + fj_cpu.find_mtm_move_viol_times.size()); + CUOPT_LOG_TRACE("find_mtm_move_sat: avg=%.6f ms, total=%.6f ms, calls=%zu\n", + sat_avg * 1000.0, + sat_total * 1000.0, + fj_cpu.find_mtm_move_sat_times.size()); + CUOPT_LOG_TRACE("apply_move: avg=%.6f ms, total=%.6f ms, calls=%zu\n", + apply_avg * 1000.0, + apply_total * 1000.0, + fj_cpu.apply_move_times.size()); + CUOPT_LOG_TRACE("update_weights: avg=%.6f ms, total=%.6f ms, calls=%zu\n", + weights_avg * 1000.0, + weights_total * 1000.0, + fj_cpu.update_weights_times.size()); + CUOPT_LOG_TRACE("compute_score: avg=%.6f ms, total=%.6f ms, calls=%zu\n", + compute_score_avg * 1000.0, + compute_score_total * 1000.0, + fj_cpu.compute_score_times.size()); + CUOPT_LOG_TRACE("cache hit percentage: %.2f%%\n", + (double)fj_cpu.hit_count / (fj_cpu.hit_count + fj_cpu.miss_count) * 100.0); + CUOPT_LOG_TRACE("bin candidate move hit percentage: %.2f%%\n", + (double)fj_cpu.candidate_move_hits[0] / + (fj_cpu.candidate_move_hits[0] + fj_cpu.candidate_move_misses[0]) * 100.0); + CUOPT_LOG_TRACE("int candidate move hit percentage: %.2f%%\n", + (double)fj_cpu.candidate_move_hits[1] / + (fj_cpu.candidate_move_hits[1] + fj_cpu.candidate_move_misses[1]) * 100.0); + CUOPT_LOG_TRACE("cont candidate move hit percentage: %.2f%%\n", + (double)fj_cpu.candidate_move_hits[2] / + (fj_cpu.candidate_move_hits[2] + fj_cpu.candidate_move_misses[2]) * 100.0); + CUOPT_LOG_TRACE("========================================\n"); +} + +template +static inline bool tabu_check(fj_cpu_climber_t& fj_cpu, + i_t var_idx, + f_t delta, + bool localmin = false) +{ + if (localmin) { + return (delta < 0 && fj_cpu.iterations == fj_cpu.h_tabu_lastinc[var_idx] + 1) || + (delta >= 0 && fj_cpu.iterations == fj_cpu.h_tabu_lastdec[var_idx] + 1); + } else { + return (delta < 0 && fj_cpu.iterations < fj_cpu.h_tabu_nodec_until[var_idx]) || + (delta >= 0 && fj_cpu.iterations < fj_cpu.h_tabu_noinc_until[var_idx]); + } +} + +template +static inline fj_staged_score_t compute_score(fj_cpu_climber_t& fj_cpu, + i_t var_idx, + f_t delta) +{ + // timing_raii_t timer(fj_cpu.compute_score_times); + + f_t obj_diff = fj_cpu.h_obj_coeffs[var_idx] * delta; + + cuopt_assert(isfinite(delta), ""); + + cuopt_assert(var_idx < fj_cpu.view.pb.n_variables, "variable index out of bounds"); + + f_t base_feas_sum = 0; + f_t bonus_robust_sum = 0; + + auto [offset_begin, offset_end] = fj_cpu.view.pb.reverse_range_for_var(var_idx); + for (i_t i = offset_begin; i < offset_end; i++) { + auto cstr_idx = fj_cpu.h_reverse_constraints[i]; + auto cstr_coeff = fj_cpu.h_reverse_coefficients[i]; + auto [c_lb, c_ub] = fj_cpu.cached_cstr_bounds[i]; + + cuopt_assert(c_lb <= c_ub, "invalid bounds"); + + auto [cstr_base_feas, cstr_bonus_robust] = feas_score_constraint( + fj_cpu.view, var_idx, delta, cstr_idx, cstr_coeff, c_lb, c_ub, fj_cpu.h_lhs[cstr_idx]); + + base_feas_sum += cstr_base_feas; + bonus_robust_sum += cstr_bonus_robust; + } + + f_t base_obj = 0; + if (obj_diff < 0) // improving move wrt objective + base_obj = fj_cpu.h_objective_weight; + else if (obj_diff > 0) + base_obj = -fj_cpu.h_objective_weight; + + f_t bonus_breakthrough = 0; + + bool old_obj_better = fj_cpu.h_incumbent_objective < fj_cpu.h_best_objective; + bool new_obj_better = fj_cpu.h_incumbent_objective + obj_diff < fj_cpu.h_best_objective; + if (!old_obj_better && new_obj_better) + bonus_breakthrough += fj_cpu.h_objective_weight; + else if (old_obj_better && !new_obj_better) { + bonus_breakthrough -= fj_cpu.h_objective_weight; + } + + fj_staged_score_t score; + score.base = round(base_obj + base_feas_sum); + score.bonus = round(bonus_breakthrough + bonus_robust_sum); + return score; +} + +template +static void smooth_weights(fj_cpu_climber_t& fj_cpu) +{ + for (i_t cstr_idx = 0; cstr_idx < fj_cpu.view.pb.n_constraints; cstr_idx++) { + // consider only satisfied constraints + if (fj_cpu.violated_constraints.count(cstr_idx)) continue; + + f_t weight_l = max((f_t)0, fj_cpu.h_cstr_left_weights[cstr_idx] - 1); + f_t weight_r = max((f_t)0, fj_cpu.h_cstr_right_weights[cstr_idx] - 1); + + fj_cpu.h_cstr_left_weights[cstr_idx] = weight_l; + fj_cpu.h_cstr_right_weights[cstr_idx] = weight_r; + } + + if (fj_cpu.h_objective_weight > 0 && fj_cpu.h_incumbent_objective >= fj_cpu.h_best_objective) { + fj_cpu.h_objective_weight = max((f_t)0, fj_cpu.h_objective_weight - 1); + } +} + +template +static void update_weights(fj_cpu_climber_t& fj_cpu) +{ + timing_raii_t timer(fj_cpu.update_weights_times); + + raft::random::PCGenerator rng(fj_cpu.settings.seed + fj_cpu.iterations, 0, 0); + bool smoothing = rng.next_float() <= fj_cpu.settings.parameters.weight_smoothing_probability; + + if (smoothing) { + smooth_weights(fj_cpu); + return; + } + + for (auto cstr_idx : fj_cpu.violated_constraints) { + f_t curr_incumbent_lhs = fj_cpu.h_lhs[cstr_idx]; + f_t curr_lower_excess = + fj_cpu.view.lower_excess_score(cstr_idx, curr_incumbent_lhs, fj_cpu.h_cstr_lb[cstr_idx]); + f_t curr_upper_excess = + fj_cpu.view.upper_excess_score(cstr_idx, curr_incumbent_lhs, fj_cpu.h_cstr_ub[cstr_idx]); + f_t curr_excess_score = curr_lower_excess + curr_upper_excess; + + f_t old_weight; + if (curr_lower_excess < 0.) { + old_weight = fj_cpu.h_cstr_left_weights[cstr_idx]; + } else { + old_weight = fj_cpu.h_cstr_right_weights[cstr_idx]; + } + + cuopt_assert(curr_excess_score < 0, "constraint not violated"); + + i_t int_delta = 1.0; + f_t delta = int_delta; + + f_t new_weight = old_weight + delta; + new_weight = round(new_weight); + + if (curr_lower_excess < 0.) { + fj_cpu.h_cstr_left_weights[cstr_idx] = new_weight; + fj_cpu.max_weight = max(fj_cpu.max_weight, new_weight); + } else { + fj_cpu.h_cstr_right_weights[cstr_idx] = new_weight; + fj_cpu.max_weight = max(fj_cpu.max_weight, new_weight); + } + + // Invalidate related cached move scores + auto [relvar_offset_begin, relvar_offset_end] = fj_cpu.view.pb.range_for_constraint(cstr_idx); + for (auto i = relvar_offset_begin; i < relvar_offset_end; i++) { + fj_cpu.cached_mtm_moves[i].first = 0; + } + } + + if (fj_cpu.violated_constraints.empty()) { fj_cpu.h_objective_weight += 1; } +} + +template +static void apply_move(fj_cpu_climber_t& fj_cpu, + i_t var_idx, + f_t delta, + bool localmin = false) +{ + timing_raii_t timer(fj_cpu.apply_move_times); + + raft::random::PCGenerator rng(fj_cpu.settings.seed + fj_cpu.iterations, 0, 0); + + cuopt_assert(var_idx < fj_cpu.view.pb.n_variables, "variable index out of bounds"); + // Update the LHSs of all involved constraints. + auto [offset_begin, offset_end] = fj_cpu.view.pb.reverse_range_for_var(var_idx); + + i_t previous_viol = fj_cpu.violated_constraints.size(); + + for (auto i = offset_begin; i < offset_end; i++) { + cuopt_assert(i < (i_t)fj_cpu.h_reverse_constraints.size(), ""); + auto [c_lb, c_ub] = fj_cpu.cached_cstr_bounds[i]; + + auto cstr_idx = fj_cpu.h_reverse_constraints[i]; + auto cstr_coeff = fj_cpu.h_reverse_coefficients[i]; + + f_t old_lhs = fj_cpu.h_lhs[cstr_idx]; + f_t new_lhs = old_lhs + cstr_coeff * delta; + f_t old_cost = fj_cpu.view.excess_score(cstr_idx, old_lhs, c_lb, c_ub); + f_t new_cost = fj_cpu.view.excess_score(cstr_idx, new_lhs, c_lb, c_ub); + f_t cstr_tolerance = fj_cpu.view.get_corrected_tolerance(cstr_idx, c_lb, c_ub); + + if (new_cost < -cstr_tolerance && !fj_cpu.violated_constraints.count(cstr_idx)) { + fj_cpu.violated_constraints.insert(cstr_idx); + fj_cpu.satisfied_constraints.erase(cstr_idx); + } else if (!(new_cost < -cstr_tolerance) && fj_cpu.violated_constraints.count(cstr_idx)) { + fj_cpu.violated_constraints.erase(cstr_idx); + fj_cpu.satisfied_constraints.insert(cstr_idx); + } + + cuopt_assert(isfinite(delta), "delta should be finite"); + // Kahan compensated summation + f_t y = cstr_coeff * delta - fj_cpu.h_lhs_sumcomp[cstr_idx]; + f_t t = old_lhs + y; + fj_cpu.h_lhs_sumcomp[cstr_idx] = (t - old_lhs) - y; + fj_cpu.h_lhs[cstr_idx] = t; + cuopt_assert(isfinite(fj_cpu.h_lhs[cstr_idx]), "assignment should be finite"); + + // Invalidate related cached move scores + auto [relvar_offset_begin, relvar_offset_end] = fj_cpu.view.pb.range_for_constraint(cstr_idx); + for (auto i = relvar_offset_begin; i < relvar_offset_end; i++) { + fj_cpu.cached_mtm_moves[i].first = 0; + } + } + + if (previous_viol > 0 && fj_cpu.violated_constraints.empty()) { + fj_cpu.last_feasible_entrance_iter = fj_cpu.iterations; + } + + // update the assignment and objective proper + f_t new_val = fj_cpu.h_assignment[var_idx] + delta; + if (fj_cpu.view.pb.is_integer_var(var_idx)) { + cuopt_assert(fj_cpu.view.pb.integer_equal(new_val, round(new_val)), "new_val is not integer"); + new_val = round(new_val); + } + fj_cpu.h_assignment[var_idx] = new_val; + + cuopt_assert(fj_cpu.view.pb.check_variable_within_bounds(var_idx, new_val), + "assignment not within bounds"); + cuopt_assert(isfinite(new_val), "assignment is not finite"); + + fj_cpu.h_incumbent_objective += fj_cpu.h_obj_coeffs[var_idx] * delta; + if (fj_cpu.h_incumbent_objective < fj_cpu.h_best_objective && + fj_cpu.violated_constraints.empty()) { + cuopt_assert(fj_cpu.satisfied_constraints.size() == fj_cpu.view.pb.n_constraints, ""); + fj_cpu.h_best_objective = + fj_cpu.h_incumbent_objective - fj_cpu.settings.parameters.breakthrough_move_epsilon; + fj_cpu.h_best_assignment = fj_cpu.h_assignment; + CUOPT_LOG_TRACE("%sCPUFJ: new best objective: %g\n", + fj_cpu.log_prefix.c_str(), + fj_cpu.pb_ptr->get_user_obj_from_solver_obj(fj_cpu.h_best_objective)); + if (fj_cpu.improvement_callback) { + fj_cpu.improvement_callback(fj_cpu.h_best_objective, fj_cpu.h_assignment); + } + fj_cpu.feasible_found = true; + } + + i_t tabu_tenure = fj_cpu.settings.parameters.tabu_tenure_min + + rng.next_u32() % (fj_cpu.settings.parameters.tabu_tenure_max - + fj_cpu.settings.parameters.tabu_tenure_min); + if (delta > 0) { + fj_cpu.h_tabu_lastinc[var_idx] = fj_cpu.iterations; + fj_cpu.h_tabu_nodec_until[var_idx] = fj_cpu.iterations + tabu_tenure; + fj_cpu.h_tabu_noinc_until[var_idx] = fj_cpu.iterations + tabu_tenure / 2; + // CUOPT_LOG_TRACE("CPU: tabu nodec_until: %d\n", fj_cpu.h_tabu_nodec_until[var_idx]); + } else { + fj_cpu.h_tabu_lastdec[var_idx] = fj_cpu.iterations; + fj_cpu.h_tabu_noinc_until[var_idx] = fj_cpu.iterations + tabu_tenure; + fj_cpu.h_tabu_nodec_until[var_idx] = fj_cpu.iterations + tabu_tenure / 2; + // CUOPT_LOG_TRACE("CPU: tabu noinc_until: %d\n", fj_cpu.h_tabu_noinc_until[var_idx]); + } + + std::fill(fj_cpu.flip_move_computed.begin(), fj_cpu.flip_move_computed.end(), false); + std::fill(fj_cpu.var_bitmap.begin(), fj_cpu.var_bitmap.end(), false); + fj_cpu.iter_mtm_vars.clear(); +} + +template +static thrust::tuple find_mtm_move( + fj_cpu_climber_t& fj_cpu, const std::vector& target_cstrs, bool localmin = false) +{ + auto& problem = *fj_cpu.pb_ptr; + + raft::random::PCGenerator rng(fj_cpu.settings.seed + fj_cpu.iterations, 0, 0); + + fj_move_t best_move = fj_move_t{-1, 0}; + fj_staged_score_t best_score = fj_staged_score_t::invalid(); + + // collect all the variables that are involved in the target constraints + for (size_t cstr_idx : target_cstrs) { + auto [offset_begin, offset_end] = fj_cpu.view.pb.range_for_constraint(cstr_idx); + for (auto i = offset_begin; i < offset_end; i++) { + i_t var_idx = fj_cpu.h_variables[i]; + if (fj_cpu.var_bitmap[var_idx]) continue; + fj_cpu.iter_mtm_vars.push_back(var_idx); + fj_cpu.var_bitmap[var_idx] = true; + } + } + // estimate the amount of nnzs to consider + i_t nnz_sum = 0; + for (auto var_idx : fj_cpu.iter_mtm_vars) { + auto [offset_begin, offset_end] = fj_cpu.view.pb.reverse_range_for_var(var_idx); + nnz_sum += offset_end - offset_begin; + } + + f_t nnz_pick_probability = 1; + if (nnz_sum > fj_cpu.nnz_samples) nnz_pick_probability = (f_t)fj_cpu.nnz_samples / nnz_sum; + + for (size_t cstr_idx : target_cstrs) { + f_t cstr_tol = fj_cpu.view.get_corrected_tolerance(cstr_idx); + + cuopt_assert(cstr_idx < fj_cpu.h_cstr_lb.size(), "cstr_idx is out of bounds"); + auto [offset_begin, offset_end] = fj_cpu.view.pb.range_for_constraint(cstr_idx); + for (auto i = offset_begin; i < offset_end; i++) { + // early cached check + if (auto& cached_move = fj_cpu.cached_mtm_moves[i]; cached_move.first != 0) { + if (best_score < cached_move.second) { + auto var_idx = fj_cpu.h_variables[i]; + if (fj_cpu.view.pb.check_variable_within_bounds( + var_idx, fj_cpu.h_assignment[var_idx] + cached_move.first)) { + best_score = cached_move.second; + best_move = fj_move_t{var_idx, cached_move.first}; + } + // cuopt_assert(fj_cpu.view.pb.check_variable_within_bounds(var_idx, + // fj_cpu.h_assignment[var_idx] + cached_move.first), "best move not within bounds"); + } + fj_cpu.hit_count++; + continue; + } + + // random chance to skip this nnz if there are many to consider + if (nnz_pick_probability < 1) + if (rng.next_float() > nnz_pick_probability) continue; + + auto var_idx = fj_cpu.h_variables[i]; + + f_t val = fj_cpu.h_assignment[var_idx]; + f_t new_val = val; + f_t delta = 0; + + // Special case for binary variables + if (fj_cpu.h_is_binary_variable[var_idx]) { + if (fj_cpu.flip_move_computed[var_idx]) continue; + fj_cpu.flip_move_computed[var_idx] = true; + new_val = 1 - val; + } else { + auto cstr_coeff = fj_cpu.h_coefficients[i]; + + f_t c_lb = fj_cpu.h_cstr_lb[cstr_idx]; + f_t c_ub = fj_cpu.h_cstr_ub[cstr_idx]; + auto [delta, sign, slack, cstr_tolerance] = get_mtm_for_constraint( + fj_cpu.view, var_idx, cstr_idx, cstr_coeff, c_lb, c_ub); + if (fj_cpu.view.pb.is_integer_var(var_idx)) { + new_val = cstr_coeff * sign > 0 + ? floor(val + delta + fj_cpu.view.pb.tolerances.integrality_tolerance) + : ceil(val + delta - fj_cpu.view.pb.tolerances.integrality_tolerance); + } else { + new_val = val + delta; + } + // fallback + if (new_val < get_lower(fj_cpu.h_var_bounds[var_idx]) || + new_val > get_upper(fj_cpu.h_var_bounds[var_idx])) { + new_val = cstr_coeff * sign > 0 ? get_lower(fj_cpu.h_var_bounds[var_idx]) + : get_upper(fj_cpu.h_var_bounds[var_idx]); + } + } + if (!isfinite(new_val)) continue; + cuopt_assert(fj_cpu.view.pb.check_variable_within_bounds(var_idx, new_val), + "new_val is not within bounds"); + delta = new_val - val; + // more permissive tabu in the case of local minima + if (tabu_check(fj_cpu, var_idx, delta, localmin)) continue; + if (fabs(delta) < cstr_tol) continue; + + auto move = fj_move_t{var_idx, delta}; + cuopt_assert(move.var_idx < fj_cpu.h_assignment.size(), "move.var_idx is out of bounds"); + cuopt_assert(move.var_idx >= 0, "move.var_idx is not positive"); + + // candidate_moves.insert(move); + fj_staged_score_t score; + score = compute_score(fj_cpu, var_idx, delta); + fj_cpu.cached_mtm_moves[i] = std::make_pair(delta, score); + fj_cpu.miss_count++; + if (best_score < score) { + best_score = score; + best_move = move; + } + } + } + + // also consider BM moves if we have found a feasible solution at least once + if (move_type == MTMMoveType::FJ_MTM_VIOLATED && + fj_cpu.h_best_objective < std::numeric_limits::infinity() && + fj_cpu.h_incumbent_objective >= + fj_cpu.h_best_objective + fj_cpu.settings.parameters.breakthrough_move_epsilon) { + for (auto var_idx : fj_cpu.h_objective_vars) { + f_t old_val = fj_cpu.h_assignment[var_idx]; + f_t new_val = get_breakthrough_move(fj_cpu.view, var_idx); + + if (fj_cpu.view.pb.integer_equal(new_val, old_val) || !isfinite(new_val)) continue; + + f_t delta = new_val - old_val; + + // Check if we already have a move for this variable + auto move = fj_move_t{var_idx, delta}; + cuopt_assert(move.var_idx < fj_cpu.h_assignment.size(), "move.var_idx is out of bounds"); + cuopt_assert(move.var_idx >= 0, "move.var_idx is not positive"); + + if (tabu_check(fj_cpu, var_idx, delta)) continue; + + fj_staged_score_t score = compute_score(fj_cpu, var_idx, delta); + + cuopt_assert(fj_cpu.view.pb.check_variable_within_bounds(var_idx, new_val), ""); + cuopt_assert(isfinite(delta), ""); + + if (best_score < score) { + best_score = score; + best_move = move; + } + } + } + + return thrust::make_tuple(best_move, best_score); +} + +template +static thrust::tuple find_mtm_move_viol( + fj_cpu_climber_t& fj_cpu, i_t sample_size = 100, bool localmin = false) +{ + timing_raii_t timer(fj_cpu.find_mtm_move_viol_times); + + std::vector sampled_cstrs; + sampled_cstrs.reserve(sample_size); + std::sample(fj_cpu.violated_constraints.begin(), + fj_cpu.violated_constraints.end(), + std::back_inserter(sampled_cstrs), + sample_size, + std::mt19937(fj_cpu.settings.seed + fj_cpu.iterations)); + + return find_mtm_move(fj_cpu, sampled_cstrs, localmin); +} + +template +static thrust::tuple find_mtm_move_sat( + fj_cpu_climber_t& fj_cpu, i_t sample_size = 100) +{ + timing_raii_t timer(fj_cpu.find_mtm_move_sat_times); + + std::vector sampled_cstrs; + sampled_cstrs.reserve(sample_size); + std::sample(fj_cpu.satisfied_constraints.begin(), + fj_cpu.satisfied_constraints.end(), + std::back_inserter(sampled_cstrs), + sample_size, + std::mt19937(fj_cpu.settings.seed + fj_cpu.iterations)); + + return find_mtm_move(fj_cpu, sampled_cstrs); +} + +template +static void init_lhs(fj_cpu_climber_t& fj_cpu) +{ + cuopt_assert(fj_cpu.h_lhs.size() == fj_cpu.view.pb.n_constraints, "h_lhs size mismatch"); + + fj_cpu.violated_constraints.clear(); + fj_cpu.satisfied_constraints.clear(); + for (i_t cstr_idx = 0; cstr_idx < fj_cpu.view.pb.n_constraints; ++cstr_idx) { + auto [offset_begin, offset_end] = fj_cpu.view.pb.range_for_constraint(cstr_idx); + f_t lhs = 0; + for (i_t i = offset_begin; i < offset_end; ++i) { + lhs += fj_cpu.h_coefficients[i] * fj_cpu.h_assignment[fj_cpu.h_variables[i]]; + } + + fj_cpu.h_lhs[cstr_idx] = lhs; + + f_t cstr_tolerance = fj_cpu.view.get_corrected_tolerance(cstr_idx); + f_t new_cost = fj_cpu.view.excess_score(cstr_idx, lhs); + if (new_cost < -cstr_tolerance) { + fj_cpu.violated_constraints.insert(cstr_idx); + } else { + fj_cpu.satisfied_constraints.insert(cstr_idx); + } + } + + // compute incumbent objective + fj_cpu.h_incumbent_objective = thrust::inner_product( + fj_cpu.h_assignment.begin(), fj_cpu.h_assignment.end(), fj_cpu.h_obj_coeffs.begin(), 0.); +} + +template +static thrust::tuple find_lift_move( + fj_cpu_climber_t& fj_cpu) +{ + timing_raii_t timer(fj_cpu.find_lift_move_times); + + fj_move_t best_move = fj_move_t{-1, 0}; + fj_staged_score_t best_score = fj_staged_score_t::zero(); + + for (auto var_idx : fj_cpu.h_objective_vars) { + cuopt_assert(var_idx < fj_cpu.h_obj_coeffs.size(), "var_idx is out of bounds"); + cuopt_assert(var_idx >= 0, "var_idx is out of bounds"); + + f_t obj_coeff = fj_cpu.h_obj_coeffs[var_idx]; + f_t delta = -std::numeric_limits::infinity(); + f_t val = fj_cpu.h_assignment[var_idx]; + + // special path for binary variables + if (fj_cpu.h_is_binary_variable[var_idx]) { + cuopt_assert(fj_cpu.view.pb.is_integer(val), "binary variable is not integer"); + cuopt_assert(fj_cpu.view.pb.integer_equal(val, 0) || fj_cpu.view.pb.integer_equal(val, 1), + "Current assignment is not binary!"); + delta = round(1.0 - 2 * val); + // flip move wouldn't improve + if (delta * obj_coeff >= 0) continue; + } else { + f_t lfd_lb = get_lower(fj_cpu.h_var_bounds[var_idx]) - val; + f_t lfd_ub = get_upper(fj_cpu.h_var_bounds[var_idx]) - val; + auto [offset_begin, offset_end] = fj_cpu.view.pb.reverse_range_for_var(var_idx); + for (i_t j = offset_begin; j < offset_end; j += 1) { + auto cstr_idx = fj_cpu.view.pb.reverse_constraints[j]; + auto cstr_coeff = fj_cpu.view.pb.reverse_coefficients[j]; + f_t c_lb = fj_cpu.view.pb.constraint_lower_bounds[cstr_idx]; + f_t c_ub = fj_cpu.view.pb.constraint_upper_bounds[cstr_idx]; + f_t cstr_tolerance = fj_cpu.view.get_corrected_tolerance(cstr_idx, c_lb, c_ub); + cuopt_assert(c_lb <= c_ub, "invalid bounds"); + cuopt_assert(fj_cpu.view.cstr_satisfied(cstr_idx, fj_cpu.h_lhs[cstr_idx]), + "cstr should be satisfied"); + + // Process each bound separately, as both are satified and may both be finite + // otherwise range constraints aren't correctly handled + for (auto [bound, sign] : {std::make_tuple(c_lb, -1), std::make_tuple(c_ub, 1)}) { + auto [delta, slack] = + get_mtm_for_bound(fj_cpu.view, var_idx, cstr_idx, cstr_coeff, bound, sign); + + if (cstr_coeff * sign < 0) { + if (fj_cpu.view.pb.is_integer_var(var_idx)) delta = ceil(delta); + } else { + if (fj_cpu.view.pb.is_integer_var(var_idx)) delta = floor(delta); + } + + // skip this variable if there is no slack + if (fabs(slack) <= cstr_tolerance) { + if (cstr_coeff * sign > 0) { + lfd_ub = 0; + } else { + lfd_lb = 0; + } + } else if (!fj_cpu.view.pb.check_variable_within_bounds(var_idx, val + delta)) { + continue; + } else { + if (cstr_coeff * sign < 0) { + lfd_lb = max(lfd_lb, delta); + } else { + lfd_ub = min(lfd_ub, delta); + } + } + } + if (lfd_lb >= lfd_ub) break; + } + + // invalid crossing bounds + if (lfd_lb >= lfd_ub) { lfd_lb = lfd_ub = 0; } + + if (!fj_cpu.view.pb.check_variable_within_bounds(var_idx, val + lfd_lb)) { lfd_lb = 0; } + if (!fj_cpu.view.pb.check_variable_within_bounds(var_idx, val + lfd_ub)) { lfd_ub = 0; } + + // Now that the life move domain is computed, compute the correct lift move + cuopt_assert(isfinite(val), "invalid assignment value"); + delta = obj_coeff < 0 ? lfd_ub : lfd_lb; + } + + if (!isfinite(delta)) delta = 0; + if (fj_cpu.view.pb.integer_equal(delta, (f_t)0)) continue; + if (tabu_check(fj_cpu, var_idx, delta)) continue; + + cuopt_assert(delta * obj_coeff < 0, "lift move doesn't improve the objective!"); + + // get the score + auto move = fj_move_t{var_idx, delta}; + fj_staged_score_t score = fj_staged_score_t::zero(); + f_t obj_score = -1 * obj_coeff * delta; // negated to turn this into a positive score + score.base = round(obj_score); + + if (best_score < score) { + best_score = score; + best_move = move; + } + } + + return thrust::make_tuple(best_move, best_score); +} + +template +static void perturb(fj_cpu_climber_t& fj_cpu) +{ + // select N variables, assign them a random value between their bounds + std::vector sampled_vars; + std::sample(fj_cpu.h_objective_vars.begin(), + fj_cpu.h_objective_vars.end(), + std::back_inserter(sampled_vars), + 2, + std::mt19937(fj_cpu.settings.seed + fj_cpu.iterations)); + raft::random::PCGenerator rng(fj_cpu.settings.seed + fj_cpu.iterations, 0, 0); + + for (auto var_idx : sampled_vars) { + f_t lb = ceil(std::max(get_lower(fj_cpu.h_var_bounds[var_idx]), -1e7)); + f_t ub = floor(std::min(get_upper(fj_cpu.h_var_bounds[var_idx]), 1e7)); + f_t val = lb + (ub - lb) * rng.next_double(); + if (fj_cpu.view.pb.is_integer_var(var_idx)) { + val = std::round(val); + val = std::min(std::max(val, lb), ub); + } + + cuopt_assert(fj_cpu.view.pb.check_variable_within_bounds(var_idx, val), + "value is out of bounds"); + fj_cpu.h_assignment[var_idx] = val; + } + + init_lhs(fj_cpu); +} + +template +static void init_fj_cpu(fj_cpu_climber_t& fj_cpu, + solution_t& solution, + const std::vector& left_weights, + const std::vector& right_weights, + f_t objective_weight) +{ + auto& problem = *solution.problem_ptr; + auto handle_ptr = solution.handle_ptr; + + // build a cpu-based fj_view_t + fj_cpu.view = typename fj_t::climber_data_t::view_t{}; + fj_cpu.view.pb = problem.view(); + fj_cpu.pb_ptr = &problem; + // Get host copies of device data + fj_cpu.h_reverse_coefficients = + cuopt::host_copy(problem.reverse_coefficients, handle_ptr->get_stream()); + fj_cpu.h_reverse_constraints = + cuopt::host_copy(problem.reverse_constraints, handle_ptr->get_stream()); + fj_cpu.h_reverse_offsets = cuopt::host_copy(problem.reverse_offsets, handle_ptr->get_stream()); + fj_cpu.h_coefficients = cuopt::host_copy(problem.coefficients, handle_ptr->get_stream()); + fj_cpu.h_offsets = cuopt::host_copy(problem.offsets, handle_ptr->get_stream()); + fj_cpu.h_variables = cuopt::host_copy(problem.variables, handle_ptr->get_stream()); + fj_cpu.h_obj_coeffs = cuopt::host_copy(problem.objective_coefficients, handle_ptr->get_stream()); + fj_cpu.h_var_bounds = cuopt::host_copy(problem.variable_bounds, handle_ptr->get_stream()); + fj_cpu.h_cstr_lb = cuopt::host_copy(problem.constraint_lower_bounds, handle_ptr->get_stream()); + fj_cpu.h_cstr_ub = cuopt::host_copy(problem.constraint_upper_bounds, handle_ptr->get_stream()); + fj_cpu.h_var_types = cuopt::host_copy(problem.variable_types, handle_ptr->get_stream()); + fj_cpu.h_is_binary_variable = + cuopt::host_copy(problem.is_binary_variable, handle_ptr->get_stream()); + fj_cpu.h_binary_indices = cuopt::host_copy(problem.binary_indices, handle_ptr->get_stream()); + + fj_cpu.h_cstr_left_weights = left_weights; + fj_cpu.h_cstr_right_weights = right_weights; + fj_cpu.max_weight = 1.0; + fj_cpu.h_objective_weight = objective_weight; + fj_cpu.h_assignment = solution.get_host_assignment(); + fj_cpu.h_best_assignment = solution.get_host_assignment(); + fj_cpu.h_lhs.resize(fj_cpu.pb_ptr->n_constraints); + fj_cpu.h_lhs_sumcomp.resize(fj_cpu.pb_ptr->n_constraints, 0); + fj_cpu.h_tabu_nodec_until.resize(fj_cpu.pb_ptr->n_variables, 0); + fj_cpu.h_tabu_noinc_until.resize(fj_cpu.pb_ptr->n_variables, 0); + fj_cpu.h_tabu_lastdec.resize(fj_cpu.pb_ptr->n_variables, 0); + fj_cpu.h_tabu_lastinc.resize(fj_cpu.pb_ptr->n_variables, 0); + fj_cpu.iterations = 0; + + // set pointers to host copies + // technically not 'device_span's but raft doesn't have a universal span. + // cuda::std::span? + fj_cpu.view.cstr_left_weights = + raft::device_span(fj_cpu.h_cstr_left_weights.data(), fj_cpu.h_cstr_left_weights.size()); + fj_cpu.view.cstr_right_weights = + raft::device_span(fj_cpu.h_cstr_right_weights.data(), fj_cpu.h_cstr_right_weights.size()); + fj_cpu.view.objective_weight = &fj_cpu.h_objective_weight; + fj_cpu.view.incumbent_assignment = + raft::device_span(fj_cpu.h_assignment.data(), fj_cpu.h_assignment.size()); + fj_cpu.view.incumbent_lhs = raft::device_span(fj_cpu.h_lhs.data(), fj_cpu.h_lhs.size()); + fj_cpu.view.tabu_nodec_until = + raft::device_span(fj_cpu.h_tabu_nodec_until.data(), fj_cpu.h_tabu_nodec_until.size()); + fj_cpu.view.tabu_noinc_until = + raft::device_span(fj_cpu.h_tabu_noinc_until.data(), fj_cpu.h_tabu_noinc_until.size()); + fj_cpu.view.tabu_lastdec = + raft::device_span(fj_cpu.h_tabu_lastdec.data(), fj_cpu.h_tabu_lastdec.size()); + fj_cpu.view.tabu_lastinc = + raft::device_span(fj_cpu.h_tabu_lastinc.data(), fj_cpu.h_tabu_lastinc.size()); + fj_cpu.view.objective_vars = + raft::device_span(fj_cpu.h_objective_vars.data(), fj_cpu.h_objective_vars.size()); + fj_cpu.view.incumbent_objective = &fj_cpu.h_incumbent_objective; + fj_cpu.view.best_objective = &fj_cpu.h_best_objective; + + fj_cpu.view.settings = &fj_cpu.settings; + fj_cpu.view.pb.constraint_lower_bounds = + raft::device_span(fj_cpu.h_cstr_lb.data(), fj_cpu.h_cstr_lb.size()); + fj_cpu.view.pb.constraint_upper_bounds = + raft::device_span(fj_cpu.h_cstr_ub.data(), fj_cpu.h_cstr_ub.size()); + fj_cpu.view.pb.variable_bounds = raft::device_span::type>( + fj_cpu.h_var_bounds.data(), fj_cpu.h_var_bounds.size()); + fj_cpu.view.pb.variable_types = + raft::device_span(fj_cpu.h_var_types.data(), fj_cpu.h_var_types.size()); + fj_cpu.view.pb.is_binary_variable = + raft::device_span(fj_cpu.h_is_binary_variable.data(), fj_cpu.h_is_binary_variable.size()); + fj_cpu.view.pb.coefficients = + raft::device_span(fj_cpu.h_coefficients.data(), fj_cpu.h_coefficients.size()); + fj_cpu.view.pb.offsets = raft::device_span(fj_cpu.h_offsets.data(), fj_cpu.h_offsets.size()); + fj_cpu.view.pb.variables = + raft::device_span(fj_cpu.h_variables.data(), fj_cpu.h_variables.size()); + fj_cpu.view.pb.reverse_coefficients = raft::device_span( + fj_cpu.h_reverse_coefficients.data(), fj_cpu.h_reverse_coefficients.size()); + fj_cpu.view.pb.reverse_constraints = raft::device_span(fj_cpu.h_reverse_constraints.data(), + fj_cpu.h_reverse_constraints.size()); + fj_cpu.view.pb.reverse_offsets = + raft::device_span(fj_cpu.h_reverse_offsets.data(), fj_cpu.h_reverse_offsets.size()); + fj_cpu.view.pb.objective_coefficients = + raft::device_span(fj_cpu.h_obj_coeffs.data(), fj_cpu.h_obj_coeffs.size()); + fj_cpu.h_objective_vars.resize(problem.n_variables); + auto end = std::copy_if( + thrust::counting_iterator(0), + thrust::counting_iterator(problem.n_variables), + fj_cpu.h_objective_vars.begin(), + [&fj_cpu](i_t idx) { return !fj_cpu.view.pb.integer_equal(fj_cpu.h_obj_coeffs[idx], (f_t)0); }); + fj_cpu.h_objective_vars.resize(end - fj_cpu.h_objective_vars.begin()); + + fj_cpu.h_best_objective = +std::numeric_limits::infinity(); + + // nnz count + fj_cpu.cached_mtm_moves.resize(fj_cpu.h_coefficients.size(), + std::make_pair(0, fj_staged_score_t::zero())); + + fj_cpu.cached_cstr_bounds.resize(fj_cpu.h_reverse_coefficients.size()); + for (i_t var_idx = 0; var_idx < (i_t)fj_cpu.view.pb.n_variables; ++var_idx) { + auto [offset_begin, offset_end] = fj_cpu.view.pb.reverse_range_for_var(var_idx); + for (i_t i = offset_begin; i < offset_end; ++i) { + fj_cpu.cached_cstr_bounds[i] = + std::make_pair(fj_cpu.h_cstr_lb[fj_cpu.h_reverse_constraints[i]], + fj_cpu.h_cstr_ub[fj_cpu.h_reverse_constraints[i]]); + } + } + + fj_cpu.flip_move_computed.resize(fj_cpu.view.pb.n_variables, false); + fj_cpu.var_bitmap.resize(fj_cpu.view.pb.n_variables, false); + fj_cpu.iter_mtm_vars.reserve(fj_cpu.view.pb.n_variables); + + init_lhs(fj_cpu); +} + +template +static void sanity_checks(fj_cpu_climber_t& fj_cpu) +{ + // Check that each violated constraint is actually violated and not present in + // satisfied_constraints + for (const auto& cstr_idx : fj_cpu.violated_constraints) { + cuopt_assert(fj_cpu.satisfied_constraints.count(cstr_idx) == 0, + "Violated constraint also in satisfied_constraints"); + f_t lhs = fj_cpu.h_lhs[cstr_idx]; + f_t tol = fj_cpu.view.get_corrected_tolerance(cstr_idx); + f_t excess = fj_cpu.view.excess_score(cstr_idx, lhs); + cuopt_assert(excess < -tol, "Constraint in violated_constraints is not actually violated"); + } + + // Check that each satisfied constraint is actually satisfied and not present in + // violated_constraints + for (const auto& cstr_idx : fj_cpu.satisfied_constraints) { + cuopt_assert(fj_cpu.violated_constraints.count(cstr_idx) == 0, + "Satisfied constraint also in violated_constraints"); + f_t lhs = fj_cpu.h_lhs[cstr_idx]; + f_t tol = fj_cpu.view.get_corrected_tolerance(cstr_idx); + f_t excess = fj_cpu.view.excess_score(cstr_idx, lhs); + cuopt_assert(!(excess < -tol), "Constraint in satisfied_constraints is actually violated"); + } + + // Check that each constraint is in exactly one of violated_constraints or satisfied_constraints + for (i_t cstr_idx = 0; cstr_idx < fj_cpu.view.pb.n_constraints; ++cstr_idx) { + bool in_viol = fj_cpu.violated_constraints.count(cstr_idx) > 0; + bool in_sat = fj_cpu.satisfied_constraints.count(cstr_idx) > 0; + cuopt_assert( + in_viol != in_sat, + "Constraint must be in exactly one of violated_constraints or satisfied_constraints"); + + cuopt_assert(fj_cpu.h_cstr_left_weights[cstr_idx] >= 0, "Weights should be positive or zero"); + cuopt_assert(fj_cpu.h_cstr_right_weights[cstr_idx] >= 0, "Weights should be positive or zero"); + } + cuopt_assert(fj_cpu.h_objective_weight >= 0, "Objective weight should be positive or zero"); +} + +template +std::unique_ptr> fj_t::create_cpu_climber( + solution_t& solution, + const std::vector& left_weights, + const std::vector& right_weights, + f_t objective_weight, + fj_settings_t settings, + bool randomize_params) +{ + raft::common::nvtx::range scope("fj_cpu_init"); + + auto fj_cpu = std::make_unique>(); + + // Initialize fj_cpu with all the data + init_fj_cpu(*fj_cpu, solution, left_weights, right_weights, objective_weight); + fj_cpu->settings = settings; + if (randomize_params) { + auto rng = std::mt19937(cuopt::seed_generator::get_seed()); + fj_cpu->mtm_viol_samples = std::uniform_int_distribution(15, 50)(rng); + fj_cpu->mtm_sat_samples = std::uniform_int_distribution(10, 30)(rng); + fj_cpu->nnz_samples = std::uniform_int_distribution(2000, 15000)(rng); + fj_cpu->perturb_interval = std::uniform_int_distribution(50, 500)(rng); + } + fj_cpu->settings.seed = cuopt::seed_generator::get_seed(); + return fj_cpu; // move +} + +template +bool fj_t::cpu_solve(fj_cpu_climber_t& fj_cpu, f_t in_time_limit) +{ + raft::common::nvtx::range scope("fj_cpu"); + + i_t local_mins = 0; + auto loop_start = std::chrono::high_resolution_clock::now(); + auto time_limit = std::chrono::milliseconds((int)(in_time_limit * 1000)); + auto loop_time_start = std::chrono::high_resolution_clock::now(); + while (!fj_cpu.halted) { + // Check if 5 seconds have passed + auto now = std::chrono::high_resolution_clock::now(); + if (in_time_limit < std::numeric_limits::infinity() && + now - loop_time_start > time_limit) { + CUOPT_LOG_TRACE("%sTime limit of %.4f seconds reached, breaking loop at iteration %d\n", + fj_cpu.log_prefix.c_str(), + time_limit.count() / 1000.f, + fj_cpu.iterations); + break; + } + + fj_move_t move = fj_move_t{-1, 0}; + fj_staged_score_t score = fj_staged_score_t::invalid(); + // Perform lift moves + if (fj_cpu.violated_constraints.empty()) { thrust::tie(move, score) = find_lift_move(fj_cpu); } + // Regular MTM + if (!(score > fj_staged_score_t::zero())) { + thrust::tie(move, score) = find_mtm_move_viol(fj_cpu, fj_cpu.mtm_viol_samples); + } + // try with MTM in satisfied constraints + if (fj_cpu.feasible_found && !(score > fj_staged_score_t::zero())) { + thrust::tie(move, score) = find_mtm_move_sat(fj_cpu, fj_cpu.mtm_sat_samples); + } + // if we're in the feasible region but haven't found improvements in the last n iterations, + // perturb + bool should_perturb = false; + if (fj_cpu.violated_constraints.empty() && + fj_cpu.iterations - fj_cpu.last_feasible_entrance_iter > fj_cpu.perturb_interval) { + should_perturb = true; + fj_cpu.last_feasible_entrance_iter = fj_cpu.iterations; + } + + if (score > fj_staged_score_t::zero() && !should_perturb) { + apply_move(fj_cpu, move.var_idx, move.value, false); + } else { + // Local Min + update_weights(fj_cpu); + if (should_perturb) { + perturb(fj_cpu); + for (auto& cached_move : fj_cpu.cached_mtm_moves) + cached_move.first = 0; + } + thrust::tie(move, score) = + find_mtm_move_viol(fj_cpu, 1, true); // pick a single random violated constraint + i_t var_idx = move.var_idx >= 0 ? move.var_idx : 0; + f_t delta = move.var_idx >= 0 ? move.value : 0; + apply_move(fj_cpu, var_idx, delta, true); + ++local_mins; + } + if (fj_cpu.iterations % fj_cpu.log_interval == 0) { + CUOPT_LOG_TRACE( + "%sCPUFJ iteration: %d, local mins: %d, best_objective: %g, viol: %zu, obj weight %g, maxw " + "%g\n", + fj_cpu.log_prefix.c_str(), + fj_cpu.iterations, + local_mins, + fj_cpu.pb_ptr->get_user_obj_from_solver_obj(fj_cpu.h_best_objective), + fj_cpu.violated_constraints.size(), + fj_cpu.h_objective_weight, + fj_cpu.max_weight); + } + // send current solution to callback every 3000 steps for diversity + if (fj_cpu.iterations % fj_cpu.diversity_callback_interval == 0) { + if (fj_cpu.diversity_callback) { + fj_cpu.diversity_callback(fj_cpu.h_incumbent_objective, fj_cpu.h_assignment); + } + } + + // Print timing statistics every N iterations +#if CPUFJ_TIMING_TRACE + if (fj_cpu.iterations % fj_cpu.timing_stats_interval == 0 && fj_cpu.iterations > 0) { + print_timing_stats(fj_cpu); + } +#endif + cuopt_func_call(sanity_checks(fj_cpu)); + fj_cpu.iterations++; + } + auto loop_end = std::chrono::high_resolution_clock::now(); + double total_time = + std::chrono::duration_cast>(loop_end - loop_start).count(); + double avg_time_per_iter = total_time / fj_cpu.iterations; + CUOPT_LOG_TRACE("%sCPUFJ Average time per iteration: %.8fms\n", + fj_cpu.log_prefix.c_str(), + avg_time_per_iter * 1000.0); + +#if CPUFJ_TIMING_TRACE + // Print final timing statistics + CUOPT_LOG_TRACE("\n=== Final Timing Statistics ===\n"); + print_timing_stats(fj_cpu); +#endif + + return fj_cpu.feasible_found; +} + +#if MIP_INSTANTIATE_FLOAT +template class fj_t; +#endif + +#if MIP_INSTANTIATE_DOUBLE +template class fj_t; +#endif + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/feasibility_jump/fj_cpu.cuh b/cpp/src/mip/feasibility_jump/fj_cpu.cuh new file mode 100644 index 000000000..41993da02 --- /dev/null +++ b/cpp/src/mip/feasibility_jump/fj_cpu.cuh @@ -0,0 +1,122 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights + * reserved. SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace cuopt::linear_programming::detail { + +// NOTE: this seems an easy pick for reflection/xmacros once this is available (C++26?) +// Maintaining a single source of truth for all members would be nice +template +struct fj_cpu_climber_t { + fj_cpu_climber_t() = default; + fj_cpu_climber_t(const fj_cpu_climber_t& other) = delete; + fj_cpu_climber_t& operator=(const fj_cpu_climber_t& other) = delete; + + fj_cpu_climber_t(fj_cpu_climber_t&& other) = default; + fj_cpu_climber_t& operator=(fj_cpu_climber_t&& other) = default; + + problem_t* pb_ptr; + fj_settings_t settings; + typename fj_t::climber_data_t::view_t view; + // Host copies of device data as struct members + std::vector h_reverse_coefficients; + std::vector h_reverse_constraints; + std::vector h_reverse_offsets; + std::vector h_coefficients; + std::vector h_offsets; + std::vector h_variables; + std::vector h_obj_coeffs; + std::vector::type> h_var_bounds; + std::vector h_cstr_lb; + std::vector h_cstr_ub; + std::vector h_var_types; + std::vector h_is_binary_variable; + std::vector h_objective_vars; + std::vector h_binary_indices; + + std::vector h_tabu_nodec_until; + std::vector h_tabu_noinc_until; + std::vector h_tabu_lastdec; + std::vector h_tabu_lastinc; + + std::vector h_lhs; + std::vector h_lhs_sumcomp; + std::vector h_cstr_left_weights; + std::vector h_cstr_right_weights; + f_t max_weight; + std::vector h_assignment; + std::vector h_best_assignment; + f_t h_objective_weight; + f_t h_incumbent_objective; + f_t h_best_objective; + i_t last_feasible_entrance_iter{0}; + i_t iterations; + std::unordered_set violated_constraints; + std::unordered_set satisfied_constraints; + bool feasible_found{false}; + + // Timing data structures + std::vector find_lift_move_times; + std::vector find_mtm_move_viol_times; + std::vector find_mtm_move_sat_times; + std::vector apply_move_times; + std::vector update_weights_times; + std::vector compute_score_times; + + i_t hit_count{0}; + i_t miss_count{0}; + + i_t candidate_move_hits[3] = {0}; + i_t candidate_move_misses[3] = {0}; + + // vector is actually likely beneficial here since we're memory bound + std::vector flip_move_computed; + ; + // CSR nnz offset -> (delta, score) + std::vector> cached_mtm_moves; + + // CSC (transposed!) nnz-offset-indexed constraint bounds (lb, ub) + // std::pair better compile down to 16 bytes!! GCC do your job! + std::vector> cached_cstr_bounds; + + std::vector var_bitmap; + std::vector iter_mtm_vars; + + i_t mtm_viol_samples{25}; + i_t mtm_sat_samples{15}; + i_t nnz_samples{50000}; + i_t perturb_interval{100}; + + i_t log_interval{1000}; + i_t diversity_callback_interval{3000}; + i_t timing_stats_interval{5000}; + + std::function&)> improvement_callback{nullptr}; + std::function&)> diversity_callback{nullptr}; + std::string log_prefix{""}; + + std::atomic halted{false}; +}; + +} // namespace cuopt::linear_programming::detail diff --git a/cpp/src/mip/feasibility_jump/load_balancing.cuh b/cpp/src/mip/feasibility_jump/load_balancing.cuh index 67af9c06a..d8c43df16 100644 --- a/cpp/src/mip/feasibility_jump/load_balancing.cuh +++ b/cpp/src/mip/feasibility_jump/load_balancing.cuh @@ -139,7 +139,7 @@ __global__ void load_balancing_prepare_iteration(const __grid_constant__ for (i_t i = blockIdx.x + range.first; i < range.second; i += gridDim.x) { i_t var_idx = fj.pb.related_variables[i]; - update_jump_value(fj, var_idx); + update_jump_value(fj, var_idx); } if (FIRST_THREAD) *fj.load_balancing_skip = true; diff --git a/cpp/src/mip/local_search/local_search.cu b/cpp/src/mip/local_search/local_search.cu index f0497d382..196c6bdac 100644 --- a/cpp/src/mip/local_search/local_search.cu +++ b/cpp/src/mip/local_search/local_search.cu @@ -26,10 +26,85 @@ #include #include +#include + #include +#include + namespace cuopt::linear_programming::detail { +template +cpu_fj_thread_t::cpu_fj_thread_t() +{ + cpu_worker = std::thread(&cpu_fj_thread_t::cpu_worker_thread, this); +} + +template +cpu_fj_thread_t::~cpu_fj_thread_t() +{ + if (!cpu_thread_terminate) { kill_cpu_solver(); } +} + +template +void cpu_fj_thread_t::cpu_worker_thread() +{ + while (!cpu_thread_terminate) { + // Wait for start signal + { + std::unique_lock lock(cpu_mutex); + cpu_cv.wait(lock, [this] { return cpu_thread_should_start || cpu_thread_terminate; }); + } + + if (cpu_thread_terminate) break; + + // Run CPU solver + { + raft::common::nvtx::range fun_scope("Running CPU FJ"); + cpu_fj_solution_found = fj_ptr->cpu_solve(*fj_cpu); + } + + cpu_thread_should_start = false; + cpu_thread_done = true; + } +} + +template +void cpu_fj_thread_t::kill_cpu_solver() +{ + cpu_thread_terminate = true; + if (fj_cpu) fj_cpu->halted = true; + cpu_cv.notify_one(); + cpu_worker.join(); +} + +template +void cpu_fj_thread_t::start_cpu_solver() +{ + cuopt_assert(fj_cpu != nullptr, "fj_cpu must not be null"); + // Reset flags + cpu_thread_done = false; + cpu_thread_should_start = true; + fj_cpu->halted = false; + cpu_cv.notify_one(); +} + +template +void cpu_fj_thread_t::stop_cpu_solver() +{ + fj_cpu->halted = true; +} + +template +bool cpu_fj_thread_t::wait_for_cpu_solver() +{ + while (!cpu_thread_done && !cpu_thread_terminate) { + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + + return cpu_fj_solution_found; +} + template local_search_t::local_search_t(mip_solver_context_t& context_, rmm::device_uvector& lp_optimal_solution_) @@ -50,6 +125,174 @@ local_search_t::local_search_t(mip_solver_context_t& context rng(cuopt::seed_generator::get_seed()), problem_with_objective_cut(*context.problem_ptr, context.problem_ptr->handle_ptr) { + for (auto& cpu_fj : ls_cpu_fj) { + cpu_fj.fj_ptr = &fj; + } + for (auto& cpu_fj : scratch_cpu_fj) { + cpu_fj.fj_ptr = &fj; + } + scratch_cpu_fj_on_lp_opt.fj_ptr = &fj; +} + +static double local_search_best_obj = std::numeric_limits::max(); +static population_t* pop_ptr = nullptr; + +template +void local_search_t::start_cpufj_scratch_threads(population_t& population) +{ + pop_ptr = &population; + + std::vector default_weights(context.problem_ptr->n_constraints, 1.); + + solution_t solution(*context.problem_ptr); + thrust::fill(solution.handle_ptr->get_thrust_policy(), + solution.assignment.begin(), + solution.assignment.end(), + 0.0); + solution.clamp_within_bounds(); + i_t counter = 0; + for (auto& cpu_fj : scratch_cpu_fj) { + if (counter > 0) solution.assign_random_within_bounds(0.4); + cpu_fj.fj_cpu = cpu_fj.fj_ptr->create_cpu_climber(solution, + default_weights, + default_weights, + 0., + fj_settings_t{}, + /*randomize=*/counter > 0); + + cpu_fj.fj_cpu->log_prefix = "******* scratch " + std::to_string(counter) + ": "; + cpu_fj.fj_cpu->improvement_callback = [this, &population, &cpu_fj]( + f_t obj, const std::vector& h_vec) { + population.add_external_solution(h_vec, obj, solution_origin_t::CPUFJ); + if (obj < local_search_best_obj) { + CUOPT_LOG_DEBUG("******* New local search best obj %g, best overall %g", + context.problem_ptr->get_user_obj_from_solver_obj(obj), + context.problem_ptr->get_user_obj_from_solver_obj( + population.is_feasible() ? population.best_feasible().get_objective() + : std::numeric_limits::max())); + local_search_best_obj = obj; + } + }; + counter++; + }; + + // default weights + cudaDeviceSynchronize(); + + for (auto& cpu_fj : scratch_cpu_fj) { + cpu_fj.start_cpu_solver(); + } + + solution_t solution_lp(*context.problem_ptr); + solution_lp.copy_new_assignment(host_copy(lp_optimal_solution)); + solution_lp.round_random_nearest(500); + scratch_cpu_fj_on_lp_opt.fj_cpu = + fj.create_cpu_climber(solution_lp, default_weights, default_weights, 0.); + scratch_cpu_fj_on_lp_opt.fj_cpu->log_prefix = "******* scratch on LP optimal: "; + scratch_cpu_fj_on_lp_opt.fj_cpu->improvement_callback = + [this, &population](f_t obj, const std::vector& h_vec) { + population.add_external_solution(h_vec, obj, solution_origin_t::CPUFJ); + if (obj < local_search_best_obj) { + CUOPT_LOG_DEBUG("******* New local search best obj %g, best overall %g", + context.problem_ptr->get_user_obj_from_solver_obj(obj), + context.problem_ptr->get_user_obj_from_solver_obj( + population.is_feasible() ? population.best_feasible().get_objective() + : std::numeric_limits::max())); + local_search_best_obj = obj; + } + }; + + // default weights + cudaDeviceSynchronize(); + scratch_cpu_fj_on_lp_opt.start_cpu_solver(); +} + +template +void local_search_t::stop_cpufj_scratch_threads() +{ + for (auto& cpu_fj : scratch_cpu_fj) { + cpu_fj.kill_cpu_solver(); + } + scratch_cpu_fj_on_lp_opt.kill_cpu_solver(); +} + +template +bool local_search_t::do_fj_solve(solution_t& solution, + fj_t& in_fj, + const std::string& source) +{ + auto h_weights = cuopt::host_copy(in_fj.cstr_weights, solution.handle_ptr->get_stream()); + auto h_objective_weight = in_fj.objective_weight.value(solution.handle_ptr->get_stream()); + for (auto& cpu_fj : ls_cpu_fj) { + cpu_fj.fj_cpu = cpu_fj.fj_ptr->create_cpu_climber( + solution, h_weights, h_weights, h_objective_weight, fj_settings_t{}, true); + } + + auto solution_copy = solution; + + // Start CPU solver in background thread + for (auto& cpu_fj : ls_cpu_fj) { + cpu_fj.start_cpu_solver(); + } + + // Run GPU solver and measure execution time + auto gpu_fj_start = std::chrono::high_resolution_clock::now(); + in_fj.solve(solution); + + // Stop CPU solver + for (auto& cpu_fj : ls_cpu_fj) { + cpu_fj.stop_cpu_solver(); + } + + auto gpu_fj_end = std::chrono::high_resolution_clock::now(); + double gpu_fj_duration = std::chrono::duration(gpu_fj_end - gpu_fj_start).count(); + + solution_t solution_cpu(*solution.problem_ptr); + + f_t best_cpu_obj = std::numeric_limits::max(); + // // Wait for CPU solver to finish + for (auto& cpu_fj : ls_cpu_fj) { + bool cpu_sol_found = cpu_fj.wait_for_cpu_solver(); + if (cpu_sol_found) { + f_t cpu_obj = cpu_fj.fj_cpu->h_best_objective; + if (cpu_obj < best_cpu_obj) { + best_cpu_obj = cpu_obj; + solution_cpu.copy_new_assignment(cpu_fj.fj_cpu->h_best_assignment); + solution_cpu.compute_feasibility(); + } + } + } + bool cpu_sol_found = best_cpu_obj < std::numeric_limits::max(); + + bool gpu_feasible = solution.get_feasible(); + bool cpu_feasible = cpu_sol_found && solution_cpu.get_feasible(); + + static std::unordered_map total_calls; + static std::unordered_map cpu_better; + + CUOPT_LOG_DEBUG("GPU FJ returns feas %d, obj %g", gpu_feasible, solution.get_user_objective()); + CUOPT_LOG_DEBUG("CPU FJ returns feas %d, obj %g, stats %d/%d", + cpu_feasible, + solution_cpu.get_user_objective(), + total_calls[source], + cpu_better[source]); + + total_calls[source]++; + if (cpu_feasible && !gpu_feasible || + (cpu_feasible && solution_cpu.get_objective() < solution.get_objective())) { + CUOPT_LOG_DEBUG( + "CPU FJ returns better solution! cpu_obj %g, gpu_obj %g, stats %d/%d, source %s", + solution_cpu.get_user_objective(), + solution.get_user_objective(), + total_calls[source], + cpu_better[source], + source.c_str()); + solution.copy_from(solution_cpu); + cpu_better[source]++; + } + solution.compute_feasibility(); + + return cpu_feasible; } template @@ -73,7 +316,7 @@ void local_search_t::generate_fast_solution(solution_t& solu if (timer.check_time_limit()) { return; }; fj.settings.time_limit = std::min(3., timer.remaining_time()); // run fj on the solution - fj.solve(solution); + do_fj_solve(solution, fj, "fast"); // TODO check if FJ returns the same solution // check if the solution is feasible if (solution.compute_feasibility()) { return; } @@ -136,7 +379,7 @@ bool local_search_t::run_fj_until_timer(solution_t& solution fj.settings.update_weights = false; fj.settings.feasibility_run = false; fj.copy_weights(weights, solution.handle_ptr); - fj.solve(solution); + do_fj_solve(solution, fj, "until_timer"); CUOPT_LOG_DEBUG("Initial FJ feasibility done"); is_feasible = solution.compute_feasibility(); if (fj.settings.feasibility_run || timer.check_time_limit()) { return is_feasible; } @@ -150,6 +393,12 @@ bool local_search_t::run_fj_annealing(solution_t& solution, { auto prev_settings = fj.settings; + solution.compute_feasibility(); + CUOPT_LOG_DEBUG("Running FJ Annealing on solution with obj %g/%g, feas? %d", + solution.get_user_objective(), + solution.get_objective(), + solution.get_feasible()); + // run in FEASIBLE_FIRST to priorize feasibility-improving moves fj.settings.n_of_minimums_for_exit = ls_config.n_local_mins; fj.settings.mode = fj_mode_t::EXIT_NON_IMPROVING; @@ -159,7 +408,7 @@ bool local_search_t::run_fj_annealing(solution_t& solution, fj.settings.parameters.allow_infeasibility_iterations = 100; fj.settings.update_weights = 1; fj.settings.baseline_objective_for_longer_run = ls_config.best_objective_of_parents; - fj.solve(solution); + do_fj_solve(solution, fj, "annealing"); bool is_feasible = solution.compute_feasibility(); fj.settings = prev_settings; @@ -225,7 +474,7 @@ bool local_search_t::check_fj_on_lp_optimal(solution_t& solu fj.settings.update_weights = true; fj.settings.feasibility_run = false; fj.settings.time_limit = std::min(30., timer.remaining_time()); - fj.solve(solution); + do_fj_solve(solution, fj, "on_lp_optimal"); return solution.get_feasible(); } @@ -242,7 +491,7 @@ bool local_search_t::run_fj_on_zero(solution_t& solution, ti fj.settings.update_weights = true; fj.settings.feasibility_run = false; fj.settings.time_limit = std::min(30., timer.remaining_time()); - bool is_feasible = fj.solve(solution); + bool is_feasible = do_fj_solve(solution, fj, "on_zero"); return is_feasible; } diff --git a/cpp/src/mip/local_search/local_search.cuh b/cpp/src/mip/local_search/local_search.cuh index d878b4b55..2a30db2b8 100644 --- a/cpp/src/mip/local_search/local_search.cuh +++ b/cpp/src/mip/local_search/local_search.cuh @@ -24,6 +24,12 @@ #include #include +#include +#include +#include +#include +#include + namespace cuopt::linear_programming::detail { // make sure RANDOM is always the last @@ -40,18 +46,44 @@ struct ls_config_t { f_t best_objective_of_parents{std::numeric_limits::lowest()}; i_t n_local_mins_for_line_segment = 50; i_t n_points_to_search_for_line_segment = 5; - i_t n_local_mins = 250; + i_t n_local_mins = 2500; i_t iteration_limit_for_line_segment = 20 * n_local_mins_for_line_segment; i_t iteration_limit = 20 * n_local_mins; ls_method_t ls_method = ls_method_t::RANDOM; }; +template +struct cpu_fj_thread_t { + cpu_fj_thread_t(); + ~cpu_fj_thread_t(); + + void cpu_worker_thread(); + void start_cpu_solver(); + void stop_cpu_solver(); + bool wait_for_cpu_solver(); // return feasibility + void kill_cpu_solver(); + + std::thread cpu_worker; + std::mutex cpu_mutex; + std::condition_variable cpu_cv; + std::atomic should_stop{false}; + std::atomic cpu_thread_should_start{false}; + std::atomic cpu_thread_done{false}; + std::atomic cpu_thread_terminate{false}; + bool cpu_fj_solution_found{false}; + std::unique_ptr> fj_cpu; + fj_t* fj_ptr{nullptr}; +}; + template class local_search_t { public: local_search_t() = delete; local_search_t(mip_solver_context_t& context, rmm::device_uvector& lp_optimal_solution_); + + void start_cpufj_scratch_threads(population_t& population); + void stop_cpufj_scratch_threads(); void generate_fast_solution(solution_t& solution, timer_t timer); bool generate_solution(solution_t& solution, bool perturb, @@ -79,6 +111,10 @@ class local_search_t { timer_t timer, population_t* population_ptr = nullptr); void resize_vectors(problem_t& problem, const raft::handle_t* handle_ptr); + + bool do_fj_solve(solution_t& solution, fj_t& fj, const std::string& source); + + i_t ls_threads() const { return ls_cpu_fj.size() + scratch_cpu_fj.size(); } void save_solution_and_add_cutting_plane(solution_t& solution, rmm::device_uvector& best_solution, f_t& best_objective); @@ -97,11 +133,14 @@ class local_search_t { bool lp_optimal_exists{false}; rmm::device_uvector fj_sol_on_lp_opt; fj_t fj; - // fj_tree_t fj_tree; constraint_prop_t constraint_prop; line_segment_search_t line_segment_search; feasibility_pump_t fp; std::mt19937 rng; + + std::array, 8> ls_cpu_fj; + std::array, 1> scratch_cpu_fj; + cpu_fj_thread_t scratch_cpu_fj_on_lp_opt; problem_t problem_with_objective_cut; bool cutting_plane_added_for_active_run{false}; }; diff --git a/cpp/src/mip/local_search/rounding/simple_rounding.cu b/cpp/src/mip/local_search/rounding/simple_rounding.cu index 17c8793e3..bc7f9c58f 100644 --- a/cpp/src/mip/local_search/rounding/simple_rounding.cu +++ b/cpp/src/mip/local_search/rounding/simple_rounding.cu @@ -82,12 +82,44 @@ bool check_brute_force_rounding(solution_t& solution) return false; } +template +bool invoke_simple_rounding(solution_t& solution) +{ + solution.compute_feasibility(); + + solution_t sol_copy(*solution.problem_ptr); + sol_copy.copy_from(solution); + + rmm::device_scalar successful(true, solution.handle_ptr->get_stream()); + i_t TPB = 128; + simple_rounding_kernel + <<<2048, TPB, 0, solution.handle_ptr->get_stream()>>>(solution.view(), successful.data()); + if (!successful.value(solution.handle_ptr->get_stream())) { + CUOPT_LOG_DEBUG("Simple rounding failed"); + solution.copy_from(sol_copy); + return false; + } else { + solution.compute_feasibility(); + CUOPT_LOG_DEBUG("Simple rounding successful"); + CUOPT_LOG_DEBUG("objective %g, feas %d, excess %g, int violation %g", + solution.get_user_objective(), + solution.get_feasible(), + solution.get_total_excess(), + solution.compute_max_int_violation()); + return true; + } +} + template void invoke_round_nearest(solution_t& solution) { i_t TPB = 128; bool brute_force_found_feas = check_brute_force_rounding(solution); if (brute_force_found_feas) { return; } + + bool simple_round = invoke_simple_rounding(solution); + if (simple_round) { return; } + i_t n_blocks = (solution.problem_ptr->n_integer_vars + TPB - 1) / TPB; nearest_rounding_kernel<<get_stream()>>>( solution.view(), cuopt::seed_generator::get_seed()); @@ -153,6 +185,7 @@ void invoke_correct_integers(solution_t& solution, f_t tol) template void invoke_random_round_nearest(solution_t & solution, \ int n_target_random_rounds); \ template void invoke_round_nearest(solution_t & solution); \ + template bool invoke_simple_rounding(solution_t & solution); \ template void invoke_correct_integers(solution_t & solution, \ F_TYPE tol); diff --git a/cpp/src/mip/local_search/rounding/simple_rounding.cuh b/cpp/src/mip/local_search/rounding/simple_rounding.cuh index 5e1edd1fe..47416c3ff 100644 --- a/cpp/src/mip/local_search/rounding/simple_rounding.cuh +++ b/cpp/src/mip/local_search/rounding/simple_rounding.cuh @@ -24,6 +24,9 @@ namespace cuopt::linear_programming::detail { template void invoke_round_nearest(solution_t& solution); +template +bool invoke_simple_rounding(solution_t& solution); + template void invoke_random_round_nearest(solution_t& solution, i_t n_target_random_rounds); diff --git a/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh b/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh index 1c0103110..bc0e2157d 100644 --- a/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh +++ b/cpp/src/mip/local_search/rounding/simple_rounding_kernels.cuh @@ -25,6 +25,58 @@ namespace cuopt::linear_programming::detail { +template +__global__ void simple_rounding_kernel(typename solution_t::view_t solution, + bool* successful) +{ + if (TH_ID_X >= solution.problem.n_integer_vars) { return; } + i_t var_id = solution.problem.integer_indices[TH_ID_X]; + f_t curr_val = solution.assignment[var_id]; + if (solution.problem.is_integer(curr_val)) { return; } + + i_t up_locks = 0; + i_t down_locks = 0; + + auto [offset_begin, offset_end] = solution.problem.reverse_range_for_var(var_id); + for (i_t i = offset_begin; i < offset_end; i += 1) { + auto cstr_idx = solution.problem.reverse_constraints[i]; + auto cstr_coeff = solution.problem.reverse_coefficients[i]; + + // boxed constraint. can't be rounded safely + if (std::isfinite(solution.problem.constraint_lower_bounds[cstr_idx]) && + std::isfinite(solution.problem.constraint_upper_bounds[cstr_idx])) { + up_locks += 1; + down_locks += 1; + continue; + } + + f_t sign = std::isfinite(solution.problem.constraint_upper_bounds[cstr_idx]) ? 1 : -1; + + if (cstr_coeff * sign > 0) { + up_locks += 1; + } else { + down_locks += 1; + } + } + + bool can_round_up = up_locks == 0; + bool can_round_down = down_locks == 0; + + if (can_round_up && can_round_down) { + if (solution.problem.objective_coefficients[var_id] > 0) { + solution.assignment[var_id] = floor(curr_val); + } else { + solution.assignment[var_id] = ceil(curr_val); + } + } else if (can_round_up) { + solution.assignment[var_id] = ceil(curr_val); + } else if (can_round_down) { + solution.assignment[var_id] = floor(curr_val); + } else { + *successful = false; + } +} + // rounds each integer variable to the nearest integer value that doesn't violate the bounds template __global__ void nearest_rounding_kernel(typename solution_t::view_t solution, diff --git a/cpp/src/mip/problem/problem.cu b/cpp/src/mip/problem/problem.cu index 598534c61..aa9407452 100644 --- a/cpp/src/mip/problem/problem.cu +++ b/cpp/src/mip/problem/problem.cu @@ -85,7 +85,7 @@ void problem_t::op_problem_cstr_body(const optimization_problem_t::op_problem_cstr_body(const optimization_problem_t(*this, combined_bounds); } @@ -712,7 +712,7 @@ void problem_t::recompute_auxilliary_data(bool check_representation) // TODO: speedup compute related variables const double time_limit = 30.; compute_related_variables(time_limit); - if (check_representation) check_problem_representation(true); + if (check_representation) cuopt_func_call(check_problem_representation(true)); } template @@ -1251,7 +1251,7 @@ void problem_t::remove_given_variables(problem_t& original_p combine_constraint_bounds(*this, combined_bounds); handle_ptr->sync_stream(); recompute_auxilliary_data(); - check_problem_representation(true); + cuopt_func_call(check_problem_representation(true)); } template @@ -1281,7 +1281,7 @@ void problem_t::compute_integer_fixed_problem() thrust::fill(handle_ptr->get_thrust_policy(), assignment.begin(), assignment.end(), 0.); integer_fixed_problem = std::make_shared>(get_problem_after_fixing_vars( assignment, integer_indices, integer_fixed_variable_map, handle_ptr)); - integer_fixed_problem->check_problem_representation(true); + cuopt_func_call(integer_fixed_problem->check_problem_representation(true)); integer_fixed_problem->lp_state.resize(*integer_fixed_problem, handle_ptr->get_stream()); } @@ -1306,7 +1306,7 @@ void problem_t::fill_integer_fixed_problem(rmm::device_uvector& a integer_fixed_problem->fix_given_variables(*this, assignment, integer_indices, handle_ptr); combine_constraint_bounds(*integer_fixed_problem, integer_fixed_problem->combined_bounds); - integer_fixed_problem->check_problem_representation(true); + cuopt_func_call(integer_fixed_problem->check_problem_representation(true)); } template @@ -1445,7 +1445,7 @@ void problem_t::preprocess_problem() standardize_bounds(variable_constraint_map, *this); compute_csr(variable_constraint_map, *this); compute_transpose_of_problem(); - check_problem_representation(true, false); + cuopt_func_call(check_problem_representation(true, false)); presolve_data.initialize_var_mapping(*this, handle_ptr); integer_indices.resize(n_variables, handle_ptr->get_stream()); is_binary_variable.resize(n_variables, handle_ptr->get_stream()); @@ -1455,7 +1455,7 @@ void problem_t::preprocess_problem() std::iota(reverse_original_ids.begin(), reverse_original_ids.end(), 0); compute_n_integer_vars(); compute_binary_var_table(); - check_problem_representation(true); + cuopt_func_call(check_problem_representation(true)); preprocess_called = true; } @@ -1556,7 +1556,7 @@ void problem_t::get_host_user_problem( } } template -f_t problem_t::get_user_obj_from_solver_obj(f_t solver_obj) +f_t problem_t::get_user_obj_from_solver_obj(f_t solver_obj) const { return presolve_data.objective_scaling_factor * (solver_obj + presolve_data.objective_offset); } diff --git a/cpp/src/mip/problem/problem.cuh b/cpp/src/mip/problem/problem.cuh index f9c147b89..c210c3cfa 100644 --- a/cpp/src/mip/problem/problem.cuh +++ b/cpp/src/mip/problem/problem.cuh @@ -99,7 +99,7 @@ class problem_t { bool resize_to_original_problem = true); void post_process_solution(solution_t& solution); void compute_transpose_of_problem(); - f_t get_user_obj_from_solver_obj(f_t solver_obj); + f_t get_user_obj_from_solver_obj(f_t solver_obj) const; void compute_integer_fixed_problem(); void fill_integer_fixed_problem(rmm::device_uvector& assignment, const raft::handle_t* handle_ptr); @@ -116,23 +116,23 @@ class problem_t { void compute_vars_with_objective_coeffs(); struct view_t { - DI std::pair reverse_range_for_var(i_t v) const + HDI std::pair reverse_range_for_var(i_t v) const { cuopt_assert(v >= 0 && v < n_variables, "Variable should be within the range"); return std::make_pair(reverse_offsets[v], reverse_offsets[v + 1]); } - DI std::pair range_for_constraint(i_t c) const + HDI std::pair range_for_constraint(i_t c) const { return std::make_pair(offsets[c], offsets[c + 1]); } - DI std::pair range_for_related_vars(i_t v) const + HDI std::pair range_for_related_vars(i_t v) const { return std::make_pair(related_variables_offsets[v], related_variables_offsets[v + 1]); } - DI bool check_variable_within_bounds(i_t v, f_t val) const + HDI bool check_variable_within_bounds(i_t v, f_t val) const { const f_t int_tol = tolerances.integrality_tolerance; auto bounds = variable_bounds[v]; @@ -141,20 +141,20 @@ class problem_t { return within_bounds; } - DI bool is_integer_var(i_t v) const { return var_t::INTEGER == variable_types[v]; } + HDI bool is_integer_var(i_t v) const { return var_t::INTEGER == variable_types[v]; } // check if the variable is integer according to the tolerances // specified for this problem - DI bool is_integer(f_t val) const + HDI bool is_integer(f_t val) const { return raft::abs(round(val) - (val)) <= tolerances.integrality_tolerance; } - DI bool integer_equal(f_t val1, f_t val2) const + HDI bool integer_equal(f_t val1, f_t val2) const { return raft::abs(val1 - val2) <= tolerances.integrality_tolerance; } - DI f_t get_random_for_var(i_t v, raft::random::PCGenerator& rng) const + HDI f_t get_random_for_var(i_t v, raft::random::PCGenerator& rng) const { cuopt_assert(var_t::INTEGER != variable_types[v], "Random value can only be called on continuous values"); diff --git a/cpp/src/mip/solution/solution.cu b/cpp/src/mip/solution/solution.cu index 7c95aaeed..cdffa37b0 100644 --- a/cpp/src/mip/solution/solution.cu +++ b/cpp/src/mip/solution/solution.cu @@ -287,6 +287,13 @@ void solution_t::compute_constraints() { if (problem_ptr->n_constraints == 0) { return; } + cuopt_assert(problem_ptr->n_constraints == problem_ptr->constraint_upper_bounds.size(), + "invalid assignment size"); + // TODO: investigate why sometimes the sizes are incorrectly set before the kernel call + constraint_value.resize(problem_ptr->n_constraints, handle_ptr->get_stream()); + lower_excess.resize(problem_ptr->n_constraints, handle_ptr->get_stream()); + upper_excess.resize(problem_ptr->n_constraints, handle_ptr->get_stream()); + i_t TPB = 64; compute_constraint_values <<n_constraints, TPB, 0, handle_ptr->get_stream()>>>(view()); @@ -372,6 +379,13 @@ bool solution_t::round_random_nearest(i_t n_target_random_rounds) return compute_feasibility(); } +template +bool solution_t::round_simple() +{ + invoke_simple_rounding(*this); + return compute_feasibility(); +} + template void solution_t::correct_integer_precision() { diff --git a/cpp/src/mip/solution/solution.cuh b/cpp/src/mip/solution/solution.cuh index 0774bdef2..956434c90 100644 --- a/cpp/src/mip/solution/solution.cuh +++ b/cpp/src/mip/solution/solution.cuh @@ -62,6 +62,7 @@ class solution_t { bool round_nearest(); // rounds integers to random if fractionality is between 0.25 and 0.75. otherwise, to nearest bool round_random_nearest(i_t n_target_random_rounds); + bool round_simple(); // makes the approximate integer values up to INTEGRALITY TOLERANCE whole integers void correct_integer_precision(); // does a reduction and returns if the current solution is feasible diff --git a/cpp/src/mip/solve.cu b/cpp/src/mip/solve.cu index 7941f41dc..22638c4f9 100644 --- a/cpp/src/mip/solve.cu +++ b/cpp/src/mip/solve.cu @@ -90,6 +90,8 @@ mip_solution_t run_mip(detail::problem_t& problem, solution.compute_objective(); // just to ensure h_user_obj is set auto stats = solver_stats_t{}; stats.solution_bound = solution.get_user_objective(); + // log the objective for scripts which need it + CUOPT_LOG_INFO("Best feasible: %f", solution.get_user_objective()); return solution.get_solution(true, stats, false); } // problem contains unpreprocessed data diff --git a/cpp/src/mip/solver.cu b/cpp/src/mip/solver.cu index ea3272419..a9b7615ed 100644 --- a/cpp/src/mip/solver.cu +++ b/cpp/src/mip/solver.cu @@ -73,7 +73,7 @@ struct branch_and_bound_solution_helper_t { void solution_callback(std::vector& solution, f_t objective) { - dm->population.add_external_solution(solution, objective); + dm->population.add_external_solution(solution, objective, solution_origin_t::BRANCH_AND_BOUND); } void set_simplex_solution(std::vector& solution, @@ -175,6 +175,7 @@ solution_t mip_solver_t::run_solver() if (context.settings.num_cpu_threads != -1) { branch_and_bound_settings.num_threads = std::max(1, context.settings.num_cpu_threads); } + CUOPT_LOG_INFO("Using %d CPU threads for B&B", branch_and_bound_settings.num_threads); // Set the branch and bound -> primal heuristics callback branch_and_bound_settings.solution_callback = diff --git a/cpp/tests/mip/termination_test.cu b/cpp/tests/mip/termination_test.cu index ce2003c40..90221d3f1 100644 --- a/cpp/tests/mip/termination_test.cu +++ b/cpp/tests/mip/termination_test.cu @@ -104,9 +104,9 @@ TEST(termination_status, optimality_test) // Ensure the lower bound on maximization problems when BB times out has the right sign TEST(termination_status, lower_bound_bb_timeout) { - auto [termination_status, obj_val, lb] = test_mps_file("mip/cod105_max.mps", 0.5, false); + auto [termination_status, obj_val, lb] = test_mps_file("mip/cod105_max.mps", 5.0, false); EXPECT_EQ(termination_status, mip_termination_status_t::FeasibleFound); - EXPECT_EQ(obj_val, 12); + EXPECT_GE(obj_val, 6); EXPECT_GE(lb, obj_val); } diff --git a/dependencies.yaml b/dependencies.yaml index d153a0291..ee479e520 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -393,7 +393,6 @@ dependencies: - output_types: [conda, requirements, pyproject] packages: - fastapi - - httpx - *jsonref - *msgpack_numpy - *pandas diff --git a/docs/cuopt/source/resources.rst b/docs/cuopt/source/resources.rst index 9121800ee..b20ef4a24 100644 --- a/docs/cuopt/source/resources.rst +++ b/docs/cuopt/source/resources.rst @@ -13,6 +13,9 @@ cuOpt Examples and Tutorials Videos .. dropdown:: Tutorial List - `Quick Start to GPU-Accelerated Large-Scale Logistics and Supply Chain Optimization with NVIDIA cuOpt `_ + - `Accelerated MILP for Supply Chain, Logistics & Planning Optimization — Quick Start with NVIDIA cuOpt `_ + - `Solving Vehicle Routing Problems — Hands-On with Open Source NVIDIA cuOpt `_ + `Test cuOpt with NVIDIA Launchable `_ ------------------------------------------------------------------------------------------------------------------------------ diff --git a/python/cuopt_server/pyproject.toml b/python/cuopt_server/pyproject.toml index 83e764f23..f84297589 100644 --- a/python/cuopt_server/pyproject.toml +++ b/python/cuopt_server/pyproject.toml @@ -35,7 +35,6 @@ dependencies = [ "cuopt==25.10.*,>=0.0.0a0", "cupy-cuda13x>=13.6.0", "fastapi", - "httpx", "jsonref==1.1.0", "msgpack-numpy==0.4.8", "msgpack==1.1.0",