diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp index a4f52cb4e..4a6121f8e 100644 --- a/benchmarks/linear_programming/cuopt/run_mip.cpp +++ b/benchmarks/linear_programming/cuopt/run_mip.cpp @@ -80,10 +80,11 @@ void merge_result_files(const std::string& out_dir, void write_to_output_file(const std::string& out_dir, const std::string& base_filename, int gpu_id, + int n_gpus, int batch_id, const std::string& data) { - int output_id = batch_id * 8 + gpu_id; + int output_id = batch_id * n_gpus + gpu_id; std::string filename = out_dir + "/result_" + std::to_string(output_id) + ".txt"; std::ofstream outfile(filename, std::ios_base::app); if (outfile.is_open()) { @@ -149,6 +150,7 @@ std::vector> read_solution_from_dir(const std::string file_p int run_single_file(std::string file_path, int device, int batch_id, + int n_gpus, std::string out_dir, std::optional initial_solution_dir, bool heuristics_only, @@ -243,7 +245,7 @@ int run_single_file(std::string file_path, << obj_val << "," << benchmark_info.objective_of_initial_population << "," << benchmark_info.last_improvement_of_best_feasible << "," << benchmark_info.last_improvement_after_recombination << "\n"; - write_to_output_file(out_dir, base_filename, device, batch_id, ss.str()); + write_to_output_file(out_dir, base_filename, device, n_gpus, batch_id, ss.str()); CUOPT_LOG_INFO("Results written to the file %s", base_filename.c_str()); return sol_found; } @@ -251,6 +253,7 @@ int run_single_file(std::string file_path, void run_single_file_mp(std::string file_path, int device, int batch_id, + int n_gpus, std::string out_dir, std::optional input_file_dir, bool heuristics_only, @@ -265,6 +268,7 @@ void run_single_file_mp(std::string file_path, int sol_found = run_single_file(file_path, device, batch_id, + n_gpus, out_dir, input_file_dir, heuristics_only, @@ -462,6 +466,7 @@ int main(int argc, char* argv[]) run_single_file_mp(file_name, gpu_id, batch_num, + n_gpus, out_dir, initial_solution_file, heuristics_only, @@ -501,6 +506,7 @@ int main(int argc, char* argv[]) run_single_file(path, 0, 0, + n_gpus, out_dir, initial_solution_file, heuristics_only, diff --git a/ci/test_python.sh b/ci/test_python.sh index 0d3d1e596..7d504f473 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -60,6 +60,9 @@ EXITCODE=0 trap "EXITCODE=1" ERR set +e +# Due to race condition in certain cases UCX might not be able to cleanup properly, so we set the number of threads to 1 +export OMP_NUM_THREADS=1 + rapids-logger "Test cuopt_cli" timeout 10m bash ./python/libcuopt/libcuopt/tests/test_cli.sh diff --git a/ci/test_wheel_cuopt_server.sh b/ci/test_wheel_cuopt_server.sh index 5f0b874ba..de4a52f47 100755 --- a/ci/test_wheel_cuopt_server.sh +++ b/ci/test_wheel_cuopt_server.sh @@ -37,4 +37,7 @@ rapids-pip-retry install \ ./datasets/linear_programming/download_pdlp_test_dataset.sh ./datasets/mip/download_miplib_test_dataset.sh +# Due to race condition in certain cases UCX might not be able to cleanup properly, so we set the number of threads to 1 +export OMP_NUM_THREADS=1 + RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest --verbose --capture=no ./python/cuopt_server/cuopt_server/tests/ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index cb17f0c4a..90bb1c57f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -190,9 +190,9 @@ FetchContent_Declare( # does not have some of the presolvers and settings that we need # Mainly, probing and clique merging. # This is the reason we are using the development branch - # commit from Oct 8, 2025. Once these changes are merged into the main branch, + # commit from cliquemergebug branch. Once these changes are merged into the main branch, #we can switch to the main branch. - GIT_TAG "24ccf5752656df0f15dd9aabe5b97feae829b9ec" + GIT_TAG "8f710e33d352bf319d30b9c57e70516222f3f5ca" GIT_PROGRESS TRUE SYSTEM ) @@ -201,8 +201,6 @@ find_package(TBB REQUIRED) set(BUILD_TESTING OFF CACHE BOOL "Disable test build for papilo") set(PAPILO_NO_BINARIES ON) option(LUSOL "Disable LUSOL" OFF) -# Disable TBB because of a bug in CliqueMerging parallel version -set(TBB OFF CACHE BOOL "Disable TBB for papilo") FetchContent_MakeAvailable(papilo) diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index cf6fd6979..78acd0742 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -566,20 +566,13 @@ node_status_t branch_and_bound_t::solve_node(search_tree_t& lp_solution_t leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols); assert(leaf_vstatus.size() == leaf_problem.num_cols); - // Set the correct bounds for the leaf problem - leaf_problem.lower = original_lp_.lower; - leaf_problem.upper = original_lp_.upper; - std::vector bounds_changed(leaf_problem.num_cols, false); // Technically, we can get the already strengthened bounds from the node/parent instead of // getting it from the original problem and re-strengthening. But this requires storing // two vectors at each node and potentially cause memory issues node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed); - i_t node_iter = 0; - f_t lp_start_time = tic(); - std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; - + std::vector leaf_edge_norms = edge_norms_; // = node.steepest_edge_norms; simplex_solver_settings_t lp_settings = settings_; lp_settings.set_log(false); lp_settings.cut_off = upper_bound + settings_.dual_tol; @@ -594,6 +587,9 @@ node_status_t branch_and_bound_t::solve_node(search_tree_t& dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED; if (feasible) { + i_t node_iter = 0; + f_t lp_start_time = tic(); + lp_status = dual_phase2(2, 0, lp_start_time, @@ -610,10 +606,10 @@ node_status_t branch_and_bound_t::solve_node(search_tree_t& leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); lp_status = convert_lp_status_to_dual_status(second_status); } - } - stats_.total_lp_solve_time += toc(lp_start_time); - stats_.total_lp_iters += node_iter; + stats_.total_lp_solve_time += toc(lp_start_time); + stats_.total_lp_iters += node_iter; + } if (lp_status == dual::status_t::DUAL_UNBOUNDED) { // Node was infeasible. Do not branch @@ -695,17 +691,18 @@ void branch_and_bound_t::exploration_ramp_up(search_tree_t* i_t initial_heap_size) { if (status_ != mip_exploration_status_t::RUNNING) { return; } - if (omp_get_thread_num() == 0) { repair_heuristic_solutions(); } + + // Note that we do not know which thread will execute the + // `exploration_ramp_up` task, so we allow to any thread + // to repair the heuristic solution. + repair_heuristic_solutions(); f_t lower_bound = node->lower_bound; f_t upper_bound = get_upper_bound(); f_t rel_gap = user_relative_gap(original_lp_, upper_bound, lower_bound); f_t abs_gap = upper_bound - lower_bound; - i_t nodes_explored = 0; - i_t nodes_unexplored = 0; - - nodes_explored = (stats_.nodes_explored++); - nodes_unexplored = (stats_.nodes_unexplored--); + i_t nodes_explored = (++stats_.nodes_explored); + i_t nodes_unexplored = (--stats_.nodes_unexplored); stats_.nodes_since_last_log++; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { @@ -714,14 +711,18 @@ void branch_and_bound_t::exploration_ramp_up(search_tree_t* return; } - f_t now = toc(stats_.start_time); + f_t now = toc(stats_.start_time); + f_t time_since_last_log = stats_.last_log == 0 ? 1.0 : toc(stats_.last_log); - if (omp_get_thread_num() == 0) { - f_t time_since_last_log = stats_.last_log == 0 ? 1.0 : toc(stats_.last_log); + if (((stats_.nodes_since_last_log >= 10 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && + (time_since_last_log >= 1)) || + (time_since_last_log > 30) || now > settings_.time_limit) { + // Check if no new node was explored until now. If this is the case, + // only the last thread should report the progress + if (stats_.nodes_explored.load() == nodes_explored) { + stats_.nodes_since_last_log = 0; + stats_.last_log = tic(); - if (((stats_.nodes_since_last_log >= 10 || abs_gap < 10 * settings_.absolute_mip_gap_tol) && - (time_since_last_log >= 1)) || - (time_since_last_log > 30) || now > settings_.time_limit) { f_t obj = compute_user_objective(original_lp_, upper_bound); f_t user_lower = compute_user_objective(original_lp_, root_objective_); std::string gap_user = user_mip_gap(obj, user_lower); @@ -735,8 +736,6 @@ void branch_and_bound_t::exploration_ramp_up(search_tree_t* nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0, gap_user.c_str(), now); - - stats_.nodes_since_last_log = 0; } } @@ -744,6 +743,11 @@ void branch_and_bound_t::exploration_ramp_up(search_tree_t* status_ = mip_exploration_status_t::TIME_LIMIT; return; } + + // Set the correct bounds for the leaf problem + leaf_problem.lower = original_lp_.lower; + leaf_problem.upper = original_lp_.upper; + node_status_t node_status = solve_node(*search_tree, node, leaf_problem, Arow, upper_bound, settings_.log, 'B'); @@ -784,7 +788,7 @@ void branch_and_bound_t::explore_subtree(i_t id, stack.push_front(start_node); while (stack.size() > 0 && status_ == mip_exploration_status_t::RUNNING) { - if (omp_get_thread_num() == 0) { repair_heuristic_solutions(); } + if (id == 0) { repair_heuristic_solutions(); } mip_node_t* node_ptr = stack.front(); stack.pop_front(); @@ -802,8 +806,8 @@ void branch_and_bound_t::explore_subtree(i_t id, // - The lower bound of the parent is lower or equal to its children assert(id < local_lower_bounds_.size()); local_lower_bounds_[id] = lower_bound; - i_t nodes_explored = stats_.nodes_explored++; - i_t nodes_unexplored = stats_.nodes_unexplored--; + i_t nodes_explored = (++stats_.nodes_explored); + i_t nodes_unexplored = (--stats_.nodes_unexplored); stats_.nodes_since_last_log++; if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) { @@ -842,6 +846,10 @@ void branch_and_bound_t::explore_subtree(i_t id, return; } + // Set the correct bounds for the leaf problem + leaf_problem.lower = original_lp_.lower; + leaf_problem.upper = original_lp_.upper; + node_status_t node_status = solve_node(search_tree, node_ptr, leaf_problem, Arow, upper_bound, settings_.log, 'B'); @@ -866,7 +874,7 @@ void branch_and_bound_t::explore_subtree(i_t id, // would be better if we discard the node instead. if (get_heap_size() > settings_.num_bfs_threads) { mutex_dive_queue_.lock(); - dive_queue_.push(node->detach_copy()); + dive_queue_.emplace(node->detach_copy(), leaf_problem.lower, leaf_problem.upper); mutex_dive_queue_.unlock(); } @@ -950,16 +958,16 @@ void branch_and_bound_t::diving_thread(lp_problem_t& leaf_pr while (status_ == mip_exploration_status_t::RUNNING && (active_subtrees_ > 0 || get_heap_size() > 0)) { - std::optional> start_node; + std::optional> start_node; mutex_dive_queue_.lock(); if (dive_queue_.size() > 0) { start_node = dive_queue_.pop(); } mutex_dive_queue_.unlock(); if (start_node.has_value()) { - if (get_upper_bound() < start_node->lower_bound) { continue; } + if (get_upper_bound() < start_node->node.lower_bound) { continue; } - search_tree_t subtree(std::move(start_node.value())); + search_tree_t subtree(std::move(start_node->node)); std::deque*> stack; stack.push_front(&subtree.root); @@ -975,6 +983,10 @@ void branch_and_bound_t::diving_thread(lp_problem_t& leaf_pr if (toc(stats_.start_time) > settings_.time_limit) { return; } + // Set the correct bounds for the leaf problem + leaf_problem.lower = start_node->lp_lower; + leaf_problem.upper = start_node->lp_upper; + node_status_t node_status = solve_node(subtree, node_ptr, leaf_problem, Arow, upper_bound, log, 'D'); @@ -985,7 +997,9 @@ void branch_and_bound_t::diving_thread(lp_problem_t& leaf_pr auto [first, second] = child_selection(node_ptr); stack.push_front(second); stack.push_front(first); + } + if (stack.size() > 1) { // If the diving thread is consuming the nodes faster than the // best first search, then we split the current subtree at the // lowest possible point and move to the queue, so it can @@ -994,7 +1008,7 @@ void branch_and_bound_t::diving_thread(lp_problem_t& leaf_pr mutex_dive_queue_.lock(); mip_node_t* new_node = stack.back(); stack.pop_back(); - dive_queue_.push(new_node->detach_copy()); + dive_queue_.emplace(new_node->detach_copy(), leaf_problem.lower, leaf_problem.upper); mutex_dive_queue_.unlock(); } } diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp index 7b80f88fa..5453e8b42 100644 --- a/cpp/src/dual_simplex/branch_and_bound.hpp +++ b/cpp/src/dual_simplex/branch_and_bound.hpp @@ -55,36 +55,64 @@ enum class mip_exploration_status_t { template void upper_bound_callback(f_t upper_bound); +template +struct diving_root_t { + mip_node_t node; + std::vector lp_lower; + std::vector lp_upper; + + diving_root_t(mip_node_t&& node, + const std::vector& lower, + const std::vector& upper) + : node(std::move(node)), lp_upper(upper), lp_lower(lower) + { + } + + friend bool operator>(const diving_root_t& a, const diving_root_t& b) + { + return a.node.lower_bound > b.node.lower_bound; + } +}; + // A min-heap for storing the starting nodes for the dives. -// This has a maximum size of 8192, such that the container +// This has a maximum size of 256, such that the container // will discard the least promising node if the queue is full. template class dive_queue_t { private: - std::vector> buffer; - static constexpr i_t max_size_ = 2048; + std::vector> buffer; + static constexpr i_t max_size_ = 256; public: dive_queue_t() { buffer.reserve(max_size_); } - void push(mip_node_t&& node) + void push(diving_root_t&& node) { buffer.push_back(std::move(node)); - std::push_heap(buffer.begin(), buffer.end(), node_compare_t()); + std::push_heap(buffer.begin(), buffer.end(), std::greater<>()); + if (buffer.size() > max_size()) { buffer.pop_back(); } + } + + void emplace(mip_node_t&& node, + const std::vector& lower, + const std::vector& upper) + { + buffer.emplace_back(std::move(node), lower, upper); + std::push_heap(buffer.begin(), buffer.end(), std::greater<>()); if (buffer.size() > max_size()) { buffer.pop_back(); } } - mip_node_t pop() + diving_root_t pop() { - std::pop_heap(buffer.begin(), buffer.end(), node_compare_t()); - mip_node_t node = std::move(buffer.back()); + std::pop_heap(buffer.begin(), buffer.end(), std::greater<>()); + diving_root_t node = std::move(buffer.back()); buffer.pop_back(); return node; } i_t size() const { return buffer.size(); } constexpr i_t max_size() const { return max_size_; } - const mip_node_t& top() const { return buffer.front(); } + const diving_root_t& top() const { return buffer.front(); } void clear() { buffer.clear(); } }; @@ -188,7 +216,7 @@ class branch_and_bound_t { // Set the final solution. mip_status_t set_final_solution(mip_solution_t& solution, f_t lower_bound); - // Update the incumbent solution with the new feasible solution. + // Update the incumbent solution with the new feasible solution // found during branch and bound. void add_feasible_solution(f_t leaf_objective, const std::vector& leaf_solution, diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py index c7ef8b99b..42059bf3d 100644 --- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py +++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py @@ -406,6 +406,7 @@ def test_warm_start(): settings = SolverSettings() settings.set_parameter(CUOPT_PDLP_SOLVER_MODE, PDLPSolverMode.Stable2) + settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP) settings.set_optimality_tolerance(1e-3) settings.set_parameter(CUOPT_INFEASIBILITY_DETECTION, False) diff --git a/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py b/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py index be67894be..cfc30fa1c 100644 --- a/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py +++ b/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py @@ -22,6 +22,7 @@ from cuopt.linear_programming.solver.solver_parameters import ( CUOPT_INFEASIBILITY_DETECTION, CUOPT_PDLP_SOLVER_MODE, + CUOPT_METHOD, ) from cuopt.linear_programming.solver_settings import PDLPSolverMode @@ -45,6 +46,7 @@ def test_warmstart(cuoptproc): # noqa settings.set_optimality_tolerance(1e-4) settings.set_parameter(CUOPT_INFEASIBILITY_DETECTION, False) settings.set_parameter(CUOPT_PDLP_SOLVER_MODE, PDLPSolverMode.Stable2) + settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP) data["solver_config"] = settings.toDict() headers = {"CLIENT-VERSION": "custom"}