diff --git a/benchmarks/linear_programming/cuopt/benchmark_helper.hpp b/benchmarks/linear_programming/cuopt/benchmark_helper.hpp index 6c58c8b5d..1a461f409 100644 --- a/benchmarks/linear_programming/cuopt/benchmark_helper.hpp +++ b/benchmarks/linear_programming/cuopt/benchmark_helper.hpp @@ -134,6 +134,8 @@ void fill_pdlp_hyper_params(const std::string& pdlp_hyper_params_path) handle_some_primal_gradients_on_finite_bounds_as_residuals}, {"project_initial_primal", &cuopt::linear_programming::pdlp_hyper_params::project_initial_primal}, + {"use_adaptive_step_size_strategy", + &cuopt::linear_programming::pdlp_hyper_params::use_adaptive_step_size_strategy}, }; while (std::getline(file, line)) { diff --git a/benchmarks/linear_programming/cuopt/run_pdlp.cu b/benchmarks/linear_programming/cuopt/run_pdlp.cu index 396ff6fbd..d25eaff27 100644 --- a/benchmarks/linear_programming/cuopt/run_pdlp.cu +++ b/benchmarks/linear_programming/cuopt/run_pdlp.cu @@ -54,10 +54,11 @@ static void parse_arguments(argparse::ArgumentParser& program) .default_value(1e-4) .scan<'g', double>(); + // TODO replace all comments with Stable2 with Stable3 program.add_argument("--pdlp-solver-mode") - .help("Solver mode for PDLP. Possible values: Stable2 (default), Methodical1, Fast1") - .default_value("Stable2") - .choices("Stable2", "Methodical1", "Fast1"); + .help("Solver mode for PDLP. Possible values: Stable3 (default), Methodical1, Fast1") + .default_value("Stable3") + .choices("Stable3", "Methodical1", "Fast1"); program.add_argument("--method") .help( @@ -90,7 +91,9 @@ static cuopt::linear_programming::pdlp_solver_mode_t string_to_pdlp_solver_mode( return cuopt::linear_programming::pdlp_solver_mode_t::Methodical1; else if (mode == "Fast1") return cuopt::linear_programming::pdlp_solver_mode_t::Fast1; - return cuopt::linear_programming::pdlp_solver_mode_t::Stable2; + else if (mode == "Stable3") + return cuopt::linear_programming::pdlp_solver_mode_t::Stable3; + return cuopt::linear_programming::pdlp_solver_mode_t::Stable3; } static cuopt::linear_programming::pdlp_solver_settings_t create_solver_settings( diff --git a/cpp/cuopt_cli.cpp b/cpp/cuopt_cli.cpp index ac22cf9fe..12afda195 100644 --- a/cpp/cuopt_cli.cpp +++ b/cpp/cuopt_cli.cpp @@ -122,13 +122,13 @@ int run_single_file(const std::string& file_path, (op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::MIP || op_problem.get_problem_category() == cuopt::linear_programming::problem_category_t::IP); - auto initial_solution = - initial_solution_file.empty() - ? std::vector() - : cuopt::linear_programming::solution_reader_t::get_variable_values_from_sol_file( - initial_solution_file, mps_data_model.get_variable_names()); - try { + auto initial_solution = + initial_solution_file.empty() + ? std::vector() + : cuopt::linear_programming::solution_reader_t::get_variable_values_from_sol_file( + initial_solution_file, mps_data_model.get_variable_names()); + if (is_mip && !solve_relaxation) { auto& mip_settings = settings.get_mip_settings(); if (initial_solution.size() > 0) { diff --git a/cpp/include/cuopt/linear_programming/constants.h b/cpp/include/cuopt/linear_programming/constants.h index 72d05a6ad..092e427fc 100644 --- a/cpp/include/cuopt/linear_programming/constants.h +++ b/cpp/include/cuopt/linear_programming/constants.h @@ -101,6 +101,7 @@ #define CUOPT_PDLP_SOLVER_MODE_STABLE2 1 #define CUOPT_PDLP_SOLVER_MODE_METHODICAL1 2 #define CUOPT_PDLP_SOLVER_MODE_FAST1 3 +#define CUOPT_PDLP_SOLVER_MODE_STABLE3 4 #define CUOPT_METHOD_CONCURRENT 0 #define CUOPT_METHOD_PDLP 1 diff --git a/cpp/include/cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh b/cpp/include/cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh index b33efcd53..fd9179928 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh +++ b/cpp/include/cuopt/linear_programming/pdlp/pdlp_hyper_params.cuh @@ -56,5 +56,17 @@ extern bool update_primal_weight_on_initial_solution; extern bool update_step_size_on_initial_solution; extern bool handle_some_primal_gradients_on_finite_bounds_as_residuals; extern bool project_initial_primal; +extern bool use_adaptive_step_size_strategy; +extern bool initial_step_size_max_singular_value; +extern bool initial_primal_weight_combined_bounds; +extern bool bound_objective_rescaling; +extern bool use_reflected_primal_dual; +extern bool use_fixed_point_error; +extern double reflection_coefficient; +extern double restart_k_p; +extern double restart_k_i; +extern double restart_k_d; +extern double restart_i_smooth; +extern bool use_conditional_major; } // namespace cuopt::linear_programming::pdlp_hyper_params diff --git a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp index f4e515288..0660ede6e 100644 --- a/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp +++ b/cpp/include/cuopt/linear_programming/pdlp/solver_settings.hpp @@ -35,13 +35,13 @@ class solver_settings_t; * @brief Enum representing the different solver modes under which PDLP can * operate. * - * Stable2: Best overall mode from experiments; balances speed and convergence - * success. If you want to use the legacy version, use Stable1. + * Stable3: Best overall mode from experiments; balances speed and convergence + * success. If you want to use the legacy version, use Stable2. * Methodical1: Usually leads to slower individual steps but fewer are needed to * converge. It uses from 1.3x up to 1.7x times more memory. * Fast1: Less convergence success but usually yields the highest speed * - * @note Default mode is Stable2. + * @note Default mode is Stable3. */ // Forced to use an enum instead of an enum class for compatibility with the // Cython layer @@ -49,7 +49,8 @@ enum pdlp_solver_mode_t : int { Stable1 = CUOPT_PDLP_SOLVER_MODE_STABLE1, Stable2 = CUOPT_PDLP_SOLVER_MODE_STABLE2, Methodical1 = CUOPT_PDLP_SOLVER_MODE_METHODICAL1, - Fast1 = CUOPT_PDLP_SOLVER_MODE_FAST1 + Fast1 = CUOPT_PDLP_SOLVER_MODE_FAST1, + Stable3 = CUOPT_PDLP_SOLVER_MODE_STABLE3 }; /** @@ -200,7 +201,7 @@ class pdlp_solver_settings_t { bool strict_infeasibility{false}; i_t iteration_limit{std::numeric_limits::max()}; double time_limit{std::numeric_limits::infinity()}; - pdlp_solver_mode_t pdlp_solver_mode{pdlp_solver_mode_t::Stable2}; + pdlp_solver_mode_t pdlp_solver_mode{pdlp_solver_mode_t::Stable3}; bool log_to_console{true}; std::string log_file{""}; std::string sol_file{""}; diff --git a/cpp/src/linear_programming/cusparse_view.cu b/cpp/src/linear_programming/cusparse_view.cu index 475353078..57c90c6ce 100644 --- a/cpp/src/linear_programming/cusparse_view.cu +++ b/cpp/src/linear_programming/cusparse_view.cu @@ -131,7 +131,8 @@ cusparse_view_t::cusparse_view_t( saddle_point_state_t& current_saddle_point_state, rmm::device_uvector& _tmp_primal, rmm::device_uvector& _tmp_dual, - rmm::device_uvector& _potential_next_dual_solution) + rmm::device_uvector& _potential_next_dual_solution, + rmm::device_uvector& _reflected_primal_solution) : handle_ptr_(handle_ptr), A{}, A_T{}, @@ -218,6 +219,12 @@ cusparse_view_t::cusparse_view_t( &tmp_primal, op_problem_scaled.n_variables, _tmp_primal.data())); RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( &tmp_dual, op_problem_scaled.n_constraints, _tmp_dual.data())); + if (pdlp_hyper_params::use_reflected_primal_dual) { + cuopt_assert(_reflected_primal_solution.size() > 0, "Reflected primal solution empty"); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec(&reflected_primal_solution, + op_problem_scaled.n_variables, + _reflected_primal_solution.data())); + } const rmm::device_scalar alpha{1, handle_ptr->get_stream()}; const rmm::device_scalar beta{1, handle_ptr->get_stream()}; @@ -284,6 +291,8 @@ cusparse_view_t::cusparse_view_t(raft::handle_t const* handle_ptr, rmm::device_uvector& _dual_solution, rmm::device_uvector& _tmp_primal, rmm::device_uvector& _tmp_dual, + rmm::device_uvector& _potential_next_primal, + rmm::device_uvector& _potential_next_dual, const rmm::device_uvector& _A_T, const rmm::device_uvector& _A_T_offsets, const rmm::device_uvector& _A_T_indices) @@ -335,10 +344,17 @@ cusparse_view_t::cusparse_view_t(raft::handle_t const* handle_ptr, RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( &c, op_problem.n_variables, const_cast(op_problem.objective_coefficients.data()))); - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( - &primal_solution, op_problem.n_variables, _primal_solution.data())); - RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( - &dual_solution, op_problem.n_constraints, _dual_solution.data())); + if (!pdlp_hyper_params::use_adaptive_step_size_strategy) { + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( + &primal_solution, op_problem.n_variables, _potential_next_primal.data())); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( + &dual_solution, op_problem.n_constraints, _potential_next_dual.data())); + } else { + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( + &primal_solution, op_problem.n_variables, _primal_solution.data())); + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( + &dual_solution, op_problem.n_constraints, _dual_solution.data())); + } RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( &tmp_primal, op_problem.n_variables, _tmp_primal.data())); diff --git a/cpp/src/linear_programming/cusparse_view.hpp b/cpp/src/linear_programming/cusparse_view.hpp index fc82725b5..e73a22dee 100644 --- a/cpp/src/linear_programming/cusparse_view.hpp +++ b/cpp/src/linear_programming/cusparse_view.hpp @@ -34,7 +34,8 @@ class cusparse_view_t { saddle_point_state_t& current_saddle_point_state, rmm::device_uvector& _tmp_primal, rmm::device_uvector& _tmp_dual, - rmm::device_uvector& _potential_next_dual_solution); + rmm::device_uvector& _potential_next_dual_solution, + rmm::device_uvector& _reflected_primal_solution); cusparse_view_t(raft::handle_t const* handle_ptr, const problem_t& op_problem, @@ -42,6 +43,8 @@ class cusparse_view_t { rmm::device_uvector& _dual_solution, rmm::device_uvector& _tmp_primal, rmm::device_uvector& _tmp_dual, + rmm::device_uvector& _potential_next_primal, + rmm::device_uvector& _potential_next_dual, const rmm::device_uvector& _A_T, const rmm::device_uvector& _A_T_offsets, const rmm::device_uvector& _A_T_indices); @@ -89,6 +92,9 @@ class cusparse_view_t { rmm::device_uvector buffer_non_transpose; rmm::device_uvector buffer_transpose; + // Only when using reflection + cusparseDnVecDescr_t reflected_primal_solution; + // Ref to the A_T found in either // Initial problem, we use it to have an unscaled A_T // PDLP copy of the problem which holds the scaled version diff --git a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu index 3eae93690..6140dbd3e 100644 --- a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu +++ b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cu @@ -17,6 +17,8 @@ #include +#include + #include #include #include @@ -56,6 +58,8 @@ pdlp_initial_scaling_strategy_t::pdlp_initial_scaling_strategy_t( running_mip_(running_mip), iteration_constraint_matrix_scaling_{static_cast(dual_size_h_), stream_view_}, iteration_variable_scaling_{static_cast(primal_size_h_), stream_view_}, + bound_rescaling_(f_t(1), stream_view_), + objective_rescaling_(f_t(1), stream_view_), cummulative_constraint_matrix_scaling_{static_cast(dual_size_h_), stream_view_}, cummulative_variable_scaling_{static_cast(primal_size_h_), stream_view_} { @@ -64,6 +68,9 @@ pdlp_initial_scaling_strategy_t::pdlp_initial_scaling_strategy_t( RAFT_CUDA_TRY(cudaDeviceSynchronize()); std::cout << "Initializing initial_scaling_strategy" << std::endl; #endif + + if (!running_mip_) cuopt_assert(pdhg_solver_ptr_ != nullptr, "PDHG solver pointer is null"); + // start with all one for scaling vectors RAFT_CUDA_TRY(cudaMemsetAsync( iteration_constraint_matrix_scaling_.data(), 0.0, sizeof(f_t) * dual_size_h_, stream_view_)); @@ -91,6 +98,63 @@ void pdlp_initial_scaling_strategy_t::compute_scaling_vectors( if (pdlp_hyper_params::do_pock_chambolle_scaling) { pock_chambolle_scaling(alpha); } } +template +void pdlp_initial_scaling_strategy_t::bound_objective_rescaling() +{ + // TODO: test bound obj scaling w/ MIP + rmm::device_buffer d_temp_storage; + size_t bytes; + + auto main_op = [] HD(const thrust::tuple t) { + const f_t lower = thrust::get<0>(t); + const f_t upper = thrust::get<1>(t); + f_t sum = 0; + if (isfinite(lower) && (lower != upper)) sum += lower * lower; + if (isfinite(upper)) sum += upper * upper; + return sum; + }; + cub::DeviceReduce::TransformReduce( + nullptr, + bytes, + thrust::make_zip_iterator(op_problem_scaled_.constraint_lower_bounds.data(), + op_problem_scaled_.constraint_upper_bounds.data()), + bound_rescaling_.data(), + op_problem_scaled_.constraint_lower_bounds.size(), + cuda::std::plus<>{}, + main_op, + f_t(0), + stream_view_); + + d_temp_storage.resize(bytes, stream_view_); + + cub::DeviceReduce::TransformReduce( + d_temp_storage.data(), + bytes, + thrust::make_zip_iterator(op_problem_scaled_.constraint_lower_bounds.data(), + op_problem_scaled_.constraint_upper_bounds.data()), + bound_rescaling_.data(), + op_problem_scaled_.constraint_lower_bounds.size(), + cuda::std::plus<>{}, + main_op, + f_t(0), + stream_view_); + + h_bound_rescaling = f_t(1.0) / (std::sqrt(bound_rescaling_.value(stream_view_)) + f_t(1.0)); + bound_rescaling_.set_value_async(h_bound_rescaling, stream_view_); + + detail::my_l2_weighted_norm(op_problem_scaled_.objective_coefficients, + pdlp_hyper_params::initial_primal_weight_c_scaling, + objective_rescaling_, + stream_view_); + + // sqrt already applied + h_objective_rescaling = f_t(1.0) / (objective_rescaling_.value(stream_view_) + f_t(1.0)); + objective_rescaling_.set_value_async(h_objective_rescaling, stream_view_); + + // Sync since we are using local variable + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); +} + template __global__ void inf_norm_row_and_col_kernel( const typename problem_t::view_t op_problem, @@ -396,6 +460,52 @@ void pdlp_initial_scaling_strategy_t::scale_problem() dual_size_h_, stream_view_); + if (pdlp_hyper_params::bound_objective_rescaling && !running_mip_) { + // Coefficients are computed on the already scaled values + bound_objective_rescaling(); + +#ifdef CUPDLP_DEBUG_MODE + printf("Bound rescaling %lf %lf\n", + bound_rescaling_.value(stream_view_), + objective_rescaling_.value(stream_view_)); +#endif + + cub::DeviceTransform::Transform( + cuda::std::make_tuple(op_problem_scaled_.constraint_lower_bounds.data(), + op_problem_scaled_.constraint_upper_bounds.data()), + thrust::make_zip_iterator(op_problem_scaled_.constraint_lower_bounds.data(), + op_problem_scaled_.constraint_upper_bounds.data()), + op_problem_scaled_.constraint_upper_bounds.size(), + [bound_rescaling = bound_rescaling_.data()] __device__( + f_t constraint_lower_bound, f_t constraint_upper_bound) -> thrust::tuple { + return {constraint_lower_bound * *bound_rescaling, + constraint_upper_bound * *bound_rescaling}; + }, + stream_view_); + + cub::DeviceTransform::Transform( + cuda::std::make_tuple(op_problem_scaled_.variable_bounds.data(), + op_problem_scaled_.objective_coefficients.data()), + thrust::make_zip_iterator(op_problem_scaled_.variable_bounds.data(), + op_problem_scaled_.objective_coefficients.data()), + op_problem_scaled_.variable_bounds.size(), + [bound_rescaling = bound_rescaling_.data(), + objective_rescaling = objective_rescaling_.data()] __device__(f_t2 variable_bounds, + f_t objective_coefficient) + -> thrust::tuple { + return {{variable_bounds.x * *bound_rescaling, variable_bounds.y * *bound_rescaling}, + objective_coefficient * *objective_rescaling}; + }, + stream_view_); + } + +#ifdef CUPDLP_DEBUG_MODE + print("constraint_lower_bound", op_problem_scaled_.constraint_lower_bounds); + print("constraint_upper_bound", op_problem_scaled_.constraint_upper_bounds); + // print("variable_lower_bound", op_problem_scaled_.variable_lower_bounds); + // print("variable_upper_bound", op_problem_scaled_.variable_upper_bounds); + print("objective_vector", op_problem_scaled_.objective_coefficients); +#endif op_problem_scaled_.is_scaled_ = true; if (!running_mip_) { scale_solutions(pdhg_solver_ptr_->get_primal_solution(), pdhg_solver_ptr_->get_dual_solution()); @@ -404,77 +514,161 @@ void pdlp_initial_scaling_strategy_t::scale_problem() template void pdlp_initial_scaling_strategy_t::scale_solutions( - rmm::device_uvector& primal_solution, rmm::device_uvector& dual_solution) const + rmm::device_uvector& primal_solution, + rmm::device_uvector& dual_solution, + rmm::device_uvector& dual_slack) const { - // scale solutions - raft::linalg::eltwiseDivideCheckZero(primal_solution.data(), - primal_solution.data(), - cummulative_variable_scaling_.data(), - primal_size_h_, - stream_view_); + if (primal_solution.size()) { + cuopt_expects(primal_solution.size() == static_cast(primal_size_h_), + error_type_t::RuntimeError, + "Scale primal didn't get a vector of size primal"); + raft::linalg::eltwiseDivideCheckZero(primal_solution.data(), + primal_solution.data(), + cummulative_variable_scaling_.data(), + primal_size_h_, + stream_view_); + + if (pdlp_hyper_params::bound_objective_rescaling && !running_mip_) { + raft::linalg::scalarMultiply(primal_solution.data(), + primal_solution.data(), + h_bound_rescaling, + primal_size_h_, + stream_view_); + } + } + if (dual_solution.size()) { + cuopt_expects(dual_solution.size() == static_cast(dual_size_h_), + error_type_t::RuntimeError, + "Unscale dual didn't get a vector of size dual"); raft::linalg::eltwiseDivideCheckZero(dual_solution.data(), dual_solution.data(), cummulative_constraint_matrix_scaling_.data(), dual_size_h_, stream_view_); + if (pdlp_hyper_params::bound_objective_rescaling && !running_mip_) { + raft::linalg::scalarMultiply(dual_solution.data(), + dual_solution.data(), + h_objective_rescaling, + dual_size_h_, + stream_view_); + } } + + if (dual_slack.size()) { + cuopt_expects(dual_slack.size() == static_cast(primal_size_h_), + error_type_t::RuntimeError, + "Unscale dual didn't get a vector of size dual"); + raft::linalg::eltwiseMultiply(dual_slack.data(), + dual_slack.data(), + cummulative_variable_scaling_.data(), + primal_size_h_, + stream_view_); + if (pdlp_hyper_params::bound_objective_rescaling && !running_mip_) { + raft::linalg::scalarMultiply( + dual_slack.data(), dual_slack.data(), h_objective_rescaling, primal_size_h_, stream_view_); + } + } +} + +template +void pdlp_initial_scaling_strategy_t::scale_solutions( + rmm::device_uvector& primal_solution, rmm::device_uvector& dual_solution) const +{ + rmm::device_uvector dummy(0, dual_solution.stream()); + scale_solutions(primal_solution, dual_solution, dummy); +} + +template +void pdlp_initial_scaling_strategy_t::scale_solutions( + rmm::device_uvector& primal_solution) const +{ + rmm::device_uvector dummy(0, primal_solution.stream()); + scale_solutions(primal_solution, dummy); } template void pdlp_initial_scaling_strategy_t::scale_primal( rmm::device_uvector& primal_solution) const { - cuopt_expects(primal_solution.size() == static_cast(primal_size_h_), - error_type_t::RuntimeError, - "Scale primal didn't get a vector of size primal"); - // scale solutions - raft::linalg::eltwiseDivideCheckZero(primal_solution.data(), - primal_solution.data(), - cummulative_variable_scaling_.data(), - primal_size_h_, - stream_view_); + scale_solutions(primal_solution); } template void pdlp_initial_scaling_strategy_t::scale_dual( rmm::device_uvector& dual_solution) const { - cuopt_expects(dual_solution.size() == static_cast(dual_size_h_), - error_type_t::RuntimeError, - "Scale dual didn't get a vector of size dual"); - // scale solutions - raft::linalg::eltwiseDivideCheckZero(dual_solution.data(), - dual_solution.data(), - cummulative_constraint_matrix_scaling_.data(), - dual_size_h_, - stream_view_); + rmm::device_uvector dummy(0, dual_solution.stream()); + scale_solutions(dummy, dual_solution); } template void pdlp_initial_scaling_strategy_t::unscale_solutions( - rmm::device_uvector& primal_solution, rmm::device_uvector& dual_solution) const + rmm::device_uvector& primal_solution, + rmm::device_uvector& dual_solution, + rmm::device_uvector& dual_slack) const { - // if there are some tails in the solution, don't scale that - cuopt_expects(primal_solution.size() == static_cast(primal_size_h_), - error_type_t::RuntimeError, - "Unscale primal didn't get a vector of size primal"); - // unscale avg solutions - raft::linalg::eltwiseMultiply(primal_solution.data(), - primal_solution.data(), - cummulative_variable_scaling_.data(), - primal_size_h_, - stream_view_); + raft::common::nvtx::range fun_scope("unscale_solutions"); + + if (primal_solution.size()) { + cuopt_expects(primal_solution.size() == static_cast(primal_size_h_), + error_type_t::RuntimeError, + "Unscale primal didn't get a vector of size primal"); + cuopt_assert(cummulative_variable_scaling_.size() == static_cast(primal_size_h_), ""); + + raft::linalg::eltwiseMultiply(primal_solution.data(), + primal_solution.data(), + cummulative_variable_scaling_.data(), + primal_size_h_, + stream_view_); + + if (pdlp_hyper_params::bound_objective_rescaling && !running_mip_) { + cuopt_assert(h_bound_rescaling != f_t(0), + "Numerical error: bound_rescaling_ should never equal 0"); + raft::linalg::scalarMultiply(primal_solution.data(), + primal_solution.data(), + f_t(1.0) / h_bound_rescaling, + primal_size_h_, + stream_view_); + } + } if (dual_solution.size()) { cuopt_expects(dual_solution.size() == static_cast(dual_size_h_), error_type_t::RuntimeError, "Unscale dual didn't get a vector of size dual"); + cuopt_assert(cummulative_constraint_matrix_scaling_.size() == static_cast(dual_size_h_), + ""); raft::linalg::eltwiseMultiply(dual_solution.data(), dual_solution.data(), cummulative_constraint_matrix_scaling_.data(), dual_size_h_, stream_view_); + if (pdlp_hyper_params::bound_objective_rescaling && !running_mip_) { + raft::linalg::scalarMultiply(dual_solution.data(), + dual_solution.data(), + f_t(1.0) / (h_objective_rescaling), + dual_size_h_, + stream_view_); + } + } + + if (dual_slack.size()) { + cuopt_expects(dual_slack.size() == static_cast(primal_size_h_), + error_type_t::RuntimeError, + "Unscale dual didn't get a vector of size dual"); + raft::linalg::eltwiseDivideCheckZero(dual_slack.data(), + dual_slack.data(), + cummulative_variable_scaling_.data(), + primal_size_h_, + stream_view_); + if (pdlp_hyper_params::bound_objective_rescaling && !running_mip_) { + raft::linalg::scalarMultiply(dual_slack.data(), + dual_slack.data(), + f_t(1.0) / h_objective_rescaling, + primal_size_h_, + stream_view_); + } } } @@ -488,6 +682,14 @@ void pdlp_initial_scaling_strategy_t::unscale_solutions( unscale_solutions(primal_solution, dummy); } +template +void pdlp_initial_scaling_strategy_t::unscale_solutions( + rmm::device_uvector& solution, rmm::device_uvector& s) const +{ + rmm::device_uvector dummy(0, solution.stream()); + unscale_solutions(solution, s, dummy); +} + template const problem_t& pdlp_initial_scaling_strategy_t::get_scaled_op_problem() { diff --git a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cuh b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cuh index 0cd01aa9a..db4b7d5d2 100644 --- a/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cuh +++ b/cpp/src/linear_programming/initial_scaling_strategy/initial_scaling.cuh @@ -63,17 +63,26 @@ class pdlp_initial_scaling_strategy_t { void scale_problem(); + void scale_solutions(rmm::device_uvector& primal_solution) const; void scale_solutions(rmm::device_uvector& primal_solution, rmm::device_uvector& dual_solution) const; + void scale_solutions(rmm::device_uvector& primal_solution, + rmm::device_uvector& dual_solution, + rmm::device_uvector& dual_slack) const; void scale_primal(rmm::device_uvector& primal_solution) const; void scale_dual(rmm::device_uvector& dual_solution) const; void unscale_solutions(rmm::device_uvector& primal_solution, rmm::device_uvector& dual_solution) const; + void unscale_solutions(rmm::device_uvector& primal_solution, + rmm::device_uvector& dual_solution, + rmm::device_uvector& dual_slack) const; void unscale_solutions(solution_t& solution) const; rmm::device_uvector& get_constraint_matrix_scaling_vector(); rmm::device_uvector& get_variable_scaling_vector(); const problem_t& get_scaled_op_problem(); + void bound_objective_rescaling(); + /** * @brief Gets the device-side view (with raw pointers), for ease of access * inside cuda kernels @@ -96,6 +105,12 @@ class pdlp_initial_scaling_strategy_t { rmm::device_uvector iteration_constraint_matrix_scaling_; rmm::device_uvector iteration_variable_scaling_; + rmm::device_scalar bound_rescaling_; + rmm::device_scalar objective_rescaling_; + // Since we need it on the host + f_t h_bound_rescaling = std::numeric_limits::signaling_NaN(); + f_t h_objective_rescaling = std::numeric_limits::signaling_NaN(); + rmm::device_uvector cummulative_constraint_matrix_scaling_; rmm::device_uvector cummulative_variable_scaling_; pdhg_solver_t* pdhg_solver_ptr_; diff --git a/cpp/src/linear_programming/pdhg.cu b/cpp/src/linear_programming/pdhg.cu index 38373391a..a6f0f4e4a 100644 --- a/cpp/src/linear_programming/pdhg.cu +++ b/cpp/src/linear_programming/pdhg.cu @@ -20,6 +20,10 @@ #include #include +#ifdef CUPDLP_DEBUG_MODE +#include +#endif + #include #include #include @@ -48,12 +52,24 @@ pdhg_solver_t::pdhg_solver_t(raft::handle_t const* handle_ptr, potential_next_primal_solution_{static_cast(problem_ptr->n_variables), stream_view_}, potential_next_dual_solution_{static_cast(problem_ptr->n_constraints), stream_view_}, total_pdhg_iterations_{0}, + dual_slack_{static_cast( + (pdlp_hyper_params::use_reflected_primal_dual) ? problem_ptr->n_variables : 0), + stream_view_}, + reflected_primal_{ + static_cast((pdlp_hyper_params::use_reflected_primal_dual) ? problem_ptr->n_variables + : 0), + stream_view_}, + reflected_dual_{static_cast((pdlp_hyper_params::use_reflected_primal_dual) + ? problem_ptr->n_constraints + : 0), + stream_view_}, cusparse_view_{handle_ptr_, op_problem_scaled, current_saddle_point_state_, tmp_primal_, tmp_dual_, - potential_next_dual_solution_}, + potential_next_dual_solution_, + reflected_primal_}, reusable_device_scalar_value_1_{1.0, stream_view_}, reusable_device_scalar_value_0_{0.0, stream_view_}, reusable_device_scalar_value_neg_1_{f_t(-1.0), stream_view_}, @@ -62,6 +78,21 @@ pdhg_solver_t::pdhg_solver_t(raft::handle_t const* handle_ptr, graph_prim_proj_gradient_dual{stream_view_, is_batch_mode}, d_total_pdhg_iterations_{0, stream_view_} { + thrust::fill(handle_ptr->get_thrust_policy(), tmp_primal_.data(), tmp_primal_.end(), f_t(0)); + thrust::fill(handle_ptr->get_thrust_policy(), tmp_dual_.data(), tmp_dual_.end(), f_t(0)); + thrust::fill(handle_ptr->get_thrust_policy(), + potential_next_primal_solution_.data(), + potential_next_primal_solution_.end(), + f_t(0)); + thrust::fill(handle_ptr->get_thrust_policy(), + potential_next_dual_solution_.data(), + potential_next_dual_solution_.end(), + f_t(0)); + thrust::fill( + handle_ptr->get_thrust_policy(), reflected_primal_.data(), reflected_primal_.end(), f_t(0)); + thrust::fill( + handle_ptr->get_thrust_policy(), reflected_dual_.data(), reflected_dual_.end(), f_t(0)); + thrust::fill(handle_ptr->get_thrust_policy(), dual_slack_.data(), dual_slack_.end(), f_t(0)); } template @@ -134,6 +165,24 @@ void pdhg_solver_t::compute_At_y() stream_view_)); } +template +void pdhg_solver_t::compute_A_x() +{ + // A @ x + + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A, + cusparse_view_.reflected_primal_solution, + reusable_device_scalar_value_0_.data(), + cusparse_view_.dual_gradient, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_non_transpose.data(), + stream_view_)); +} + template void pdhg_solver_t::compute_primal_projection_with_gradient( rmm::device_scalar& primal_step_size) @@ -216,22 +265,188 @@ void pdhg_solver_t::compute_next_primal_dual_solution( } } +template +struct primal_reflected_major_projection { + using f_t2 = typename type_2::type; + primal_reflected_major_projection(const f_t* scalar) : scalar_{scalar} {} + HDI thrust::tuple operator()(f_t current_primal, + f_t objective, + f_t Aty, + f_t2 bounds) + { + cuopt_assert(*scalar_ != f_t(0.0), "Scalar can't be 0"); + const f_t next = current_primal - *scalar_ * (objective - Aty); + const f_t next_clamped = raft::max(raft::min(next, bounds.y), bounds.x); + return { + next_clamped, (next_clamped - next) / *scalar_, f_t(2.0) * next_clamped - current_primal}; + } + const f_t* scalar_; +}; + +template +struct primal_reflected_projection { + using f_t2 = typename type_2::type; + primal_reflected_projection(const f_t* scalar) : scalar_{scalar} {} + HDI f_t operator()(f_t current_primal, f_t objective, f_t Aty, f_t2 bounds) + { + const f_t next = current_primal - *scalar_ * (objective - Aty); + const f_t next_clamped = raft::max(raft::min(next, bounds.y), bounds.x); + return f_t(2.0) * next_clamped - current_primal; + } + const f_t* scalar_; +}; + +template +struct dual_reflected_major_projection { + dual_reflected_major_projection(const f_t* scalar) : scalar_{scalar} {} + HDI thrust::tuple operator()(f_t current_dual, + f_t Ax, + f_t lower_bound, + f_t upper_bounds) + { + cuopt_assert(*scalar_ != f_t(0.0), "Scalar can't be 0"); + const f_t tmp = current_dual / *scalar_ - Ax; + const f_t tmp_proj = raft::max(-upper_bounds, raft::min(tmp, -lower_bound)); + const f_t next_dual = (tmp - tmp_proj) * *scalar_; + return {next_dual, f_t(2.0) * next_dual - current_dual}; + } + + const f_t* scalar_; +}; + +template +struct dual_reflected_projection { + dual_reflected_projection(const f_t* scalar) : scalar_{scalar} {} + HDI f_t operator()(f_t current_dual, f_t Ax, f_t lower_bound, f_t upper_bounds) + { + cuopt_assert(*scalar_ != f_t(0.0), "Scalar can't be 0"); + const f_t tmp = current_dual / *scalar_ - Ax; + const f_t tmp_proj = raft::max(-upper_bounds, raft::min(tmp, -lower_bound)); + const f_t next_dual = (tmp - tmp_proj) * *scalar_; + return f_t(2.0) * next_dual - current_dual; + } + + const f_t* scalar_; +}; + +template +void pdhg_solver_t::compute_next_primal_dual_solution_reflected( + rmm::device_scalar& primal_step_size, + rmm::device_scalar& dual_step_size, + bool should_major) +{ + raft::common::nvtx::range fun_scope("compute_next_primal_dual_solution_reflected"); + + // Compute next primal solution reflected + + if (should_major) { + if (!graph_all.is_initialized(should_major)) { + graph_all.start_capture(should_major); + + compute_At_y(); + cub::DeviceTransform::Transform( + cuda::std::make_tuple(current_saddle_point_state_.get_primal_solution().data(), + problem_ptr->objective_coefficients.data(), + current_saddle_point_state_.get_current_AtY().data(), + problem_ptr->variable_bounds.data()), + thrust::make_zip_iterator( + potential_next_primal_solution_.data(), dual_slack_.data(), reflected_primal_.data()), + primal_size_h_, + primal_reflected_major_projection(primal_step_size.data()), + stream_view_); +#ifdef CUPDLP_DEBUG_MODE + print("potential_next_primal_solution_", potential_next_primal_solution_); + print("reflected_primal_", reflected_primal_); + print("dual_slack_", dual_slack_); +#endif + + // Compute next dual + compute_A_x(); + + cub::DeviceTransform::Transform( + cuda::std::make_tuple(current_saddle_point_state_.get_dual_solution().data(), + current_saddle_point_state_.get_dual_gradient().data(), + problem_ptr->constraint_lower_bounds.data(), + problem_ptr->constraint_upper_bounds.data()), + thrust::make_zip_iterator(potential_next_dual_solution_.data(), reflected_dual_.data()), + dual_size_h_, + dual_reflected_major_projection(dual_step_size.data()), + stream_view_); + +#ifdef CUPDLP_DEBUG_MODE + print("potential_next_dual_solution_", potential_next_dual_solution_); + print("reflected_dual_", reflected_dual_); +#endif + graph_all.end_capture(should_major); + } + graph_all.launch(should_major); + + } else { + if (!graph_all.is_initialized(should_major)) { + graph_all.start_capture(should_major); + + // Compute next primal + compute_At_y(); + + cub::DeviceTransform::Transform( + cuda::std::make_tuple(current_saddle_point_state_.get_primal_solution().data(), + problem_ptr->objective_coefficients.data(), + current_saddle_point_state_.get_current_AtY().data(), + problem_ptr->variable_bounds.data()), + reflected_primal_.data(), + primal_size_h_, + primal_reflected_projection(primal_step_size.data()), + stream_view_); +#ifdef CUPDLP_DEBUG_MODE + print("reflected_primal_", reflected_primal_); +#endif + + // Compute next dual + compute_A_x(); + + cub::DeviceTransform::Transform( + cuda::std::make_tuple(current_saddle_point_state_.get_dual_solution().data(), + current_saddle_point_state_.get_dual_gradient().data(), + problem_ptr->constraint_lower_bounds.data(), + problem_ptr->constraint_upper_bounds.data()), + reflected_dual_.data(), + dual_size_h_, + dual_reflected_projection(dual_step_size.data()), + stream_view_); +#ifdef CUPDLP_DEBUG_MODE + print("reflected_dual_", reflected_dual_); +#endif + graph_all.end_capture(should_major); + } + graph_all.launch(should_major); + } +} + template void pdhg_solver_t::take_step(rmm::device_scalar& primal_step_size, rmm::device_scalar& dual_step_size, i_t iterations_since_last_restart, bool last_restart_was_average, - i_t total_pdlp_iterations) + i_t total_pdlp_iterations, + bool is_major_iteration) { #ifdef PDLP_DEBUG_MODE std::cout << "Take Step:" << std::endl; #endif - compute_next_primal_dual_solution(primal_step_size, - iterations_since_last_restart, - last_restart_was_average, - dual_step_size, - total_pdlp_iterations); + if (!pdlp_hyper_params::use_reflected_primal_dual) { + compute_next_primal_dual_solution(primal_step_size, + iterations_since_last_restart, + last_restart_was_average, + dual_step_size, + total_pdlp_iterations); + } else { + compute_next_primal_dual_solution_reflected( + primal_step_size, + dual_step_size, + is_major_iteration || + ((total_pdlp_iterations + 2) % conditional_major(total_pdlp_iterations + 2)) == 0); + } total_pdhg_iterations_ += 1; } @@ -305,6 +520,18 @@ rmm::device_uvector& pdhg_solver_t::get_dual_tmp_resource() return tmp_dual_; } +template +rmm::device_uvector& pdhg_solver_t::get_potential_next_primal_solution() +{ + return potential_next_primal_solution_; +} + +template +rmm::device_uvector& pdhg_solver_t::get_dual_slack() +{ + return dual_slack_; +} + template const rmm::device_uvector& pdhg_solver_t::get_potential_next_primal_solution() const { @@ -317,6 +544,18 @@ const rmm::device_uvector& pdhg_solver_t::get_potential_next_dual return potential_next_dual_solution_; } +template +const rmm::device_uvector& pdhg_solver_t::get_reflected_dual() const +{ + return reflected_dual_; +} + +template +const rmm::device_uvector& pdhg_solver_t::get_reflected_primal() const +{ + return reflected_primal_; +} + template rmm::device_uvector& pdhg_solver_t::get_potential_next_dual_solution() { diff --git a/cpp/src/linear_programming/pdhg.hpp b/cpp/src/linear_programming/pdhg.hpp index 3e4f7565e..90766d4fe 100644 --- a/cpp/src/linear_programming/pdhg.hpp +++ b/cpp/src/linear_programming/pdhg.hpp @@ -39,9 +39,13 @@ class pdhg_solver_t { cusparse_view_t& get_cusparse_view(); rmm::device_uvector& get_primal_tmp_resource(); rmm::device_uvector& get_dual_tmp_resource(); + rmm::device_uvector& get_potential_next_primal_solution(); + rmm::device_uvector& get_dual_slack(); const rmm::device_uvector& get_potential_next_primal_solution() const; rmm::device_uvector& get_potential_next_dual_solution(); const rmm::device_uvector& get_potential_next_dual_solution() const; + const rmm::device_uvector& get_reflected_dual() const; + const rmm::device_uvector& get_reflected_primal() const; i_t get_total_pdhg_iterations(); rmm::device_scalar& get_d_total_pdhg_iterations(); rmm::device_uvector& get_primal_solution(); @@ -51,7 +55,8 @@ class pdhg_solver_t { rmm::device_scalar& dual_step_size, i_t iterations_since_last_restart, bool last_restart_was_average, - i_t total_pdlp_iterations); + i_t total_pdlp_iterations, + bool is_major_iteration); void update_solution(cusparse_view_t& current_op_problem_evaluation_cusparse_view_); i_t total_pdhg_iterations_; @@ -63,10 +68,14 @@ class pdhg_solver_t { rmm::device_scalar& dual_step_size, i_t total_pdlp_iterations); void compute_next_dual_solution(rmm::device_scalar& dual_step_size); + void compute_next_primal_dual_solution_reflected(rmm::device_scalar& primal_step_size, + rmm::device_scalar& dual_step_size, + bool should_major); void compute_primal_projection_with_gradient(rmm::device_scalar& primal_step_size); void compute_primal_projection(rmm::device_scalar& primal_step_size); void compute_At_y(); + void compute_A_x(); raft::handle_t const* handle_ptr_{nullptr}; rmm::cuda_stream_view stream_view_; @@ -84,6 +93,11 @@ class pdhg_solver_t { rmm::device_uvector potential_next_primal_solution_; rmm::device_uvector potential_next_dual_solution_; + rmm::device_uvector dual_slack_; + rmm::device_uvector reflected_primal_; + rmm::device_uvector reflected_dual_; + + // Important that vectors passed down to the cusparse_view are allocated before cusparse_view_t cusparse_view_; const rmm::device_scalar reusable_device_scalar_value_1_; diff --git a/cpp/src/linear_programming/pdlp.cu b/cpp/src/linear_programming/pdlp.cu index 45d0b8b9c..87d9f5856 100644 --- a/cpp/src/linear_programming/pdlp.cu +++ b/cpp/src/linear_programming/pdlp.cu @@ -24,6 +24,10 @@ #include #include "cuopt/linear_programming/pdlp/solver_solution.hpp" +#include + +#include +#include #include #include #include @@ -67,6 +71,7 @@ pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, primal_step_size_{stream_view_}, dual_step_size_{stream_view_}, primal_weight_{stream_view_}, + best_primal_weight_{stream_view_}, step_size_{(f_t)pdlp_hyper_params::initial_step_size_scaling, stream_view_}, step_size_strategy_{handle_ptr_, &primal_weight_, &step_size_, is_batch_mode}, pdhg_solver_{handle_ptr_, op_problem_scaled_, is_batch_mode}, @@ -85,6 +90,8 @@ pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, unscaled_dual_avg_solution_, pdhg_solver_.get_primal_tmp_resource(), pdhg_solver_.get_dual_tmp_resource(), + pdhg_solver_.get_potential_next_primal_solution(), + pdhg_solver_.get_potential_next_dual_solution(), op_problem.reverse_coefficients, op_problem.reverse_offsets, op_problem.reverse_constraints}, @@ -94,6 +101,8 @@ pdlp_solver_t::pdlp_solver_t(problem_t& op_problem, pdhg_solver_.get_dual_solution(), pdhg_solver_.get_primal_tmp_resource(), pdhg_solver_.get_dual_tmp_resource(), + pdhg_solver_.get_potential_next_primal_solution(), + pdhg_solver_.get_potential_next_dual_solution(), op_problem.reverse_coefficients, op_problem.reverse_offsets, op_problem.reverse_constraints}, @@ -275,8 +284,12 @@ std::optional> pdlp_solver_t return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, - pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_primal_solution() + : pdhg_solver_.get_potential_next_primal_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_dual_solution() + : pdhg_solver_.get_potential_next_dual_solution(), get_filled_warmed_start_data(), pdlp_termination_status_t::TimeLimit); } @@ -299,8 +312,12 @@ std::optional> pdlp_solver_t return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, - pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_primal_solution() + : pdhg_solver_.get_potential_next_primal_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_dual_solution() + : pdhg_solver_.get_potential_next_dual_solution(), get_filled_warmed_start_data(), pdlp_termination_status_t::IterationLimit); } @@ -315,8 +332,12 @@ std::optional> pdlp_solver_t return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, - pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_primal_solution() + : pdhg_solver_.get_potential_next_primal_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_dual_solution() + : pdhg_solver_.get_potential_next_dual_solution(), get_filled_warmed_start_data(), pdlp_termination_status_t::ConcurrentLimit); } @@ -535,32 +556,39 @@ std::optional> pdlp_solver_t #ifdef PDLP_VERBOSE_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); printf("Termination criteria current\n"); - current_termination_strategy_.print_termination_criteria(); + print_termination_criteria(timer, false); RAFT_CUDA_TRY(cudaDeviceSynchronize()); #endif pdlp_termination_status_t termination_current = current_termination_strategy_.evaluate_termination_criteria( pdhg_solver_, - pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_primal_solution() + : pdhg_solver_.get_potential_next_primal_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_dual_solution() + : pdhg_solver_.get_potential_next_dual_solution(), + pdhg_solver_.get_dual_slack(), problem_ptr->combined_bounds, problem_ptr->objective_coefficients); #ifdef PDLP_VERBOSE_MODE RAFT_CUDA_TRY(cudaDeviceSynchronize()); std::cout << "Termination criteria average:" << std::endl; - average_termination_strategy_.print_termination_criteria(); + print_termination_criteria(timer, true); RAFT_CUDA_TRY(cudaDeviceSynchronize()); #endif - // Check both average and current solution pdlp_termination_status_t termination_average = - average_termination_strategy_.evaluate_termination_criteria( - pdhg_solver_, - unscaled_primal_avg_solution_, - unscaled_dual_avg_solution_, - problem_ptr->combined_bounds, - problem_ptr->objective_coefficients); + (pdlp_hyper_params::never_restart_to_average) + ? pdlp_termination_status_t::NoTermination + : average_termination_strategy_.evaluate_termination_criteria( + pdhg_solver_, + unscaled_primal_avg_solution_, + unscaled_dual_avg_solution_, + pdhg_solver_.get_dual_slack(), + problem_ptr->combined_bounds, + problem_ptr->objective_coefficients); // We exit directly without checking the termination criteria as some problem can have a low // initial redidual + there is by definition 0 gap at first @@ -588,8 +616,12 @@ std::optional> pdlp_solver_t return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, - pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_primal_solution() + : pdhg_solver_.get_potential_next_primal_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_dual_solution() + : pdhg_solver_.get_potential_next_dual_solution(), get_filled_warmed_start_data(), termination_current); } else // Average has better overall residual @@ -606,8 +638,12 @@ std::optional> pdlp_solver_t return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, - pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_primal_solution() + : pdhg_solver_.get_potential_next_primal_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_dual_solution() + : pdhg_solver_.get_potential_next_dual_solution(), get_filled_warmed_start_data(), termination_current); } else if (termination_average == pdlp_termination_status_t::PrimalFeasible) { @@ -647,8 +683,12 @@ std::optional> pdlp_solver_t return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, - pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_primal_solution() + : pdhg_solver_.get_potential_next_primal_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_dual_solution() + : pdhg_solver_.get_potential_next_dual_solution(), get_filled_warmed_start_data(), termination_current); } else { @@ -698,8 +738,12 @@ std::optional> pdlp_solver_t return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, - pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_primal_solution() + : pdhg_solver_.get_potential_next_primal_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_dual_solution() + : pdhg_solver_.get_potential_next_dual_solution(), get_filled_warmed_start_data(), termination_current); } @@ -721,8 +765,12 @@ std::optional> pdlp_solver_t return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, - pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_primal_solution() + : pdhg_solver_.get_potential_next_primal_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_dual_solution() + : pdhg_solver_.get_potential_next_dual_solution(), termination_current); } if (termination_average == pdlp_termination_status_t::PrimalInfeasible || @@ -756,8 +804,12 @@ std::optional> pdlp_solver_t return current_termination_strategy_.fill_return_problem_solution( internal_solver_iterations_, pdhg_solver_, - pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_primal_solution() + : pdhg_solver_.get_potential_next_primal_solution(), + (pdlp_hyper_params::use_adaptive_step_size_strategy) + ? pdhg_solver_.get_dual_solution() + : pdhg_solver_.get_potential_next_dual_solution(), termination_current); } } @@ -967,6 +1019,75 @@ void pdlp_solver_t::update_primal_dual_solutions( } } +template +void pdlp_solver_t::compute_fixed_error(bool& has_restarted) +{ +#ifdef CUPDLP_DEBUG_MODE + printf("Computing compute_fixed_point_error \n"); +#endif + cuopt_assert(pdhg_solver_.get_reflected_primal().size() == primal_size_h_, + "reflected_primal_ size mismatch"); + cuopt_assert(pdhg_solver_.get_reflected_dual().size() == dual_size_h_, + "reflected_dual_ size mismatch"); + cuopt_assert(pdhg_solver_.get_primal_solution().size() == primal_size_h_, + "primal_solution_ size mismatch"); + cuopt_assert(pdhg_solver_.get_dual_solution().size() == dual_size_h_, + "dual_solution_ size mismatch"); + cuopt_assert(pdhg_solver_.get_saddle_point_state().get_delta_primal().size() == primal_size_h_, + "delta_primal_ size mismatch"); + cuopt_assert(pdhg_solver_.get_saddle_point_state().get_delta_dual().size() == dual_size_h_, + "delta_dual_ size mismatch"); + + // Computing the deltas + cub::DeviceTransform::Transform(cuda::std::make_tuple(pdhg_solver_.get_reflected_primal().data(), + pdhg_solver_.get_primal_solution().data()), + pdhg_solver_.get_saddle_point_state().get_delta_primal().data(), + primal_size_h_, + cuda::std::minus{}, + stream_view_); + cub::DeviceTransform::Transform(cuda::std::make_tuple(pdhg_solver_.get_reflected_dual().data(), + pdhg_solver_.get_dual_solution().data()), + pdhg_solver_.get_saddle_point_state().get_delta_dual().data(), + dual_size_h_, + cuda::std::minus{}, + stream_view_); + + auto& cusparse_view = pdhg_solver_.get_cusparse_view(); + // Make potential_next_dual_solution point towards reflected dual solution to reuse the code + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( + &cusparse_view.potential_next_dual_solution, + op_problem_scaled_.n_constraints, + const_cast(pdhg_solver_.get_reflected_dual().data()))); + + step_size_strategy_.compute_interaction_and_movement( + pdhg_solver_.get_primal_tmp_resource(), cusparse_view, pdhg_solver_.get_saddle_point_state()); + + const f_t movement = + step_size_strategy_.get_norm_squared_delta_primal() * primal_weight_.value(stream_view_) + + step_size_strategy_.get_norm_squared_delta_dual() / primal_weight_.value(stream_view_); + const f_t interaction = + f_t(2.0) * step_size_strategy_.get_interaction() * step_size_.value(stream_view_); + + restart_strategy_.fixed_point_error_ = std::sqrt(movement + interaction); + +#ifdef CUPDLP_DEBUG_MODE + printf("movement %lf\n", movement); + printf("interaction %lf\n", interaction); + printf("state->fixed_point_error %lf\n", restart_strategy_.fixed_point_error_); +#endif + + // Put back + RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsecreatednvec( + &cusparse_view.potential_next_dual_solution, + op_problem_scaled_.n_constraints, + const_cast(pdhg_solver_.get_potential_next_dual_solution().data()))); + + if (has_restarted) { + restart_strategy_.initial_fixed_point_error_ = restart_strategy_.fixed_point_error_; + has_restarted = false; + } +} + template optimization_problem_solution_t pdlp_solver_t::run_solver(const timer_t& timer) { @@ -1057,6 +1178,11 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co raft::print_device_vector("Initial primal_step_size", primal_step_size_.data(), 1, std::cout); raft::print_device_vector("Initial dual_step_size", dual_step_size_.data(), 1, std::cout); } +#ifdef CUPDLP_DEBUG_MODE + printf("Initial primal weight %lf, step size %lf\n", + primal_weight_.value(stream_view_), + step_size_.value(stream_view_)); +#endif bool warm_start_was_given = settings_.get_pdlp_warm_start_data().last_restart_duality_gap_dual_solution_.size() != 0; @@ -1066,15 +1192,25 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co " Iter Primal Obj. Dual Obj. Gap Primal Res. Dual Res. Time"); } while (true) { - bool is_major_iteration = ((total_pdlp_iterations_ % pdlp_hyper_params::major_iteration == 0) && - (total_pdlp_iterations_ > 0)) || - (total_pdlp_iterations_ <= pdlp_hyper_params::min_iteration_restart); +#ifdef CUPDLP_DEBUG_MODE + printf("Step: %d\n", total_pdlp_iterations_); +#endif + bool is_major_iteration = + (((total_pdlp_iterations_) % pdlp_hyper_params::major_iteration == 0) && + (total_pdlp_iterations_ > 0)) || + (total_pdlp_iterations_ <= pdlp_hyper_params::min_iteration_restart); bool error_occured = (step_size_strategy_.get_valid_step_size() == -1); bool artificial_restart_check_main_loop = false; + bool has_restarted = false; + bool is_conditional_major = + (pdlp_hyper_params::use_conditional_major) + ? (total_pdlp_iterations_ % conditional_major(total_pdlp_iterations_)) == 0 + : false; if (pdlp_hyper_params::artificial_restart_in_main_loop) artificial_restart_check_main_loop = restart_strategy_.should_do_artificial_restart(total_pdlp_iterations_); - if (is_major_iteration || artificial_restart_check_main_loop || error_occured) { + if (is_major_iteration || artificial_restart_check_main_loop || error_occured || + is_conditional_major) { if (verbose) { std::cout << "-------------------------------" << std::endl; std::cout << internal_solver_iterations_ << std::endl; @@ -1119,8 +1255,15 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co initial_scaling_strategy_.unscale_solutions(unscaled_primal_avg_solution_, unscaled_dual_avg_solution_); } - initial_scaling_strategy_.unscale_solutions(pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution()); + if (pdlp_hyper_params::use_adaptive_step_size_strategy) { + initial_scaling_strategy_.unscale_solutions(pdhg_solver_.get_primal_solution(), + pdhg_solver_.get_dual_solution()); + } else { + initial_scaling_strategy_.unscale_solutions( + pdhg_solver_.get_potential_next_primal_solution(), + pdhg_solver_.get_potential_next_dual_solution(), + pdhg_solver_.get_dual_slack()); + } // Check for termination std::optional> solution = check_termination(timer); @@ -1130,14 +1273,22 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co if (pdlp_hyper_params::rescale_for_restart) { initial_scaling_strategy_.scale_solutions(unscaled_primal_avg_solution_, unscaled_dual_avg_solution_); - initial_scaling_strategy_.scale_solutions(pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution()); + if (pdlp_hyper_params::use_adaptive_step_size_strategy) { + initial_scaling_strategy_.scale_solutions(pdhg_solver_.get_primal_solution(), + pdhg_solver_.get_dual_solution()); + } else { + initial_scaling_strategy_.scale_solutions( + pdhg_solver_.get_potential_next_primal_solution(), + pdhg_solver_.get_potential_next_dual_solution(), + pdhg_solver_.get_dual_slack()); + } } if (pdlp_hyper_params::restart_strategy != - static_cast( - detail::pdlp_restart_strategy_t::restart_strategy_t::NO_RESTART)) { - restart_strategy_.compute_restart( + static_cast( + detail::pdlp_restart_strategy_t::restart_strategy_t::NO_RESTART) && + (is_major_iteration || artificial_restart_check_main_loop)) { + has_restarted = restart_strategy_.compute_restart( pdhg_solver_, unscaled_primal_avg_solution_, unscaled_dual_avg_solution_, @@ -1147,7 +1298,8 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co primal_weight_, step_size_, current_termination_strategy_.get_convergence_information(), // Needed for KKT restart - average_termination_strategy_.get_convergence_information() // Needed for KKT restart + average_termination_strategy_.get_convergence_information(), // Needed for KKT restart + best_primal_weight_ // Needed for cuPDLP+ restart ); } @@ -1156,22 +1308,44 @@ optimization_problem_solution_t pdlp_solver_t::run_solver(co // getting the scaled accumulation // During the next iteration, unscaled_avg_solution will be overwritten again through // get_average_solutions - initial_scaling_strategy_.scale_solutions(pdhg_solver_.get_primal_solution(), - pdhg_solver_.get_dual_solution()); + if (pdlp_hyper_params::use_adaptive_step_size_strategy) { + initial_scaling_strategy_.scale_solutions(pdhg_solver_.get_primal_solution(), + pdhg_solver_.get_dual_solution()); + } else { + initial_scaling_strategy_.scale_solutions( + pdhg_solver_.get_potential_next_primal_solution(), + pdhg_solver_.get_potential_next_dual_solution(), + pdhg_solver_.get_dual_slack()); + } } } - take_step(total_pdlp_iterations_); +#ifdef CUPDLP_DEBUG_MODE + printf("Is Major %d\n", (total_pdlp_iterations_ + 1) % pdlp_hyper_params::major_iteration == 0); +#endif + take_step(total_pdlp_iterations_, + (total_pdlp_iterations_ + 1) % pdlp_hyper_params::major_iteration == 0); + + if (pdlp_hyper_params::use_reflected_primal_dual) { + if (pdlp_hyper_params::use_fixed_point_error && + (total_pdlp_iterations_ + 1) % pdlp_hyper_params::major_iteration == 0 || + has_restarted) + compute_fixed_error(has_restarted); // May set has_restarted to false + + halpern_update(); + } ++total_pdlp_iterations_; ++internal_solver_iterations_; + if (pdlp_hyper_params::never_restart_to_average) + restart_strategy_.increment_iteration_since_last_restart(); } return optimization_problem_solution_t{pdlp_termination_status_t::NumericalError, stream_view_}; } template -void pdlp_solver_t::take_step(i_t total_pdlp_iterations) +void pdlp_solver_t::take_adaptive_step(i_t total_pdlp_iterations, bool is_major_iteration) { // continue testing stepsize until we find a valid one or encounter a numerical error step_size_strategy_.set_valid_step_size(0); @@ -1188,7 +1362,8 @@ void pdlp_solver_t::take_step(i_t total_pdlp_iterations) dual_step_size_, restart_strategy_.get_iterations_since_last_restart(), restart_strategy_.get_last_restart_was_average(), - total_pdlp_iterations); + total_pdlp_iterations, + is_major_iteration); step_size_strategy_.compute_step_sizes( pdhg_solver_, primal_step_size_, dual_step_size_, total_pdlp_iterations); @@ -1207,46 +1382,232 @@ void pdlp_solver_t::take_step(i_t total_pdlp_iterations) pdhg_solver_.update_solution(current_op_problem_evaluation_cusparse_view_); } +template +void pdlp_solver_t::take_constant_step(bool is_major_iteration) +{ + pdhg_solver_.take_step( + primal_step_size_, dual_step_size_, 0, false, total_pdlp_iterations_, is_major_iteration); +} + +template +void pdlp_solver_t::halpern_update() +{ + raft::common::nvtx::range fun_scope("halpern_update"); + + const f_t weight = + f_t(restart_strategy_.weighted_average_solution_.get_iterations_since_last_restart() + 1) / + f_t(restart_strategy_.weighted_average_solution_.get_iterations_since_last_restart() + 2); + +#ifdef CUPDLP_DEBUG_MODE + printf("halper_update weight %lf\n", weight); +#endif + + // Update primal + cub::DeviceTransform::Transform( + cuda::std::make_tuple(pdhg_solver_.get_reflected_primal().data(), + pdhg_solver_.get_saddle_point_state().get_primal_solution().data(), + restart_strategy_.last_restart_duality_gap_.primal_solution_.data()), + pdhg_solver_.get_saddle_point_state().get_primal_solution().data(), + primal_size_h_, + [weight, reflection_coefficient = pdlp_hyper_params::reflection_coefficient] __device__( + f_t reflected_primal, f_t current_primal, f_t initial_primal) { + const f_t reflected = reflection_coefficient * reflected_primal + + (f_t(1.0) - reflection_coefficient) * current_primal; + return weight * reflected + (f_t(1.0) - weight) * initial_primal; + }, + stream_view_); + + // Update dual + cub::DeviceTransform::Transform( + cuda::std::make_tuple(pdhg_solver_.get_reflected_dual().data(), + pdhg_solver_.get_saddle_point_state().get_dual_solution().data(), + restart_strategy_.last_restart_duality_gap_.dual_solution_.data()), + pdhg_solver_.get_saddle_point_state().get_dual_solution().data(), + dual_size_h_, + [weight, reflection_coefficient = pdlp_hyper_params::reflection_coefficient] __device__( + f_t reflected_dual, f_t current_dual, f_t initial_dual) { + const f_t reflected = reflection_coefficient * reflected_dual + + (f_t(1.0) - reflection_coefficient) * current_dual; + return weight * reflected + (f_t(1.0) - weight) * initial_dual; + }, + stream_view_); + +#ifdef CUPDLP_DEBUG_MODE + print("halpen_update current primal", + pdhg_solver_.get_saddle_point_state().get_primal_solution()); + print("halpen_update current dual", pdhg_solver_.get_saddle_point_state().get_dual_solution()); +#endif +} + +template +void pdlp_solver_t::take_step([[maybe_unused]] i_t total_pdlp_iterations, + [[maybe_unused]] bool is_major_iteration) +{ + if (pdlp_hyper_params::use_adaptive_step_size_strategy) { + take_adaptive_step(total_pdlp_iterations, is_major_iteration); + } else { + cuopt_assert(total_pdlp_iterations == pdhg_solver_.get_total_pdhg_iterations(), + "In non adaptive step size mode, both pdlp and pdhg step should always be equal"); + take_constant_step(is_major_iteration); + } +} + template void pdlp_solver_t::compute_initial_step_size() { raft::common::nvtx::range fun_scope("compute_initial_step_size"); - // set stepsize relative to maximum absolute value of A - rmm::device_scalar abs_max_element{0.0, stream_view_}; - void* d_temp_storage = NULL; - size_t temp_storage_bytes = 0; - - detail::max_abs_value red_op; - cub::DeviceReduce::Reduce(d_temp_storage, - temp_storage_bytes, - op_problem_scaled_.coefficients.data(), - abs_max_element.data(), - op_problem_scaled_.nnz, - red_op, - 0.0, - stream_view_); - // Allocate temporary storage - rmm::device_buffer cub_tmp{temp_storage_bytes, stream_view_}; - // Run max-reduction - cub::DeviceReduce::Reduce(cub_tmp.data(), - temp_storage_bytes, - op_problem_scaled_.coefficients.data(), - abs_max_element.data(), - op_problem_scaled_.nnz, - red_op, - 0.0, - stream_view_); - raft::linalg::eltwiseDivideCheckZero( - step_size_.data(), step_size_.data(), abs_max_element.data(), 1, stream_view_); + if (!pdlp_hyper_params::initial_step_size_max_singular_value) { + // set stepsize relative to maximum absolute value of A + rmm::device_scalar abs_max_element{0.0, stream_view_}; + void* d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + + detail::max_abs_value red_op; + cub::DeviceReduce::Reduce(d_temp_storage, + temp_storage_bytes, + op_problem_scaled_.coefficients.data(), + abs_max_element.data(), + op_problem_scaled_.nnz, + red_op, + 0.0, + stream_view_); + // Allocate temporary storage + rmm::device_buffer cub_tmp{temp_storage_bytes, stream_view_}; + // Run max-reduction + cub::DeviceReduce::Reduce(cub_tmp.data(), + temp_storage_bytes, + op_problem_scaled_.coefficients.data(), + abs_max_element.data(), + op_problem_scaled_.nnz, + red_op, + 0.0, + stream_view_); + raft::linalg::eltwiseDivideCheckZero( + step_size_.data(), step_size_.data(), abs_max_element.data(), 1, stream_view_); + + // Sync since we are using local variable + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); + } else { + constexpr i_t max_iterations = 5000; + constexpr f_t tolerance = 1e-4; + + i_t m = op_problem_scaled_.n_constraints; + i_t n = op_problem_scaled_.n_variables; + + std::vector z(m); + rmm::device_uvector d_z(m, stream_view_); + rmm::device_uvector d_q(m, stream_view_); + rmm::device_uvector d_atq(n, stream_view_); + + std::mt19937 gen(1); + std::normal_distribution dist(0.0, 1.0); + + for (int i = 0; i < m; ++i) + z[i] = dist(gen); + + device_copy(d_z, z, stream_view_); + + rmm::device_scalar norm_q(stream_view_); + rmm::device_scalar sigma_max_sq(stream_view_); + rmm::device_scalar residual_norm(stream_view_); + rmm::device_scalar reusable_device_scalar_value_1_(1, stream_view_); + rmm::device_scalar reusable_device_scalar_value_0_(0, stream_view_); + + cusparseDnVecDescr_t vecZ, vecQ, vecATQ; + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsecreatednvec(&vecZ, m, const_cast(d_z.data()))); + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsecreatednvec(&vecQ, m, const_cast(d_q.data()))); + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsecreatednvec(&vecATQ, n, const_cast(d_atq.data()))); + + const auto& cusparse_view_ = pdhg_solver_.get_cusparse_view(); + + int sing_iters = 0; + for (int i = 0; i < max_iterations; ++i) { + ++sing_iters; + // d_q = d_z + raft::copy(d_q.data(), d_z.data(), m, stream_view_); + // norm_q = l2_norm(d_q) + my_l2_norm(d_q, norm_q, handle_ptr_); + + cuopt_assert(norm_q.value(stream_view_) != f_t(0), "norm q can't be 0"); + + // d_q *= 1 / norm_q + cub::DeviceTransform::Transform( + d_q.data(), + d_q.data(), + d_q.size(), + [norm_q = norm_q.data()] __device__(f_t d_q) { return d_q / *norm_q; }, + stream_view_); + + // A_t_q = A_t @ d_q + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), + cusparse_view_.A_T, + vecQ, + reusable_device_scalar_value_0_.data(), + vecATQ, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_transpose.data(), + stream_view_)); + + // z = A @ A_t_q + RAFT_CUSPARSE_TRY( + raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, + reusable_device_scalar_value_1_.data(), // 1 + cusparse_view_.A, + vecATQ, + reusable_device_scalar_value_0_.data(), // 1 + vecZ, + CUSPARSE_SPMV_CSR_ALG2, + (f_t*)cusparse_view_.buffer_non_transpose.data(), + stream_view_)); + // sigma_max_sq = dot(q, z) + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(handle_ptr_->get_cublas_handle(), + m, + d_q.data(), + primal_stride, + d_z.data(), + primal_stride, + sigma_max_sq.data(), + stream_view_)); + + cub::DeviceTransform::Transform( + cuda::std::make_tuple(d_q.data(), d_z.data()), + d_q.data(), + d_q.size(), + [sigma_max_sq = sigma_max_sq.data()] __device__(f_t d_q, f_t d_z) { + return d_q * -(*sigma_max_sq) + d_z; + }, + stream_view_); + + my_l2_norm(d_q, residual_norm, handle_ptr_); + + if (residual_norm.value(stream_view_) < tolerance) break; + } +#ifdef CUPDLP_DEBUG_MODE + printf("iter_count %d\n", sing_iters); +#endif - RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); + constexpr f_t scaling_factor = 0.998; + const f_t step_size = scaling_factor / std::sqrt(sigma_max_sq.value(stream_view_)); + step_size_.set_value_async(step_size, stream_view_); + + // Sync since we are using local variable + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); + } } template __global__ void compute_weights_initial_primal_weight_from_squared_norms(const f_t* b_vec_norm, const f_t* c_vec_norm, - f_t* primal_weight) + f_t* primal_weight, + f_t* best_primal_weight) { if (threadIdx.x + blockIdx.x * blockDim.x > 0) { return; } f_t c_vec_norm_ = *c_vec_norm; @@ -1259,38 +1620,60 @@ __global__ void compute_weights_initial_primal_weight_from_squared_norms(const f c_vec_norm_, pdlp_hyper_params::primal_importance); #endif - *primal_weight = pdlp_hyper_params::primal_importance * (c_vec_norm_ / b_vec_norm_); + *primal_weight = pdlp_hyper_params::primal_importance * (c_vec_norm_ / b_vec_norm_); + *best_primal_weight = *primal_weight; } else { - *primal_weight = pdlp_hyper_params::primal_importance; + *primal_weight = pdlp_hyper_params::primal_importance; + *best_primal_weight = *primal_weight; } } template void pdlp_solver_t::compute_initial_primal_weight() { + raft::common::nvtx::range fun_scope("compute_initial_primal_weight"); + // Here we use the combined bounds of the op_problem_scaled which may or may not be scaled yet // based on pdlp config detail::combine_constraint_bounds(op_problem_scaled_, op_problem_scaled_.combined_bounds); - - // => same as sqrt(dot(b,b)) - rmm::device_scalar b_vec_norm{0.0, stream_view_}; rmm::device_scalar c_vec_norm{0.0, stream_view_}; - - detail::my_l2_weighted_norm(op_problem_scaled_.combined_bounds, - pdlp_hyper_params::initial_primal_weight_b_scaling, - b_vec_norm, - stream_view_); - detail::my_l2_weighted_norm(op_problem_scaled_.objective_coefficients, pdlp_hyper_params::initial_primal_weight_c_scaling, c_vec_norm, stream_view_); + rmm::device_scalar b_vec_norm{0.0, stream_view_}; + if (pdlp_hyper_params::initial_primal_weight_combined_bounds) { + // => same as sqrt(dot(b,b)) + detail::my_l2_weighted_norm(op_problem_scaled_.combined_bounds, + pdlp_hyper_params::initial_primal_weight_b_scaling, + b_vec_norm, + stream_view_); + + } else { + if (pdlp_hyper_params::bound_objective_rescaling) { + const f_t one = 1; + primal_weight_.set_value_async(one, stream_view_); + best_primal_weight_.set_value_async(one, stream_view_); + return; + } else { + cuopt_expects(pdlp_hyper_params::initial_primal_weight_b_scaling == 1, + error_type_t::ValidationError, + "Passing a scaling is not supported for now"); + + compute_sum_bounds(op_problem_scaled_.constraint_lower_bounds, + op_problem_scaled_.constraint_upper_bounds, + b_vec_norm, + stream_view_); + } + } + compute_weights_initial_primal_weight_from_squared_norms<<<1, 1, 0, stream_view_>>>( - b_vec_norm.data(), c_vec_norm.data(), primal_weight_.data()); + b_vec_norm.data(), c_vec_norm.data(), primal_weight_.data(), best_primal_weight_.data()); RAFT_CUDA_TRY(cudaPeekAtLastError()); + // Sync since we are using local variable RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_)); } @@ -1323,14 +1706,20 @@ pdlp_solver_t::get_current_termination_strategy() template class pdlp_solver_t; template __global__ void compute_weights_initial_primal_weight_from_squared_norms( - const float* b_vec_norm, const float* c_vec_norm, float* primal_weight); + const float* b_vec_norm, + const float* c_vec_norm, + float* primal_weight, + float* best_primal_weight); #endif #if MIP_INSTANTIATE_DOUBLE template class pdlp_solver_t; template __global__ void compute_weights_initial_primal_weight_from_squared_norms( - const double* b_vec_norm, const double* c_vec_norm, double* primal_weight); + const double* b_vec_norm, + const double* c_vec_norm, + double* primal_weight, + double* best_primal_weight); #endif } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/linear_programming/pdlp.cuh b/cpp/src/linear_programming/pdlp.cuh index ae75505ab..72d6b14d4 100644 --- a/cpp/src/linear_programming/pdlp.cuh +++ b/cpp/src/linear_programming/pdlp.cuh @@ -99,6 +99,9 @@ class pdlp_solver_t { void set_inside_mip(bool inside_mip); + void compute_initial_step_size(); + void compute_initial_primal_weight(); + private: void print_termination_criteria(const timer_t& timer, bool is_average = false); void print_final_termination_criteria( @@ -106,8 +109,6 @@ class pdlp_solver_t { const convergence_information_t& convergence_information, const pdlp_termination_status_t& termination_status, bool is_average = false); - void compute_initial_step_size(); - void compute_initial_primal_weight(); std::optional> check_termination(const timer_t& timer); std::optional> check_limits(const timer_t& timer); void record_best_primal_so_far(const detail::pdlp_termination_strategy_t& current, @@ -115,7 +116,10 @@ class pdlp_solver_t { const pdlp_termination_status_t& termination_current, const pdlp_termination_status_t& termination_average); - void take_step(i_t total_pdlp_iterations); + void take_step([[maybe_unused]] i_t total_pdlp_iterations, + [[maybe_unused]] bool is_major_iteration); + void take_adaptive_step(i_t total_pdlp_iterations, bool is_major_iteration); + void take_constant_step(bool is_major_iteration); /** * @brief Update current primal & dual solution by setting new solutions and triggering a @@ -157,6 +161,7 @@ class pdlp_solver_t { primal and dual distances traveled since the last restart. */ rmm::device_scalar primal_weight_; + rmm::device_scalar best_primal_weight_; rmm::device_scalar step_size_; // Step size strategy @@ -165,11 +170,14 @@ class pdlp_solver_t { public: // Inner solver detail::pdhg_solver_t pdhg_solver_; + void halpern_update(); private: // Intentionnaly take a copy to avoid an unintentional modification in the calling context const pdlp_solver_settings_t settings_; + void compute_fixed_error(bool& has_restarted); + pdlp_warm_start_data_t get_filled_warmed_start_data(); // Initial scaling strategy diff --git a/cpp/src/linear_programming/pdlp_constants.hpp b/cpp/src/linear_programming/pdlp_constants.hpp index 8bf9b6c2b..199eae8e0 100644 --- a/cpp/src/linear_programming/pdlp_constants.hpp +++ b/cpp/src/linear_programming/pdlp_constants.hpp @@ -31,6 +31,8 @@ inline constexpr int dual_stride = 1; // #define PDLP_DEBUG_MODE +// #define CUPDLP_DEBUG_MODE + // Value used to determine what we see as too small (the value) or too large (1/value) values when // computing the new primal weight during the restart. template diff --git a/cpp/src/linear_programming/pdlp_hyper_params.cu b/cpp/src/linear_programming/pdlp_hyper_params.cu index 5386c50d9..ed6242e0a 100644 --- a/cpp/src/linear_programming/pdlp_hyper_params.cu +++ b/cpp/src/linear_programming/pdlp_hyper_params.cu @@ -78,5 +78,19 @@ bool update_step_size_on_initial_solution = false; bool handle_some_primal_gradients_on_finite_bounds_as_residuals = false; // Whether to project initial primal values using variable bounds bool project_initial_primal = true; +// Whether to use adaptive step size strategy +bool use_adaptive_step_size_strategy = true; +// All hyperparameters needed to have the same heuristics cuPDLP+ +bool initial_step_size_max_singular_value = false; +bool initial_primal_weight_combined_bounds = true; +bool bound_objective_rescaling = false; +bool use_reflected_primal_dual = false; +bool use_fixed_point_error = false; +double reflection_coefficient = 1.0; +double restart_k_p = 0.99; +double restart_k_i = 0.01; +double restart_k_d = 0.0; +double restart_i_smooth = 0.3; +bool use_conditional_major = false; } // namespace cuopt::linear_programming::pdlp_hyper_params diff --git a/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu b/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu index 7e214b7b5..2571523aa 100644 --- a/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu +++ b/cpp/src/linear_programming/restart_strategy/localized_duality_gap_container.cu @@ -47,6 +47,14 @@ localized_duality_gap_container_t::localized_duality_gap_container_t( dual_solution_tr_{is_KKT_restart() ? 0 : static_cast(dual_size), handle_ptr->get_stream()} { + RAFT_CUDA_TRY(cudaMemsetAsync(primal_solution_.data(), + f_t(0.0), + sizeof(f_t) * primal_solution_.size(), + handle_ptr->get_stream())); + RAFT_CUDA_TRY(cudaMemsetAsync(dual_solution_.data(), + f_t(0.0), + sizeof(f_t) * dual_solution_.size(), + handle_ptr->get_stream())); } template diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu index cf23c8a1d..5e3b14f1e 100644 --- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu +++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cu @@ -22,6 +22,9 @@ #include #include #include +#ifdef CUPDLP_DEBUG_MODE +#include +#endif #include #include @@ -276,7 +279,7 @@ void pdlp_restart_strategy_t::get_average_solutions(rmm::device_uvecto } template -void pdlp_restart_strategy_t::run_trust_region_restart( +bool pdlp_restart_strategy_t::run_trust_region_restart( pdhg_solver_t& pdhg_solver, rmm::device_uvector& primal_solution_avg, rmm::device_uvector& dual_solution_avg, @@ -295,7 +298,7 @@ void pdlp_restart_strategy_t::run_trust_region_restart( #ifdef PDLP_VERBOSE_MODE std::cout << " No internal iteration, can't restart yet, returning:" << std::endl; #endif - return; + return false; } // make the step sizes into the norm weights that will be used throughout the computations @@ -362,6 +365,8 @@ void pdlp_restart_strategy_t::run_trust_region_restart( reset_internal(); weighted_average_solution_.reset_weighted_average_solution(); } + + return restart; } template @@ -642,7 +647,202 @@ bool pdlp_restart_strategy_t::run_kkt_restart( } template -void pdlp_restart_strategy_t::compute_restart( +bool pdlp_restart_strategy_t::should_cupdlpx_restart(i_t total_number_of_iterations) +{ + bool should_restart = false; + + if (total_number_of_iterations == pdlp_hyper_params::major_iteration) { +#ifdef CUPDLP_DEBUG_MODE + printf("forced restart at first major\n"); +#endif + should_restart = true; + } else if (total_number_of_iterations > pdlp_hyper_params::major_iteration) { + cuopt_assert(initial_fixed_point_error_ != std::numeric_limits::signaling_NaN(), + "Numerical error: initial_fixed_point_error_ should not be at nan at this stage"); + cuopt_assert(fixed_point_error_ != std::numeric_limits::signaling_NaN(), + "Numerical error: fixed_point_error_ should not be at nan at this stage"); + if (fixed_point_error_ <= pdlp_hyper_params::host_default_sufficient_reduction_for_restart * + initial_fixed_point_error_) { +#ifdef CUPDLP_DEBUG_MODE + + printf("sufficient restart\n"); +#endif + should_restart = true; + } else if (fixed_point_error_ <= + pdlp_hyper_params::host_default_necessary_reduction_for_restart * + initial_fixed_point_error_ && + fixed_point_error_ > last_trial_fixed_point_error_) { +#ifdef CUPDLP_DEBUG_MODE + printf("neseccary restart\n"); +#endif + should_restart = true; + } else if (should_do_artificial_restart(total_number_of_iterations)) { +#ifdef CUPDLP_DEBUG_MODE + printf("artificial restart\n"); +#endif + should_restart = true; + } + } + last_trial_fixed_point_error_ = fixed_point_error_; + return should_restart; +} + +template +void pdlp_restart_strategy_t::cupdlpx_restart( + const convergence_information_t& current_convergence_information, + pdhg_solver_t& pdhg_solver, + rmm::device_scalar& primal_weight, + const rmm::device_scalar& step_size, + rmm::device_scalar& primal_step_size, + rmm::device_scalar& dual_step_size, + rmm::device_scalar& best_primal_weight) +{ + // Computing the deltas + distance_squared_moved_from_last_restart_period( + pdhg_solver.get_potential_next_primal_solution(), + last_restart_duality_gap_.primal_solution_, + pdhg_solver.get_primal_tmp_resource(), + primal_size_h_, + 1, + last_restart_duality_gap_.primal_distance_traveled_); + distance_squared_moved_from_last_restart_period( + pdhg_solver.get_potential_next_dual_solution(), + last_restart_duality_gap_.dual_solution_, + pdhg_solver.get_dual_tmp_resource(), + dual_size_h_, + 1, + last_restart_duality_gap_.dual_distance_traveled_); + + const f_t l2_primal_distance = + std::sqrt(last_restart_duality_gap_.primal_distance_traveled_.value(stream_view_)); + const f_t l2_dual_distance = + std::sqrt(last_restart_duality_gap_.dual_distance_traveled_.value(stream_view_)); + +#ifdef CUPDLP_DEBUG_MODE + printf("l2 primal distance: %lf l2 dual distance %lf\n", l2_primal_distance, l2_dual_distance); + printf("relative L2 primal residual %lf relative L2 dual residual %lf\n", + current_convergence_information.get_relative_l2_primal_residual_value(), + current_convergence_information.get_relative_l2_dual_residual_value()); +#endif + + const f_t relative_l2_dual_residual_value = + current_convergence_information.get_relative_l2_dual_residual_value(); + const f_t relative_l2_primal_residual_value = + current_convergence_information.get_relative_l2_primal_residual_value(); + const f_t ratio_infeas = (relative_l2_primal_residual_value == f_t(0.0)) + ? std::numeric_limits::infinity() + : relative_l2_dual_residual_value / relative_l2_primal_residual_value; + + if (l2_primal_distance > f_t(1e-16) && l2_dual_distance > f_t(1e-16) && + l2_primal_distance < f_t(1e12) && l2_dual_distance < f_t(1e12) && ratio_infeas > f_t(1e-8) && + ratio_infeas < f_t(1e8)) { +#ifdef CUPDLP_DEBUG_MODE + printf("Compute new primal weight\n"); +#endif + const f_t current_primal_weight = primal_weight.value(stream_view_); + const f_t error = + std::log(l2_dual_distance) - std::log(l2_primal_distance) - std::log(current_primal_weight); + primal_weight_error_sum_ *= pdlp_hyper_params::restart_i_smooth; + primal_weight_error_sum_ += error; + const f_t delta_error = error - primal_weight_last_error_; + const f_t new_primal_weight = + std::exp(pdlp_hyper_params::restart_k_p * error + + pdlp_hyper_params::restart_k_i * primal_weight_error_sum_ + + pdlp_hyper_params::restart_k_d * delta_error) * + current_primal_weight; + primal_weight.set_value_async(new_primal_weight, stream_view_); + primal_weight_last_error_ = error; + const f_t h_step_size = step_size.value(stream_view_); + const f_t new_primal_step_size = h_step_size / new_primal_weight; + const f_t new_dual_step_size = h_step_size * new_primal_weight; + primal_step_size.set_value_async(new_primal_step_size, stream_view_); + dual_step_size.set_value_async(new_dual_step_size, stream_view_); + } else { +#ifdef CUPDLP_DEBUG_MODE + printf("Setting new primal weight to best primal weight\n"); +#endif + const f_t best_primal_weight_value = best_primal_weight.value(stream_view_); + primal_weight.set_value_async(best_primal_weight_value, stream_view_); + primal_weight_error_sum_ = f_t(0.0); + primal_weight_last_error_ = f_t(0.0); + const f_t h_step_size = step_size.value(stream_view_); + const f_t new_primal_step_size = h_step_size / best_primal_weight_value; + const f_t new_dual_step_size = h_step_size * best_primal_weight_value; + primal_step_size.set_value_async(new_primal_step_size, stream_view_); + dual_step_size.set_value_async(new_dual_step_size, stream_view_); + } + + // TODO possible error if relative_l2_primal_residual_value == 0 + const f_t primal_dual_residual_gap = + std::abs(std::log10(relative_l2_dual_residual_value / relative_l2_primal_residual_value)); + if (primal_dual_residual_gap < best_primal_dual_residual_gap_) { + best_primal_dual_residual_gap_ = primal_dual_residual_gap; + const f_t new_primal_weight = primal_weight.value(stream_view_); + best_primal_weight.set_value_async(new_primal_weight, stream_view_); + } + +#ifdef CUPDLP_DEBUG_MODE + printf("New primal weight %lf\n", primal_weight.value(stream_view_)); + printf("New best_primal_weight %lf\n", best_primal_weight.value(stream_view_)); + printf("New primal_weight_error_sum_ %lf\n", primal_weight_error_sum_); + printf("New primal_weight_last_error_ %lf\n", primal_weight_last_error_); + printf("New best_primal_dual_residual_gap_ %lf\n", best_primal_dual_residual_gap_); +#endif + + raft::copy(last_restart_duality_gap_.primal_solution_.data(), + pdhg_solver.get_potential_next_primal_solution().data(), + primal_size_h_, + stream_view_); + raft::copy(pdhg_solver.get_primal_solution().data(), + pdhg_solver.get_potential_next_primal_solution().data(), + primal_size_h_, + stream_view_); + raft::copy(last_restart_duality_gap_.dual_solution_.data(), + pdhg_solver.get_potential_next_dual_solution().data(), + dual_size_h_, + stream_view_); + raft::copy(pdhg_solver.get_dual_solution().data(), + pdhg_solver.get_potential_next_dual_solution().data(), + dual_size_h_, + stream_view_); + +#ifdef CUPDLP_DEBUG_MODE + print("New last_restart_duality_gap_.primal_solution_", + last_restart_duality_gap_.primal_solution_); + print("New pdhg_solver.get_primal_solution", pdhg_solver.get_primal_solution()); + print("New last_restart_duality_gap_.dual_solution_", last_restart_duality_gap_.dual_solution_); + print("New pdhg_solver.get_dual_solution", pdhg_solver.get_dual_solution()); +#endif + + weighted_average_solution_.iterations_since_last_restart_ = 0; + last_trial_fixed_point_error_ = std::numeric_limits::infinity(); +} + +template +bool pdlp_restart_strategy_t::run_cupdlpx_restart( + const convergence_information_t& current_convergence_information, + pdhg_solver_t& pdhg_solver, + i_t total_number_of_iterations, + rmm::device_scalar& primal_weight, + const rmm::device_scalar& step_size, + rmm::device_scalar& primal_step_size, + rmm::device_scalar& dual_step_size, + rmm::device_scalar& best_primal_weight) +{ + bool should_restart = should_cupdlpx_restart(total_number_of_iterations); + if (should_restart) + cupdlpx_restart(current_convergence_information, + pdhg_solver, + primal_weight, + step_size, + primal_step_size, + dual_step_size, + best_primal_weight); + return should_restart; +} + +template +bool pdlp_restart_strategy_t::compute_restart( pdhg_solver_t& pdhg_solver, rmm::device_uvector& primal_solution_avg, rmm::device_uvector& dual_solution_avg, @@ -652,36 +852,53 @@ void pdlp_restart_strategy_t::compute_restart( rmm::device_scalar& primal_weight, const rmm::device_scalar& step_size, const convergence_information_t& current_convergence_information, - const convergence_information_t& average_convergence_information) + const convergence_information_t& average_convergence_information, + [[maybe_unused]] rmm::device_scalar& best_primal_weight) { raft::common::nvtx::range fun_scope("compute_restart"); if (is_KKT_restart()) { - run_kkt_restart(pdhg_solver, - primal_solution_avg, - dual_solution_avg, - current_convergence_information, - average_convergence_information, - primal_step_size, - dual_step_size, - primal_weight, - step_size, - total_number_of_iterations); + return run_kkt_restart(pdhg_solver, + primal_solution_avg, + dual_solution_avg, + current_convergence_information, + average_convergence_information, + primal_step_size, + dual_step_size, + primal_weight, + step_size, + total_number_of_iterations); } else if (pdlp_hyper_params::restart_strategy == static_cast(restart_strategy_t::TRUST_REGION_RESTART)) { - run_trust_region_restart(pdhg_solver, - primal_solution_avg, - dual_solution_avg, - total_number_of_iterations, - primal_step_size, - dual_step_size, - primal_weight, - step_size); + return run_trust_region_restart(pdhg_solver, + primal_solution_avg, + dual_solution_avg, + total_number_of_iterations, + primal_step_size, + dual_step_size, + primal_weight, + step_size); + } else if (pdlp_hyper_params::restart_strategy == + static_cast(restart_strategy_t::CUPDLPX_RESTART)) { + return run_cupdlpx_restart(current_convergence_information, + pdhg_solver, + total_number_of_iterations, + primal_weight, + step_size, + primal_step_size, + dual_step_size, + best_primal_weight); } else { EXE_CUOPT_FAIL("Bad restart value"); } } +template +void pdlp_restart_strategy_t::increment_iteration_since_last_restart() +{ + ++weighted_average_solution_.iterations_since_last_restart_; +} + template __global__ void compute_new_primal_weight_kernel( const typename localized_duality_gap_container_t::view_t duality_gap_view, diff --git a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh index 8400977c1..53b1bc53e 100644 --- a/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh +++ b/cpp/src/linear_programming/restart_strategy/pdlp_restart_strategy.cuh @@ -95,6 +95,7 @@ class pdlp_restart_strategy_t { NO_RESTART = 0, KKT_RESTART = 1, TRUST_REGION_RESTART = 2, + CUPDLPX_RESTART = 3, }; pdlp_restart_strategy_t(raft::handle_t const* handle_ptr, @@ -110,6 +111,8 @@ class pdlp_restart_strategy_t { const rmm::device_scalar& gap, const rmm::device_scalar& primal_weight); + void increment_iteration_since_last_restart(); + void update_distance(pdhg_solver_t& pdhg_solver, rmm::device_scalar& primal_weight, rmm::device_scalar& primal_step_size, @@ -124,7 +127,7 @@ class pdlp_restart_strategy_t { void get_average_solutions(rmm::device_uvector& avg_primal, rmm::device_uvector& avg_dual); - void compute_restart(pdhg_solver_t& pdhg_solver, + bool compute_restart(pdhg_solver_t& pdhg_solver, rmm::device_uvector& primal_solution_avg, rmm::device_uvector& dual_solution_avg, const i_t total_number_of_iterations, @@ -133,7 +136,8 @@ class pdlp_restart_strategy_t { rmm::device_scalar& primal_weight, const rmm::device_scalar& step_size, // To update primal/dual step size const convergence_information_t& current_convergence_information, - const convergence_information_t& average_convergence_information); + const convergence_information_t& average_convergence_information, + [[maybe_unused]] rmm::device_scalar& best_primal_weight); /** * @brief Gets the device-side view (with raw pointers), for ease of access @@ -149,7 +153,25 @@ class pdlp_restart_strategy_t { i_t should_do_artificial_restart(i_t total_number_of_iterations) const; private: - void run_trust_region_restart(pdhg_solver_t& pdhg_solver, + bool run_cupdlpx_restart( + const convergence_information_t& current_convergence_information, + pdhg_solver_t& pdhg_solver, + i_t total_number_of_iterations, + rmm::device_scalar& primal_weight, + const rmm::device_scalar& step_size, + rmm::device_scalar& primal_step_size, + rmm::device_scalar& dual_step_size, + rmm::device_scalar& best_primal_weight); + bool should_cupdlpx_restart(i_t total_number_of_iterations); + void cupdlpx_restart(const convergence_information_t& current_convergence_information, + pdhg_solver_t& pdhg_solver, + rmm::device_scalar& primal_weight, + const rmm::device_scalar& step_size, + rmm::device_scalar& primal_step_size, + rmm::device_scalar& dual_step_size, + rmm::device_scalar& best_primal_weight); + + bool run_trust_region_restart(pdhg_solver_t& pdhg_solver, rmm::device_uvector& primal_solution_avg, rmm::device_uvector& dual_solution_avg, const i_t total_number_of_iterations, @@ -323,6 +345,14 @@ class pdlp_restart_strategy_t { f_t last_restart_kkt_score = f_t(0.0); bool last_restart_was_average_ = false; + + // Needed for cuPDLP+ restart + f_t fixed_point_error_ = std::numeric_limits::signaling_NaN(); + f_t initial_fixed_point_error_ = std::numeric_limits::signaling_NaN(); + f_t last_trial_fixed_point_error_ = std::numeric_limits::infinity(); + f_t primal_weight_error_sum_ = f_t(0.0); + f_t primal_weight_last_error_ = f_t(0.0); + f_t best_primal_dual_residual_gap_ = std::numeric_limits::infinity(); }; template diff --git a/cpp/src/linear_programming/solve.cu b/cpp/src/linear_programming/solve.cu index ef7528751..730820222 100644 --- a/cpp/src/linear_programming/solve.cu +++ b/cpp/src/linear_programming/solve.cu @@ -94,6 +94,14 @@ static void set_Stable1() pdlp_hyper_params::update_step_size_on_initial_solution = false; pdlp_hyper_params::handle_some_primal_gradients_on_finite_bounds_as_residuals = true; pdlp_hyper_params::project_initial_primal = false; + pdlp_hyper_params::use_adaptive_step_size_strategy = true; + pdlp_hyper_params::initial_step_size_max_singular_value = false; + pdlp_hyper_params::initial_primal_weight_combined_bounds = true; + pdlp_hyper_params::bound_objective_rescaling = false; + pdlp_hyper_params::use_reflected_primal_dual = false; + pdlp_hyper_params::use_fixed_point_error = false; + pdlp_hyper_params::reflection_coefficient = 1.0; + pdlp_hyper_params::use_conditional_major = false; } // Even better general setting due to proper primal gradient handling for KKT restart and initial @@ -129,6 +137,72 @@ static void set_Stable2() pdlp_hyper_params::update_step_size_on_initial_solution = false; pdlp_hyper_params::handle_some_primal_gradients_on_finite_bounds_as_residuals = false; pdlp_hyper_params::project_initial_primal = true; + pdlp_hyper_params::use_adaptive_step_size_strategy = true; + pdlp_hyper_params::initial_step_size_max_singular_value = false; + pdlp_hyper_params::initial_primal_weight_combined_bounds = true; + pdlp_hyper_params::bound_objective_rescaling = false; + pdlp_hyper_params::use_reflected_primal_dual = false; + pdlp_hyper_params::use_fixed_point_error = false; + pdlp_hyper_params::reflection_coefficient = 1.0; + pdlp_hyper_params::use_conditional_major = false; +} + +/* 1 - 1 mapping of cuPDLPx(+) function from Haihao and al. + * For more information please read: + * @article{lu2025cupdlpx, + * title={cuPDLPx: A Further Enhanced GPU-Based First-Order Solver for Linear Programming}, + * author={Lu, Haihao and Peng, Zedong and Yang, Jinwen}, + * journal={arXiv preprint arXiv:2507.14051}, + * year={2025} + * } + * + * @article{lu2024restarted, + * title={Restarted Halpern PDHG for linear programming}, + * author={Lu, Haihao and Yang, Jinwen}, + * journal={arXiv preprint arXiv:2407.16144}, + * year={2024} + * } + */ +static void set_Stable3() +{ + pdlp_hyper_params::initial_step_size_scaling = 1.0; + pdlp_hyper_params::default_l_inf_ruiz_iterations = 10; + pdlp_hyper_params::do_pock_chambolle_scaling = true; + pdlp_hyper_params::do_ruiz_scaling = true; + pdlp_hyper_params::default_alpha_pock_chambolle_rescaling = 1.0; + pdlp_hyper_params::default_artificial_restart_threshold = 0.36; + pdlp_hyper_params::compute_initial_step_size_before_scaling = false; + pdlp_hyper_params::compute_initial_primal_weight_before_scaling = + true; // TODO this is maybe why he disabled primal weight when bound rescaling is on, because + // TODO try with false + pdlp_hyper_params::initial_primal_weight_c_scaling = 1.0; + pdlp_hyper_params::initial_primal_weight_b_scaling = 1.0; + pdlp_hyper_params::major_iteration = 200; // TODO Try with something smaller + pdlp_hyper_params::min_iteration_restart = 0; + pdlp_hyper_params::restart_strategy = 3; + pdlp_hyper_params::never_restart_to_average = true; + pdlp_hyper_params::host_default_reduction_exponent = 0.3; + pdlp_hyper_params::host_default_growth_exponent = 0.6; + pdlp_hyper_params::host_default_primal_weight_update_smoothing = 0.5; + pdlp_hyper_params::host_default_sufficient_reduction_for_restart = 0.2; + pdlp_hyper_params::host_default_necessary_reduction_for_restart = 0.8; + pdlp_hyper_params::host_primal_importance = 1.0; + pdlp_hyper_params::host_primal_distance_smoothing = 0.5; + pdlp_hyper_params::host_dual_distance_smoothing = 0.5; + pdlp_hyper_params::compute_last_restart_before_new_primal_weight = true; + pdlp_hyper_params::artificial_restart_in_main_loop = false; + pdlp_hyper_params::rescale_for_restart = true; + pdlp_hyper_params::update_primal_weight_on_initial_solution = false; + pdlp_hyper_params::update_step_size_on_initial_solution = false; + pdlp_hyper_params::handle_some_primal_gradients_on_finite_bounds_as_residuals = false; + pdlp_hyper_params::project_initial_primal = true; + pdlp_hyper_params::use_adaptive_step_size_strategy = false; + pdlp_hyper_params::initial_step_size_max_singular_value = true; + pdlp_hyper_params::initial_primal_weight_combined_bounds = false; + pdlp_hyper_params::bound_objective_rescaling = true; + pdlp_hyper_params::use_reflected_primal_dual = true; + pdlp_hyper_params::use_fixed_point_error = true; + pdlp_hyper_params::use_conditional_major = true; } // Legacy/Original/Initial PDLP settings @@ -163,6 +237,14 @@ static void set_Methodical1() pdlp_hyper_params::update_step_size_on_initial_solution = false; pdlp_hyper_params::handle_some_primal_gradients_on_finite_bounds_as_residuals = true; pdlp_hyper_params::project_initial_primal = false; + pdlp_hyper_params::use_adaptive_step_size_strategy = true; + pdlp_hyper_params::initial_step_size_max_singular_value = false; + pdlp_hyper_params::initial_primal_weight_combined_bounds = true; + pdlp_hyper_params::bound_objective_rescaling = false; + pdlp_hyper_params::use_reflected_primal_dual = false; + pdlp_hyper_params::use_fixed_point_error = false; + pdlp_hyper_params::reflection_coefficient = 1.0; + pdlp_hyper_params::use_conditional_major = false; } // Can be extremly faster but usually leads to more divergence @@ -198,6 +280,14 @@ static void set_Fast1() pdlp_hyper_params::update_step_size_on_initial_solution = false; pdlp_hyper_params::handle_some_primal_gradients_on_finite_bounds_as_residuals = true; pdlp_hyper_params::project_initial_primal = false; + pdlp_hyper_params::use_adaptive_step_size_strategy = true; + pdlp_hyper_params::initial_step_size_max_singular_value = false; + pdlp_hyper_params::initial_primal_weight_combined_bounds = true; + pdlp_hyper_params::bound_objective_rescaling = false; + pdlp_hyper_params::use_reflected_primal_dual = false; + pdlp_hyper_params::use_fixed_point_error = false; + pdlp_hyper_params::reflection_coefficient = 1.0; + pdlp_hyper_params::use_conditional_major = false; } template @@ -211,6 +301,8 @@ void set_pdlp_solver_mode(pdlp_solver_settings_t const& settings) set_Methodical1(); else if (settings.pdlp_solver_mode == pdlp_solver_mode_t::Fast1) set_Fast1(); + else if (settings.pdlp_solver_mode == pdlp_solver_mode_t::Stable3) + set_Stable3(); } void setup_device_symbols(rmm::cuda_stream_view stream_view) @@ -809,7 +901,8 @@ optimization_problem_solution_t solve_lp( \ template optimization_problem_t mps_data_model_to_optimization_problem( \ raft::handle_t const* handle_ptr, \ - const cuopt::mps_parser::mps_data_model_t& data_model); + const cuopt::mps_parser::mps_data_model_t& data_model); \ + template void set_pdlp_solver_mode(pdlp_solver_settings_t const& settings); #if MIP_INSTANTIATE_FLOAT INSTANTIATE(float) diff --git a/cpp/src/linear_programming/solve.cuh b/cpp/src/linear_programming/solve.cuh index 3098fd1b9..bc95bc268 100644 --- a/cpp/src/linear_programming/solve.cuh +++ b/cpp/src/linear_programming/solve.cuh @@ -38,4 +38,7 @@ cuopt::linear_programming::optimization_problem_solution_t solve_lp_wi const timer_t& timer, bool is_batch_mode = false); +template +void set_pdlp_solver_mode(pdlp_solver_settings_t const& settings); + } // namespace cuopt::linear_programming diff --git a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu index 156986918..e888709a1 100644 --- a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu +++ b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.cu @@ -58,7 +58,8 @@ adaptive_step_size_strategy_t::adaptive_step_size_strategy_t( reusable_device_scalar_value_0_{f_t(0.0), stream_view_}, graph(stream_view_, is_batch_mode) { - valid_step_size_ = make_unique_cuda_host_pinned(); + valid_step_size_ = make_unique_cuda_host_pinned(); + *valid_step_size_ = 0; } void set_adaptive_step_size_hyper_parameters(rmm::cuda_stream_view stream_view) @@ -194,6 +195,24 @@ i_t adaptive_step_size_strategy_t::get_valid_step_size() const return *valid_step_size_; } +template +f_t adaptive_step_size_strategy_t::get_interaction() const +{ + return interaction_.value(stream_view_); +} + +template +f_t adaptive_step_size_strategy_t::get_norm_squared_delta_primal() const +{ + return norm_squared_delta_primal_.value(stream_view_); +} + +template +f_t adaptive_step_size_strategy_t::get_norm_squared_delta_dual() const +{ + return norm_squared_delta_dual_.value(stream_view_); +} + template void adaptive_step_size_strategy_t::set_valid_step_size(i_t valid) { diff --git a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp index 4649caaf1..348308cb6 100644 --- a/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp +++ b/cpp/src/linear_programming/step_size_strategy/adaptive_step_size_strategy.hpp @@ -74,12 +74,15 @@ class adaptive_step_size_strategy_t { i_t get_valid_step_size() const; void set_valid_step_size(i_t); + f_t get_interaction() const; + f_t get_norm_squared_delta_primal() const; + f_t get_norm_squared_delta_dual() const; - private: void compute_interaction_and_movement(rmm::device_uvector& tmp_primal, cusparse_view_t& cusparse_view, saddle_point_state_t& current_saddle_point_state); + private: // Stream pool to run different step size computation in parallel // Because we already have the main stream, we just need 2 extra streams from this rmm::cuda_stream_pool stream_pool_; diff --git a/cpp/src/linear_programming/termination_strategy/convergence_information.cu b/cpp/src/linear_programming/termination_strategy/convergence_information.cu index ce579e310..9b8eff7f6 100644 --- a/cpp/src/linear_programming/termination_strategy/convergence_information.cu +++ b/cpp/src/linear_programming/termination_strategy/convergence_information.cu @@ -18,6 +18,7 @@ #include #include #include + #include #include @@ -61,15 +62,18 @@ convergence_information_t::convergence_information_t( nb_violated_constraints_{0, stream_view_}, gap_{0.0, stream_view_}, abs_objective_{0.0, stream_view_}, - l2_primal_variable_{0.0, stream_view_}, - l2_dual_variable_{0.0, stream_view_}, primal_residual_{static_cast(dual_size_h_), stream_view_}, dual_residual_{static_cast(primal_size_h_), stream_view_}, reduced_cost_{static_cast(primal_size_h_), stream_view_}, bound_value_{static_cast(std::max(primal_size_h_, dual_size_h_)), stream_view_}, + primal_slack_{ + (pdlp_hyper_params::use_reflected_primal_dual) ? static_cast(dual_size_h_) : 0, + stream_view_}, reusable_device_scalar_value_1_{1.0, stream_view_}, reusable_device_scalar_value_0_{0.0, stream_view_}, - reusable_device_scalar_value_neg_1_{-1.0, stream_view_} + reusable_device_scalar_value_neg_1_{-1.0, stream_view_}, + dual_dot_{stream_view_}, + sum_primal_slack_{stream_view_} { combine_constraint_bounds( *problem_ptr, @@ -79,7 +83,15 @@ convergence_information_t::convergence_information_t( // constant throughout solving, so precompute my_l2_norm( problem_ptr->objective_coefficients, l2_norm_primal_linear_objective_, handle_ptr_); - my_l2_norm(primal_residual_, l2_norm_primal_right_hand_side_, handle_ptr_); + + if (pdlp_hyper_params::initial_primal_weight_combined_bounds) + my_l2_norm(primal_residual_, l2_norm_primal_right_hand_side_, handle_ptr_); + else { + compute_sum_bounds(problem_ptr->constraint_lower_bounds, + problem_ptr->constraint_upper_bounds, + l2_norm_primal_right_hand_side_, + handle_ptr_->get_stream()); + } void* d_temp_storage = NULL; size_t temp_storage_bytes_1 = 0; @@ -151,15 +163,20 @@ void convergence_information_t::compute_convergence_information( pdhg_solver_t& current_pdhg_solver, rmm::device_uvector& primal_iterate, rmm::device_uvector& dual_iterate, + [[maybe_unused]] const rmm::device_uvector& dual_slack, const rmm::device_uvector& combined_bounds, const rmm::device_uvector& objective_coefficients, const pdlp_solver_settings_t& settings) { raft::common::nvtx::range fun_scope("compute_convergence_information"); - compute_primal_residual(op_problem_cusparse_view_, current_pdhg_solver.get_dual_tmp_resource()); + compute_primal_residual( + op_problem_cusparse_view_, current_pdhg_solver.get_dual_tmp_resource(), dual_iterate); compute_primal_objective(primal_iterate); my_l2_norm(primal_residual_, l2_primal_residual_, handle_ptr_); +#ifdef CUPDLP_DEBUG_MODE + printf("Absolute Primal Residual: %lf\n", l2_primal_residual_.value(stream_view_)); +#endif // If per_constraint_residual is false we still need to perform the l2 since it's used in kkt if (settings.per_constraint_residual) { // Compute the linf of (residual_i - rel * b_i) @@ -186,12 +203,16 @@ void convergence_information_t::compute_convergence_information( thrust::maximum()); } } - my_l2_norm(primal_iterate, l2_primal_variable_, handle_ptr_); - compute_dual_residual( - op_problem_cusparse_view_, current_pdhg_solver.get_primal_tmp_resource(), primal_iterate); - compute_dual_objective(dual_iterate); + compute_dual_residual(op_problem_cusparse_view_, + current_pdhg_solver.get_primal_tmp_resource(), + primal_iterate, + dual_slack); + compute_dual_objective(dual_iterate, primal_iterate, dual_slack); my_l2_norm(dual_residual_, l2_dual_residual_, handle_ptr_); +#ifdef CUPDLP_DEBUG_MODE + printf("Absolute Dual Residual: %lf\n", l2_dual_residual_.value(stream_view_)); +#endif // If per_constraint_residual is false we still need to perform the l2 since it's used in kkt if (settings.per_constraint_residual) { // Compute the linf of (residual_i - rel * c_i) @@ -206,7 +227,6 @@ void convergence_information_t::compute_convergence_information( neutral, thrust::maximum()); } - my_l2_norm(dual_iterate, l2_dual_variable_, handle_ptr_); compute_remaining_stats_kernel<<<1, 1, 0, stream_view_>>>(this->view()); RAFT_CUDA_TRY(cudaPeekAtLastError()); @@ -218,9 +238,17 @@ void convergence_information_t::compute_convergence_information( cudaMemsetAsync(dual_residual_.data(), 0.0, sizeof(f_t) * dual_residual_.size(), stream_view_)); } +template +DI f_t finite_or_zero(f_t in) +{ + return isfinite(in) ? in : f_t(0.0); +} + template void convergence_information_t::compute_primal_residual( - cusparse_view_t& cusparse_view, rmm::device_uvector& tmp_dual) + cusparse_view_t& cusparse_view, + rmm::device_uvector& tmp_dual, + [[maybe_unused]] const rmm::device_uvector& dual_iterate) { raft::common::nvtx::range fun_scope("compute_primal_residual"); @@ -237,14 +265,31 @@ void convergence_information_t::compute_primal_residual( (f_t*)cusparse_view.buffer_non_transpose.data(), stream_view_)); - // The constraint bound violations for the first part of the residual - raft::linalg::ternaryOp>(primal_residual_.data(), - tmp_dual.data(), - problem_ptr->constraint_lower_bounds.data(), - problem_ptr->constraint_upper_bounds.data(), - dual_size_h_, - violation(), - stream_view_); + if (!pdlp_hyper_params::use_reflected_primal_dual) { + // The constraint bound violations for the first part of the residual + raft::linalg::ternaryOp>(primal_residual_.data(), + tmp_dual.data(), + problem_ptr->constraint_lower_bounds.data(), + problem_ptr->constraint_upper_bounds.data(), + dual_size_h_, + violation(), + stream_view_); + } else { + cub::DeviceTransform::Transform( + cuda::std::make_tuple(tmp_dual.data(), + problem_ptr->constraint_lower_bounds.data(), + problem_ptr->constraint_upper_bounds.data(), + dual_iterate.data()), + thrust::make_zip_iterator(primal_residual_.data(), primal_slack_.data()), + primal_residual_.size(), + [] __device__(f_t Ax, f_t lower, f_t upper, f_t dual) -> thrust::tuple { + const f_t clamped_Ax = raft::max(lower, raft::min(Ax, upper)); + return {Ax - clamped_Ax, + raft::max(dual, f_t(0.0)) * finite_or_zero(lower) + + raft::min(dual, f_t(0.0)) * finite_or_zero(upper)}; + }, + stream_view_); + } } template @@ -281,13 +326,18 @@ void convergence_information_t::compute_primal_objective( problem_ptr->presolve_data.objective_offset); RAFT_CUDA_TRY(cudaPeekAtLastError()); } + +#ifdef CUPDLP_DEBUG_MODE + printf("Primal objective %lf\n", primal_objective_.value(stream_view_)); +#endif } template void convergence_information_t::compute_dual_residual( cusparse_view_t& cusparse_view, rmm::device_uvector& tmp_primal, - rmm::device_uvector& primal_solution) + rmm::device_uvector& primal_solution, + [[maybe_unused]] const rmm::device_uvector& dual_slack) { raft::common::nvtx::range fun_scope("compute_dual_residual"); // compute objective product (Q*x) if QP @@ -295,33 +345,50 @@ void convergence_information_t::compute_dual_residual( // gradient is recomputed with the dual solution that has been computed since the gradient was // last computed // c-K^Ty -> copy c to gradient first - raft::copy( - tmp_primal.data(), problem_ptr->objective_coefficients.data(), primal_size_h_, stream_view_); + thrust::fill(handle_ptr_->get_thrust_policy(), tmp_primal.begin(), tmp_primal.end(), f_t(0)); RAFT_CUSPARSE_TRY(raft::sparse::detail::cusparsespmv(handle_ptr_->get_cusparse_handle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - reusable_device_scalar_value_neg_1_.data(), + reusable_device_scalar_value_1_.data(), cusparse_view.A_T, cusparse_view.dual_solution, - reusable_device_scalar_value_1_.data(), + reusable_device_scalar_value_0_.data(), cusparse_view.tmp_primal, CUSPARSE_SPMV_CSR_ALG2, (f_t*)cusparse_view.buffer_transpose.data(), stream_view_)); - compute_reduced_cost_from_primal_gradient(tmp_primal, primal_solution); - - // primal_gradient - reduced_costs - raft::linalg::eltwiseSub(dual_residual_.data(), - tmp_primal.data(), // primal_gradient - reduced_cost_.data(), + // Substract with the objective vector manually to avoid possible cusparse bug w/ nonzero beta and + // len(X)=1 + raft::linalg::eltwiseSub(tmp_primal.data(), + problem_ptr->objective_coefficients.data(), + tmp_primal.data(), primal_size_h_, stream_view_); + + if (pdlp_hyper_params::use_reflected_primal_dual) { + cub::DeviceTransform::Transform(cuda::std::make_tuple(tmp_primal.data(), dual_slack.data()), + dual_residual_.data(), + dual_residual_.size(), + cuda::std::minus<>{}, + stream_view_); + } else { + compute_reduced_cost_from_primal_gradient(tmp_primal, primal_solution); + + // primal_gradient - reduced_costs + raft::linalg::eltwiseSub(dual_residual_.data(), + tmp_primal.data(), // primal_gradient + reduced_cost_.data(), + primal_size_h_, + stream_view_); + } } template void convergence_information_t::compute_dual_objective( - rmm::device_uvector& dual_solution) + rmm::device_uvector& dual_solution, + [[maybe_unused]] const rmm::device_uvector& primal_solution, + [[maybe_unused]] const rmm::device_uvector& dual_slack) { raft::common::nvtx::range fun_scope("compute_dual_objective"); @@ -331,29 +398,50 @@ void convergence_information_t::compute_dual_objective( // the value of y term in the objective of the dual problem, see[] // (l^c)^T[y]_+ − (u^c)^T[y]_− in the dual objective - raft::linalg::ternaryOp(bound_value_.data(), - dual_solution.data(), - problem_ptr->constraint_lower_bounds.data(), - problem_ptr->constraint_upper_bounds.data(), - dual_size_h_, - constraint_bound_value_reduced_cost_product(), - stream_view_); + if (!pdlp_hyper_params::use_reflected_primal_dual) { + raft::linalg::ternaryOp(bound_value_.data(), + dual_solution.data(), + problem_ptr->constraint_lower_bounds.data(), + problem_ptr->constraint_upper_bounds.data(), + dual_size_h_, + constraint_bound_value_reduced_cost_product(), + stream_view_); - cub::DeviceReduce::Sum(rmm_tmp_buffer_.data(), - size_of_buffer_, - bound_value_.begin(), - dual_objective_.data(), - dual_size_h_, - stream_view_); + cub::DeviceReduce::Sum(rmm_tmp_buffer_.data(), + size_of_buffer_, + bound_value_.begin(), + dual_objective_.data(), + dual_size_h_, + stream_view_); - compute_reduced_costs_dual_objective_contribution(); + compute_reduced_costs_dual_objective_contribution(); - raft::linalg::eltwiseAdd(dual_objective_.data(), - dual_objective_.data(), - reduced_cost_dual_objective_.data(), - 1, + raft::linalg::eltwiseAdd(dual_objective_.data(), + dual_objective_.data(), + reduced_cost_dual_objective_.data(), + 1, + stream_view_); + } else { + RAFT_CUBLAS_TRY(raft::linalg::detail::cublasdot(handle_ptr_->get_cublas_handle(), + primal_size_h_, + dual_slack.data(), + primal_stride, + primal_solution.data(), + primal_stride, + dual_dot_.data(), + stream_view_)); + + cub::DeviceReduce::Sum(rmm_tmp_buffer_.data(), + size_of_buffer_, + primal_slack_.begin(), + sum_primal_slack_.data(), + dual_size_h_, stream_view_); + const f_t sum = dual_dot_.value(stream_view_) + sum_primal_slack_.value(stream_view_); + dual_objective_.set_value_async(sum, stream_view_); + } + // dual_objective = 1 * (dual_objective + 0) = dual_objective if (problem_ptr->presolve_data.objective_scaling_factor != 1 || problem_ptr->presolve_data.objective_offset != 0) { @@ -363,6 +451,10 @@ void convergence_information_t::compute_dual_objective( problem_ptr->presolve_data.objective_offset); RAFT_CUDA_TRY(cudaPeekAtLastError()); } + +#ifdef CUPDLP_DEBUG_MODE + printf("Dual objective %lf\n", dual_objective_.value(stream_view_)); +#endif } template @@ -511,9 +603,6 @@ typename convergence_information_t::view_t convergence_information_t& current_pdhg_solver, rmm::device_uvector& primal_iterate, rmm::device_uvector& dual_iterate, + [[maybe_unused]] const rmm::device_uvector& dual_slack, const rmm::device_uvector& combined_bounds, // Only useful if per_constraint_residual const rmm::device_uvector& objective_coefficients, // Only useful if per_constraint_residual @@ -91,9 +92,6 @@ class convergence_information_t { f_t* gap; f_t* abs_objective; - f_t* l2_primal_variable; - f_t* l2_dual_variable; - f_t* primal_residual; f_t* dual_residual; f_t* reduced_cost; @@ -125,17 +123,21 @@ class convergence_information_t { primal_quality_adapter_t to_primal_quality_adapter(bool is_primal_feasible) const noexcept; - private: void compute_primal_residual(cusparse_view_t& cusparse_view, - rmm::device_uvector& tmp_dual); + rmm::device_uvector& tmp_dual, + [[maybe_unused]] const rmm::device_uvector& dual_iterate); + private: void compute_primal_objective(rmm::device_uvector& primal_solution); void compute_dual_residual(cusparse_view_t& cusparse_view, rmm::device_uvector& tmp_primal, - rmm::device_uvector& primal_solution); + rmm::device_uvector& primal_solution, + [[maybe_unused]] const rmm::device_uvector& dual_slack); - void compute_dual_objective(rmm::device_uvector& dual_solution); + void compute_dual_objective(rmm::device_uvector& dual_solution, + [[maybe_unused]] const rmm::device_uvector& primal_solution, + [[maybe_unused]] const rmm::device_uvector& dual_slack); void compute_reduced_cost_from_primal_gradient(const rmm::device_uvector& primal_gradient, const rmm::device_uvector& primal_solution); @@ -172,20 +174,23 @@ class convergence_information_t { rmm::device_scalar gap_; rmm::device_scalar abs_objective_; - rmm::device_scalar l2_primal_variable_; - rmm::device_scalar l2_dual_variable_; - // used for computations and can be reused rmm::device_uvector primal_residual_; rmm::device_uvector dual_residual_; rmm::device_uvector reduced_cost_; rmm::device_uvector bound_value_; + // used for reflected + rmm::device_uvector primal_slack_; + rmm::device_buffer rmm_tmp_buffer_; size_t size_of_buffer_; const rmm::device_scalar reusable_device_scalar_value_1_; const rmm::device_scalar reusable_device_scalar_value_0_; const rmm::device_scalar reusable_device_scalar_value_neg_1_; + + rmm::device_scalar dual_dot_; + rmm::device_scalar sum_primal_slack_; }; } // namespace cuopt::linear_programming::detail diff --git a/cpp/src/linear_programming/termination_strategy/termination_strategy.cu b/cpp/src/linear_programming/termination_strategy/termination_strategy.cu index fcb66cdd0..61e461a2e 100644 --- a/cpp/src/linear_programming/termination_strategy/termination_strategy.cu +++ b/cpp/src/linear_programming/termination_strategy/termination_strategy.cu @@ -81,6 +81,7 @@ pdlp_termination_status_t pdlp_termination_strategy_t::evaluate_termin pdhg_solver_t& current_pdhg_solver, rmm::device_uvector& primal_iterate, rmm::device_uvector& dual_iterate, + [[maybe_unused]] const rmm::device_uvector& dual_slack, const rmm::device_uvector& combined_bounds, const rmm::device_uvector& objective_coefficients) { @@ -89,6 +90,7 @@ pdlp_termination_status_t pdlp_termination_strategy_t::evaluate_termin convergence_information_.compute_convergence_information(current_pdhg_solver, primal_iterate, dual_iterate, + dual_slack, combined_bounds, objective_coefficients, settings_); diff --git a/cpp/src/linear_programming/termination_strategy/termination_strategy.hpp b/cpp/src/linear_programming/termination_strategy/termination_strategy.hpp index 4a7948a84..ec8d17df2 100644 --- a/cpp/src/linear_programming/termination_strategy/termination_strategy.hpp +++ b/cpp/src/linear_programming/termination_strategy/termination_strategy.hpp @@ -46,6 +46,7 @@ class pdlp_termination_strategy_t { pdhg_solver_t& current_pdhg_solver, rmm::device_uvector& primal_iterate, rmm::device_uvector& dual_iterate, + [[maybe_unused]] const rmm::device_uvector& dual_slack, const rmm::device_uvector& combined_bounds, // Only useful if per_constraint_residual const rmm::device_uvector& objective_coefficients // Only useful if per_constraint_residual diff --git a/cpp/src/linear_programming/utils.cuh b/cpp/src/linear_programming/utils.cuh index de107a66f..ca77146f6 100644 --- a/cpp/src/linear_programming/utils.cuh +++ b/cpp/src/linear_programming/utils.cuh @@ -69,6 +69,27 @@ struct max_abs_value { } }; +template +i_t conditional_major(uint64_t total_pdlp_iterations) +{ + uint64_t step = 10; + uint64_t threshold = 1000; + uint64_t iteration = 0; + + [[maybe_unused]] constexpr uint64_t max_u64 = std::numeric_limits::max(); + + while (total_pdlp_iterations >= threshold) { + ++iteration; + + cuopt_assert(step <= max_u64 / 10, "Overflow risk in step during conditional_major"); + cuopt_assert(threshold <= max_u64 / 10, "Overflow risk in threshold during conditional_major"); + + step *= 10; + threshold *= 10; + } + return step; +} + template struct a_sub_scalar_times_b { a_sub_scalar_times_b(const f_t* scalar) : scalar_{scalar} {} @@ -174,6 +195,53 @@ void inline combine_constraint_bounds(const problem_t& op_problem, } } +template +void inline compute_sum_bounds(const rmm::device_uvector& constraint_lower_bounds, + const rmm::device_uvector& constraint_upper_bounds, + rmm::device_scalar& out, + rmm::cuda_stream_view stream_view) +{ + rmm::device_buffer d_temp_storage; + size_t bytes; + auto main_op = [] HD(const thrust::tuple t) { + const f_t lower = thrust::get<0>(t); + const f_t upper = thrust::get<1>(t); + f_t sum = 0; + if (isfinite(lower) && (lower != upper)) sum += lower * lower; + if (isfinite(upper)) sum += upper * upper; + return sum; + }; + cub::DeviceReduce::TransformReduce( + nullptr, + bytes, + thrust::make_zip_iterator(constraint_lower_bounds.data(), constraint_upper_bounds.data()), + out.data(), + constraint_lower_bounds.size(), + cuda::std::plus<>{}, + main_op, + f_t(0), + stream_view); + + d_temp_storage.resize(bytes, stream_view); + + cub::DeviceReduce::TransformReduce( + d_temp_storage.data(), + bytes, + thrust::make_zip_iterator(constraint_lower_bounds.data(), constraint_upper_bounds.data()), + out.data(), + constraint_lower_bounds.size(), + cuda::std::plus<>{}, + main_op, + f_t(0), + stream_view); + + const f_t res = std::sqrt(out.value(stream_view)); + out.set_value_async(res, stream_view); + + // Sync since we are using local variable + RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view)); +} + template struct violation { violation() {} diff --git a/cpp/src/math_optimization/solver_settings.cu b/cpp/src/math_optimization/solver_settings.cu index a049d0d09..9e1381cbb 100644 --- a/cpp/src/math_optimization/solver_settings.cu +++ b/cpp/src/math_optimization/solver_settings.cu @@ -86,9 +86,10 @@ solver_settings_t::solver_settings_t() : pdlp_settings(), mip_settings }; // Int parameters + // TODO should we have Stable2 and Methodolical1 here? int_parameters = { {CUOPT_ITERATION_LIMIT, &pdlp_settings.iteration_limit, 0, std::numeric_limits::max(), std::numeric_limits::max()}, - {CUOPT_PDLP_SOLVER_MODE, reinterpret_cast(&pdlp_settings.pdlp_solver_mode), CUOPT_PDLP_SOLVER_MODE_STABLE1, CUOPT_PDLP_SOLVER_MODE_FAST1, CUOPT_PDLP_SOLVER_MODE_STABLE2}, + {CUOPT_PDLP_SOLVER_MODE, reinterpret_cast(&pdlp_settings.pdlp_solver_mode), CUOPT_PDLP_SOLVER_MODE_STABLE1, CUOPT_PDLP_SOLVER_MODE_STABLE3, CUOPT_PDLP_SOLVER_MODE_STABLE3}, {CUOPT_METHOD, reinterpret_cast(&pdlp_settings.method), CUOPT_METHOD_CONCURRENT, CUOPT_METHOD_DUAL_SIMPLEX, CUOPT_METHOD_CONCURRENT}, {CUOPT_NUM_CPU_THREADS, &mip_settings.num_cpu_threads, -1, std::numeric_limits::max(), -1} }; diff --git a/cpp/src/mip/relaxed_lp/relaxed_lp.cu b/cpp/src/mip/relaxed_lp/relaxed_lp.cu index d28ad7fbb..a32b3cf82 100644 --- a/cpp/src/mip/relaxed_lp/relaxed_lp.cu +++ b/cpp/src/mip/relaxed_lp/relaxed_lp.cu @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -58,6 +59,9 @@ optimization_problem_solution_t get_relaxed_lp_solution( pdlp_settings.concurrent_halt = settings.concurrent_halt; pdlp_settings.per_constraint_residual = settings.per_constraint_residual; pdlp_settings.first_primal_feasible = settings.return_first_feasible; + pdlp_settings.pdlp_solver_mode = pdlp_solver_mode_t::Stable2; + set_pdlp_solver_mode(pdlp_settings); + // TODO: set Stable3 here? pdlp_solver_t lp_solver(op_problem, pdlp_settings); if (settings.has_initial_primal) { i_t prev_size = lp_state.prev_dual.size(); diff --git a/cpp/tests/linear_programming/pdlp_test.cu b/cpp/tests/linear_programming/pdlp_test.cu index de40d8d32..2edf0d76c 100644 --- a/cpp/tests/linear_programming/pdlp_test.cu +++ b/cpp/tests/linear_programming/pdlp_test.cu @@ -166,8 +166,7 @@ TEST(pdlp_class, run_time_limit) cuopt::linear_programming::pdlp_solver_settings_t settings = cuopt::linear_programming::pdlp_solver_settings_t{}; - // 200 ms - constexpr double time_limit_seconds = 0.2; + constexpr double time_limit_seconds = 2; settings.time_limit = time_limit_seconds; // To make sure it doesn't return before the time limit settings.set_optimality_tolerance(0); @@ -212,7 +211,9 @@ TEST(pdlp_class, run_sub_mittleman) // Testing for each solver_mode is ok as it's parsing that is the bottleneck here, not // solving auto solver_mode_list = { + cuopt::linear_programming::pdlp_solver_mode_t::Stable3, cuopt::linear_programming::pdlp_solver_mode_t::Stable2, + cuopt::linear_programming::pdlp_solver_mode_t::Stable1, cuopt::linear_programming::pdlp_solver_mode_t::Methodical1, cuopt::linear_programming::pdlp_solver_mode_t::Fast1, }; @@ -224,6 +225,7 @@ TEST(pdlp_class, run_sub_mittleman) const raft::handle_t handle_{}; optimization_problem_solution_t solution = solve_lp(&handle_, op_problem, settings); + printf("running %s mode %d presolve? %d\n", name.c_str(), (int)solver_mode, presolve); EXPECT_EQ((int)solution.get_termination_status(), CUOPT_TERIMINATION_STATUS_OPTIMAL); EXPECT_FALSE(is_incorrect_objective( expected_objective_value, @@ -687,11 +689,13 @@ TEST(pdlp_class, per_constraint_test) auto& current_termination_strategy = solver.get_current_termination_strategy(); pdlp_termination_status_t termination_average = - current_termination_strategy.evaluate_termination_criteria(solver.pdhg_solver_, - d_initial_primal, - d_initial_primal, - problem.combined_bounds, - problem.objective_coefficients); + current_termination_strategy.evaluate_termination_criteria( + solver.pdhg_solver_, + d_initial_primal, + d_initial_primal, + solver.pdhg_solver_.get_dual_slack(), + problem.combined_bounds, + problem.objective_coefficients); EXPECT_TRUE(termination_average != pdlp_termination_status_t::Optimal); } @@ -706,11 +710,13 @@ TEST(pdlp_class, per_constraint_test) auto& current_termination_strategy = solver.get_current_termination_strategy(); pdlp_termination_status_t termination_average = - current_termination_strategy.evaluate_termination_criteria(solver.pdhg_solver_, - d_initial_primal, - d_initial_primal, - problem.combined_bounds, - problem.objective_coefficients); + current_termination_strategy.evaluate_termination_criteria( + solver.pdhg_solver_, + d_initial_primal, + d_initial_primal, + solver.pdhg_solver_.get_dual_slack(), + problem.combined_bounds, + problem.objective_coefficients); EXPECT_EQ(current_termination_strategy.get_convergence_information() .get_relative_linf_primal_residual() .value(handle.get_stream()), @@ -921,8 +927,9 @@ TEST(pdlp_class, test_max) cuopt::mps_parser::mps_data_model_t op_problem = cuopt::mps_parser::parse_mps(path); - auto solver_settings = pdlp_solver_settings_t{}; - solver_settings.method = cuopt::linear_programming::method_t::PDLP; + auto solver_settings = pdlp_solver_settings_t{}; + solver_settings.method = cuopt::linear_programming::method_t::PDLP; + solver_settings.pdlp_solver_mode = cuopt::linear_programming::pdlp_solver_mode_t::Stable2; optimization_problem_solution_t solution = solve_lp(&handle_, op_problem, solver_settings); diff --git a/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu b/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu index b573bea03..ece05832f 100644 --- a/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu +++ b/cpp/tests/linear_programming/unit_tests/solver_settings_test.cu @@ -64,9 +64,9 @@ TEST(SolverSettingsTest, TestSetGet) EXPECT_TRUE(solver_settings.detect_infeasibility); // To avoid the "," inside the macros which are interpreted as extra parameters - auto Stable2 = cuopt::linear_programming::pdlp_solver_mode_t::Stable2; + auto Stable3 = cuopt::linear_programming::pdlp_solver_mode_t::Stable3; auto Fast1 = cuopt::linear_programming::pdlp_solver_mode_t::Fast1; - EXPECT_EQ(solver_settings.pdlp_solver_mode, Stable2); + EXPECT_EQ(solver_settings.pdlp_solver_mode, Stable3); solver_settings.pdlp_solver_mode = Fast1; EXPECT_EQ(solver_settings.pdlp_solver_mode, Fast1); diff --git a/cpp/tests/mip/unit_test.cu b/cpp/tests/mip/unit_test.cu index 1114c7aba..9897b2feb 100644 --- a/cpp/tests/mip/unit_test.cu +++ b/cpp/tests/mip/unit_test.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -132,6 +133,70 @@ mps_parser::mps_data_model_t create_single_var_milp_problem(bool ma return problem; } +TEST(LPTest, TestSampleLP2) +{ + raft::handle_t handle; + + // Construct a simple LP problem: + // Minimize: x + // Subject to: x <= 1 + // x <= 1 + // x >= 0 + + // One variable, two constraints (both x <= 1) + std::vector A_values = {1.0, 1.0}; + std::vector A_indices = {0, 0}; + std::vector A_offsets = {0, 1, 2}; // CSR: 2 constraints, 1 variable + + std::vector b = {1.0, 1.0}; // RHS for both constraints + std::vector b_lower = {-std::numeric_limits::infinity(), + -std::numeric_limits::infinity()}; + + std::vector c = {1.0}; // Objective: Minimize x + + std::vector row_types = {'L', 'L'}; // Both constraints are <= + + // Build the problem + mps_parser::mps_data_model_t problem; + problem.set_csr_constraint_matrix(A_values.data(), + A_values.size(), + A_indices.data(), + A_indices.size(), + A_offsets.data(), + A_offsets.size()); + problem.set_constraint_upper_bounds(b.data(), b.size()); + problem.set_constraint_lower_bounds(b_lower.data(), b_lower.size()); + + // Set variable bounds (x >= 0) + std::vector var_lower = {0.0}; + std::vector var_upper = {std::numeric_limits::infinity()}; + problem.set_variable_lower_bounds(var_lower.data(), var_lower.size()); + problem.set_variable_upper_bounds(var_upper.data(), var_upper.size()); + + problem.set_objective_coefficients(c.data(), c.size()); + problem.set_maximize(false); + // Set up solver settings + cuopt::linear_programming::pdlp_solver_settings_t settings{}; + settings.set_optimality_tolerance(1e-2); + settings.method = cuopt::linear_programming::method_t::PDLP; + settings.time_limit = 5; + + // Solve + auto result = cuopt::linear_programming::solve_lp(&handle, problem, settings); + + // Check results + EXPECT_EQ(result.get_termination_status(), + cuopt::linear_programming::pdlp_termination_status_t::Optimal); + ASSERT_EQ(result.get_primal_solution().size(), 1); + + // Copy solution to host to access values + auto primal_host = cuopt::host_copy(result.get_primal_solution()); + EXPECT_NEAR(primal_host[0], 0.0, 1e-6); + + EXPECT_NEAR(result.get_additional_termination_information().primal_objective, 0.0, 1e-6); + EXPECT_NEAR(result.get_additional_termination_information().dual_objective, 0.0, 1e-6); +} + TEST(LPTest, TestSampleLP) { raft::handle_t handle; diff --git a/datasets/linear_programming/good-mps-free-ranges.mps b/datasets/linear_programming/good-mps-free-ranges.mps index aee11d883..76551acb0 100644 --- a/datasets/linear_programming/good-mps-free-ranges.mps +++ b/datasets/linear_programming/good-mps-free-ranges.mps @@ -24,7 +24,6 @@ COLUMNS VAR2 COST 0.1 VAR2 ROW1 4 ROW2 10.1 VAR2 ROW3 0.2 ROW4 0.4 - VAR2 ROW3 0.2 ROW4 0.4 VAR2 ROW5 1.4 ROW6 2.4 RHS RHS1 ROW1 5.4 ROW2 1.5 diff --git a/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst b/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst index 4743584d9..6d942bde6 100644 --- a/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst +++ b/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst @@ -172,6 +172,7 @@ These constants are used to configure `CUOPT_PDLP_SOLVER_MODE` via :c:func:`cuOp .. doxygendefine:: CUOPT_PDLP_SOLVER_MODE_STABLE1 .. doxygendefine:: CUOPT_PDLP_SOLVER_MODE_STABLE2 +.. doxygendefine:: CUOPT_PDLP_SOLVER_MODE_STABLE3 .. doxygendefine:: CUOPT_PDLP_SOLVER_MODE_METHODICAL1 .. doxygendefine:: CUOPT_PDLP_SOLVER_MODE_FAST1 diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst index b97f31f16..c06a73968 100644 --- a/docs/cuopt/source/lp-milp-settings.rst +++ b/docs/cuopt/source/lp-milp-settings.rst @@ -93,8 +93,8 @@ PDLP Solver Mode ``CUOPT_PDLP_MODE`` controls the mode under which PDLP should operate. The mode will change the way the PDLP internally optimizes the problem. The mode choice can drastically impact how fast a specific problem will be solved. Users are encouraged to test different modes to see which one -fits the best their problem. By default, the solver uses ``Stable2``, the best -overall mode from our experiments. For now, only three modes are available: ``Stable2``, +fits the best their problem. By default, the solver uses ``Stable3``, the best +overall mode from our experiments. For now, only three modes are available: ``Stable3``, ``Methodical1``, and ``Fast1``. For now, we do not offer a mechanism to know upfront which solver mode will be the best diff --git a/python/cuopt/cuopt/linear_programming/solver/solver.pxd b/python/cuopt/cuopt/linear_programming/solver/solver.pxd index 255fcd764..d6adf14f0 100644 --- a/python/cuopt/cuopt/linear_programming/solver/solver.pxd +++ b/python/cuopt/cuopt/linear_programming/solver/solver.pxd @@ -39,6 +39,7 @@ cdef extern from "cuopt/linear_programming/pdlp/solver_settings.hpp" namespace " Stable2 "cuopt::linear_programming::pdlp_solver_mode_t::Stable2" # noqa Methodical1 "cuopt::linear_programming::pdlp_solver_mode_t::Methodical1" # noqa Fast1 "cuopt::linear_programming::pdlp_solver_mode_t::Fast1" # noqa + Stable3 "cuopt::linear_programming::pdlp_solver_mode_t::Stable3" # noqa cdef extern from "cuopt/linear_programming/solver_settings.hpp" namespace "cuopt::linear_programming": # noqa diff --git a/python/cuopt/cuopt/linear_programming/solver_settings/solver_settings.py b/python/cuopt/cuopt/linear_programming/solver_settings/solver_settings.py index 9f429e655..78f37b489 100644 --- a/python/cuopt/cuopt/linear_programming/solver_settings/solver_settings.py +++ b/python/cuopt/cuopt/linear_programming/solver_settings/solver_settings.py @@ -76,7 +76,7 @@ class PDLPSolverMode(IntEnum): Attributes ---------- - Stable2 + Stable3 Best overall mode from experiments; balances speed and convergence success. If you want to use the legacy version, use Stable1. Methodical1 @@ -86,13 +86,14 @@ class PDLPSolverMode(IntEnum): Notes ----- - Default mode is Stable2. + Default mode is Stable3. """ Stable1 = 0 Stable2 = auto() Methodical1 = auto() Fast1 = auto() + Stable3 = auto() def __str__(self): """Convert the solver mode to a string. diff --git a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py index ed0d04ec1..dc2b9997c 100644 --- a/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py +++ b/python/cuopt/cuopt/tests/linear_programming/test_lp_solver.py @@ -75,6 +75,8 @@ def test_solver(): settings = solver_settings.SolverSettings() settings.set_optimality_tolerance(1e-2) settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP) + # FIXME: Stable3 infinite-loops on this sample trivial problem + settings.set_parameter(CUOPT_PDLP_SOLVER_MODE, PDLPSolverMode.Stable2) solution = solver.Solve(data_model_obj, settings) assert solution.get_termination_reason() == "Optimal" @@ -121,6 +123,7 @@ def test_very_low_tolerance(): assert solution.get_solve_time() <= expected_time * 5 +# TODO: should test all LP solver modes? def test_iteration_limit_solver(): file_path = ( @@ -129,7 +132,7 @@ def test_iteration_limit_solver(): data_model_obj = cuopt_mps_parser.ParseMps(file_path) settings = solver_settings.SolverSettings() - settings.set_optimality_tolerance(0) + settings.set_optimality_tolerance(1e-12) settings.set_parameter(CUOPT_ITERATION_LIMIT, 1) # Setting both to make sure the lowest one is picked settings.set_parameter(CUOPT_TIME_LIMIT, 99999999) @@ -151,8 +154,7 @@ def test_time_limit_solver(): data_model_obj = cuopt_mps_parser.ParseMps(file_path) settings = solver_settings.SolverSettings() - settings.set_optimality_tolerance(0) - # 200 ms + settings.set_optimality_tolerance(1e-12) time_limit_seconds = 0.2 settings.set_parameter(CUOPT_TIME_LIMIT, time_limit_seconds) # Setting both to make sure the lowest one is picked @@ -302,7 +304,7 @@ def test_solver_settings(): assert not settings.get_parameter(CUOPT_INFEASIBILITY_DETECTION) assert settings.get_parameter(CUOPT_PDLP_SOLVER_MODE) == int( - PDLPSolverMode.Stable2 + PDLPSolverMode.Stable3 ) with pytest.raises(ValueError): @@ -429,6 +431,7 @@ def test_parse_var_names(): settings = solver_settings.SolverSettings() settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP) + settings.set_parameter(CUOPT_PDLP_SOLVER_MODE, PDLPSolverMode.Stable2) solution = solver.Solve(data_model_obj, settings) expected_dict = { diff --git a/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solver_settings.py b/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solver_settings.py index 63703b3e4..7f78906c9 100644 --- a/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solver_settings.py +++ b/python/cuopt_self_hosted/cuopt_sh_client/thin_client_solver_settings.py @@ -48,7 +48,7 @@ class PDLPSolverMode(IntEnum): Attributes ---------- - Stable2 + Stable3 Best overall mode from experiments; balances speed and convergence success. If you want to use the legacy version, use Stable1. Methodical1 @@ -58,13 +58,14 @@ class PDLPSolverMode(IntEnum): Notes ----- - Default mode is Stable2. + Default mode is Stable3. """ Stable1 = 0 Stable2 = auto() Methodical1 = auto() Fast1 = auto() + Stable3 = auto() def __str__(self): """Convert the solver mode to a string. diff --git a/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py b/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py index 577bf2fca..be67894be 100644 --- a/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py +++ b/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py @@ -21,7 +21,9 @@ from cuopt.linear_programming import solver_settings from cuopt.linear_programming.solver.solver_parameters import ( CUOPT_INFEASIBILITY_DETECTION, + CUOPT_PDLP_SOLVER_MODE, ) +from cuopt.linear_programming.solver_settings import PDLPSolverMode from cuopt_server.tests.utils.utils import cuoptproc # noqa from cuopt_server.tests.utils.utils import ( @@ -42,6 +44,7 @@ def test_warmstart(cuoptproc): # noqa settings = solver_settings.SolverSettings() settings.set_optimality_tolerance(1e-4) settings.set_parameter(CUOPT_INFEASIBILITY_DETECTION, False) + settings.set_parameter(CUOPT_PDLP_SOLVER_MODE, PDLPSolverMode.Stable2) data["solver_config"] = settings.toDict() headers = {"CLIENT-VERSION": "custom"} diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py index 30dcf8695..160e1e930 100644 --- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py +++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py @@ -493,19 +493,21 @@ class SolverConfig(StrictModel): "Note: Not supported for MILP. ", ) pdlp_solver_mode: Optional[int] = Field( - default=1, + default=4, description="Solver mode to use for PDLP:" "
" "- Stable1: 0, Legacy stable mode" "
" - "- Stable2: 1, Best overall mode from experiments; " - "balances speed and convergence success" + "- Stable2: 1, Legacy stable mode" "
" "- Methodical1: 2, Takes slower individual steps, " "but fewer are needed to converge" "
" "- Fast1: 3, Fastest mode, but with less success in convergence" "
" + "- Stable3: 4, Best overall mode from experiments; " + "balances speed and convergence success" + "
" "Note: Not supported for MILP. ", ) method: Optional[int] = Field(