diff --git a/cpp/src/dual_simplex/CMakeLists.txt b/cpp/src/dual_simplex/CMakeLists.txt index 16ee502f8..d85471f9b 100644 --- a/cpp/src/dual_simplex/CMakeLists.txt +++ b/cpp/src/dual_simplex/CMakeLists.txt @@ -16,6 +16,7 @@ set(DUAL_SIMPLEX_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/basis_solves.cpp ${CMAKE_CURRENT_SOURCE_DIR}/basis_updates.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/bound_flipping_ratio_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/branch_and_bound.cpp ${CMAKE_CURRENT_SOURCE_DIR}/crossover.cpp ${CMAKE_CURRENT_SOURCE_DIR}/initial_basis.cpp @@ -30,6 +31,7 @@ set(DUAL_SIMPLEX_SRC_FILES ${CMAKE_CURRENT_SOURCE_DIR}/singletons.cpp ${CMAKE_CURRENT_SOURCE_DIR}/solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/sparse_matrix.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/sparse_vector.cpp ${CMAKE_CURRENT_SOURCE_DIR}/tic_toc.cpp ${CMAKE_CURRENT_SOURCE_DIR}/triangle_solve.cpp ${CMAKE_CURRENT_SOURCE_DIR}/vector_math.cpp) diff --git a/cpp/src/dual_simplex/basis_solves.cpp b/cpp/src/dual_simplex/basis_solves.cpp index d363c3ee8..53464fc66 100644 --- a/cpp/src/dual_simplex/basis_solves.cpp +++ b/cpp/src/dual_simplex/basis_solves.cpp @@ -364,7 +364,8 @@ i_t factorize_basis(const csc_matrix_t& A, for (i_t h = 0; h < Sdim; ++h) { identity[h] = h; } - Srank = right_looking_lu(S, medium_tol, identity, S_col_perm, SL, SU, S_perm_inv); + Srank = right_looking_lu( + S, settings.threshold_partial_pivoting_tol, identity, S_col_perm, SL, SU, S_perm_inv); if (Srank != Sdim) { // Get the rank deficient columns deficient.resize(Sdim - Srank); diff --git a/cpp/src/dual_simplex/basis_updates.cpp b/cpp/src/dual_simplex/basis_updates.cpp index 78737c167..a7654ce12 100644 --- a/cpp/src/dual_simplex/basis_updates.cpp +++ b/cpp/src/dual_simplex/basis_updates.cpp @@ -30,6 +30,14 @@ i_t basis_update_t::b_solve(const std::vector& rhs, std::vector +i_t basis_update_t::b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const +{ + sparse_vector_t Lsol(rhs.n, 0); + return b_solve(rhs, solution, Lsol); +} + template i_t basis_update_t::b_solve(const std::vector& rhs, std::vector& solution, @@ -55,6 +63,33 @@ i_t basis_update_t::b_solve(const std::vector& rhs, return 0; } +template +i_t basis_update_t::b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& Lsol) const +{ + const i_t m = L0_.m; + assert(row_permutation_.size() == m); + assert(rhs.n == m); + assert(solution.n == m); + assert(Lsol.n == m); + + // P*B = L*U + // B*x = b + // P*B*x = P*b = b' + solution = rhs; + solution.inverse_permute_vector(inverse_row_permutation_); + + // L*U*x = b' + // Solve for v such that L*v = b' + l_solve(solution); + Lsol = solution; + + // Solve for x such that U*x = v + u_solve(solution); + return 0; +} + template i_t basis_update_t::b_transpose_solve(const std::vector& rhs, std::vector& solution) const @@ -87,6 +122,112 @@ i_t basis_update_t::b_transpose_solve(const std::vector& rhs, return 0; } +template +i_t basis_update_t::b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const +{ + // Observe that + // P*B = L*U + // B'*P' = U'*L' + // We want to solve + // B'*y = c + // Let y = P'*w + // B'*y = B'*P'*w = U'*L'*w = c + // 1. Solve U'*r = c for r + // 2. Solve L'*w = r for w + // 3. Compute y = P'*w + + const i_t m = L0_.m; + assert(rhs.n == m); + assert(solution.n == m); + + // Solve for r such that U'*r = c + // Actually Q*U'*Q'*r = c + sparse_vector_t r = rhs; + u_transpose_solve(r); + +#ifdef CHECK_U_TRANSPOSE_SOLVE + std::vector residual; + rhs.to_dense(residual); + std::vector r_dense; + r.to_dense(r_dense); + std::vector product(m); + // Q * U' * Q' * r_dense - c + + std::vector r_dense_permuted(m); + inverse_permute_vector(col_permutation_, r_dense, r_dense_permuted); + + // product = U' * Q' * r_dense + matrix_transpose_vector_multiply(U_, 1.0, r_dense_permuted, 0.0, product); + std::vector product_permuted(m); + permute_vector(col_permutation_, product, product_permuted); + // residual = product_permuted - c + for (i_t k = 0; k < m; ++k) { + residual[k] -= product_permuted[k]; + } + + const f_t Ut_error = vector_norm_inf(residual); + if (Ut_error > 1e-6) { + printf("|| U' * r - c || %e\n", Ut_error); + for (i_t k = 0; k < m; ++k) { + if (std::abs(residual[k]) > 1e-6) { printf("%d residual %e\n", k, residual[k]); } + } + printf("rhs nz %d\n", rhs.i.size()); + } +#endif + + // Solve for w such that L'*w = r + l_transpose_solve(r); + + // y = P'*w + r.inverse_permute_vector(row_permutation_, solution); + +#ifdef CHECK_PERMUTATION + std::vector r_dense2; + r.to_dense(r_dense2); + std::vector solution_dense_permuted(m); + permute_vector(inverse_row_permutation_, r_dense2, solution_dense_permuted); + std::vector solution_dense; + solution.to_dense(solution_dense); + bool found_error = false; + for (i_t k = 0; k < m; ++k) { + if (std::abs(solution_dense[k] - solution_dense_permuted[k]) > 1e-6) { + printf("B transpose inverse permutation error %d %e %e\n", + k, + solution_dense[k], + solution_dense_permuted[k]); + found_error = true; + } + } + if (found_error) { + for (i_t k = 0; k < m; ++k) { + printf("%d (sparse -> permuted -> dense) %e (sparse -> dense -> permuted)%e\n", + k, + solution_dense[k], + solution_dense_permuted[k]); + } + for (i_t k = 0; k < solution.i.size(); ++k) { + printf("%d solution sparse %d %e\n", k, solution.i[k], solution.x[k]); + } + for (i_t k = 0; k < m; ++k) { + if (solution_dense[k] != 0.0) { printf("%d solution dense %e\n", k, solution_dense[k]); } + } + for (i_t k = 0; k < m; ++k) { + printf("inv permutation %d %d\n", k, inverse_row_permutation_[k]); + } + for (i_t k = 0; k < m; ++k) { + if (r_dense2[k] != 0.0) { printf("%d r dense %e\n", k, r_dense2[k]); } + } + for (i_t k = 0; k < m; ++k) { + if (solution_dense_permuted[k] != 0.0) { + printf("%d solution dense permuted %e\n", k, solution_dense_permuted[k]); + } + } + } +#endif + return 0; +} + // Solve for x such that L*x = b template i_t basis_update_t::l_solve(std::vector& rhs) const @@ -101,7 +242,6 @@ i_t basis_update_t::l_solve(std::vector& rhs) const #endif // First solve // L0*x0 = b - // TODO: Handle a sparse rhs dual_simplex::lower_triangular_solve(L0_, rhs); #ifdef CHECK_LOWER_SOLVE { @@ -129,6 +269,92 @@ i_t basis_update_t::l_solve(std::vector& rhs) const return 0; } +template +i_t basis_update_t::l_solve(sparse_vector_t& rhs) const +{ + // L = L0 * R1^{-1} * R2^{-1} * ... * Rk^{-1} + // + // where Ri = I + e_r d^T + + // First solve + // L0*x0 = b + const i_t m = L0_.m; + + i_t top = sparse_triangle_solve( + rhs, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + +#ifdef CHECK_L_SOLVE + std::vector residual(m, 0.0); + const i_t col_start = B.col_start[0]; + const i_t col_end = B.col_start[1]; + for (i_t p = col_start; p < col_end; ++p) { + residual[B.i[p]] = B.x[p]; + } + + std::vector x0; + rhs.to_dense(x0); + matrix_vector_multiply(L0_, 1.0, x0, -1.0, residual); + const f_t L0_solve_error = vector_norm_inf(residual); + if (L0_solve_error > 1e-10) { printf("L0 solve error %e\n", L0_solve_error); } +#endif + + // then solve R1^{-1}*x1 = x0 -> x1 = R1*x0 + // then solve R2^{-1}*x2 = x1 -> x2 = R2*x1 + // until we get to + // Rk^{-1}*x = xk-1 -> x = Rk*xk-1 + // Rk = (I + e_rk dk^T) + // x = Rk*xk-1 = xk-1 + erk (dk^T xk-1) + +#ifdef CHECK_MULTIPLY + std::vector multiply; + rhs.to_dense(multiply); +#endif + + i_t nz = scatter_into_workspace(rhs); + + for (i_t k = 0; k < num_updates_; ++k) { + const i_t r = pivot_indices_[k]; + f_t dot = 0.0; + const i_t col_start = S_.col_start[k]; + const i_t col_end = S_.col_start[k + 1]; + for (i_t p = col_start; p < col_end; ++p) { + if (xi_workspace_[S_.i[p]]) { dot += S_.x[p] * x_workspace_[S_.i[p]]; } + } + if (!xi_workspace_[r]) { + xi_workspace_[r] = 1; + xi_workspace_[m + nz] = r; + nz++; + } + x_workspace_[r] += dot; + +#ifdef CHECK_MULTIPLY + f_t dot2 = 0.0; + for (i_t p = col_start; p < col_end; ++p) { + dot2 += S_.x[p] * multiply[S_.i[p]]; + } + multiply[r] += dot2; +#endif + } + + // Gather the solution into rhs + gather_into_sparse_vector(nz, rhs); + + rhs.sort(); + +#ifdef CHECK_MULTIPLY + std::vector rhs_dense; + rhs.to_dense(rhs_dense); + for (i_t k = 0; k < m; ++k) { + if (std::abs(rhs_dense[k] - multiply[k]) > 1e-10) { + printf("l_solve rhs dense/multiply error %d %e %e\n", k, rhs_dense[k], multiply[k]); + } + } +#endif + + return 0; +} + // Solve for y such that L'*y = c template i_t basis_update_t::l_transpose_solve(std::vector& rhs) const @@ -153,6 +379,149 @@ i_t basis_update_t::l_transpose_solve(std::vector& rhs) const return 0; } +template +i_t basis_update_t::scatter_into_workspace(const sparse_vector_t& in) const +{ + const i_t m = L0_.m; + // scatter pattern into xi_workspace_ + i_t nz = in.i.size(); + for (i_t k = 0; k < nz; ++k) { + const i_t i = in.i[k]; + xi_workspace_[i] = 1; + xi_workspace_[m + k] = i; + } + // scatter values into x_workspace_ + for (i_t k = 0; k < nz; ++k) { + x_workspace_[in.i[k]] = in.x[k]; + } + return nz; +} + +template +void basis_update_t::gather_into_sparse_vector(i_t nz, + sparse_vector_t& out) const +{ + const i_t m = L0_.m; + out.i.clear(); + out.x.clear(); + out.i.resize(nz); + out.x.resize(nz); + for (i_t k = 0; k < nz; ++k) { + const i_t i = xi_workspace_[m + k]; + out.i[k] = i; + out.x[k] = x_workspace_[i]; + xi_workspace_[m + k] = 0; + xi_workspace_[i] = 0; + x_workspace_[i] = 0.0; + } +} + +template +void basis_update_t::solve_to_sparse_vector(i_t top, sparse_vector_t& out) const +{ + const i_t m = L0_.m; + out.n = m; + out.i.clear(); + out.x.clear(); + const i_t nz = m - top; + out.x.resize(nz); + out.i.resize(nz); + i_t k = 0; + for (i_t p = top; p < m; ++p) { + const i_t i = xi_workspace_[p]; + out.i[k] = i; + out.x[k] = x_workspace_[i]; + x_workspace_[i] = 0.0; + xi_workspace_[p] = 0; + k++; + } +} + +template +i_t basis_update_t::l_transpose_solve(sparse_vector_t& rhs) const +{ + // L = L0*R1^{-1}* ... * Rk^{-1} + // L' = Rk^{-T} * Rk-1^{-T} * ... * R2^{-T} * R1^{-T} * L0^T + // L'*y = c + // Rk^{-T} * Rk-1^{-T} * ... * R2^{-T} * R1^{-T} * L0^T * y = c + // L0^T * y = cprime = R1^1 * ... * Rk^T * c + const i_t m = L0_.m; + + i_t nz = 0; + +#ifdef CHECK_UPDATES + std::vector multiply; + rhs.to_dense(multiply); + for (i_t k = 0; k < 2 * m; ++k) { + if (xi_workspace_[k]) { printf("xi workspace %d %d\n", k, xi_workspace_[k]); } + } +#endif + + if (num_updates_ > 0) { nz = scatter_into_workspace(rhs); } + + for (i_t k = num_updates_ - 1; k >= 0; --k) { + const i_t r = pivot_indices_[k]; + assert(r < m); + const i_t col_start = S_.col_start[k]; + const i_t col_end = S_.col_start[k + 1]; + if (xi_workspace_[r]) { + for (i_t p = col_start; p < col_end; ++p) { + // rhs.x[S_.i[p]] += rhs.x[r] * S_.x[p]; + if (!xi_workspace_[S_.i[p]]) { + xi_workspace_[S_.i[p]] = 1; + xi_workspace_[m + nz] = S_.i[p]; + nz++; + } + x_workspace_[S_.i[p]] += x_workspace_[r] * S_.x[p]; + } + } +#ifdef CHECK_UPDATES + for (i_t p = col_start; p < col_end; ++p) { + multiply[S_.i[p]] += multiply[r] * S_.x[p]; + } +#endif + } + + // Gather into rhs + if (num_updates_ > 0) { + gather_into_sparse_vector(nz, rhs); + + rhs.sort(); + +#ifdef CHECK_UPDATES + std::vector rhs_dense; + rhs.to_dense(rhs_dense); + for (i_t k = 0; k < m; ++k) { + if (std::abs(rhs_dense[k] - multiply[k]) > 1e-6) { + printf("rhs dense/multiply error %d %e %e\n", k, rhs_dense[k], multiply[k]); + } + } +#endif + } + + // L0^T * y = cprime +#ifdef CHECK_LOWER_TRANSPOSE_SOLVE + std::vector cprime_dense; + rhs.to_dense(cprime_dense); +#endif + + i_t top = sparse_triangle_solve( + rhs, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + +#ifdef CHECK_LOWER_TRANSPOSE_SOLVE + std::vector y_dense; + rhs.to_dense(y_dense); + + std::vector residual = cprime_dense; + matrix_transpose_vector_multiply(L0_, 1.0, y_dense, -1.0, residual); + const f_t L0_solve_error = vector_norm_inf(residual); + if (L0_solve_error > 1e-6) { printf("L0 solve error %e\n", L0_solve_error); } + +#endif + return 0; +} + template f_t basis_update_t::update_lower(const std::vector& sind, const std::vector& sval, @@ -205,6 +574,28 @@ i_t basis_update_t::u_solve(std::vector& x) const return 0; } +template +i_t basis_update_t::u_solve(sparse_vector_t& rhs) const +{ + // Solve Q*U*Q'*x = b + // Multiplying by Q' we have U*Q'*x = Q'*b = bprime + // Let y = Q'*x so U*y = bprime + // 1. Compute bprime = Q'*b + // 2. Solve for y such that U*y = bprime + // 3. Compute Q*y = x + const i_t m = U_.m; + sparse_vector_t bprime(m, 0); + rhs.inverse_permute_vector(col_permutation_, bprime); + + i_t top = sparse_triangle_solve( + bprime, std::nullopt, xi_workspace_, U_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + + rhs.inverse_permute_vector(inverse_col_permutation_); + + return 0; +} + // x = U'(q,q)\b template i_t basis_update_t::u_transpose_solve(std::vector& x) const @@ -223,6 +614,113 @@ i_t basis_update_t::u_transpose_solve(std::vector& x) const return 0; } +template +i_t basis_update_t::u_transpose_solve(sparse_vector_t& rhs) const +{ + // Solve Q*U'*Q'*x = b + // Multiplying by Q' we have U'*Q'*x = Q'*b = bprime + // Let y = Q'*x so U'*y = bprime + // 1. Compute bprime = Q'*b + // 2. Solve for y such that U'*y = bprime + // 3. Compute Q*y = x + const i_t m = U_.m; + sparse_vector_t bprime(1, 0); +#ifdef CHECK_PERMUTATION + std::vector rhs_dense(m); + rhs.to_dense(rhs_dense); +#endif + rhs.inverse_permute_vector(col_permutation_, bprime); +#ifdef CHECK_PERMUTATION + std::vector bprime_dense; + bprime.to_dense(bprime_dense); + std::vector rhs_dense_permuted(m); + inverse_permute_vector(col_permutation_, rhs_dense, rhs_dense_permuted); + for (i_t k = 0; k < m; ++k) { + if (std::abs(bprime_dense[k] - rhs_dense_permuted[k]) > 1e-6) { + printf("u_transpose inverse permutation error %d %e %e\n", + k, + bprime_dense[k], + rhs_dense_permuted[k]); + } + } +#endif + +#ifdef CHECK_WORKSPACE + for (i_t k = 0; k < 2 * m; ++k) { + if (xi_workspace_[k]) { + printf("before Utranspose m %d solve xi workspace %d %d\n", m, k, xi_workspace_[k]); + } + } +#endif + + // U'*y = bprime + i_t top = sparse_triangle_solve( + bprime, std::nullopt, xi_workspace_, U_transpose_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); // Uses xi_workspace_ and x_workspace_ to fill rhs + +#ifdef CHECK_WORKSPACE + for (i_t k = 0; k < 2 * m; ++k) { + if (xi_workspace_[k]) { + printf( + "after Utranspose m %d top %d solve xi workspace %d %d\n", m, top, k, xi_workspace_[k]); + } + } +#endif + +#ifdef CHECK_PERMUTATION + std::vector rhs_dense2; + rhs.to_dense(rhs_dense2); +#endif + + // Q*y = x + rhs.inverse_permute_vector(inverse_col_permutation_); +#ifdef CHECK_PERMUTATION + rhs.to_dense(rhs_dense_permuted); + std::vector rhs_dense_permuted2(m); + permute_vector(col_permutation_, rhs_dense2, rhs_dense_permuted2); + bool found_error = false; + for (i_t k = 0; k < m; ++k) { + if (std::abs(rhs_dense_permuted[k] - rhs_dense_permuted2[k]) > 1e-6) { + printf("u_transpose2 permutation error %d %e %e\n", + k, + rhs_dense_permuted[k], + rhs_dense_permuted2[k]); + found_error = true; + } + } + if (found_error) { + for (i_t k = 0; k < m; ++k) { + printf("%d (sparse -> permuted -> dense) %e (sparse -> dense -> permuted)%e\n", + k, + rhs_dense_permuted[k], + rhs_dense_permuted2[k]); + } + for (i_t k = 0; k < rhs.i.size(); ++k) { + printf("%d rhs sparse %d %e\n", k, rhs.i[k], rhs.x[k]); + } + for (i_t k = 0; k < m; ++k) { + if (rhs_dense_permuted[k] != 0.0) { + printf("%d rhs dense permuted %e\n", k, rhs_dense_permuted[k]); + } + } + for (i_t k = 0; k < m; ++k) { + if (rhs_dense2[k] != 0.0) { printf("%d rhs dense2 %e\n", k, rhs_dense2[k]); } + } + printf("col permutation %d rhs dense 2 %d rhs dense permuted %d\n", + col_permutation_.size(), + rhs_dense2.size(), + rhs_dense_permuted.size()); + for (i_t k = 0; k < col_permutation_.size(); ++k) { + printf("%d col permutation %d\n", k, col_permutation_[k]); + } + for (i_t k = 0; k < m; ++k) { + printf("%d col permutation inverse %d\n", k, inverse_col_permutation_[k]); + } + } +#endif + return 0; +} + template i_t basis_update_t::index_map(i_t r) const { @@ -334,6 +832,7 @@ i_t basis_update_t::update_upper(const std::vector& ind, U_.col_start[n] = new_nz; // Check to ensure that U remains upper triangular +#ifdef CHECK_UPPER_TRIANGULAR for (i_t k = 0; k < n; ++k) { const i_t col_start = U_.col_start[k]; const i_t col_end = U_.col_start[k + 1]; @@ -341,6 +840,10 @@ i_t basis_update_t::update_upper(const std::vector& ind, assert(U_.i[p] <= k); } } +#endif + + // Update U transpose + U_.transpose(U_transpose_); return 0; } @@ -436,7 +939,7 @@ i_t basis_update_t::update(std::vector& utilde, i_t leaving_index norm_s = update_lower(sind, sval, leaving_index); } -#ifdef PARANOID +#ifdef CHECK_ABAR { sparse_matrix_t abar_test(m, 1, 1); const Int nz = lower_triangular_multiply(U_, m - 1, abar_test, 1); @@ -473,21 +976,19 @@ i_t basis_update_t::multiply_lu(csc_matrix_t& out) out.col_start.resize(m + 1); assert(out.m == m); const i_t nz_estimate = L0_.col_start[m] + U_.col_start[m]; -#if 0 - printf("Nz estimate %d m %d num updates %d\n", nz_estimate, m, num_updates_); - printf("q = ["); - for (Int k = 0; k < m; ++k) - { - printf("%d ", col_permutation_[k]); - } - printf("];\n"); - //PrintMatrix(L0_); - printf("p = ["); - for (Int k = 0; k < m; ++k) - { - printf("%d ", row_permutation_[k]); - } - printf("];\n"); +#ifdef PRINT_PERMUTATIONS + printf("Nz estimate %d m %d num updates %d\n", nz_estimate, m, num_updates_); + printf("q = ["); + for (i_t k = 0; k < m; ++k) { + printf("%d ", col_permutation_[k]); + } + printf("];\n"); + // PrintMatrix(L0_); + printf("p = ["); + for (i_t k = 0; k < m; ++k) { + printf("%d ", row_permutation_[k]); + } + printf("];\n"); #endif out.reallocate(nz_estimate); @@ -547,16 +1048,14 @@ i_t basis_update_t::lower_triangular_multiply(const csc_matrix_t::lower_triangular_multiply(const csc_matrix_t +void basis_update_mpf_t::gather_into_sparse_vector(i_t nz, + sparse_vector_t& out) const +{ + const i_t m = L0_.m; + out.i.clear(); + out.x.clear(); + out.i.reserve(nz); + out.x.reserve(nz); + const f_t zero_tol = 1e-13; + for (i_t k = 0; k < nz; ++k) { + const i_t i = xi_workspace_[m + k]; + if (std::abs(x_workspace_[i]) > zero_tol) { + out.i.push_back(i); + out.x.push_back(x_workspace_[i]); + } + xi_workspace_[m + k] = 0; + xi_workspace_[i] = 0; + x_workspace_[i] = 0.0; + } +} + +template +void basis_update_mpf_t::solve_to_workspace(i_t top) const +{ + const i_t m = L0_.m; + i_t nz = 0; + for (i_t p = top; p < m; ++p) { + const i_t i = xi_workspace_[p]; + xi_workspace_[m + nz] = i; + xi_workspace_[p] = 0; + nz++; + } + for (i_t k = 0; k < nz; ++k) { + const i_t i = xi_workspace_[m + k]; + xi_workspace_[i] = 1; + } +} + +template +void basis_update_mpf_t::solve_to_sparse_vector(i_t top, + sparse_vector_t& out) const +{ + const i_t m = L0_.m; + out.n = m; + const i_t nz = m - top; + out.x.clear(); + out.i.clear(); + out.x.reserve(nz); + out.i.reserve(nz); + i_t k = 0; + const f_t zero_tol = 1e-13; + for (i_t p = top; p < m; ++p) { + const i_t i = xi_workspace_[p]; + if (std::abs(x_workspace_[i]) > zero_tol) { + out.i.push_back(i); + out.x.push_back(x_workspace_[i]); + } + x_workspace_[i] = 0.0; + xi_workspace_[p] = 0; + k++; + } +} + +template +i_t basis_update_mpf_t::scatter_into_workspace(const sparse_vector_t& in) const +{ + const i_t m = L0_.m; + // scatter pattern into xi_workspace_ + i_t nz = in.i.size(); + for (i_t k = 0; k < nz; ++k) { + const i_t i = in.i[k]; + xi_workspace_[i] = 1; + xi_workspace_[m + k] = i; + } + // scatter values into x_workspace_ + for (i_t k = 0; k < nz; ++k) { + x_workspace_[in.i[k]] = in.x[k]; + } + return nz; +} + +template +void basis_update_mpf_t::grow_storage(i_t nz, i_t& S_start, i_t& S_nz) +{ + const i_t last_S_col = num_updates_ * 2; + const i_t new_last_S_col = last_S_col + 2; + if (new_last_S_col >= S_.col_start.size()) { + S_.col_start.resize(new_last_S_col + refactor_frequency_); + } + S_nz = S_.col_start[last_S_col]; + if (S_nz + nz > S_.i.size()) { + S_.i.resize(std::max(2 * S_nz, S_nz + nz)); + S_.x.resize(std::max(2 * S_nz, S_nz + nz)); + } + S_start = last_S_col; +} + +template +i_t basis_update_mpf_t::nonzeros(const std::vector& x) const +{ + i_t nz = 0; + const i_t xsz = x.size(); + for (i_t i = 0; i < xsz; ++i) { + if (x[i] != 0.0) { nz++; } + } + return nz; +} + +// dot = S(:, col)' * x +template +f_t basis_update_mpf_t::dot_product(i_t col, const std::vector& x) const +{ + f_t dot = 0.0; + const i_t col_start = S_.col_start[col]; + const i_t col_end = S_.col_start[col + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = S_.i[p]; + dot += S_.x[p] * x[i]; + } + return dot; +} + +// dot = S(:, col)' * x +template +f_t basis_update_mpf_t::dot_product(i_t col, + const std::vector& mark, + const std::vector& x) const +{ + f_t dot = 0.0; + const i_t col_start = S_.col_start[col]; + const i_t col_end = S_.col_start[col + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = S_.i[p]; + if (mark[i]) { dot += S_.x[p] * x[i]; } + } + return dot; +} + +// x <- x + theta * S(:, col) +template +void basis_update_mpf_t::add_sparse_column(const csc_matrix_t& S, + i_t col, + f_t theta, + std::vector& x) const +{ + const i_t col_start = S.col_start[col]; + const i_t col_end = S.col_start[col + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = S.i[p]; + x[i] += theta * S.x[p]; + } +} + +template +void basis_update_mpf_t::add_sparse_column(const csc_matrix_t& S, + i_t col, + f_t theta, + std::vector& mark, + i_t& nz, + std::vector& x) const +{ + const i_t m = L0_.m; + const i_t col_start = S.col_start[col]; + const i_t col_end = S.col_start[col + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = S.i[p]; + if (!mark[i]) { + // Fill occured + mark[i] = 1; + mark[m + nz] = i; + nz++; + } + x[i] += theta * S.x[p]; + } +} + +template +i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, + std::vector& solution) const +{ + std::vector UTsol; + return b_transpose_solve(rhs, solution, UTsol); +} + +template +i_t basis_update_mpf_t::b_transpose_solve(const std::vector& rhs, + std::vector& solution, + std::vector& UTsol) const +{ + const i_t m = L0_.m; + // P*B = L*U + // B'*P' = U'*L' + // We want to solve + // B'*y = c + // Let y = P'*w + // B'*y = B'*P'*w = U'*L'*w = c + // 1. Solve U'*r = c for r + // 2. Solve L'*w = r for w + // 3. Compute y = P'*w + + // Solve for r such that U'*r = c + std::vector r = rhs; + u_transpose_solve(r); + UTsol = r; + + // Solve for w such that L'*w = r + l_transpose_solve(r); + + // Compute y = P'*w + inverse_permute_vector(row_permutation_, r, solution); + + return 0; +} + +template +i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const +{ + sparse_vector_t UTsol(1, 0); + return b_transpose_solve(rhs, solution, UTsol); +} + +template +i_t basis_update_mpf_t::b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& UTsol) const +{ + // Solve for r such that U'*r = c + + bool use_hypersparse = false; + const f_t input_size = static_cast(rhs.i.size()); + estimate_solution_density(input_size, sum_U_transpose_, num_calls_U_transpose_, use_hypersparse); + if (use_hypersparse) { + solution = rhs; + u_transpose_solve(solution); + } else { + std::vector solution_dense; + rhs.to_dense(solution_dense); + u_transpose_solve(solution_dense); + solution.from_dense(solution_dense); + } + UTsol = solution; + sum_U_transpose_ += static_cast(solution.i.size()) / input_size; + +#ifdef CHECK_U_TRANSPOSE_SOLVE + std::vector UTsol_dense; + UTsol.to_dense(UTsol_dense); + std::vector rhs_dense; + rhs.to_dense(rhs_dense); + + matrix_transpose_vector_multiply(U0_, 1.0, UTsol_dense, -1.0, rhs_dense); + if (vector_norm_inf(rhs_dense) > 1e-10) { + printf("B transpose solve U transpose residual %e\n", vector_norm_inf(rhs_dense)); + } +#endif + + // Solve for w such that L'*w = r +#ifdef CHECK_L_TRANSPOSE_SOLVE + std::vector r_dense; + solution.to_dense(r_dense); +#endif + const f_t rhs_size = static_cast(solution.i.size()); + estimate_solution_density(rhs_size, sum_L_transpose_, num_calls_L_transpose_, use_hypersparse); + if (use_hypersparse) { + l_transpose_solve(solution); + } else { + std::vector solution_dense; + solution.to_dense(solution_dense); + l_transpose_solve(solution_dense); + solution.from_dense(solution_dense); + } + sum_L_transpose_ += static_cast(solution.i.size()) / rhs_size; + +#ifdef CHECK_L_TRANSPOSE_SOLVE + std::vector solution_dense; + solution.to_dense(solution_dense); + l_transpose_multiply(solution_dense); + f_t max_error = 0.0; + for (i_t k = 0; k < L0_.m; ++k) { + if (std::abs(solution_dense[k] - r_dense[k]) > 1e-4) { + printf( + "B transpose solve L transpose solve error %e: index %d multiply %e rhs %e. update %d. use " + "hypersparse %d\n", + std::abs(solution_dense[k] - r_dense[k]), + k, + solution_dense[k], + r_dense[k], + num_updates_, + use_hypersparse); + } + + max_error = std::max(max_error, std::abs(solution_dense[k] - r_dense[k])); + } + if (max_error > 1e-4) { printf("B transpose solve L transpose solve residual %e\n", max_error); } +#endif + // Compute y = P'*w + solution.inverse_permute_vector(row_permutation_); + return 0; +} + +template +i_t basis_update_mpf_t::u_transpose_solve(std::vector& rhs) const +{ + total_dense_U_transpose_++; + dual_simplex::upper_triangular_transpose_solve(U0_, rhs); + return 0; +} + +template +i_t basis_update_mpf_t::u_transpose_solve(sparse_vector_t& rhs) const +{ + total_sparse_U_transpose_++; + // U0'*x = y + // Solve U0'*x0 = y + i_t top = dual_simplex::sparse_triangle_solve( + rhs, std::nullopt, xi_workspace_, U0_transpose_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); + return 0; +} + +template +i_t basis_update_mpf_t::l_transpose_solve(std::vector& rhs) const +{ + total_dense_L_transpose_++; + // L = L0 * T0 * T1 * ... * T_{num_updates_ - 1} + // L' = T_{num_updates_ - 1}^T * T_{num_updates_ - 2}^T * ... * T0^T * L0^T + // L'*x = b + // L0^T *x = T_0^-T * T_1^-T * ... * T_{num_updates_ - 1}^-T * b = b' + + const f_t zero_tol = 1e-13; + // Compute b' + for (i_t k = num_updates_ - 1; k >= 0; --k) { + // T_k^{-T} = ( I - v u^T/(1 + u^T v)) + // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu + + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + const f_t mu = mu_values_[k]; + + // dot = u^T * b + f_t dot = dot_product(u_col, rhs); + const f_t theta = dot / mu; + + if (std::abs(theta) > zero_tol) { add_sparse_column(S_, v_col, -theta, rhs); } + } + + // Solve for x such that L0^T * x = b' + dual_simplex::lower_triangular_transpose_solve(L0_, rhs); + + return 0; +} + +template +i_t basis_update_mpf_t::l_transpose_solve(sparse_vector_t& rhs) const +{ + total_sparse_L_transpose_++; + const i_t m = L0_.m; + // L'*x = b + // L0^T * x = T_0^-T * T_1^-T * ... * T_{num_updates_ - 1}^-T * b = b' + + scatter_into_workspace(rhs); + i_t nz = rhs.i.size(); + +#ifdef CHECK_MULTIPLY + std::vector rhs_dense_0; + rhs.to_dense(rhs_dense_0); +#endif + const f_t zero_tol = 1e-13; + // Compute b' + for (i_t k = num_updates_ - 1; k >= 0; --k) { + // T_k^{-T} = ( I - v u^T/(1 + u^T v)) + // T_k^{-T} * b = b - v * (u^T * b) / (1 + u^T * v) = b - theta * v, theta = u^T b / mu + + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + const f_t mu = mu_values_[k]; + + // dot = u^T * b + f_t dot = dot_product(u_col, xi_workspace_, x_workspace_); + +#ifdef CHECK_MULTIPLY + f_t dot_check = 0.0; + for (i_t p = S_.col_start[u_col]; p < S_.col_start[u_col + 1]; ++p) { + const i_t i = S_.i[p]; + dot_check += S_.x[p] * rhs_dense_0[i]; + } + if (std::abs(dot - dot_check) > 1e-10) { + printf("L transpose solve dot erorr: index %d dot %e dot check %e\n", k, dot, dot_check); + } +#endif + + const f_t theta = dot / mu; + if (std::abs(theta) > zero_tol) { + add_sparse_column(S_, v_col, -theta, xi_workspace_, nz, x_workspace_); + } + +#ifdef CHECK_MULTIPLY + for (i_t p = S_.col_start[v_col]; p < S_.col_start[v_col + 1]; ++p) { + const i_t i = S_.i[p]; + rhs_dense_0[i] -= theta * S_.x[p]; + } +#endif + } + +#ifdef CHECK_MULTIPLY + for (i_t i = 0; i < m; ++i) { + if (std::abs(rhs_dense_0[i] - x_workspace_[i]) > 1e-9) { + printf("L transpose solve multiply error %e index %d sparse %e dense %e\n", + std::abs(rhs_dense_0[i] - x_workspace_[i]), + i, + x_workspace_[i], + rhs_dense_0[i]); + } + } +#endif + + sparse_vector_t b(m, nz); + gather_into_sparse_vector(nz, b); + i_t top = dual_simplex::sparse_triangle_solve( + b, std::nullopt, xi_workspace_, L0_transpose_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); + +#ifdef CHECK_SPARSE_SOLVE + std::vector rhs_dense; + rhs.to_dense(rhs_dense); + + std::vector b_dense(m, 0.0); + for (i_t p = 0; p < nz; ++p) { + const i_t i = b.i[p]; + b_dense[i] = b.x[p]; + } + matrix_vector_multiply(L0_transpose_, 1.0, rhs_dense, -1.0, b_dense); + if (vector_norm_inf(b_dense) > 1e-9) { + printf("L0 transpose solve residual %e\n", vector_norm_inf(b_dense)); + } +#endif + + return 0; +} + +template +i_t basis_update_mpf_t::b_solve(const std::vector& rhs, + std::vector& solution) const +{ + const i_t m = L0_.m; + std::vector Lsol(m); + return b_solve(rhs, solution, Lsol); +} + +// Solve for x such that B*x = y +template +i_t basis_update_mpf_t::b_solve(const std::vector& rhs, + std::vector& solution, + std::vector& Lsol, + bool need_Lsol) const +{ + const i_t m = L0_.m; + // P*B = L*U + // B*x = b + // P*B*x = P*b + + permute_vector(row_permutation_, rhs, solution); + + // L*U*x = b' + // Solve for v such that L*v = b' +#ifdef CHECK_L_SOLVE + std::vector rhs_permuted = solution; +#endif + l_solve(solution); + if (need_Lsol) { Lsol = solution; } + +#ifdef CHECK_L_SOLVE + std::vector Lsol_check = Lsol; + l_multiply(Lsol_check); + f_t max_lsol_err = 0.0; + for (i_t k = 0; k < m; ++k) { + const f_t err = std::abs(Lsol_check[k] - rhs_permuted[k]); + max_lsol_err = std::max(max_lsol_err, err); + } + printf("B solve L multiply error %e\n", max_lsol_err); +#endif + + // Solve for x such that U*x = v + u_solve(solution); + +#ifdef CHECK_U_SOLVE + std::vector residual = Lsol; + matrix_vector_multiply(U0_, 1.0, solution, -1.0, residual); + f_t max_err = vector_norm_inf(residual); + printf("B solve U solve residual %e\n", max_err); +#endif + return 0; +} + +template +i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const +{ + sparse_vector_t Lsol(1, 0); + return b_solve(rhs, solution, Lsol, false); +} + +template +i_t basis_update_mpf_t::b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& Lsol, + bool need_Lsol) const +{ + const i_t m = L0_.m; + solution = rhs; + solution.inverse_permute_vector(inverse_row_permutation_); + +#ifdef CHECK_PERMUTATION + std::vector permuation_rhs; + rhs.to_dense(permuation_rhs); + std::vector finish_perm(m); + permute_vector(row_permutation_, permuation_rhs, finish_perm); + + std::vector solution_dense2; + solution.to_dense(solution_dense2); + for (i_t k = 0; k < m; ++k) { + if (finish_perm[k] != solution_dense2[k]) { + printf("B solve permutation error %e %e %d\n", finish_perm[k], solution_dense2[k], k); + } + } +#endif + +#ifdef CHECK_L_SOLVE + std::vector l_solve_rhs; + solution.to_dense(l_solve_rhs); +#endif + + bool use_hypersparse; + const f_t input_size = static_cast(rhs.i.size()); + estimate_solution_density(input_size, sum_L_, num_calls_L_, use_hypersparse); + if (use_hypersparse) { + l_solve(solution); + } else { + std::vector solution_dense; + solution.to_dense(solution_dense); + l_solve(solution_dense); + solution.from_dense(solution_dense); + } + if (need_Lsol) { Lsol = solution; } + sum_L_ += static_cast(solution.i.size()) / input_size; + +#ifdef CHECK_L_SOLVE + std::vector l_solve_dense; + Lsol.to_dense(l_solve_dense); + + l_multiply(l_solve_dense); + f_t max_err_l_solve = 0.0; + for (i_t k = 0; k < m; ++k) { + const f_t err = std::abs(l_solve_dense[k] - l_solve_rhs[k]); + max_err_l_solve = std::max(max_err_l_solve, err); + } + if (max_err_l_solve > 1e-9) { printf("B solve L solve residual %e\n", max_err_l_solve); } +#endif + +#ifdef CHECK_U_SOLVE + std::vector rhs_dense; + solution.to_dense(rhs_dense); +#endif + + const f_t rhs_size = static_cast(solution.i.size()); + estimate_solution_density(rhs_size, sum_U_, num_calls_U_, use_hypersparse); + if (use_hypersparse) { + u_solve(solution); + } else { + std::vector solution_dense; + solution.to_dense(solution_dense); + u_solve(solution_dense); + solution.from_dense(solution_dense); + } + sum_U_ += static_cast(solution.i.size()) / rhs_size; + +#ifdef CHECK_U_SOLVE + std::vector solution_dense; + solution.to_dense(solution_dense); + + matrix_vector_multiply(U0_, 1.0, solution_dense, -1.0, rhs_dense); + + const f_t max_err = vector_norm_inf(rhs_dense); + if (max_err > 1e-9) { printf("B solve U0 solve residual %e\n", max_err); } +#endif + return 0; +} + +// Solve for x such that U*x = y +template +i_t basis_update_mpf_t::u_solve(std::vector& rhs) const +{ + total_dense_U_++; + const i_t m = L0_.m; + // U*x = y + dual_simplex::upper_triangular_solve(U0_, rhs); + return 0; +} + +template +i_t basis_update_mpf_t::u_solve(sparse_vector_t& rhs) const +{ + total_sparse_U_++; + const i_t m = L0_.m; + // U*x = y + + // Solve U0*x = y + i_t top = dual_simplex::sparse_triangle_solve( + rhs, std::nullopt, xi_workspace_, U0_, x_workspace_.data()); + solve_to_sparse_vector(top, rhs); + + return 0; +} +// Solve for x such that L*x = y +template +i_t basis_update_mpf_t::l_solve(std::vector& rhs) const +{ + total_dense_L_++; + const i_t m = L0_.m; + // L*x = y + // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y + + // First solve L0*x0 = y +#ifdef CHECK_L0_SOLVE + std::vector residual = rhs; +#endif +#ifdef CHECK_L_SOLVE + std::vector rhs_check = rhs; +#endif + dual_simplex::lower_triangular_solve(L0_, rhs); + +#ifdef CHECK_L0_SOLVE + matrix_vector_multiply(L0_, 1.0, rhs, -1.0, residual); + f_t max_err = vector_norm_inf(residual); + printf("L solve: L0 solve residual %e\n", max_err); +#endif + + // Then T0 * T1 * ... * T_{num_updates_ - 1} * x = x0 + // Or x = T_{num_updates}^{-1} * T_1^{-1} * T_0^{-1} x0 + const f_t zero_tol = 1e-16; // Any higher and pilot_ja fails + for (i_t k = 0; k < num_updates_; ++k) { + // T = I + u*v^T + // T^{-1} = I - u*v^T / (1 + v^T*u) + // T^{-1} * x = x - u*v^T * x / (1 + v^T*u) = x - theta * u, theta = v^T * x / (1 + v^T*u) = v^T + // x / mu + const f_t mu = mu_values_[k]; + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + f_t dot = dot_product(v_col, rhs); + const f_t theta = dot / mu; + + if (std::abs(theta) > zero_tol) { add_sparse_column(S_, u_col, -theta, rhs); } + } + +#ifdef CHECK_L_SOLVE + std::vector inout = rhs; + l_multiply(inout); + f_t err_max = 0.0; + for (i_t k = 0; k < m; ++k) { + const f_t err = std::abs(inout[k] - rhs_check[k]); + err_max = std::max(err_max, err); + } + printf("L solve residual %e\n", err_max); +#endif + + return 0; +} + +template +i_t basis_update_mpf_t::l_solve(sparse_vector_t& rhs) const +{ + total_sparse_L_++; + const i_t m = L0_.m; + // L*x = y + // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y + + // First solve L0*x0 = y + i_t top = dual_simplex::sparse_triangle_solve( + rhs, std::nullopt, xi_workspace_, L0_, x_workspace_.data()); + solve_to_workspace(top); // Uses xi_workspace_ and x_workspace_ to fill rhs + i_t nz = m - top; + // Then T0 * T1 * ... * T_{num_updates_ - 1} * x = x0 + // Or x = T_{num_updates}^{-1} * T_1^{-1} * T_0^{-1} x0 + const f_t zero_tol = 1e-13; + for (i_t k = 0; k < num_updates_; ++k) { + // T = I + u*v^T + // T^{-1} = I - u*v^T / (1 + v^T*u) + // T^{-1} * x = x - u*v^T * x / (1 + v^T*u) = x - theta * u, theta = v^T * x / (1 + v^T*u) = v^T + // x / mu + const f_t mu = mu_values_[k]; + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + + // dot = v^T * x + f_t dot = dot_product(v_col, xi_workspace_, x_workspace_); + + const f_t theta = dot / mu; + if (std::abs(theta) > zero_tol) { + add_sparse_column(S_, u_col, -theta, xi_workspace_, nz, x_workspace_); + } + } + + gather_into_sparse_vector(nz, rhs); + + return 0; +} + +// Takes in utilde such that L*utilde = abar, where abar is the column to add to the basis +// and etilde such that U'*etilde = e_leaving +template +i_t basis_update_mpf_t::update(const std::vector& utilde, + const std::vector& etilde, + i_t leaving_index) +{ + const i_t m = L0_.m; +#ifdef PRINT_NUM_UPDATES + printf("Update: num_updates_ %d\n", num_updates_); +#endif + + // We are going to create a new matrix T = I + u*v^T + const i_t col_start = U0_.col_start[leaving_index]; + const i_t col_end = U0_.col_start[leaving_index + 1]; + std::vector u = utilde; + // u = utilde - U0(:, leaving_index) + add_sparse_column(U0_, leaving_index, -1.0, u); + + i_t u_nz = nonzeros(u); + + // v = etilde + i_t v_nz = nonzeros(etilde); + + i_t nz = u_nz + v_nz; + i_t S_start; + i_t S_nz; + grow_storage(nz, S_start, S_nz); +#ifdef PRINT_NZ_INFO + printf("Update: S_start %d S_nz %d num updates %d S.n %d\n", S_start, S_nz, num_updates_, S_.n); +#endif + + i_t S_nz_start = S_nz; + + // Scatter u into S + S_.append_column(u); + + // Scatter v into S + S_.append_column(etilde); + + // Compute mu = 1 + v^T * u + const f_t mu = 1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], + S_.x.data() + S_.col_start[S_start], + S_.col_start[S_start + 1] - S_.col_start[S_start], + S_.i.data() + S_.col_start[S_start + 1], + S_.x.data() + S_.col_start[S_start + 1], + v_nz); + +#ifdef CHECK_MU + const f_t mu_check = 1.0 + dot(etilde, u); + printf("Update: mu %e mu_check %e diff %e\n", mu, mu_check, std::abs(mu - mu_check)); +#endif + mu_values_.push_back(mu); + +#ifdef PRINT_MU_INFO + printf("Update mu %e u nz %d v nz %d\n", + mu_values_.back(), + S_.col_start[S_start + 1] - S_.col_start[S_start], + S_.col_start[S_start + 2] - S_.col_start[S_start + 1]); +#endif + num_updates_++; + + return 0; +} + +// Takes in utilde such that L*utilde = abar, where abar is the column to add to the basis +template +i_t basis_update_mpf_t::update(const sparse_vector_t& utilde, + sparse_vector_t& etilde, + i_t leaving_index) +{ + const i_t m = L0_.m; +#ifdef PRINT_NUM_UPDATES + printf("Update: num_updates_ %d\n", num_updates_); +#endif + + // We are going to create a new matrix T = I + u*v^T + // where u = utilde - U0(:, p) and v = etilde + + // Scatter utilde into the workspace + i_t nz = scatter_into_workspace(utilde); + + // Subtract the column of U0 corresponding to the leaving index + add_sparse_column(U0_, leaving_index, -1.0, xi_workspace_, nz, x_workspace_); + + // Ensure the workspace is sorted. Otherwise, the sparse dot will be incorrect. + std::sort(xi_workspace_.begin() + m, xi_workspace_.begin() + m + nz, std::less()); + + // Gather the workspace into a column of S + i_t S_start; + i_t S_nz; + grow_storage(nz + etilde.i.size(), S_start, S_nz); + + S_.append_column(nz, xi_workspace_.data() + m, x_workspace_.data()); + + // Gather etilde into a column of S + etilde.sort(); // Needs to be sorted for the sparse dot. TODO(CMM): Is etilde sorted on input? + S_.append_column(etilde); + + // Compute mu = 1 + v^T * u + mu_values_.push_back(1.0 + sparse_dot(S_.i.data() + S_.col_start[S_start], + S_.x.data() + S_.col_start[S_start], + S_.col_start[S_start + 1] - S_.col_start[S_start], + S_.i.data() + S_.col_start[S_start + 1], + S_.x.data() + S_.col_start[S_start + 1], + S_.col_start[S_start + 2] - S_.col_start[S_start + 1])); + // Clear the workspace + for (i_t k = 0; k < nz; ++k) { + const i_t i = xi_workspace_[m + k]; + xi_workspace_[i] = 0; + x_workspace_[i] = 0.0; + xi_workspace_[m + k] = 0; + } + +#ifdef PRINT_MU_INFO + printf("Update mu %e u nz %d v nz %d\n", + mu_values_.back(), + S_.col_start[S_start + 1] - S_.col_start[S_start], + S_.col_start[S_start + 2] - S_.col_start[S_start + 1]); +#endif + + num_updates_++; + + return 0; +} + +template +void basis_update_mpf_t::l_multiply(std::vector& inout) const +{ + const i_t m = L0_.m; + // L*x = y + // L0 * T0 * T1 * ... * T_{num_updates_ - 1} * x = y + + for (i_t k = num_updates_ - 1; k >= 0; --k) { + // T_k = ( I + u v^T) + // T_k * b = b + u * (v^T * b) = b + theta * u, theta = v^T b + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + const f_t mu = mu_values_[k]; + + // dot = v^T b + f_t dot = dot_product(v_col, inout); + const f_t theta = dot; + add_sparse_column(S_, u_col, theta, inout); + } + + std::vector out(m, 0.0); + matrix_vector_multiply(L0_, 1.0, inout, 0.0, out); + inout = out; +} + +template +void basis_update_mpf_t::l_transpose_multiply(std::vector& inout) const +{ + const i_t m = L0_.m; + std::vector out(m, 0.0); + matrix_vector_multiply(L0_transpose_, 1.0, inout, 0.0, out); + + inout = out; + + const f_t zero_tol = 1e-13; + for (i_t k = 0; k < num_updates_; ++k) { + const i_t u_col = 2 * k; + const i_t v_col = 2 * k + 1; + const f_t mu = mu_values_[k]; + + // T_k = ( I + u v^T) + // T_k^T = ( I + v u^T) + // T_k^T * b = b + v * (u^T * b) = b + theta * v, theta = u^T * b + f_t dot = dot_product(u_col, inout); + const f_t theta = dot; + if (std::abs(theta) > zero_tol) { add_sparse_column(S_, v_col, theta, inout); } + } +} + +template +void basis_update_mpf_t::multiply_lu(csc_matrix_t& out) const +{ + // P*B = L*U + // B = P'*L*U + const i_t m = L0_.m; + + out.col_start.resize(m + 1); + out.col_start[0] = 0; + out.i.clear(); + out.x.clear(); + + i_t B_nz = 0; + + for (i_t j = 0; j < m; ++j) { + // B(:, j) = L*U(:, j) + out.col_start[j] = B_nz; + + std::vector Uj(m, 0.0); + U0_.load_a_column(j, Uj); + l_multiply(Uj); + for (i_t i = 0; i < m; ++i) { + if (Uj[i] != 0.0) { + out.i.push_back(row_permutation_[i]); + out.x.push_back(Uj[i]); + B_nz++; + } + } + } + out.col_start[m] = B_nz; + + out.m = m; + out.n = m; + out.nz_max = B_nz; +} + #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE template class basis_update_t; +template class basis_update_mpf_t; #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/basis_updates.hpp b/cpp/src/dual_simplex/basis_updates.hpp index 73bec6a5d..73592e180 100644 --- a/cpp/src/dual_simplex/basis_updates.hpp +++ b/cpp/src/dual_simplex/basis_updates.hpp @@ -18,8 +18,11 @@ #pragma once #include +#include #include +#include + namespace cuopt::linear_programming::dual_simplex { // Forrest-Tomlin update to the LU factorization of a basis matrix B @@ -32,11 +35,18 @@ class basis_update_t { : L0_(Linit), U_(Uinit), row_permutation_(p), + inverse_row_permutation_(p.size()), S_(Linit.m, 1, 0), col_permutation_(Linit.m), - inverse_col_permutation_(Linit.m) + inverse_col_permutation_(Linit.m), + xi_workspace_(2 * Linit.m, 0), + x_workspace_(Linit.m, 0.0), + U_transpose_(1, 1, 1), + L0_transpose_(1, 1, 1) { + inverse_permutation(row_permutation_, inverse_row_permutation_); clear(); + compute_transposes(); } i_t reset(const csc_matrix_t& Linit, @@ -47,34 +57,60 @@ class basis_update_t { U_ = Uinit; assert(p.size() == Linit.m); row_permutation_ = p; + inverse_permutation(row_permutation_, inverse_row_permutation_); clear(); + compute_transposes(); return 0; } // Solves for x such that B*x = b, where B is the basis matrix i_t b_solve(const std::vector& rhs, std::vector& solution) const; + // Solves for x such that B*x = b, where B is the basis matrix + i_t b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const; + // Solves for x such that B*x = b, where B is the basis matrix, also returns L*v = P*b // This is useful for avoiding an extra solve with the update i_t b_solve(const std::vector& rhs, std::vector& solution, std::vector& Lsol) const; + // Solves for x such that B*x = b, where B is the basis matrix, also returns L*v = P*b + // This is useful for avoiding an extra solve with the update + i_t b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& Lsol) const; + // Solves for y such that B'*y = c, where B is the basis matrix i_t b_transpose_solve(const std::vector& rhs, std::vector& solution) const; + i_t b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const; + // Solve for x such that L*x = y i_t l_solve(std::vector& rhs) const; + // Solve for x such that L*x = y + i_t l_solve(sparse_vector_t& rhs) const; + // Solve for x such that L'*x = y i_t l_transpose_solve(std::vector& rhs) const; + // Solve for x such that L'*x = y + i_t l_transpose_solve(sparse_vector_t& rhs) const; + // Solve for x such that U*x = y i_t u_solve(std::vector& rhs) const; + // Solve for x such that U*x = y + i_t u_solve(sparse_vector_t& rhs) const; + // Solve for x such that U'*x = y i_t u_transpose_solve(std::vector& rhs) const; + // Solve for x such that U'*x = y + i_t u_transpose_solve(sparse_vector_t& rhs) const; + // Replace the column B(:, leaving_index) with the vector abar. Pass in utilde such that L*utilde // = abar i_t update(std::vector& utilde, i_t leaving_index); @@ -85,6 +121,12 @@ class basis_update_t { const std::vector& row_permutation() const { return row_permutation_; } + void compute_transposes() + { + L0_.transpose(L0_transpose_); + U_.transpose(U_transpose_); + } + private: void clear() { @@ -110,14 +152,271 @@ class basis_update_t { csc_matrix_t& out, i_t out_col) const; - i_t num_updates_; // Number of rank-1 updates to L0 - csc_matrix_t L0_; // Sparse lower triangular matrix from initial factorization - csc_matrix_t U_; // Sparse upper triangular matrix. Is modified by updates - std::vector row_permutation_; // Row permutation from initial factorization L*U = P*B - std::vector pivot_indices_; // indicies for rank-1 updates to L - csc_matrix_t S_; // stores the pivot elements for rank-1 updates to L - std::vector col_permutation_; // symmetric permuation q used in U(q, q) represents Q + void solve_to_sparse_vector(i_t top, sparse_vector_t& out) const; + i_t scatter_into_workspace(const sparse_vector_t& in) const; + void gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const; + + i_t num_updates_; // Number of rank-1 updates to L0 + mutable csc_matrix_t L0_; // Sparse lower triangular matrix from initial factorization + mutable csc_matrix_t U_; // Sparse upper triangular matrix. Is modified by updates + std::vector row_permutation_; // Row permutation from initial factorization L*U = P*B + std::vector + inverse_row_permutation_; // Inverse row permutation from initial factorization L*U = P*B + std::vector pivot_indices_; // indicies for rank-1 updates to L + csc_matrix_t S_; // stores the pivot elements for rank-1 updates to L + std::vector col_permutation_; // symmetric permuation q used in U(q, q) represents Q std::vector inverse_col_permutation_; // inverse permutation represents Q' + mutable std::vector xi_workspace_; + mutable std::vector x_workspace_; + mutable csc_matrix_t U_transpose_; // Needed for sparse solves + mutable csc_matrix_t L0_transpose_; // Needed for sparse solves +}; + +// Middle product form update to the LU factorization of a basis matrix B +template +class basis_update_mpf_t { + public: + basis_update_mpf_t(const csc_matrix_t& Linit, + const csc_matrix_t& Uinit, + const std::vector& p, + const i_t refactor_frequency) + : L0_(Linit), + U0_(Uinit), + row_permutation_(p), + inverse_row_permutation_(p.size()), + S_(Linit.m, 0, 0), + col_permutation_(Linit.m), + inverse_col_permutation_(Linit.m), + xi_workspace_(2 * Linit.m, 0), + x_workspace_(Linit.m, 0.0), + U0_transpose_(1, 1, 1), + L0_transpose_(1, 1, 1), + refactor_frequency_(refactor_frequency), + total_sparse_L_transpose_(0), + total_dense_L_transpose_(0), + total_sparse_L_(0), + total_dense_L_(0), + total_sparse_U_transpose_(0), + total_dense_U_transpose_(0), + total_sparse_U_(0), + total_dense_U_(0), + hypersparse_threshold_(0.05) + { + inverse_permutation(row_permutation_, inverse_row_permutation_); + clear(); + compute_transposes(); + reset_stas(); + } + + void print_stats() const + { + i_t total_L_transpose_calls = total_sparse_L_transpose_ + total_dense_L_transpose_; + i_t total_U_transpose_calls = total_sparse_U_transpose_ + total_dense_U_transpose_; + i_t total_L_calls = total_sparse_L_ + total_dense_L_; + i_t total_U_calls = total_sparse_U_ + total_dense_U_; + // clang-format off + printf("sparse L transpose %8d %8.2f%\n", total_sparse_L_transpose_, 100.0 * total_sparse_L_transpose_ / total_L_transpose_calls); + printf("dense L transpose %8d %8.2f%\n", total_dense_L_transpose_, 100.0 * total_dense_L_transpose_ / total_L_transpose_calls); + printf("sparse U transpose %8d %8.2f%\n", total_sparse_U_transpose_, 100.0 * total_sparse_U_transpose_ / total_U_transpose_calls); + printf("dense U transpose %8d %8.2f%\n", total_dense_U_transpose_, 100.0 * total_dense_U_transpose_ / total_U_transpose_calls); + printf("sparse L %8d %8.2f%\n", total_sparse_L_, 100.0 * total_sparse_L_ / total_L_calls); + printf("dense L %8d %8.2f%\n", total_dense_L_, 100.0 * total_dense_L_ / total_L_calls); + printf("sparse U %8d %8.2f%\n", total_sparse_U_, 100.0 * total_sparse_U_ / total_U_calls); + printf("dense U %8d %8.2f%\n", total_dense_U_, 100.0 * total_dense_U_ / total_U_calls); + // clang-format on + } + + void reset_stas() + { + num_calls_L_ = 0; + num_calls_U_ = 0; + num_calls_L_transpose_ = 0; + num_calls_U_transpose_ = 0; + sum_L_ = 0.0; + sum_U_ = 0.0; + sum_L_transpose_ = 0.0; + sum_U_transpose_ = 0.0; + } + + i_t reset(const csc_matrix_t& Linit, + const csc_matrix_t& Uinit, + const std::vector& p) + { + L0_ = Linit; + U0_ = Uinit; + assert(p.size() == Linit.m); + row_permutation_ = p; + inverse_permutation(row_permutation_, inverse_row_permutation_); + clear(); + compute_transposes(); + reset_stas(); + return 0; + } + + f_t estimate_solution_density(f_t rhs_nz, f_t sum, i_t& num_calls, bool& use_hypersparse) const + { + num_calls++; + const f_t average_growth = std::max(1.0, sum / static_cast(num_calls)); + const f_t predicted_nz = rhs_nz * average_growth; + const f_t predicted_density = predicted_nz / static_cast(L0_.m); + use_hypersparse = predicted_density < hypersparse_threshold_; + return predicted_nz; + } + + // Solves for x such that B*x = b, where B is the basis matrix + i_t b_solve(const std::vector& rhs, std::vector& solution) const; + i_t b_solve(const sparse_vector_t& rhs, sparse_vector_t& solution) const; + i_t b_solve(const std::vector& rhs, + std::vector& solution, + std::vector& Lsol, + bool need_Lsol = true) const; + i_t b_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& Lsol, + bool need_Lsol = true) const; + + // Solves for y such that B'*y = c, where B is the basis matrix + i_t b_transpose_solve(const std::vector& rhs, std::vector& solution) const; + i_t b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution) const; + i_t b_transpose_solve(const std::vector& rhs, + std::vector& solution, + std::vector& UTsol) const; + i_t b_transpose_solve(const sparse_vector_t& rhs, + sparse_vector_t& solution, + sparse_vector_t& UTsol) const; + // Solve for x such that L*x = y + i_t l_solve(std::vector& rhs) const; + + // Solve for x such that L*x = y + i_t l_solve(sparse_vector_t& rhs) const; + + // Solve for x such that L'*x = y + i_t l_transpose_solve(std::vector& rhs) const; + + // Solve for x such that L'*x = y + i_t l_transpose_solve(sparse_vector_t& rhs) const; + + // Solve for x such that U*x = y + i_t u_solve(std::vector& rhs) const; + + // Solve for x such that U*x = y + i_t u_solve(sparse_vector_t& rhs) const; + + // Solve for x such that U'*x = y + i_t u_transpose_solve(std::vector& rhs) const; + + // Solve for x such that U'*x = y + i_t u_transpose_solve(sparse_vector_t& rhs) const; + + // Replace the column B(:, leaving_index) with the vector abar. Pass in utilde such that L*utilde + // = abar + i_t update(const std::vector& utilde, const std::vector& etilde, i_t leaving_index); + + // Replace the column B(:, leaving_index) with the vector abar. Pass in utilde such that L*utilde + // = abar + i_t update(const sparse_vector_t& utilde, + sparse_vector_t& etilde, + i_t leaving_index); + + i_t num_updates() const { return num_updates_; } + + const std::vector& row_permutation() const { return row_permutation_; } + + void compute_transposes() + { + L0_.transpose(L0_transpose_); + U0_.transpose(U0_transpose_); + } + + void multiply_lu(csc_matrix_t& out) const; + + private: + void clear() + { + pivot_indices_.clear(); + pivot_indices_.reserve(L0_.m); + std::iota(col_permutation_.begin(), col_permutation_.end(), 0); + std::iota(inverse_col_permutation_.begin(), inverse_col_permutation_.end(), 0); + S_.col_start.resize(refactor_frequency_ + 1); + S_.col_start[0] = 0; + S_.col_start[1] = 0; + S_.i.clear(); + S_.x.clear(); + S_.n = 0; + mu_values_.clear(); + mu_values_.reserve(refactor_frequency_); + num_updates_ = 0; + } + void grow_storage(i_t nz, i_t& S_start, i_t& S_nz); + i_t index_map(i_t leaving) const; + f_t u_diagonal(i_t j) const; + i_t place_diagonals(); + f_t update_lower(const std::vector& sind, const std::vector& sval, i_t leaving); + i_t update_upper(const std::vector& ind, const std::vector& baru, i_t t); + i_t lower_triangular_multiply(const csc_matrix_t& in, + i_t in_col, + csc_matrix_t& out, + i_t out_col) const; + + void solve_to_workspace(i_t top) const; + void solve_to_sparse_vector(i_t top, sparse_vector_t& out) const; + i_t scatter_into_workspace(const sparse_vector_t& in) const; + void gather_into_sparse_vector(i_t nz, sparse_vector_t& out) const; + i_t nonzeros(const std::vector& x) const; + f_t dot_product(i_t col, const std::vector& x) const; + f_t dot_product(i_t col, const std::vector& mark, const std::vector& x) const; + void add_sparse_column(const csc_matrix_t& S, + i_t col, + f_t theta, + std::vector& x) const; + void add_sparse_column(const csc_matrix_t& S, + i_t col, + f_t theta, + std::vector& mark, + i_t& nz, + std::vector& x) const; + + void l_multiply(std::vector& inout) const; + void l_transpose_multiply(std::vector& inout) const; + + i_t num_updates_; // Number of rank-1 updates to L0 + i_t refactor_frequency_; // Average updates before refactoring + mutable csc_matrix_t L0_; // Sparse lower triangular matrix from initial factorization + mutable csc_matrix_t U0_; // Sparse upper triangular matrix from initial factorization + std::vector row_permutation_; // Row permutation from initial factorization L*U = P*B + std::vector + inverse_row_permutation_; // Inverse row permutation from initial factorization L*U = P*B + std::vector pivot_indices_; // indicies for rank-1 updates to L + csc_matrix_t S_; // stores information about the rank-1 updates to L + std::vector mu_values_; // stores information about the rank-1 updates to L + std::vector col_permutation_; // symmetric permuation q used in U(q, q) represents Q + std::vector inverse_col_permutation_; // inverse permutation represents Q' + mutable std::vector xi_workspace_; + mutable std::vector x_workspace_; + mutable csc_matrix_t U0_transpose_; // Needed for sparse solves + mutable csc_matrix_t L0_transpose_; // Needed for sparse solves + + mutable i_t total_sparse_L_transpose_; + mutable i_t total_dense_L_transpose_; + mutable i_t total_sparse_L_; + mutable i_t total_dense_L_; + mutable i_t total_sparse_U_transpose_; + mutable i_t total_dense_U_transpose_; + mutable i_t total_sparse_U_; + mutable i_t total_dense_U_; + + mutable i_t num_calls_L_; + mutable i_t num_calls_U_; + mutable i_t num_calls_L_transpose_; + mutable i_t num_calls_U_transpose_; + + mutable f_t sum_L_; + mutable f_t sum_U_; + mutable f_t sum_L_transpose_; + mutable f_t sum_U_transpose_; + + f_t hypersparse_threshold_; }; } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp new file mode 100644 index 000000000..11753cbcb --- /dev/null +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.cpp @@ -0,0 +1,346 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include + +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +i_t bound_flipping_ratio_test_t::compute_breakpoints(std::vector& indicies, + std::vector& ratios) +{ + i_t n = n_; + i_t m = m_; + constexpr bool verbose = false; + f_t pivot_tol = settings_.pivot_tol; + const f_t dual_tol = settings_.dual_tol / 10; + + i_t idx = 0; + while (idx == 0 && pivot_tol >= 1e-12) { + // for (i_t k = 0; k < n - m; ++k) { + // const i_t j = nonbasic_list_[k]; + for (i_t h = 0; h < delta_z_indices_.size(); ++h) { + const i_t j = delta_z_indices_[h]; + const i_t k = nonbasic_mark_[j]; + if (vstatus_[j] == variable_status_t::NONBASIC_FIXED) { continue; } + if (vstatus_[j] == variable_status_t::NONBASIC_LOWER && delta_z_[j] < -pivot_tol) { + indicies[idx] = k; + ratios[idx] = std::max((-dual_tol - z_[j]) / delta_z_[j], 0.0); + if constexpr (verbose) { settings_.log.printf("ratios[%d] = %e\n", idx, ratios[idx]); } + idx++; + } + if (vstatus_[j] == variable_status_t::NONBASIC_UPPER && delta_z_[j] > pivot_tol) { + indicies[idx] = k; + ratios[idx] = std::max((dual_tol - z_[j]) / delta_z_[j], 0.0); + if constexpr (verbose) { settings_.log.printf("ratios[%d] = %e\n", idx, ratios[idx]); } + idx++; + } + } + pivot_tol /= 10; + } + return idx; +} + +template +i_t bound_flipping_ratio_test_t::single_pass(i_t start, + i_t end, + const std::vector& indicies, + const std::vector& ratios, + f_t& slope, + f_t& step_length, + i_t& nonbasic_entering, + i_t& entering_index) +{ + // Find the minimum ratio + f_t min_val = inf; + entering_index = -1; + i_t candidate = -1; + f_t zero_tol = settings_.zero_tol; + i_t k_idx = -1; + for (i_t k = start; k < end; ++k) { + if (ratios[k] < min_val) { + min_val = ratios[k]; + candidate = indicies[k]; + k_idx = k; + } else if (ratios[k] < min_val + zero_tol) { + // Use Harris to select variables with larger pivots + const i_t j = nonbasic_list_[indicies[k]]; + if (std::abs(delta_z_[j]) > std::abs(delta_z_[candidate])) { + min_val = ratios[k]; + candidate = indicies[k]; + k_idx = k; + } + } + } + step_length = min_val; + nonbasic_entering = candidate; + const i_t j = entering_index = nonbasic_list_[nonbasic_entering]; + + constexpr bool verbose = false; + if (bounded_variables_[j]) { + const f_t interval = upper_[j] - lower_[j]; + const f_t delta_slope = std::abs(delta_z_[j]) * interval; + if constexpr (verbose) { + settings_.log.printf("single pass delta slope %e slope %e after slope %e step length %e\n", + delta_slope, + slope, + slope - delta_slope, + step_length); + } + slope -= delta_slope; + return k_idx; // we should see if we can continue to increase the step-length + } + return -1; // we are done. do not increase the step-length further +} + +template +i_t bound_flipping_ratio_test_t::compute_step_length(f_t& step_length, + i_t& nonbasic_entering) +{ + const i_t m = m_; + const i_t n = n_; + const i_t nz = delta_z_indices_.size(); + constexpr bool verbose = false; + + // Compute the initial set of breakpoints + std::vector indicies(nz); + std::vector ratios(nz); + i_t num_breakpoints = compute_breakpoints(indicies, ratios); + if constexpr (verbose) { settings_.log.printf("Initial breakpoints %d\n", num_breakpoints); } + if (num_breakpoints == 0) { + nonbasic_entering = -1; + return -1; + } + + f_t slope = slope_; + nonbasic_entering = -1; + i_t entering_index = -1; + + i_t k_idx = single_pass( + 0, num_breakpoints, indicies, ratios, slope, step_length, nonbasic_entering, entering_index); + bool continue_search = k_idx >= 0 && num_breakpoints > 1 && slope > 0.0; + if (!continue_search) { + if constexpr (0) { + settings_.log.printf( + "BFRT stopping. No bound flips. Step length %e Nonbasic entering %d Entering %d pivot %e\n", + step_length, + nonbasic_entering, + entering_index, + std::abs(delta_z_[entering_index])); + } + return entering_index; + } + + if constexpr (verbose) { + settings_.log.printf( + "Continuing past initial step length %e entering index %d nonbasic entering %d slope %e\n", + step_length, + entering_index, + nonbasic_entering, + slope); + } + + // Continue the search using a heap to order the breakpoints + ratios[k_idx] = ratios[num_breakpoints - 1]; + indicies[k_idx] = indicies[num_breakpoints - 1]; + + constexpr bool use_bucket_pass = false; + + if (use_bucket_pass) { + f_t max_ratio = 0.0; + for (i_t k = 0; k < num_breakpoints - 1; ++k) { + if (ratios[k] > max_ratio) { max_ratio = ratios[k]; } + } + settings_.log.printf( + "Starting heap passes. %d breakpoints max ratio %e\n", num_breakpoints - 1, max_ratio); + bucket_pass( + indicies, ratios, num_breakpoints - 1, slope, step_length, nonbasic_entering, entering_index); + } + + heap_passes( + indicies, ratios, num_breakpoints - 1, slope, step_length, nonbasic_entering, entering_index); + + if constexpr (verbose) { + settings_.log.printf("BFRT step length %e entering index %d non basic entering %d pivot %e\n", + step_length, + entering_index, + nonbasic_entering, + std::abs(delta_z_[entering_index])); + } + return entering_index; +} + +template +void bound_flipping_ratio_test_t::heap_passes(const std::vector& current_indicies, + const std::vector& current_ratios, + i_t num_breakpoints, + f_t& slope, + f_t& step_length, + i_t& nonbasic_entering, + i_t& entering_index) +{ + std::vector bare_idx(num_breakpoints); + constexpr bool verbose = false; + const f_t dual_tol = settings_.dual_tol; + const f_t zero_tol = settings_.zero_tol; + const std::vector& delta_z = delta_z_; + const std::vector& nonbasic_list = nonbasic_list_; + const i_t N = num_breakpoints; + for (i_t k = 0; k < N; ++k) { + bare_idx[k] = k; + if constexpr (verbose) { + settings_.log.printf("Adding index %d ratio %e pivot %e to heap\n", + current_indicies[k], + current_ratios[k], + std::abs(delta_z[nonbasic_list[current_indicies[k]]])); + } + } + + auto compare = [zero_tol, ¤t_ratios, ¤t_indicies, &delta_z, &nonbasic_list]( + const i_t& a, const i_t& b) { + return (current_ratios[a] > current_ratios[b]) || + (current_ratios[b] - current_ratios[a] < zero_tol && + std::abs(delta_z[nonbasic_list[current_indicies[a]]]) > + std::abs(delta_z[nonbasic_list[current_indicies[b]]])); + }; + + std::make_heap(bare_idx.begin(), bare_idx.end(), compare); + + while (bare_idx.size() > 0 && slope > 0) { + // Remove minimum ratio from the heap and rebalance + i_t heap_index = bare_idx.front(); + std::pop_heap(bare_idx.begin(), bare_idx.end(), compare); + bare_idx.pop_back(); + + nonbasic_entering = current_indicies[heap_index]; + const i_t j = entering_index = nonbasic_list_[nonbasic_entering]; + step_length = current_ratios[heap_index]; + + if (bounded_variables_[j]) { + // We have a bounded variable + const f_t interval = upper_[j] - lower_[j]; + const f_t delta_slope = std::abs(delta_z_[j]) * interval; + const f_t pivot = std::abs(delta_z[j]); + if constexpr (verbose) { + settings_.log.printf( + "heap %d step-length %.12e pivot %e nonbasic entering %d slope %e delta_slope %e new " + "slope %e\n", + bare_idx.size(), + current_ratios[heap_index], + pivot, + nonbasic_entering, + slope, + delta_slope, + slope - delta_slope); + } + slope -= delta_slope; + } else { + // The variable is not bounded. Stop the search. + break; + } + + if (toc(start_time_) > settings_.time_limit) { + entering_index = -2; + return; + } + if (settings_.concurrent_halt != nullptr && + settings_.concurrent_halt->load(std::memory_order_acquire) == 1) { + entering_index = -3; + return; + } + } +} + +template +void bound_flipping_ratio_test_t::bucket_pass(const std::vector& current_indicies, + const std::vector& current_ratios, + i_t num_breakpoints, + f_t& slope, + f_t& step_length, + i_t& nonbasic_entering, + i_t& entering_index) +{ + const f_t dual_tol = settings_.dual_tol; + const f_t zero_tol = settings_.zero_tol; + const std::vector& delta_z = delta_z_; + const std::vector& nonbasic_list = nonbasic_list_; + const i_t N = num_breakpoints; + + const i_t K = 400; // 0, -16, -15, ...., 0, 1, ...., 400 - 18 = 382 + std::vector buckets(K, 0.0); + std::vector bucket_count(K, 0); + for (i_t k = 0; k < N; ++k) { + const i_t idx = current_indicies[k]; + const f_t ratio = current_ratios[k]; + const f_t min_exponent = -16.0; + const f_t max_exponent = 382.0; + const f_t exponent = std::max(min_exponent, std::min(max_exponent, std::log10(ratio))); + const i_t bucket_idx = ratio == 0.0 ? 0 : static_cast(exponent - min_exponent + 1); + // settings_.log.printf("Ratio %e exponent %e bucket_idx %d\n", ratio, exponent, bucket_idx); + const i_t j = nonbasic_list[idx]; + const f_t interval = upper_[j] - lower_[j]; + const f_t delta_slope = std::abs(delta_z_[j]) * interval; + buckets[bucket_idx] += delta_slope; + bucket_count[bucket_idx]++; + } + + std::vector cumulative_sum(K, 0.0); + cumulative_sum[0] = buckets[0]; + if (cumulative_sum[0] > slope) { + settings_.log.printf( + "Bucket 0. Count in bucket %d. Slope %e. Cumulative sum %e. Bucket value %e\n", + bucket_count[0], + slope, + cumulative_sum[0], + buckets[0]); + return; + } + i_t k; + bool exceeded = false; + for (k = 1; k < K; ++k) { + cumulative_sum[k] = cumulative_sum[k - 1] + buckets[k]; + if (cumulative_sum[k] > slope) { + exceeded = true; + break; + } + } + + if (exceeded) { + settings_.log.printf( + "Value in bucket %d. Count in buckets %d. Slope %e. Cumulative sum %e. Next sum %e Bucket " + "value %e\n", + k, + bucket_count[k], + slope, + cumulative_sum[k - 1], + cumulative_sum[k], + buckets[k - 1]); + } +} + +#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE + +template class bound_flipping_ratio_test_t; + +#endif + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp new file mode 100644 index 000000000..1d741ba28 --- /dev/null +++ b/cpp/src/dual_simplex/bound_flipping_ratio_test.hpp @@ -0,0 +1,107 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +class bound_flipping_ratio_test_t { + public: + bound_flipping_ratio_test_t(const simplex_solver_settings_t& settings, + f_t start_time, + i_t m, + i_t n, + f_t initial_slope, + const std::vector& lower, + const std::vector& upper, + const std::vector& bounded_variables, + const std::vector& vstatus, + const std::vector& nonbasic_list, + const std::vector& z, + const std::vector& delta_z, + const std::vector& delta_z_indices, + const std::vector& nonbasic_mark) + : settings_(settings), + start_time_(start_time), + m_(m), + n_(n), + slope_(initial_slope), + lower_(lower), + upper_(upper), + bounded_variables_(bounded_variables), + vstatus_(vstatus), + nonbasic_list_(nonbasic_list), + z_(z), + delta_z_(delta_z), + delta_z_indices_(delta_z_indices), + nonbasic_mark_(nonbasic_mark) + { + } + + i_t compute_step_length(f_t& step_length, i_t& nonbasic_entering); + + private: + i_t compute_breakpoints(std::vector& indices, std::vector& ratios); + i_t single_pass(i_t start, + i_t end, + const std::vector& indices, + const std::vector& ratios, + f_t& slope, + f_t& step_length, + i_t& nonbasic_entering, + i_t& enetering_index); + void heap_passes(const std::vector& current_indicies, + const std::vector& current_ratios, + i_t num_breakpoints, + f_t& slope, + f_t& step_lenght, + i_t& nonbasic_entering, + i_t& entering_index); + + void bucket_pass(const std::vector& current_indicies, + const std::vector& current_ratios, + i_t num_breakpoints, + f_t& slope, + f_t& step_length, + i_t& nonbasic_entering, + i_t& entering_index); + + const std::vector& lower_; + const std::vector& upper_; + const std::vector& bounded_variables_; + const std::vector& nonbasic_list_; + const std::vector& vstatus_; + const std::vector& z_; + const std::vector& delta_z_; + const std::vector& delta_z_indices_; + const std::vector& nonbasic_mark_; + + const simplex_solver_settings_t& settings_; + + f_t start_time_; + f_t slope_; + + i_t n_; + i_t m_; +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp index e141a71bf..906704012 100644 --- a/cpp/src/dual_simplex/branch_and_bound.cpp +++ b/cpp/src/dual_simplex/branch_and_bound.cpp @@ -205,6 +205,29 @@ void graphviz_edge(const simplex_solver_settings_t& settings, } } +dual::status_t convert_lp_status_to_dual_status(lp_status_t status) +{ + if (status == lp_status_t::OPTIMAL) { + return dual::status_t::OPTIMAL; + } else if (status == lp_status_t::INFEASIBLE) { + return dual::status_t::DUAL_UNBOUNDED; + } else if (status == lp_status_t::ITERATION_LIMIT) { + return dual::status_t::ITERATION_LIMIT; + } else if (status == lp_status_t::TIME_LIMIT) { + return dual::status_t::TIME_LIMIT; + } else if (status == lp_status_t::NUMERICAL_ISSUES) { + return dual::status_t::NUMERICAL; + } else if (status == lp_status_t::CUTOFF) { + return dual::status_t::CUTOFF; + } else if (status == lp_status_t::CONCURRENT_LIMIT) { + return dual::status_t::CONCURRENT_LIMIT; + } else if (status == lp_status_t::UNSET) { + return dual::status_t::UNSET; + } else { + return dual::status_t::NUMERICAL; + } +} + } // namespace template @@ -380,7 +403,7 @@ branch_and_bound_t::branch_and_bound_t( : original_problem(user_problem), settings(solver_settings), original_lp(1, 1, 1) { start_time = tic(); - convert_user_problem(original_problem, original_lp, new_slacks); + convert_user_problem(original_problem, settings, original_lp, new_slacks); full_variable_types(original_problem, original_lp, var_types); global_variables::mutex_upper.lock(); @@ -674,6 +697,12 @@ mip_status_t branch_and_bound_t::solve(mip_solution_t& solut leaf_solution, node_iter, leaf_edge_norms); + if (lp_status == dual::status_t::NUMERICAL) { + settings.log.printf("Numerical issue node %d. Resolving from scratch.\n", nodes_explored); + lp_status_t second_status = solve_linear_program_advanced( + leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms); + lp_status = convert_lp_status_to_dual_status(second_status); + } total_lp_solve_time += toc(lp_start_time); total_lp_iters += node_iter; diff --git a/cpp/src/dual_simplex/phase2.cpp b/cpp/src/dual_simplex/phase2.cpp index cbfc66a22..a83e3bf72 100644 --- a/cpp/src/dual_simplex/phase2.cpp +++ b/cpp/src/dual_simplex/phase2.cpp @@ -17,14 +17,17 @@ #include #include +#include #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -34,17 +37,467 @@ namespace cuopt::linear_programming::dual_simplex { namespace phase2 { +// Computes vectors farkas_y, farkas_zl, farkas_zu that satisfy +// +// A'*farkas_y + farkas_zl - farkas_zu ~= 0 +// farkas_zl, farkas_zu >= 0, +// b'*farkas_y + l'*farkas_zl - u'*farkas_zu = farkas_constant > 0 +// +// This is a Farkas certificate for the infeasibility of the primal problem +// +// A*x = b, l <= x <= u +template +void compute_farkas_certificate(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& vstatus, + const std::vector& x, + const std::vector& y, + const std::vector& z, + const std::vector& delta_y, + const std::vector& delta_z, + i_t direction, + i_t leaving_index, + f_t obj_val, + std::vector& farkas_y, + std::vector& farkas_zl, + std::vector& farkas_zu, + f_t& farkas_constant) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + + std::vector original_residual = z; + matrix_transpose_vector_multiply(lp.A, 1.0, y, 1.0, original_residual); + for (i_t j = 0; j < n; ++j) { + original_residual[j] -= lp.objective[j]; + } + const f_t original_residual_norm = vector_norm2(original_residual); + settings.log.printf("|| A'*y + z - c || = %e\n", original_residual_norm); + + std::vector zl(n); + std::vector zu(n); + for (i_t j = 0; j < n; ++j) { + zl[j] = std::max(0.0, z[j]); + zu[j] = -std::min(0.0, z[j]); + } + + original_residual = zl; + matrix_transpose_vector_multiply(lp.A, 1.0, y, 1.0, original_residual); + for (i_t j = 0; j < n; ++j) { + original_residual[j] -= (zu[j] + lp.objective[j]); + } + const f_t original_residual_2 = vector_norm2(original_residual); + settings.log.printf("|| A'*y + zl - zu - c || = %e\n", original_residual_2); + + std::vector search_dir_residual = delta_z; + matrix_transpose_vector_multiply(lp.A, 1.0, delta_y, 1.0, search_dir_residual); + settings.log.printf("|| A'*delta_y + delta_z || = %e\n", + vector_norm2(search_dir_residual)); + + std::vector y_bar(m); + for (i_t i = 0; i < m; ++i) { + y_bar[i] = y[i] + delta_y[i]; + } + original_residual = z; + matrix_transpose_vector_multiply(lp.A, 1.0, y_bar, 1.0, original_residual); + for (i_t j = 0; j < n; ++j) { + original_residual[j] += (delta_z[j] - lp.objective[j]); + } + const f_t original_residual_3 = vector_norm2(original_residual); + settings.log.printf("|| A'*(y + delta_y) + (z + delta_z) - c || = %e\n", original_residual_3); + + farkas_y.resize(m); + farkas_zl.resize(n); + farkas_zu.resize(n); + + f_t gamma = 0.0; + for (i_t j = 0; j < n; ++j) { + const f_t cj = lp.objective[j]; + const f_t lower = lp.lower[j]; + const f_t upper = lp.upper[j]; + if (lower > -inf) { gamma -= lower * std::min(0.0, cj); } + if (upper < inf) { gamma -= upper * std::max(0.0, cj); } + } + printf("gamma = %e\n", gamma); + + const f_t threshold = 1.0; + const f_t positive_threshold = std::max(-gamma, 0.0) + threshold; + printf("positive_threshold = %e\n", positive_threshold); + + // We need to increase the dual objective to positive threshold + f_t alpha = threshold; + const f_t infeas = (direction == 1) ? (lp.lower[leaving_index] - x[leaving_index]) + : (x[leaving_index] - lp.upper[leaving_index]); + // We need the new objective to be at least positive_threshold + // positive_threshold = obj_val+ alpha * infeas + // infeas > 0, alpha > 0, positive_threshold > 0 + printf("direction = %d\n", direction); + printf( + "lower %e x %e upper %d\n", lp.lower[leaving_index], x[leaving_index], lp.upper[leaving_index]); + printf("infeas = %e\n", infeas); + printf("obj_val = %e\n", obj_val); + alpha = std::max(threshold, (positive_threshold - obj_val) / infeas); + printf("alpha = %e\n", alpha); + + std::vector y_prime(m); + std::vector zl_prime(n); + std::vector zu_prime(n); + + // farkas_y = y + alpha * delta_y + for (i_t i = 0; i < m; ++i) { + farkas_y[i] = y[i] + alpha * delta_y[i]; + y_prime[i] = y[i] + alpha * delta_y[i]; + } + // farkas_zl = z + alpha * delta_z - c- + for (i_t j = 0; j < n; ++j) { + const f_t cj = lp.objective[j]; + const f_t z_j = z[j]; + const f_t delta_z_j = delta_z[j]; + farkas_zl[j] = std::max(0.0, z_j) + alpha * std::max(0.0, delta_z_j) + -std::min(0.0, cj); + zl_prime[j] = zl[j] + alpha * std::max(0.0, delta_z_j); + } + + // farkas_zu = z + alpha * delta_z + c+ + for (i_t j = 0; j < n; ++j) { + const f_t cj = lp.objective[j]; + const f_t z_j = z[j]; + const f_t delta_z_j = delta_z[j]; + farkas_zu[j] = -std::min(0.0, z_j) - alpha * std::min(0.0, delta_z_j) + std::max(0.0, cj); + zu_prime[j] = zu[j] + alpha * (-std::min(0.0, delta_z_j)); + } + + // farkas_constant = b'*farkas_y + l'*farkas_zl - u'*farkas_zu + farkas_constant = 0.0; + f_t test_constant = 0.0; + f_t test_3 = 0.0; + for (i_t i = 0; i < m; ++i) { + farkas_constant += lp.rhs[i] * farkas_y[i]; + test_constant += lp.rhs[i] * y_prime[i]; + test_3 += lp.rhs[i] * delta_y[i]; + } + printf("b'*delta_y = %e\n", test_3); + printf("|| b || %e\n", vector_norm_inf(lp.rhs)); + printf("|| delta y || %e\n", vector_norm_inf(delta_y)); + for (i_t j = 0; j < n; ++j) { + const f_t lower = lp.lower[j]; + const f_t upper = lp.upper[j]; + if (lower > -inf) { + farkas_constant += lower * farkas_zl[j]; + test_constant += lower * zl_prime[j]; + const f_t delta_z_l_j = std::max(delta_z[j], 0.0); + test_3 += lower * delta_z_l_j; + } + if (upper < inf) { + farkas_constant -= upper * farkas_zu[j]; + test_constant -= upper * zu_prime[j]; + const f_t delta_z_u_j = -std::min(delta_z[j], 0.0); + test_3 -= upper * delta_z_u_j; + } + } + + // Verify that the Farkas certificate is valid + std::vector residual = farkas_zl; + matrix_transpose_vector_multiply(lp.A, 1.0, farkas_y, 1.0, residual); + for (i_t j = 0; j < n; ++j) { + residual[j] -= farkas_zu[j]; + } + const f_t residual_norm = vector_norm2(residual); + + f_t zl_min = 0.0; + for (i_t j = 0; j < n; ++j) { + zl_min = std::min(zl_min, farkas_zl[j]); + } + settings.log.printf("farkas_zl_min = %e\n", zl_min); + f_t zu_min = 0.0; + for (i_t j = 0; j < n; ++j) { + zu_min = std::min(zu_min, farkas_zu[j]); + } + settings.log.printf("farkas_zu_min = %e\n", zu_min); + + settings.log.printf("|| A'*farkas_y + farkas_zl - farkas_zu || = %e\n", residual_norm); + settings.log.printf("b'*farkas_y + l'*farkas_zl - u'*farkas_zu = %e\n", farkas_constant); + + if (residual_norm < 1e-6 && farkas_constant > 0.0 && zl_min >= 0.0 && zu_min >= 0.0) { + settings.log.printf("Farkas certificate of infeasibility constructed\n"); + } +} + +template +void initial_perturbation(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& vstatus, + std::vector& objective) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + f_t max_abs_obj_coeff = 0.0; + for (i_t j = 0; j < n; ++j) { + max_abs_obj_coeff = std::max(max_abs_obj_coeff, std::abs(lp.objective[j])); + } + + const f_t dual_tol = settings.dual_tol; + + std::srand(static_cast(std::time(nullptr))); + + objective.resize(n); + f_t sum_perturb = 0.0; + i_t num_perturb = 0; + + random_t random(settings.seed); + for (i_t j = 0; j < n; ++j) { + f_t obj = objective[j] = lp.objective[j]; + + const f_t lower = lp.lower[j]; + const f_t upper = lp.upper[j]; + if (vstatus[j] == variable_status_t::NONBASIC_FIXED || + vstatus[j] == variable_status_t::NONBASIC_FREE || lower == upper || + lower == -inf && upper == inf) { + continue; + } + + const f_t rand_val = random.random(); + const f_t perturb = + (1e-5 * std::abs(obj) + 1e-7 * max_abs_obj_coeff + 10 * dual_tol) * (1.0 + rand_val); + + if (vstatus[j] == variable_status_t::NONBASIC_LOWER || lower > -inf && upper < inf && obj > 0) { + objective[j] = obj + perturb; + sum_perturb += perturb; + num_perturb++; + } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER || + lower > -inf && upper < inf && obj < 0) { + objective[j] = obj - perturb; + sum_perturb += perturb; + num_perturb++; + } + } + + settings.log.printf("Applied initial perturbation of %e to %d/%d objective coefficients\n", + sum_perturb, + num_perturb, + n); +} + +template +void compute_reduced_cost_update(const lp_problem_t& lp, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& delta_y, + i_t leaving_index, + i_t direction, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + + // delta_zB = sigma*ei + for (i_t k = 0; k < m; k++) { + const i_t j = basic_list[k]; + delta_z[j] = 0; + } + delta_z[leaving_index] = direction; + // delta_zN = -N'*delta_y + for (i_t k = 0; k < n - m; k++) { + const i_t j = nonbasic_list[k]; + // z_j <- -A(:, j)'*delta_y + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + f_t dot = 0.0; + for (i_t p = col_start; p < col_end; ++p) { + dot += lp.A.x[p] * delta_y[lp.A.i[p]]; + } + delta_z[j] = -dot; + if (dot != 0.0) { + delta_z_indices.push_back(j); // Note delta_z_indices has n elements reserved + delta_z_mark[j] = 1; + } + } +} + +template +void compute_delta_z(const csc_matrix_t& A_transpose, + const sparse_vector_t& delta_y, + i_t leaving_index, + i_t direction, + std::vector& nonbasic_mark, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z) +{ + // delta_zN = - N'*delta_y + const i_t nz_delta_y = delta_y.i.size(); + for (i_t k = 0; k < nz_delta_y; k++) { + const i_t i = delta_y.i[k]; + const f_t delta_y_i = delta_y.x[k]; + if (std::abs(delta_y_i) < 1e-12) { continue; } + const i_t row_start = A_transpose.col_start[i]; + const i_t row_end = A_transpose.col_start[i + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t j = A_transpose.i[p]; + if (nonbasic_mark[j] >= 0) { + delta_z[j] -= delta_y_i * A_transpose.x[p]; + if (!delta_z_mark[j]) { + delta_z_mark[j] = 1; + delta_z_indices.push_back(j); // Note delta_z_indices has n elements reserved + } + } + } + } + + // delta_zB = sigma*ei + delta_z[leaving_index] = direction; + +#ifdef CHECK_CHANGE_IN_REDUCED_COST + delta_y_sparse.to_dense(delta_y); + std::vector delta_z_check(n); + std::vector delta_z_mark_check(n, 0); + std::vector delta_z_indices_check; + phase2::compute_reduced_cost_update(lp, + basic_list, + nonbasic_list, + delta_y, + leaving_index, + direction, + delta_z_mark_check, + delta_z_indices_check, + delta_z_check); + f_t error_check = 0.0; + for (i_t k = 0; k < n; ++k) { + const f_t diff = std::abs(delta_z[k] - delta_z_check[k]); + if (diff > 1e-6) { + printf("delta_z error %d transpose %e no transpose %e diff %e\n", + k, + delta_z[k], + delta_z_check[k], + diff); + } + error_check = std::max(error_check, diff); + } + if (error_check > 1e-6) { printf("delta_z error %e\n", error_check); } +#endif +} + +template +void compute_reduced_costs(const std::vector& objective, + const csc_matrix_t& A, + const std::vector& y, + const std::vector& basic_list, + const std::vector& nonbasic_list, + std::vector& z) +{ + const i_t m = A.m; + const i_t n = A.n; + // zN = cN - N'*y + for (i_t k = 0; k < n - m; k++) { + const i_t j = nonbasic_list[k]; + // z_j <- c_j + z[j] = objective[j]; + + // z_j <- z_j - A(:, j)'*y + const i_t col_start = A.col_start[j]; + const i_t col_end = A.col_start[j + 1]; + f_t dot = 0.0; + for (i_t p = col_start; p < col_end; ++p) { + dot += A.x[p] * y[A.i[p]]; + } + z[j] -= dot; + } + // zB = 0 + for (i_t k = 0; k < m; ++k) { + z[basic_list[k]] = 0.0; + } +} + +template +void compute_primal_variables(const basis_update_mpf_t& ft, + const std::vector& lp_rhs, + const csc_matrix_t& A, + const std::vector& basic_list, + const std::vector& nonbasic_list, + f_t tight_tol, + std::vector& x) +{ + const i_t m = A.m; + const i_t n = A.n; + std::vector rhs = lp_rhs; + // rhs = b - sum_{j : x_j = l_j} A(:, j) * l(j) + // - sum_{j : x_j = u_j} A(:, j) * u(j) + for (i_t k = 0; k < n - m; ++k) { + const i_t j = nonbasic_list[k]; + const i_t col_start = A.col_start[j]; + const i_t col_end = A.col_start[j + 1]; + const f_t xj = x[j]; + if (std::abs(xj) < tight_tol * 10) continue; + for (i_t p = col_start; p < col_end; ++p) { + rhs[A.i[p]] -= xj * A.x[p]; + } + } + + std::vector xB(m); + ft.b_solve(rhs, xB); + + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + x[j] = xB[k]; + } +} + +template +void clear_delta_z(i_t entering_index, + i_t leaving_index, + std::vector& delta_z_mark, + std::vector& delta_z_indices, + std::vector& delta_z) +{ + for (i_t k = 0; k < delta_z_indices.size(); k++) { + const i_t j = delta_z_indices[k]; + delta_z[j] = 0.0; + delta_z_mark[j] = 0; + } + if (entering_index != -1) { delta_z[entering_index] = 0.0; } + delta_z[leaving_index] = 0.0; + delta_z_indices.clear(); +} + +template +void clear_delta_x(const std::vector& basic_list, + i_t entering_index, + sparse_vector_t& scaled_delta_xB_sparse, + std::vector& delta_x) +{ + const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; + delta_x[j] = 0.0; + } + // Leaving index already included above + delta_x[entering_index] = 0.0; + scaled_delta_xB_sparse.i.clear(); + scaled_delta_xB_sparse.x.clear(); +} + +template +void compute_dual_residual(const csc_matrix_t& A, + const std::vector& objective, + const std::vector& y, + const std::vector& z, + std::vector& dual_residual) +{ + dual_residual = z; + const i_t n = A.n; + // r = A'*y + z - c + for (i_t j = 0; j < n; ++j) { + dual_residual[j] -= objective[j]; + } + matrix_transpose_vector_multiply(A, 1.0, y, 1.0, dual_residual); +} + template f_t l2_dual_residual(const lp_problem_t& lp, const lp_solution_t& solution) { - std::vector dual_residual = solution.z; - const i_t n = lp.num_cols; - // dual_residual <- z - c - for (i_t j = 0; j < n; j++) { - dual_residual[j] -= lp.objective[j]; - } - // dual_residual <- 1.0*A'*y + 1.0*(z - c) - matrix_transpose_vector_multiply(lp.A, 1.0, solution.y, 1.0, dual_residual); + std::vector dual_residual; + compute_dual_residual(lp.A, lp.objective, solution.y, solution.z, dual_residual); return vector_norm2(dual_residual); } @@ -56,9 +509,38 @@ f_t l2_primal_residual(const lp_problem_t& lp, const lp_solution_t(primal_residual); } +template +void vstatus_changes(const std::vector& vstatus, + const std::vector& vstatus_old, + const std::vector& z, + const std::vector& z_old, + i_t& num_vstatus_changes, + i_t& num_z_changes) +{ + num_vstatus_changes = 0; + num_z_changes = 0; + const i_t n = vstatus.size(); + for (i_t j = 0; j < n; ++j) { + if (vstatus[j] != vstatus_old[j]) { num_vstatus_changes++; } + if (std::abs(z[j] - z_old[j]) > 1e-6) { num_z_changes++; } + } +} + +template +void compute_bounded_info(const std::vector& lower, + const std::vector& upper, + std::vector& bounded_variables) +{ + const size_t n = lower.size(); + for (size_t j = 0; j < n; j++) { + const bool bounded = (lower[j] > -inf) && (upper[j] < inf) && (lower[j] != upper[j]); + bounded_variables[j] = static_cast(bounded); + } +} + template void compute_dual_solution_from_basis(const lp_problem_t& lp, - basis_update_t& ft, + basis_update_mpf_t& ft, const std::vector& basic_list, const std::vector& nonbasic_list, std::vector& y, @@ -101,6 +583,217 @@ void compute_dual_solution_from_basis(const lp_problem_t& lp, } } +template +i_t compute_primal_solution_from_basis(const lp_problem_t& lp, + basis_update_mpf_t& ft, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& vstatus, + std::vector& x) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + std::vector rhs = lp.rhs; + + for (i_t k = 0; k < n - m; ++k) { + const i_t j = nonbasic_list[k]; + if (vstatus[j] == variable_status_t::NONBASIC_LOWER || + vstatus[j] == variable_status_t::NONBASIC_FIXED) { + x[j] = lp.lower[j]; + } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER) { + x[j] = lp.upper[j]; + } else if (vstatus[j] == variable_status_t::NONBASIC_FREE) { + x[j] = 0.0; + } + } + + // rhs = b - sum_{j : x_j = l_j} A(:, j) l(j) - sum_{j : x_j = u_j} A(:, j) * + // u(j) + for (i_t k = 0; k < n - m; ++k) { + const i_t j = nonbasic_list[k]; + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + const f_t xj = x[j]; + for (i_t p = col_start; p < col_end; ++p) { + rhs[lp.A.i[p]] -= xj * lp.A.x[p]; + } + } + + std::vector xB(m); + ft.b_solve(rhs, xB); + + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + x[j] = xB[k]; + } + return 0; +} + +template +f_t compute_initial_primal_infeasibilities(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& basic_list, + const std::vector& x, + std::vector& squared_infeasibilities, + std::vector& infeasibility_indices) +{ + const i_t m = lp.num_rows; + const i_t n = lp.num_cols; + squared_infeasibilities.resize(n, 0.0); + infeasibility_indices.reserve(n); + infeasibility_indices.clear(); + f_t primal_inf = 0.0; + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + const f_t lower_infeas = lp.lower[j] - x[j]; + const f_t upper_infeas = x[j] - lp.upper[j]; + const f_t infeas = std::max(lower_infeas, upper_infeas); + if (infeas > settings.primal_tol) { + const f_t square_infeas = infeas * infeas; + squared_infeasibilities[j] = square_infeas; + infeasibility_indices.push_back(j); + primal_inf += square_infeas; + } + } + return primal_inf; +} + +template +void update_single_primal_infeasibility(const std::vector& lower, + const std::vector& upper, + const std::vector& x, + f_t primal_tol, + std::vector& squared_infeasibilities, + std::vector& infeasibility_indices, + i_t j, + f_t& primal_inf) +{ + const f_t old_val = squared_infeasibilities[j]; + // x_j < l_j - epsilon => -x_j + l_j > epsilon + const f_t lower_infeas = lower[j] - x[j]; + // x_j > u_j + epsilon => x_j - u_j > epsilon + const f_t upper_infeas = x[j] - upper[j]; + const f_t infeas = std::max(lower_infeas, upper_infeas); + const f_t new_val = infeas * infeas; + if (infeas > primal_tol) { + primal_inf = std::max(0.0, primal_inf + (new_val - old_val)); + // We are infeasible w.r.t the tolerance + if (old_val == 0.0) { + // This is a new infeasibility + // We need to add it to the list + infeasibility_indices.push_back(j); + } else { + // Already infeasible + } + squared_infeasibilities[j] = new_val; + } else { + // We are feasible w.r.t the tolerance + if (old_val != 0.0) { + // We were previously infeasible, + primal_inf = std::max(0.0, primal_inf - old_val); + squared_infeasibilities[j] = 0.0; + } else { + // Still feasible + } + } +} + +template +void update_primal_infeasibilities(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& basic_list, + const std::vector& x, + i_t entering_index, + i_t leaving_index, + std::vector& basic_change_list, + std::vector& squared_infeasibilities, + std::vector& infeasibility_indices, + f_t& primal_inf) +{ + const f_t primal_tol = settings.primal_tol; + const i_t nz = basic_change_list.size(); + for (i_t k = 0; k < nz; ++k) { + const i_t j = basic_list[basic_change_list[k]]; + // The change list will contain the leaving variable, + // But not the entering variable. + + if (j == leaving_index) { + // Force the leaving variable to be feasible + const f_t old_val = squared_infeasibilities[j]; + squared_infeasibilities[j] = 0.0; + primal_inf = std::max(0.0, primal_inf - old_val); + continue; + } + update_single_primal_infeasibility(lp.lower, + lp.upper, + x, + primal_tol, + squared_infeasibilities, + infeasibility_indices, + j, + primal_inf); + } +} + +template +void clean_up_infeasibilities(std::vector& squared_infeasibilities, + std::vector& infeasibility_indices) +{ + bool needs_clean_up = false; + for (i_t k = 0; k < infeasibility_indices.size(); ++k) { + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; + if (squared_infeas == 0.0) { needs_clean_up = true; } + } + + if (needs_clean_up) { + for (i_t k = 0; k < infeasibility_indices.size(); ++k) { + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; + if (squared_infeas == 0.0) { + // Set to the last element + const i_t sz = infeasibility_indices.size(); + infeasibility_indices[k] = infeasibility_indices[sz - 1]; + infeasibility_indices.pop_back(); + i_t new_j = infeasibility_indices[k]; + if (squared_infeasibilities[new_j] == 0.0) { k--; } + } + } + } +} + +template +i_t steepest_edge_pricing_with_infeasibilities(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& x, + const std::vector& dy_steepest_edge, + const std::vector& basic_mark, + std::vector& squared_infeasibilities, + std::vector& infeasibility_indices, + i_t& direction, + i_t& basic_leaving, + f_t& max_val) +{ + max_val = 0.0; + i_t leaving_index = -1; + const i_t nz = infeasibility_indices.size(); + for (i_t k = 0; k < nz; ++k) { + const i_t j = infeasibility_indices[k]; + const f_t squared_infeas = squared_infeasibilities[j]; + const f_t val = squared_infeas / dy_steepest_edge[j]; + if (val > max_val || val == max_val && j > leaving_index) { + max_val = val; + leaving_index = j; + const f_t lower_infeas = lp.lower[j] - x[j]; + const f_t upper_infeas = x[j] - lp.upper[j]; + direction = lower_infeas >= upper_infeas ? 1 : -1; + } + } + + basic_leaving = leaving_index >= 0 ? basic_mark[leaving_index] : -1; + return leaving_index; +} + template i_t steepest_edge_pricing(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -344,185 +1037,200 @@ i_t phase2_ratio_test(const lp_problem_t& lp, return entering_index; } -template -i_t bound_flipping_ratio_test(const lp_problem_t& lp, - const simplex_solver_settings_t& settings, - f_t start_time, - const std::vector& vstatus, - const std::vector& nonbasic_list, - const std::vector& x, - std::vector& z, - std::vector& delta_z, - i_t direction, - i_t leaving_index, - f_t& step_length, - i_t& nonbasic_entering) -{ - const i_t n = lp.num_cols; - const i_t m = lp.num_rows; - - f_t slope = direction == 1 ? (lp.lower[leaving_index] - x[leaving_index]) - : (x[leaving_index] - lp.upper[leaving_index]); - assert(slope > 0); - - const f_t pivot_tol = settings.pivot_tol; - const f_t relaxed_pivot_tol = settings.pivot_tol; - const f_t zero_tol = settings.zero_tol; - std::list q_pos; - assert(nonbasic_list.size() == n - m); - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - if (vstatus[j] == variable_status_t::NONBASIC_FIXED) { continue; } - if (vstatus[j] == variable_status_t::NONBASIC_LOWER && delta_z[j] < -pivot_tol) { - q_pos.push_back(k); - } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && delta_z[j] > pivot_tol) { - q_pos.push_back(k); - } - } - i_t entering_index = -1; - step_length = inf; - const f_t dual_tol = settings.dual_tol / 10; - while (q_pos.size() > 0 && slope > 0) { - // Find the minimum ratio for nonbasic variables in q_pos - f_t min_val = inf; - typename std::list::iterator q_index; - i_t candidate = -1; - for (typename std::list::iterator it = q_pos.begin(); it != q_pos.end(); ++it) { - const i_t k = *it; - const i_t j = nonbasic_list[k]; - f_t ratio = inf; - if (vstatus[j] == variable_status_t::NONBASIC_LOWER && delta_z[j] < -pivot_tol) { - ratio = (-dual_tol - z[j]) / delta_z[j]; - } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && delta_z[j] > pivot_tol) { - ratio = (dual_tol - z[j]) / delta_z[j]; - } else if (min_val != inf) { - // We've already found something just continue; - } else if (vstatus[j] == variable_status_t::NONBASIC_LOWER) { - ratio = (-dual_tol - z[j]) / delta_z[j]; - } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER) { - ratio = (dual_tol - z[j]) / delta_z[j]; - } else { - assert(1 == 0); - } - - ratio = std::max(ratio, 0.0); - - if (ratio < min_val) { - min_val = ratio; - q_index = it; // Save the iterator so we can remove the element it - // points to from the q_pos list later (if it corresponds - // to a bounded variable) - candidate = j; - } else if (ratio < min_val + zero_tol && - std::abs(delta_z[j]) > std::abs(delta_z[candidate])) { - min_val = ratio; - q_index = it; - candidate = j; - } - } - step_length = min_val; // Save the step length - nonbasic_entering = *q_index; - const i_t j = entering_index = nonbasic_list[nonbasic_entering]; - if (lp.lower[j] > -inf && lp.upper[j] < inf && lp.lower[j] != lp.upper[j]) { - const f_t interval = lp.upper[j] - lp.lower[j]; - const f_t delta_slope = std::abs(delta_z[j]) * interval; -#ifdef BOUND_FLIP_DEBUG - if (slope - delta_slope > 0) { - log.printf( - "Bound flip %d slope change %e prev slope %e slope %e. curr step " - "length %e\n", - j, - delta_slope, - slope, - slope - delta_slope, - step_length); - } -#endif - slope -= delta_slope; - q_pos.erase(q_index); - } else { - // we hit a variable that is not bounded. Exit - break; - } - - if (toc(start_time) > settings.time_limit) { return -2; } - if (settings.concurrent_halt != nullptr && - settings.concurrent_halt->load(std::memory_order_acquire) == 1) { - return -3; - } - } - // step_length, nonbasic_entering, and entering_index are defined after the - // while loop - assert(step_length >= 0); - - return entering_index; -} - template i_t flip_bounds(const lp_problem_t& lp, const simplex_solver_settings_t& settings, + const std::vector& bounded_variables, const std::vector& objective, const std::vector& z, + const std::vector& delta_z_indices, const std::vector& nonbasic_list, i_t entering_index, std::vector& vstatus, std::vector& delta_x, - std::vector& atilde) + std::vector& mark, + std::vector& atilde, + std::vector& atilde_index) { - f_t delta_obj = 0; - for (i_t j : nonbasic_list) { + i_t num_flipped = 0; + for (i_t j : delta_z_indices) { if (j == entering_index) { continue; } - const bool bounded = - (lp.lower[j] > -inf) && (lp.upper[j] < inf) && (lp.lower[j] != lp.upper[j]); - if (!bounded) { continue; } + if (!bounded_variables[j]) { continue; } // x_j is now a nonbasic bounded variable that will not enter the basis this // iteration const f_t dual_tol = settings.dual_tol; // lower to 1e-7 or less will cause 25fv47 and d2q06c to cycle if (vstatus[j] == variable_status_t::NONBASIC_LOWER && z[j] < -dual_tol) { const f_t delta = lp.upper[j] - lp.lower[j]; - scatter_dense(lp.A, j, -delta, atilde); - delta_obj += delta * objective[j]; + scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); delta_x[j] += delta; vstatus[j] = variable_status_t::NONBASIC_UPPER; #ifdef BOUND_FLIP_DEBUG settings.log.printf( "Flipping nonbasic %d from lo %e to up %e. z %e\n", j, lp.lower[j], lp.upper[j], z[j]); #endif + num_flipped++; } else if (vstatus[j] == variable_status_t::NONBASIC_UPPER && z[j] > dual_tol) { const f_t delta = lp.lower[j] - lp.upper[j]; - scatter_dense(lp.A, j, -delta, atilde); - delta_obj += delta * objective[j]; + scatter_dense(lp.A, j, -delta, atilde, mark, atilde_index); delta_x[j] += delta; vstatus[j] = variable_status_t::NONBASIC_LOWER; #ifdef BOUND_FLIP_DEBUG settings.log.printf( "Flipping nonbasic %d from up %e to lo %e. z %e\n", j, lp.upper[j], lp.lower[j], z[j]); #endif + num_flipped++; } } - return 0; + return num_flipped; +} + +template +void initialize_steepest_edge_norms_from_slack_basis(const std::vector& basic_list, + const std::vector& nonbasic_list, + std::vector& delta_y_steepest_edge) +{ + const i_t m = basic_list.size(); + const i_t n = delta_y_steepest_edge.size(); + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + delta_y_steepest_edge[j] = 1.0; + } + const i_t n_minus_m = n - m; + for (i_t k = 0; k < n_minus_m; ++k) { + const i_t j = nonbasic_list[k]; + delta_y_steepest_edge[j] = 1e-4; + } } template -i_t initialize_steepest_edge_norms(const simplex_solver_settings_t& settings, +i_t initialize_steepest_edge_norms(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, const f_t start_time, const std::vector& basic_list, - const basis_update_t& ft, + basis_update_mpf_t& ft, std::vector& delta_y_steepest_edge) { - // TODO: Skip this initialization when starting from a slack basis - // Or skip individual columns corresponding to slack variables - const i_t m = basic_list.size(); + const i_t m = basic_list.size(); + + // We want to compute B^T delta_y_i = -e_i + // If there is a column u of B^T such that B^T(:, u) = alpha * e_i than the + // solve delta_y_i = -1/alpha * e_u + // So we need to find columns of B^T (or rows of B) with only a single non-zero entry + f_t start_singleton_rows = tic(); + std::vector row_degree(m, 0); + std::vector mapping(m, -1); + std::vector coeff(m, 0.0); + + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + const i_t col_start = lp.A.col_start[j]; + const i_t col_end = lp.A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = lp.A.i[p]; + row_degree[i]++; + // column j of A is column k of B + mapping[k] = i; + coeff[k] = lp.A.x[p]; + } + } + +#ifdef CHECK_SINGLETON_ROWS + csc_matrix_t B(m, m, 0); + form_b(lp.A, basic_list, B); + csc_matrix_t B_transpose(m, m, 0); + B.transpose(B_transpose); +#endif + + i_t num_singleton_rows = 0; + for (i_t i = 0; i < m; ++i) { + if (row_degree[i] == 1) { + num_singleton_rows++; +#ifdef CHECK_SINGLETON_ROWS + const i_t col_start = B_transpose.col_start[i]; + const i_t col_end = B_transpose.col_start[i + 1]; + if (col_end - col_start != 1) { + settings.log.printf("Singleton row %d has %d non-zero entries\n", i, col_end - col_start); + } +#endif + } + } + + if (num_singleton_rows > 0) { + settings.log.printf("Found %d singleton rows for steepest edge norms in %.2fs\n", + num_singleton_rows, + toc(start_singleton_rows)); + } + f_t last_log = tic(); for (i_t k = 0; k < m; ++k) { - std::vector ei(m); - std::vector dy(m); - const i_t j = basic_list[k]; - ei[k] = -1.0; - ft.b_transpose_solve(ei, dy); - ei[k] = 0.0; - const f_t init = vector_norm2_squared(dy); + sparse_vector_t sparse_ei(m, 1); + sparse_ei.x[0] = -1.0; + sparse_ei.i[0] = k; + const i_t j = basic_list[k]; + f_t init = -1.0; + if (row_degree[mapping[k]] == 1) { + const i_t u = mapping[k]; + const f_t alpha = coeff[k]; + // dy[u] = -1.0 / alpha; + f_t my_init = 1.0 / (alpha * alpha); + init = my_init; +#ifdef CHECK_HYPERSPARSE + std::vector residual(m); + b_transpose_multiply(lp, basic_list, dy, residual); + float error = 0; + for (i_t h = 0; h < m; ++h) { + const f_t error_component = std::abs(residual[h] - ei[h]); + error += error_component; + if (error_component > 1e-12) { + settings.log.printf("Singleton row %d component %d error %e residual %e ei %e\n", + k, + h, + error_component, + residual[h], + ei[h]); + } + } + if (error > 1e-12) { settings.log.printf("Singleton row %d error %e\n", k, error); } +#endif + +#ifdef CHECK_HYPERSPARSE + dy[u] = 0.0; + ft.b_transpose_solve(ei, dy); + init = vector_norm2_squared(dy); + if (init != my_init) { + settings.log.printf("Singleton row %d error %.16e init %.16e my_init %.16e\n", + k, + std::abs(init - my_init), + init, + my_init); + } +#endif + } else { +#if COMPARE_WITH_DENSE + ft.b_transpose_solve(ei, dy); + init = vector_norm2_squared(dy); +#else + sparse_vector_t sparse_dy(m, 0); + ft.b_transpose_solve(sparse_ei, sparse_dy); + f_t my_init = 0.0; + for (i_t p = 0; p < sparse_dy.x.size(); ++p) { + my_init += sparse_dy.x[p] * sparse_dy.x[p]; + } +#endif +#if COMPARE_WITH_DENSE + if (std::abs(init - my_init) > 1e-12) { + settings.log.printf("Singleton row %d error %.16e init %.16e my_init %.16e\n", + k, + std::abs(init - my_init), + init, + my_init); + } +#endif + init = my_init; + } + // ei[k] = 0.0; + // init = vector_norm2_squared(dy); assert(init > 0); delta_y_steepest_edge[j] = init; @@ -544,26 +1252,25 @@ i_t initialize_steepest_edge_norms(const simplex_solver_settings_t& se template i_t update_steepest_edge_norms(const simplex_solver_settings_t& settings, const std::vector& basic_list, - const basis_update_t& ft, + const basis_update_mpf_t& ft, i_t direction, - const std::vector& delta_y, - const std::vector& scaled_delta_xB, + const sparse_vector_t& delta_y_sparse, + f_t dy_norm_squared, + const sparse_vector_t& scaled_delta_xB, i_t basic_leaving_index, i_t entering_index, + std::vector& v, std::vector& delta_y_steepest_edge) { - i_t m = delta_y.size(); - std::vector v(m); + i_t m = basic_list.size(); + const i_t delta_y_nz = delta_y_sparse.i.size(); + sparse_vector_t v_sparse(m, 0); // B^T delta_y = - direction * e_basic_leaving_index // We want B v = - B^{-T} e_basic_leaving_index - ft.b_solve(delta_y, v); - // if direction = -1 we need to scale v - if (direction == -1) { - for (i_t k = 0; k < m; ++k) { - v[k] *= -1; - } - } - const f_t dy_norm_squared = vector_norm2_squared(delta_y); + ft.b_solve(delta_y_sparse, v_sparse); + if (direction == -1) { v_sparse.negate(); } + v_sparse.scatter(v); + const i_t leaving_index = basic_list[basic_leaving_index]; const f_t prev_dy_norm_squared = delta_y_steepest_edge[leaving_index]; #ifdef STEEPEST_EDGE_DEBUG @@ -580,17 +1287,18 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin // B*w = A(:, leaving_index) // B*scaled_delta_xB = -A(:, leaving_index) so w = -scaled_delta_xB - const f_t wr = -scaled_delta_xB[basic_leaving_index]; + const f_t wr = -scaled_delta_xB.find_coefficient(basic_leaving_index); if (wr == 0) { return -1; } - const f_t omegar = dy_norm_squared / (wr * wr); - - for (i_t k = 0; k < m; ++k) { + const f_t omegar = dy_norm_squared / (wr * wr); + const i_t scaled_delta_xB_nz = scaled_delta_xB.i.size(); + for (i_t h = 0; h < scaled_delta_xB_nz; ++h) { + const i_t k = scaled_delta_xB.i[h]; const i_t j = basic_list[k]; if (k == basic_leaving_index) { - const f_t w_squared = scaled_delta_xB[k] * scaled_delta_xB[k]; + const f_t w_squared = scaled_delta_xB.x[h] * scaled_delta_xB.x[h]; delta_y_steepest_edge[j] = (1.0 / w_squared) * dy_norm_squared; } else { - const f_t wk = -scaled_delta_xB[k]; + const f_t wk = -scaled_delta_xB.x[h]; f_t new_val = delta_y_steepest_edge[j] + wk * (2.0 * v[k] / wr + wk * omegar); new_val = std::max(new_val, 1e-4); #ifdef STEEPEST_EDGE_DEBUG @@ -611,23 +1319,30 @@ i_t update_steepest_edge_norms(const simplex_solver_settings_t& settin } } + const i_t v_nz = v_sparse.i.size(); + for (i_t k = 0; k < v_nz; ++k) { + v[v_sparse.i[k]] = 0.0; + } + return 0; } // Compute steepest edge info for entering variable template -i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t& setttings, +i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t& settings, i_t m, - const basis_update_t& ft, + const basis_update_mpf_t& ft, i_t basic_leaving_index, i_t entering_index, std::vector& steepest_edge_norms) { - std::vector es(m); - es[basic_leaving_index] = -1.0; - std::vector delta_ys(m); - ft.b_transpose_solve(es, delta_ys); - steepest_edge_norms[entering_index] = vector_norm2_squared(delta_ys); + sparse_vector_t es_sparse(m, 1); + es_sparse.i[0] = basic_leaving_index; + es_sparse.x[0] = -1.0; + sparse_vector_t delta_ys_sparse(m, 0); + ft.b_transpose_solve(es_sparse, delta_ys_sparse); + steepest_edge_norms[entering_index] = delta_ys_sparse.norm2_squared(); + #ifdef STEEPEST_EDGE_DEBUG settings.log.printf("Steepest edge norm %e for entering j %d at i %d\n", steepest_edge_norms[entering_index], @@ -640,7 +1355,7 @@ i_t compute_steepest_edge_norm_entering(const simplex_solver_settings_t i_t check_steepest_edge_norms(const simplex_solver_settings_t& settings, const std::vector& basic_list, - const basis_update_t& ft, + const basis_update_mpf_t& ft, const std::vector& delta_y_steepest_edge) { const i_t m = basic_list.size(); @@ -664,6 +1379,7 @@ i_t check_steepest_edge_norms(const simplex_solver_settings_t& setting template i_t compute_perturbation(const lp_problem_t& lp, const simplex_solver_settings_t& settings, + const std::vector& delta_z_indices, std::vector& z, std::vector& objective, f_t& sum_perturb) @@ -673,7 +1389,8 @@ i_t compute_perturbation(const lp_problem_t& lp, const f_t tight_tol = settings.tight_tol; i_t num_perturb = 0; sum_perturb = 0.0; - for (i_t j = 0; j < n; ++j) { + for (i_t k = 0; k < delta_z_indices.size(); ++k) { + const i_t j = delta_z_indices[k]; if (lp.upper[j] == inf && lp.lower[j] > -inf && z[j] < -tight_tol) { const f_t violation = -z[j]; z[j] += violation; // z[j] <- 0 @@ -708,6 +1425,233 @@ i_t compute_perturbation(const lp_problem_t& lp, return 0; } +template +void reset_basis_mark(const std::vector& basic_list, + const std::vector& nonbasic_list, + std::vector& basic_mark, + std::vector& nonbasic_mark) +{ + const i_t m = basic_list.size(); + const i_t n = nonbasic_mark.size(); + const i_t n_minus_m = n - m; + + for (i_t k = 0; k < n; k++) { + basic_mark[k] = -1; + } + + for (i_t k = 0; k < n; k++) { + nonbasic_mark[k] = -1; + } + + for (i_t k = 0; k < n_minus_m; k++) { + nonbasic_mark[nonbasic_list[k]] = k; + } + + for (i_t k = 0; k < m; k++) { + basic_mark[basic_list[k]] = k; + } +} + +template +void compute_delta_y(const basis_update_mpf_t& ft, + i_t basic_leaving_index, + i_t direction, + sparse_vector_t& delta_y_sparse, + sparse_vector_t& UTsol_sparse) +{ + const i_t m = delta_y_sparse.n; + // BT*delta_y = -delta_zB = -sigma*ei + sparse_vector_t ei_sparse(m, 1); + ei_sparse.i[0] = basic_leaving_index; + ei_sparse.x[0] = -direction; + ft.b_transpose_solve(ei_sparse, delta_y_sparse, UTsol_sparse); + + if (direction != -1) { + // We solved BT*delta_y = -sigma*ei, but for the update we need + // UT*etilde = ei. So we need to flip the sign of the solution + // in the case that sigma == 1. + UTsol_sparse.negate(); + } + +#ifdef CHECK_B_TRANSPOSE_SOLVE + std::vector delta_y_sparse_vector_check(m); + delta_y_sparse.to_dense(delta_y_sparse_vector_check); + f_t error_check = 0.0; + for (i_t k = 0; k < m; ++k) { + if (std::abs(delta_y[k] - delta_y_sparse_vector_check[k]) > 1e-6) { + settings.log.printf( + "\tBTranspose error %d %e %e\n", k, delta_y[k], delta_y_sparse_vector_check[k]); + } + error_check += std::abs(delta_y[k] - delta_y_sparse_vector_check[k]); + } + if (error_check > 1e-6) { settings.log.printf("BTranspose error %e\n", error_check); } + std::vector residual(m); + b_transpose_multiply(lp, basic_list, delta_y_sparse_vector_check, residual); + for (i_t k = 0; k < m; ++k) { + if (std::abs(residual[k] - ei[k]) > 1e-6) { + settings.log.printf("\tBTranspose multiply error %d %e %e\n", k, residual[k], ei[k]); + } + } +#endif +} + +template +void update_dual_variables(const sparse_vector_t& delta_y_sparse, + const std::vector& delta_z_indices, + const std::vector& delta_z, + f_t step_length, + i_t leaving_index, + std::vector& y, + std::vector& z) +{ + // Update dual variables + // y <- y + steplength * delta_y + const i_t delta_y_nz = delta_y_sparse.i.size(); + for (i_t k = 0; k < delta_y_nz; ++k) { + const i_t i = delta_y_sparse.i[k]; + y[i] += step_length * delta_y_sparse.x[k]; + } + // z <- z + steplength * delta_z + const i_t delta_z_nz = delta_z_indices.size(); + for (i_t k = 0; k < delta_z_nz; ++k) { + const i_t j = delta_z_indices[k]; + z[j] += step_length * delta_z[j]; + } + z[leaving_index] += step_length * delta_z[leaving_index]; +} + +template +void adjust_for_flips(const basis_update_mpf_t& ft, + const std::vector& basic_list, + const std::vector& delta_z_indices, + std::vector& atilde_index, + std::vector& atilde, + std::vector& atilde_mark, + sparse_vector_t& delta_xB_0_sparse, + std::vector& delta_x_flip, + std::vector& x) +{ + const i_t m = basic_list.size(); + const i_t atilde_nz = atilde_index.size(); + // B*delta_xB_0 = atilde + sparse_vector_t atilde_sparse(m, atilde_nz); + for (i_t k = 0; k < atilde_nz; ++k) { + atilde_sparse.i[k] = atilde_index[k]; + atilde_sparse.x[k] = atilde[atilde_index[k]]; + } + ft.b_solve(atilde_sparse, delta_xB_0_sparse); + const i_t delta_xB_0_nz = delta_xB_0_sparse.i.size(); + for (i_t k = 0; k < delta_xB_0_nz; ++k) { + const i_t j = basic_list[delta_xB_0_sparse.i[k]]; + x[j] += delta_xB_0_sparse.x[k]; + } + + for (i_t j : delta_z_indices) { + x[j] += delta_x_flip[j]; + delta_x_flip[j] = 0.0; + } + + // Clear atilde + for (i_t k = 0; k < atilde_index.size(); ++k) { + atilde[atilde_index[k]] = 0.0; + } + // Clear atilde_mark + for (i_t k = 0; k < atilde_mark.size(); ++k) { + atilde_mark[k] = 0; + } + atilde_index.clear(); +} + +template +i_t compute_delta_x(const lp_problem_t& lp, + const basis_update_mpf_t& ft, + i_t entering_index, + i_t leaving_index, + i_t basic_leaving_index, + i_t direction, + const std::vector& basic_list, + const std::vector& delta_x_flip, + const sparse_vector_t& rhs_sparse, + const std::vector& x, + sparse_vector_t& utilde_sparse, + sparse_vector_t& scaled_delta_xB_sparse, + std::vector& delta_x) +{ + f_t delta_x_leaving = direction == 1 ? lp.lower[leaving_index] - x[leaving_index] + : lp.upper[leaving_index] - x[leaving_index]; + // B*w = -A(:, entering) + ft.b_solve(rhs_sparse, scaled_delta_xB_sparse, utilde_sparse); + scaled_delta_xB_sparse.negate(); + +#ifdef CHECK_B_SOLVE + std::vector scaled_delta_xB(m); + { + std::vector residual_B(m); + b_multiply(lp, basic_list, scaled_delta_xB, residual_B); + f_t err_max = 0; + for (i_t k = 0; k < m; ++k) { + const f_t err = std::abs(rhs[k] + residual_B[k]); + if (err >= 1e-6) { + settings.log.printf( + "Bsolve diff %d %e rhs %e residual %e\n", k, err, rhs[k], residual_B[k]); + } + err_max = std::max(err_max, err); + } + if (err_max > 1e-6) { settings.log.printf("B multiply error %e\n", err_max); } + } +#endif + + f_t scale = scaled_delta_xB_sparse.find_coefficient(basic_leaving_index); + if (scale != scale) { + // We couldn't find a coefficient for the basic leaving index. + // Either this is a bug or the primal step length is inf. + return -1; + } + const f_t primal_step_length = delta_x_leaving / scale; + const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; + delta_x[j] = primal_step_length * scaled_delta_xB_sparse.x[k]; + } + delta_x[leaving_index] = delta_x_leaving; + delta_x[entering_index] = primal_step_length; + return 0; +} + +template +void update_primal_variables(const sparse_vector_t& scaled_delta_xB_sparse, + const std::vector& basic_list, + const std::vector& delta_x, + i_t entering_index, + std::vector& x) +{ + // x <- x + delta_x + const i_t scaled_delta_xB_nz = scaled_delta_xB_sparse.i.size(); + for (i_t k = 0; k < scaled_delta_xB_nz; ++k) { + const i_t j = basic_list[scaled_delta_xB_sparse.i[k]]; + x[j] += delta_x[j]; + } + // Leaving index already included above + x[entering_index] += delta_x[entering_index]; +} + +template +void update_objective(const std::vector& basic_list, + const std::vector& changed_basic_indices, + const std::vector& objective, + const std::vector& delta_x, + i_t entering_index, + f_t& obj) +{ + const i_t changed_basic_nz = changed_basic_indices.size(); + for (i_t k = 0; k < changed_basic_nz; ++k) { + const i_t j = basic_list[changed_basic_indices[k]]; + obj += delta_x[j] * objective[j]; + } + // Leaving index already included above + obj += delta_x[entering_index] * objective[entering_index]; +} + template f_t dual_infeasibility(const lp_problem_t& lp, const simplex_solver_settings_t& settings, @@ -833,6 +1777,103 @@ f_t primal_infeasibility(const lp_problem_t& lp, return primal_inf; } +template +void check_primal_infeasibilities(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const std::vector& basic_list, + const std::vector& x, + const std::vector& squared_infeasibilities, + const std::vector& infeasibility_indices) +{ + const i_t m = basic_list.size(); + for (i_t k = 0; k < m; ++k) { + const i_t j = basic_list[k]; + const f_t lower_infeas = lp.lower[j] - x[j]; + const f_t upper_infeas = x[j] - lp.upper[j]; + const f_t infeas = std::max(lower_infeas, upper_infeas); + if (infeas > settings.primal_tol) { + const f_t square_infeas = infeas * infeas; + if (square_infeas != squared_infeasibilities[j]) { + settings.log.printf("Primal infeasibility mismatch %d %e != %e\n", + j, + square_infeas, + squared_infeasibilities[j]); + } + bool found = false; + for (i_t h = 0; h < infeasibility_indices.size(); ++h) { + if (infeasibility_indices[h] == j) { + found = true; + break; + } + } + if (!found) { settings.log.printf("Infeasibility index not found %d\n", j); } + } + } +} + +template +void check_basic_infeasibilities(const std::vector& basic_list, + const std::vector& basic_mark, + const std::vector& infeasibility_indices, + i_t info) +{ + for (i_t k = 0; k < infeasibility_indices.size(); ++k) { + const i_t j = infeasibility_indices[k]; + if (basic_mark[j] < 0) { printf("%d basic_infeasibilities basic_mark[%d] < 0\n", info, j); } + } +} + +template +void check_update(const lp_problem_t& lp, + const simplex_solver_settings_t& settings, + const basis_update_t& ft, + const std::vector& basic_list, + const std::vector& basic_leaving_index) +{ + const i_t m = basic_list.size(); + csc_matrix_t Btest(m, m, 1); + ft.multiply_lu(Btest); + { + csc_matrix_t B(m, m, 1); + form_b(lp.A, basic_list, B); + csc_matrix_t Diff(m, m, 1); + add(Btest, B, 1.0, -1.0, Diff); + const f_t err = Diff.norm1(); + if (err > settings.primal_tol) { settings.log.printf("|| B - L*U || %e\n", Diff.norm1()); } + if (err > settings.primal_tol) { + for (i_t j = 0; j < m; ++j) { + for (i_t p = Diff.col_start[j]; p < Diff.col_start[j + 1]; ++p) { + const i_t i = Diff.i[p]; + if (Diff.x[p] != 0.0) { settings.log.printf("Diff %d %d %e\n", j, i, Diff.x[p]); } + } + } + } + settings.log.printf("basic leaving index %d\n", basic_leaving_index); + assert(err < settings.primal_tol); + } +} + +template +void check_basis_mark(const simplex_solver_settings_t& settings, + const std::vector& basic_list, + const std::vector& nonbasic_list, + const std::vector& basic_mark, + const std::vector& nonbasic_mark) +{ + const i_t m = basic_list.size(); + const i_t n = basic_mark.size(); + for (i_t k = 0; k < m; k++) { + if (basic_mark[basic_list[k]] != k) { + settings.log.printf("Basic mark %d %d\n", basic_list[k], k); + } + } + for (i_t k = 0; k < n - m; k++) { + if (nonbasic_mark[nonbasic_list[k]] != k) { + settings.log.printf("Nonbasic mark %d %d\n", nonbasic_list[k], k); + } + } +} + template void bound_info(const lp_problem_t& lp, const simplex_solver_settings_t& settings) @@ -953,10 +1994,32 @@ void set_primal_variables_on_bounds(const lp_problem_t& lp, } } +template +f_t compute_perturbed_objective(const std::vector& objective, const std::vector& x) +{ + const size_t n = objective.size(); + f_t obj_val = 0.0; + for (size_t j = 0; j < n; ++j) { + obj_val += objective[j] * x[j]; + } + return obj_val; +} + +template +f_t amount_of_perturbation(const lp_problem_t& lp, const std::vector& objective) +{ + f_t perturbation = 0.0; + const i_t n = lp.num_cols; + for (i_t j = 0; j < n; ++j) { + perturbation += std::abs(lp.objective[j] - objective[j]); + } + return perturbation; +} + template void prepare_optimality(const lp_problem_t& lp, const simplex_solver_settings_t& settings, - basis_update_t& ft, + basis_update_mpf_t& ft, const std::vector& objective, const std::vector& basic_list, const std::vector& nonbasic_list, @@ -975,11 +2038,7 @@ void prepare_optimality(const lp_problem_t& lp, sol.objective = compute_objective(lp, sol.x); sol.user_objective = compute_user_objective(lp, sol.objective); - f_t perturbation = 0.0; - for (i_t j = 0; j < n; ++j) { - perturbation += std::abs(lp.objective[j] - objective[j]); - } - + f_t perturbation = phase2::amount_of_perturbation(lp, objective); if (perturbation > 1e-6 && phase == 2) { // Try to remove perturbation std::vector unperturbed_y(m); @@ -994,6 +2053,8 @@ void prepare_optimality(const lp_problem_t& lp, z = unperturbed_z; y = unperturbed_y; perturbation = 0.0; + } else { + settings.log.printf("Failed to remove perturbation of %.2e.\n", perturbation); } } } @@ -1015,7 +2076,6 @@ void prepare_optimality(const lp_problem_t& lp, settings.log.printf("Primal infeasibility (abs): %.2e\n", primal_infeas); settings.log.printf("Dual infeasibility (abs): %.2e\n", dual_infeas); settings.log.printf("Perturbation: %.2e\n", perturbation); - settings.log.printf("Max steepest edge norm: %.2e\n", max_val); } else { settings.log.printf("\n"); settings.log.printf( @@ -1026,6 +2086,81 @@ void prepare_optimality(const lp_problem_t& lp, } } +template +class phase2_timers_t { + public: + phase2_timers_t(bool should_time) + : record_time(should_time), + bfrt_time(0), + pricing_time(0), + btran_time(0), + ftran_time(0), + flip_time(0), + delta_z_time(0), + se_norms_time(0), + se_entering_time(0), + lu_update_time(0), + perturb_time(0), + vector_time(0), + objective_time(0), + update_infeasibility_time(0) + { + } + + void start_timer() + { + if (!record_time) { return; } + start_time = tic(); + } + + f_t stop_timer() + { + if (!record_time) { return 0.0; } + return toc(start_time); + } + + void print_timers(const simplex_solver_settings_t& settings) const + { + if (!record_time) { return; } + const f_t total_time = bfrt_time + pricing_time + btran_time + ftran_time + flip_time + + delta_z_time + lu_update_time + se_norms_time + se_entering_time + + perturb_time + vector_time + objective_time + update_infeasibility_time; + // clang-format off + settings.log.printf("BFRT time %.2fs %4.1f%\n", bfrt_time, 100.0 * bfrt_time / total_time); + settings.log.printf("Pricing time %.2fs %4.1f%\n", pricing_time, 100.0 * pricing_time / total_time); + settings.log.printf("BTran time %.2fs %4.1f%\n", btran_time, 100.0 * btran_time / total_time); + settings.log.printf("FTran time %.2fs %4.1f%\n", ftran_time, 100.0 * ftran_time / total_time); + settings.log.printf("Flip time %.2fs %4.1f%\n", flip_time, 100.0 * flip_time / total_time); + settings.log.printf("Delta_z time %.2fs %4.1f%\n", delta_z_time, 100.0 * delta_z_time / total_time); + settings.log.printf("LU update time %.2fs %4.1f%\n", lu_update_time, 100.0 * lu_update_time / total_time); + settings.log.printf("SE norms time %.2fs %4.1f%\n", se_norms_time, 100.0 * se_norms_time / total_time); + settings.log.printf("SE enter time %.2fs %4.1f%\n", se_entering_time, 100.0 * se_entering_time / total_time); + settings.log.printf("Perturb time %.2fs %4.1f%\n", perturb_time, 100.0 * perturb_time / total_time); + settings.log.printf("Vector time %.2fs %4.1f%\n", vector_time, 100.0 * vector_time / total_time); + settings.log.printf("Objective time %.2fs %4.1f%\n", objective_time, 100.0 * objective_time / total_time); + settings.log.printf("Inf update time %.2fs %4.1f%\n", update_infeasibility_time, 100.0 * update_infeasibility_time / total_time); + settings.log.printf("Sum %.2fs\n", total_time); + // clang-format on + } + f_t bfrt_time; + f_t pricing_time; + f_t btran_time; + f_t ftran_time; + f_t flip_time; + f_t delta_z_time; + f_t se_norms_time; + f_t se_entering_time; + f_t lu_update_time; + f_t perturb_time; + f_t vector_time; + f_t objective_time; + f_t update_infeasibility_time; + + private: + f_t start_time; + bool record_time; +}; + } // namespace phase2 template @@ -1093,7 +2228,7 @@ dual::status_t dual_phase2(i_t phase, if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } assert(q.size() == m); reorder_basic_list(q, basic_list); - basis_update_t ft(L, U, p); + basis_update_mpf_t ft(L, U, p, settings.refactor_frequency); std::vector c_basic(m); for (i_t k = 0; k < m; ++k) { @@ -1105,41 +2240,19 @@ dual::status_t dual_phase2(i_t phase, ft.b_transpose_solve(c_basic, y); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } constexpr bool print_norms = false; - if (print_norms) { + if constexpr (print_norms) { settings.log.printf( "|| y || %e || cB || %e\n", vector_norm_inf(y), vector_norm_inf(c_basic)); } - // zN = cN - N'*y - for (i_t k = 0; k < n - m; k++) { - const i_t j = nonbasic_list[k]; - // z_j <- c_j - z[j] = objective[j]; - - // z_j <- z_j - A(:, j)'*y - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - f_t dot = 0.0; - for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * y[lp.A.i[p]]; - } - z[j] -= dot; - } - // zB = 0 - for (i_t k = 0; k < m; ++k) { - z[basic_list[k]] = 0.0; - } - if (print_norms) { settings.log.printf("|| z || %e\n", vector_norm_inf(z)); } + phase2::compute_reduced_costs(objective, lp.A, y, basic_list, nonbasic_list, z); + if constexpr (print_norms) { settings.log.printf("|| z || %e\n", vector_norm_inf(z)); } #ifdef COMPUTE_DUAL_RESIDUAL - // || A'*y + z - c||_inf - std::vector dual_res1 = z; - for (i_t j = 0; j < n; ++j) { - dual_res1[j] -= objective[j]; - } - matrix_transpose_vector_multiply(lp.A, 1.0, y, 1.0, dual_res1); + std::vector dual_res1; + compute_dual_residual(lp.A, objective, y, z, dual_res1); f_t dual_res_norm = vector_norm_inf(dual_res1); - if (1 || dual_res_norm > settings.tight_tol) { + if (dual_res_norm > settings.tight_tol) { settings.log.printf("|| A'*y + z - c || %e\n", dual_res_norm); } assert(dual_res_norm < 1e-3); @@ -1148,15 +2261,11 @@ dual::status_t dual_phase2(i_t phase, phase2::set_primal_variables_on_bounds(lp, settings, z, vstatus, x); #ifdef PRINT_VSTATUS_CHANGES - i_t num_vstatus_changes = 0; - i_t num_z_changes = 0; - for (i_t j = 0; j < n; ++j) { - if (vstatus[j] != vstatus_old[j]) { num_vstatus_changes++; } - if (std::abs(z[j] - z_old[j]) > 1e-6) { num_z_changes++; } - } - - printf("Number of vstatus changes %d\n", num_vstatus_changes); - printf("Number of z changes %d\n", num_z_changes); + i_t num_vstatus_changes; + i_t num_z_changes; + phase2::vstatus_changes(vstatus, vstatus_old, z, z_old, num_vstatus_changes, num_z_changes); + settings.log.printf("Number of vstatus changes %d\n", num_vstatus_changes); + settings.log.printf("Number of z changes %d\n", num_z_changes); #endif const f_t init_dual_inf = @@ -1171,28 +2280,10 @@ dual::status_t dual_phase2(i_t phase, } } - std::vector rhs = lp.rhs; - // rhs = b - sum_{j : x_j = l_j} A(:, j) l(j) - sum_{j : x_j = u_j} A(:, j) * - // u(j) - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - const f_t xj = x[j]; - if (std::abs(xj) < settings.tight_tol * 10) continue; - for (i_t p = col_start; p < col_end; ++p) { - rhs[lp.A.i[p]] -= xj * lp.A.x[p]; - } - } + phase2::compute_primal_variables( + ft, lp.rhs, lp.A, basic_list, nonbasic_list, settings.tight_tol, x); - std::vector xB(m); - ft.b_solve(rhs, xB); if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } - - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - x[j] = xB[k]; - } if (print_norms) { settings.log.printf("|| x || %e\n", vector_norm2(x)); } #ifdef COMPUTE_PRIMAL_RESIDUAL @@ -1207,18 +2298,12 @@ dual::status_t dual_phase2(i_t phase, if (delta_y_steepest_edge.size() == 0) { delta_y_steepest_edge.resize(n); if (slack_basis) { - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - delta_y_steepest_edge[j] = 1.0; - } - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - delta_y_steepest_edge[j] = 1e-4; - } + phase2::initialize_steepest_edge_norms_from_slack_basis( + basic_list, nonbasic_list, delta_y_steepest_edge); } else { std::fill(delta_y_steepest_edge.begin(), delta_y_steepest_edge.end(), -1); if (phase2::initialize_steepest_edge_norms( - settings, start_time, basic_list, ft, delta_y_steepest_edge) == -1) { + lp, settings, start_time, basic_list, ft, delta_y_steepest_edge) == -1) { return dual::status_t::TIME_LIMIT; } } @@ -1227,35 +2312,74 @@ dual::status_t dual_phase2(i_t phase, vector_norm2(delta_y_steepest_edge)); } - if (phase == 2) { settings.log.printf(" Iter Objective Primal Infeas Perturb Time\n"); } + if (phase == 2) { + settings.log.printf(" Iter Objective Num Inf. Sum Inf. Perturb Time\n"); + } const i_t iter_limit = settings.iteration_limit; - std::vector delta_y(m); - std::vector delta_z(n); - std::vector delta_x(n); + std::vector delta_y(m, 0.0); + std::vector delta_z(n, 0.0); + std::vector delta_x(n, 0.0); + std::vector delta_x_flip(n, 0.0); + std::vector atilde(m, 0.0); + std::vector atilde_mark(m, 0); + std::vector atilde_index; + std::vector nonbasic_mark(n); + std::vector basic_mark(n); + std::vector delta_z_mark(n, 0); + std::vector delta_z_indices; + std::vector v(m, 0.0); + std::vector squared_infeasibilities; + std::vector infeasibility_indices; + + delta_z_indices.reserve(n); + + phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark); + + std::vector bounded_variables(n, 0); + phase2::compute_bounded_info(lp.lower, lp.upper, bounded_variables); + + f_t primal_infeasibility = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + +#ifdef CHECK_BASIC_INFEASIBILITIES + phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 0); +#endif + + csc_matrix_t A_transpose(1, 1, 0); + lp.A.transpose(A_transpose); + + f_t obj = compute_objective(lp, x); const i_t start_iter = iter; + + i_t sparse_delta_z = 0; + i_t dense_delta_z = 0; + phase2::phase2_timers_t timers(false); + while (iter < iter_limit) { // Pricing - i_t direction; - i_t basic_leaving_index; - f_t primal_infeasibility; - i_t leaving_index = -1; + i_t direction = 0; + i_t basic_leaving_index = -1; + i_t leaving_index = -1; f_t max_val; + timers.start_timer(); if (settings.use_steepest_edge_pricing) { - leaving_index = phase2::steepest_edge_pricing(lp, - settings, - x, - delta_y_steepest_edge, - basic_list, - direction, - basic_leaving_index, - primal_infeasibility, - max_val); + leaving_index = phase2::steepest_edge_pricing_with_infeasibilities(lp, + settings, + x, + delta_y_steepest_edge, + basic_mark, + squared_infeasibilities, + infeasibility_indices, + direction, + basic_leaving_index, + max_val); } else { // Max infeasibility pricing leaving_index = phase2::phase2_pricing( lp, settings, x, basic_list, direction, basic_leaving_index, primal_infeasibility); } + timers.pricing_time += timers.stop_timer(); if (leaving_index == -1) { phase2::prepare_optimality(lp, settings, @@ -1277,18 +2401,18 @@ dual::status_t dual_phase2(i_t phase, } // BTran - // TODO: replace with sparse solve. - std::vector ei(m, 0.0); - std::vector delta_y(m); - ei[basic_leaving_index] = -direction; // BT*delta_y = -delta_zB = -sigma*ei - ft.b_transpose_solve(ei, delta_y); + timers.start_timer(); + sparse_vector_t delta_y_sparse(m, 0); + sparse_vector_t UTsol_sparse(m, 0); + phase2::compute_delta_y(ft, basic_leaving_index, direction, delta_y_sparse, UTsol_sparse); + timers.btran_time += timers.stop_timer(); - const f_t steepest_edge_norm_check = vector_norm2_squared(delta_y); + const f_t steepest_edge_norm_check = delta_y_sparse.norm2_squared(); if (delta_y_steepest_edge[leaving_index] < settings.steepest_edge_ratio * steepest_edge_norm_check) { constexpr bool verbose = false; - if (verbose) { + if constexpr (verbose) { settings.log.printf( "iteration restart due to steepest edge. Leaving %d. Actual %.2e " "from update %.2e\n", @@ -1300,43 +2424,48 @@ dual::status_t dual_phase2(i_t phase, continue; } -#ifdef COMPUTE_BTRANSPOSE_RESIDUAL - { - std::vector res(m); - b_transpose_multiply(lp, basic_list, delta_y, res); - for (Int k = 0; k < m; k++) { - const f_t err = std::abs(res[k] - ei[k]); - if (err > 1e-4) { settings.log.printf("BT err %d %e\n", k, err); } - assert(err < 1e-4); - } + timers.start_timer(); + i_t delta_y_nz0 = 0; + const i_t nz_delta_y = delta_y_sparse.i.size(); + for (i_t k = 0; k < nz_delta_y; k++) { + if (std::abs(delta_y_sparse.x[k]) > 1e-12) { delta_y_nz0++; } } -#endif - - // delta_zB = sigma*ei - for (i_t k = 0; k < m; k++) { - const i_t j = basic_list[k]; - delta_z[j] = 0; - } - delta_z[leaving_index] = direction; - // delta_zN = -N'*delta_y - for (i_t k = 0; k < n - m; k++) { - const i_t j = nonbasic_list[k]; - // z_j <- -A(:, j)'*delta_y - const i_t col_start = lp.A.col_start[j]; - const i_t col_end = lp.A.col_start[j + 1]; - f_t dot = 0.0; - for (i_t p = col_start; p < col_end; ++p) { - dot += lp.A.x[p] * delta_y[lp.A.i[p]]; - } - delta_z[j] = -dot; + const f_t delta_y_nz_percentage = delta_y_nz0 / static_cast(m) * 100.0; + const bool use_transpose = delta_y_nz_percentage <= 30.0; + if (use_transpose) { + sparse_delta_z++; + phase2::compute_delta_z(A_transpose, + delta_y_sparse, + leaving_index, + direction, + nonbasic_mark, + delta_z_mark, + delta_z_indices, + delta_z); + } else { + dense_delta_z++; + // delta_zB = sigma*ei + delta_y_sparse.to_dense(delta_y); + phase2::compute_reduced_cost_update(lp, + basic_list, + nonbasic_list, + delta_y, + leaving_index, + direction, + delta_z_mark, + delta_z_indices, + delta_z); } + timers.delta_z_time += timers.stop_timer(); #ifdef COMPUTE_DUAL_RESIDUAL - std::vector dual_residual = delta_z; + std::vector dual_residual; + std::vector zeros(n, 0.0); + phase2::compute_dual_residual(lp.A, zeros, delta_y, delta_z, dual_residual); // || A'*delta_y + delta_z ||_inf - matrix_transpose_vector_multiply(lp.A, 1.0, delta_y, 1.0, dual_residual); f_t dual_residual_norm = vector_norm_inf(dual_residual); - settings.log.printf("|| A'*dy - dz || %e\n", dual_residual_norm); + settings.log.printf( + "|| A'*dy - dz || %e use transpose %d\n", dual_residual_norm, use_transpose); #endif // Ratio test @@ -1356,18 +2485,25 @@ dual::status_t dual_phase2(i_t phase, step_length, nonbasic_entering_index); } else if (bound_flip_ratio) { - entering_index = phase2::bound_flipping_ratio_test(lp, - settings, - start_time, - vstatus, - nonbasic_list, - x, - z, - delta_z, - direction, - leaving_index, - step_length, - nonbasic_entering_index); + timers.start_timer(); + f_t slope = direction == 1 ? (lp.lower[leaving_index] - x[leaving_index]) + : (x[leaving_index] - lp.upper[leaving_index]); + bound_flipping_ratio_test_t bfrt(settings, + start_time, + m, + n, + slope, + lp.lower, + lp.upper, + bounded_variables, + vstatus, + nonbasic_list, + z, + delta_z, + delta_z_indices, + nonbasic_mark); + entering_index = bfrt.compute_step_length(step_length, nonbasic_entering_index); + timers.bfrt_time += timers.stop_timer(); } else { entering_index = phase2::phase2_ratio_test( lp, settings, vstatus, nonbasic_list, z, delta_z, step_length, nonbasic_entering_index); @@ -1375,33 +2511,135 @@ dual::status_t dual_phase2(i_t phase, if (entering_index == -2) { return dual::status_t::TIME_LIMIT; } if (entering_index == -3) { return dual::status_t::CONCURRENT_LIMIT; } if (entering_index == -1) { - if (primal_infeasibility > settings.primal_tol && - max_val < settings.steepest_edge_primal_tol) { - // We could be done - settings.log.printf("Exiting due to small primal infeasibility se %e\n", max_val); - phase2::prepare_optimality(lp, - settings, - ft, - objective, - basic_list, - nonbasic_list, - vstatus, - phase, - start_time, - max_val, - iter, - x, - y, - z, - sol); - status = dual::status_t::OPTIMAL; - break; + settings.log.printf("No entering variable found. Iter %d\n", iter); + settings.log.printf("Scaled infeasibility %e\n", max_val); + f_t perturbation = phase2::amount_of_perturbation(lp, objective); + + if (perturbation > 0.0 && phase == 2) { + // Try to remove perturbation + std::vector unperturbed_y(m); + std::vector unperturbed_z(n); + phase2::compute_dual_solution_from_basis( + lp, ft, basic_list, nonbasic_list, unperturbed_y, unperturbed_z); + { + const f_t dual_infeas = phase2::dual_infeasibility( + lp, settings, vstatus, unperturbed_z, settings.tight_tol, settings.dual_tol); + settings.log.printf("Dual infeasibility after removing perturbation %e\n", dual_infeas); + if (dual_infeas <= settings.dual_tol) { + settings.log.printf("Removed perturbation of %.2e.\n", perturbation); + z = unperturbed_z; + y = unperturbed_y; + perturbation = 0.0; + + std::vector unperturbed_x(n); + phase2::compute_primal_solution_from_basis( + lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); + x = unperturbed_x; + primal_infeasibility = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + settings.log.printf("Updated primal infeasibility: %e\n", primal_infeasibility); + + objective = lp.objective; + // Need to reset the objective value, since we have recomputed x + obj = phase2::compute_perturbed_objective(objective, x); + if (dual_infeas <= settings.dual_tol && primal_infeasibility <= settings.primal_tol) { + phase2::prepare_optimality(lp, + settings, + ft, + objective, + basic_list, + nonbasic_list, + vstatus, + phase, + start_time, + max_val, + iter, + x, + y, + z, + sol); + status = dual::status_t::OPTIMAL; + break; + } + settings.log.printf( + "Continuing with perturbation removed and steepest edge norms reset\n"); + // Clear delta_z before restarting the iteration + phase2::clear_delta_z( + entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); + continue; + } else { + std::vector unperturbed_x(n); + phase2::compute_primal_solution_from_basis( + lp, ft, basic_list, nonbasic_list, vstatus, unperturbed_x); + x = unperturbed_x; + primal_infeasibility = phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); + + const f_t orig_dual_infeas = phase2::dual_infeasibility( + lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); + + if (primal_infeasibility <= settings.primal_tol && + orig_dual_infeas <= settings.dual_tol) { + phase2::prepare_optimality(lp, + settings, + ft, + objective, + basic_list, + nonbasic_list, + vstatus, + phase, + start_time, + max_val, + iter, + x, + y, + z, + sol); + status = dual::status_t::OPTIMAL; + break; + } + settings.log.printf("Failed to remove perturbation of %.2e.\n", perturbation); + } + } + } + + if (perturbation == 0.0 && phase == 2) { + constexpr bool use_farkas = false; + if constexpr (use_farkas) { + std::vector farkas_y; + std::vector farkas_zl; + std::vector farkas_zu; + f_t farkas_constant; + std::vector my_delta_y; + delta_y_sparse.to_dense(my_delta_y); + + // TODO(CMM): Do I use the perturbed or unperturbed objective? + const f_t obj_val = phase2::compute_perturbed_objective(objective, x); + phase2::compute_farkas_certificate(lp, + settings, + vstatus, + x, + y, + z, + my_delta_y, + delta_z, + direction, + leaving_index, + obj_val, + farkas_y, + farkas_zl, + farkas_zu, + farkas_constant); + } } + const f_t dual_infeas = phase2::dual_infeasibility(lp, settings, vstatus, z, settings.tight_tol, settings.dual_tol); settings.log.printf("Dual infeasibility %e\n", dual_infeas); const f_t primal_inf = phase2::primal_infeasibility(lp, settings, vstatus, x); settings.log.printf("Primal infeasibility %e\n", primal_inf); + settings.log.printf("Updates %d\n", ft.num_updates()); + settings.log.printf("Steepest edge %e\n", max_val); if (dual_infeas > settings.dual_tol) { settings.log.printf( "Numerical issues encountered. No entering variable found with large infeasibility.\n"); @@ -1410,121 +2648,111 @@ dual::status_t dual_phase2(i_t phase, return dual::status_t::DUAL_UNBOUNDED; } + timers.start_timer(); // Update dual variables // y <- y + steplength * delta_y - for (i_t i = 0; i < m; ++i) { - y[i] += step_length * delta_y[i]; - } - // z <- z + steplength * delta_z - for (i_t j = 0; j < n; ++j) { - z[j] += step_length * delta_z[j]; - } + phase2::update_dual_variables( + delta_y_sparse, delta_z_indices, delta_z, step_length, leaving_index, y, z); + timers.vector_time += timers.stop_timer(); #ifdef COMPUTE_DUAL_RESIDUAL - dual_res1 = z; - for (i_t j = 0; j < n; ++j) { - dual_res1[j] -= objective[j]; - } - matrix_transpose_vector_multiply(lp.A, 1.0, y, 1.0, dual_res1); + phase2::compute_dual_residual(lp.A, objective, y, z, dual_res1); f_t dual_res_norm = vector_norm_inf(dual_res1); if (dual_res_norm > settings.dual_tol) { settings.log.printf("|| A'*y + z - c || %e steplength %e\n", dual_res_norm, step_length); } #endif + timers.start_timer(); // Update primal variable - std::vector atilde(m); - std::vector delta_x_flip(n); - phase2::flip_bounds( - lp, settings, objective, z, nonbasic_list, entering_index, vstatus, delta_x_flip, atilde); - - // B*delta_xB_0 = atilde - std::vector delta_xB_0(m); - ft.b_solve(atilde, delta_xB_0); - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - x[j] += delta_xB_0[k]; - } - for (i_t k = 0; k < n - m; ++k) { - const i_t j = nonbasic_list[k]; - x[j] += delta_x_flip[j]; - } - - f_t delta_x_leaving; - if (direction == 1) { - delta_x_leaving = lp.lower[leaving_index] - x[leaving_index]; - } else { - delta_x_leaving = lp.upper[leaving_index] - x[leaving_index]; - } - // B*w = -A(:, entering) - std::vector scaled_delta_xB(m); - std::fill(rhs.begin(), rhs.end(), 0.0); - lp.A.load_a_column(entering_index, rhs); - std::vector utilde(m); - ft.b_solve(rhs, scaled_delta_xB, utilde); - for (i_t i = 0; i < m; ++i) { - scaled_delta_xB[i] *= -1.0; + const i_t num_flipped = phase2::flip_bounds(lp, + settings, + bounded_variables, + objective, + z, + delta_z_indices, + nonbasic_list, + entering_index, + vstatus, + delta_x_flip, + atilde_mark, + atilde, + atilde_index); + + timers.flip_time += timers.stop_timer(); + + sparse_vector_t delta_xB_0_sparse(m, 0); + if (num_flipped > 0) { + timers.start_timer(); + phase2::adjust_for_flips(ft, + basic_list, + delta_z_indices, + atilde_index, + atilde, + atilde_mark, + delta_xB_0_sparse, + delta_x_flip, + x); + timers.ftran_time += timers.stop_timer(); } -#ifdef COMPUTE_BSOLVE_RESIDUAL - { - std::vector residual_B(m); - b_multiply(lp, basic_list, scaled_delta_xB, residual_B); - f_t err_max = 0; - for (Int k = 0; k < m; ++k) { - const f_t err = std::abs(rhs[k] - residual_B[k]); - if (err >= 1e-5) { - settings.log.printf( - "Bsolve diff %d %e rhs %e residual %e\n", k, err, rhs[k], residual_B[k]); - } - err_max = std::max(err_max, err); - } - assert(err_max < 1e-4); + timers.start_timer(); + sparse_vector_t utilde_sparse(m, 0); + sparse_vector_t scaled_delta_xB_sparse(m, 0); + sparse_vector_t rhs_sparse(lp.A, entering_index); + if (phase2::compute_delta_x(lp, + ft, + entering_index, + leaving_index, + basic_leaving_index, + direction, + basic_list, + delta_x_flip, + rhs_sparse, + x, + utilde_sparse, + scaled_delta_xB_sparse, + delta_x) == -1) { + settings.log.printf("Failed to compute delta_x. Iter %d\n", iter); + return dual::status_t::NUMERICAL; } -#endif - f_t primal_step_length = delta_x_leaving / scaled_delta_xB[basic_leaving_index]; - for (i_t k = 0; k < m; ++k) { - const i_t j = basic_list[k]; - delta_x[j] = primal_step_length * scaled_delta_xB[k]; - } - delta_x[leaving_index] = delta_x_leaving; - for (i_t k = 0; k < n - m; k++) { - const i_t j = nonbasic_list[k]; - delta_x[j] = 0.0; - } - delta_x[entering_index] = primal_step_length; + timers.ftran_time += timers.stop_timer(); -#ifdef COMPUTE_PRIMAL_STEP_RESIDUAL +#ifdef CHECK_PRIMAL_STEP + std::vector residual(m); matrix_vector_multiply(lp.A, 1.0, delta_x, 1.0, residual); - f_t primal_step_err = vector_norm_inf(residual); + f_t primal_step_err = vector_norm_inf(residual); if (primal_step_err > 1e-4) { settings.log.printf("|| A * dx || %e\n", primal_step_err); } #endif + timers.start_timer(); const i_t steepest_edge_status = phase2::update_steepest_edge_norms(settings, basic_list, ft, direction, - delta_y, - scaled_delta_xB, + delta_y_sparse, + steepest_edge_norm_check, + scaled_delta_xB_sparse, basic_leaving_index, entering_index, + v, delta_y_steepest_edge); #ifdef STEEPEST_EDGE_DEBUG if (steepest_edge_status == -1) { settings.log.printf("Num updates %d\n", ft.num_updates()); - settings.log.printf(" Primal step length %e\n", primal_step_length); - settings.log.printf("|| delta_xB || %e\n", vector_norm_inf(scaled_delta_xB)); settings.log.printf("|| rhs || %e\n", vector_norm_inf(rhs)); } #endif assert(steepest_edge_status == 0); + timers.se_norms_time += timers.stop_timer(); + timers.start_timer(); // x <- x + delta_x - for (i_t j = 0; j < n; ++j) { - x[j] += delta_x[j]; - } + phase2::update_primal_variables(scaled_delta_xB_sparse, basic_list, delta_x, entering_index, x); + timers.vector_time += timers.stop_timer(); + #ifdef COMPUTE_PRIMAL_RESIDUAL residual = lp.rhs; matrix_vector_multiply(lp.A, 1.0, x, -1.0, residual); @@ -1534,10 +2762,68 @@ dual::status_t dual_phase2(i_t phase, } #endif + timers.start_timer(); + // TODO(CMM): Do I also need to update the objective due to the bound flips? + // TODO(CMM): I'm using the unperturbed objective here, should this be the perturbed objective? + phase2::update_objective( + basic_list, scaled_delta_xB_sparse.i, lp.objective, delta_x, entering_index, obj); + timers.objective_time += timers.stop_timer(); + + timers.start_timer(); + // Update primal infeasibilities due to changes in basic variables + // from flipping bounds +#ifdef CHECK_BASIC_INFEASIBILITIES + phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 2); +#endif + phase2::update_primal_infeasibilities(lp, + settings, + basic_list, + x, + entering_index, + leaving_index, + delta_xB_0_sparse.i, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); + // Update primal infeasibilities due to changes in basic variables + // from the leaving and entering variables + phase2::update_primal_infeasibilities(lp, + settings, + basic_list, + x, + entering_index, + leaving_index, + scaled_delta_xB_sparse.i, + squared_infeasibilities, + infeasibility_indices, + primal_infeasibility); + // Update the entering variable + phase2::update_single_primal_infeasibility(lp.lower, + lp.upper, + x, + settings.primal_tol, + squared_infeasibilities, + infeasibility_indices, + entering_index, + primal_infeasibility); + + phase2::clean_up_infeasibilities(squared_infeasibilities, infeasibility_indices); + +#if CHECK_PRIMAL_INFEASIBILITIES + phase2::check_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); +#endif + timers.update_infeasibility_time += timers.stop_timer(); + + // Clear delta_x + phase2::clear_delta_x(basic_list, entering_index, scaled_delta_xB_sparse, delta_x); + + timers.start_timer(); f_t sum_perturb = 0.0; - phase2::compute_perturbation(lp, settings, z, objective, sum_perturb); + phase2::compute_perturbation(lp, settings, delta_z_indices, z, objective, sum_perturb); + timers.perturb_time += timers.stop_timer(); - // Update basis + // Update basis information vstatus[entering_index] = variable_status_t::BASIC; if (lp.lower[leaving_index] != lp.upper[leaving_index]) { vstatus[leaving_index] = static_cast(-direction); @@ -1546,66 +2832,89 @@ dual::status_t dual_phase2(i_t phase, } basic_list[basic_leaving_index] = entering_index; nonbasic_list[nonbasic_entering_index] = leaving_index; + nonbasic_mark[entering_index] = -1; + nonbasic_mark[leaving_index] = nonbasic_entering_index; + basic_mark[leaving_index] = -1; + basic_mark[entering_index] = basic_leaving_index; + +#ifdef CHECK_BASIC_INFEASIBILITIES + phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 5); +#endif - // Refactor or Update + timers.start_timer(); + // Refactor or update the basis factorization bool should_refactor = ft.num_updates() > settings.refactor_frequency; if (!should_refactor) { - i_t recommend_refactor = ft.update(utilde, basic_leaving_index); -#ifdef CHECK_FT - { - csc_matrix_t Btest(m, m, 1); - ft.multiply_lu(Btest); - { - csc_matrix_t B(m, m, 1); - form_b(lp, basic_list, B); - csc_matrix_t Diff(m, m, 1); - add(Btest, B, 1.0, -1.0, Diff); - const f_t err = Diff.norm1(); - if (err > settings.primal_tol) { - settings.log.printf("|| B - L*U || %e\n", Diff.norm1()); - } - assert(err < settings.primal_tol); - } - } + i_t recommend_refactor = ft.update(utilde_sparse, UTsol_sparse, basic_leaving_index); +#ifdef CHECK_UPDATE + phase2::check_update(lp, settings, ft, basic_list, basic_leaving_index); #endif should_refactor = recommend_refactor == 1; } +#ifdef CHECK_BASIC_INFEASIBILITIES + phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 6); +#endif if (should_refactor) { if (factorize_basis(lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { + if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } basis_repair(lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); - if (factorize_basis( - lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { - return dual::status_t::NUMERICAL; + i_t count = 0; + while (factorize_basis( + lp.A, settings, basic_list, L, U, p, pinv, q, deficient, slacks_needed) == -1) { + settings.log.printf("Failed to repair basis. %d deficient columns.\n", + static_cast(deficient.size())); + if (toc(start_time) > settings.time_limit) { return dual::status_t::TIME_LIMIT; } + settings.threshold_partial_pivoting_tol = 1.0; + count++; + if (count > 100) { return dual::status_t::NUMERICAL; } + basis_repair( + lp.A, settings, deficient, slacks_needed, basic_list, nonbasic_list, vstatus); } } reorder_basic_list(q, basic_list); ft.reset(L, U, p); + phase2::reset_basis_mark(basic_list, nonbasic_list, basic_mark, nonbasic_mark); + phase2::compute_initial_primal_infeasibilities( + lp, settings, basic_list, x, squared_infeasibilities, infeasibility_indices); } +#ifdef CHECK_BASIC_INFEASIBILITIES + phase2::check_basic_infeasibilities(basic_list, basic_mark, infeasibility_indices, 7); +#endif + timers.lu_update_time += timers.stop_timer(); + timers.start_timer(); phase2::compute_steepest_edge_norm_entering( settings, m, ft, basic_leaving_index, entering_index, delta_y_steepest_edge); + timers.se_entering_time += timers.stop_timer(); #ifdef STEEPEST_EDGE_DEBUG if (iter < 100 || iter % 100 == 0)) - { - phase2::check_steepest_edge_norms(settings, basic_list, ft, delta_y_steepest_edge); - } + { + phase2::check_steepest_edge_norms(settings, basic_list, ft, delta_y_steepest_edge); + } +#endif + +#ifdef CHECK_BASIS_MARK + phase2::check_basis_mark(settings, basic_list, nonbasic_list, basic_mark, nonbasic_mark); #endif iter++; - const f_t obj = compute_objective(lp, x); - f_t now = toc(start_time); + // Clear delta_z + phase2::clear_delta_z(entering_index, leaving_index, delta_z_mark, delta_z_indices, delta_z); + + f_t now = toc(start_time); if ((iter - start_iter) < settings.first_iteration_log || (iter % settings.iteration_log_frequency) == 0) { if (phase == 1 && iter == 1) { - settings.log.printf(" Iter Objective Primal Infeas Perturb Time\n"); + settings.log.printf(" Iter Objective Num Inf. Sum Inf. Perturb Time\n"); } - settings.log.printf("%5d %+.8e %.8e %.2e %.2f\n", + settings.log.printf("%5d %+.16e %7d %.8e %.2e %.2f\n", iter, compute_user_objective(lp, obj), + infeasibility_indices.size(), primal_infeasibility, sum_perturb, now); @@ -1624,6 +2933,20 @@ dual::status_t dual_phase2(i_t phase, } } if (iter >= iter_limit) { status = dual::status_t::ITERATION_LIMIT; } + + if (phase == 2) { + timers.print_timers(settings); + constexpr bool print_stats = false; + if constexpr (print_stats) { + settings.log.printf("Sparse delta_z %8d %8.2f%\n", + sparse_delta_z, + 100.0 * sparse_delta_z / (sparse_delta_z + dense_delta_z)); + settings.log.printf("Dense delta_z %8d %8.2f%\n", + dense_delta_z, + 100.0 * dense_delta_z / (sparse_delta_z + dense_delta_z)); + ft.print_stats(); + } + } return status; } diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp index d94fd0f6c..68043e06a 100644 --- a/cpp/src/dual_simplex/presolve.cpp +++ b/cpp/src/dual_simplex/presolve.cpp @@ -23,6 +23,128 @@ namespace cuopt::linear_programming::dual_simplex { +template +void bound_strengthening(const std::vector& row_sense, + const simplex_solver_settings_t& settings, + lp_problem_t& problem) +{ + const i_t m = problem.num_rows; + const i_t n = problem.num_cols; + + std::vector constraint_lower(m); + std::vector num_lower_infinity(m); + std::vector num_upper_infinity(m); + + csc_matrix_t Arow(1, 1, 1); + problem.A.transpose(Arow); + + std::vector less_rows; + less_rows.reserve(m); + + for (i_t i = 0; i < m; ++i) { + if (row_sense[i] == 'L') { less_rows.push_back(i); } + } + + std::vector lower = problem.lower; + std::vector upper = problem.upper; + + std::vector updated_variables_list; + updated_variables_list.reserve(n); + std::vector updated_variables_mark(n, 0); + + i_t iter = 0; + const i_t iter_limit = 10; + i_t total_strengthened_variables = 0; + settings.log.printf("Less equal rows %d\n", less_rows.size()); + while (iter < iter_limit && less_rows.size() > 0) { + // Derive bounds on the constraints + settings.log.printf("Running bound strengthening on %d rows\n", + static_cast(less_rows.size())); + for (i_t i : less_rows) { + const i_t row_start = Arow.col_start[i]; + const i_t row_end = Arow.col_start[i + 1]; + num_lower_infinity[i] = 0; + num_upper_infinity[i] = 0; + + f_t lower_limit = 0.0; + for (i_t p = row_start; p < row_end; ++p) { + const i_t j = Arow.i[p]; + const f_t a_ij = Arow.x[p]; + if (a_ij > 0) { + lower_limit += a_ij * lower[j]; + } else if (a_ij < 0) { + lower_limit += a_ij * upper[j]; + } + if (lower[j] == -inf && a_ij > 0) { + num_lower_infinity[i]++; + lower_limit = -inf; + } + if (upper[j] == inf && a_ij < 0) { + num_lower_infinity[i]++; + lower_limit = -inf; + } + } + constraint_lower[i] = lower_limit; + } + + // Use the constraint bounds to derive new bounds on the variables + for (i_t i : less_rows) { + if (std::isfinite(constraint_lower[i]) && num_lower_infinity[i] == 0) { + const i_t row_start = Arow.col_start[i]; + const i_t row_end = Arow.col_start[i + 1]; + for (i_t p = row_start; p < row_end; ++p) { + const i_t k = Arow.i[p]; + const f_t a_ik = Arow.x[p]; + if (a_ik > 0) { + const f_t new_upper = lower[k] + (problem.rhs[i] - constraint_lower[i]) / a_ik; + if (new_upper < upper[k]) { + upper[k] = new_upper; + if (lower[k] > upper[k]) { + settings.log.printf( + "\t INFEASIBLE!!!!!!!!!!!!!!!!! constraint_lower %e lower %e rhs %e\n", + constraint_lower[i], + lower[k], + problem.rhs[i]); + } + if (!updated_variables_mark[k]) { updated_variables_list.push_back(k); } + } + } else if (a_ik < 0) { + const f_t new_lower = upper[k] + (problem.rhs[i] - constraint_lower[i]) / a_ik; + if (new_lower > lower[k]) { + lower[k] = new_lower; + if (lower[k] > upper[k]) { + settings.log.printf("\t INFEASIBLE !!!!!!!!!!!!!!!!!!1\n"); + } + if (!updated_variables_mark[k]) { updated_variables_list.push_back(k); } + } + } + } + } + } + less_rows.clear(); + + // Update the bounds on the constraints + settings.log.printf("Round %d: Strengthend %d variables\n", + iter, + static_cast(updated_variables_list.size())); + total_strengthened_variables += updated_variables_list.size(); + for (i_t j : updated_variables_list) { + updated_variables_mark[j] = 0; + const i_t col_start = problem.A.col_start[j]; + const i_t col_end = problem.A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = problem.A.i[p]; + less_rows.push_back(i); + } + } + updated_variables_list.clear(); + iter++; + } + settings.log.printf("Total strengthened variables %d\n", total_strengthened_variables); + problem.lower = lower; + problem.upper = upper; +} + template i_t remove_empty_cols(lp_problem_t& problem, i_t& num_empty_cols, @@ -500,6 +622,7 @@ i_t add_artifical_variables(lp_problem_t& problem, template void convert_user_problem(const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, lp_problem_t& problem, std::vector& new_slacks) { @@ -559,6 +682,14 @@ void convert_user_problem(const user_problem_t& user_problem, convert_greater_to_less(user_problem, row_sense, problem, greater_rows, less_rows); } + // At this point the problem representation is in the form: A*x {<=, =} b + // This is the time to run bound strengthening + constexpr bool run_bound_strengthening = false; + if constexpr (run_bound_strengthening) { + settings.log.printf("Running bound strengthening\n"); + bound_strengthening(row_sense, settings, problem); + } + // The original problem may have a variable without a lower bound // but a finite upper bound // -inf < x_j <= u_j @@ -669,7 +800,8 @@ void convert_user_lp_with_guess(const user_problem_t& user_problem, lp_solution_t& converted_solution) { std::vector new_slacks; - convert_user_problem(user_problem, problem, new_slacks); + simplex_solver_settings_t settings; + convert_user_problem(user_problem, settings, problem, new_slacks); crush_primal_solution_with_slack( user_problem, problem, initial_solution.x, initial_slack, new_slacks, converted_solution.x); crush_dual_solution(user_problem, @@ -900,9 +1032,11 @@ void uncrush_solution(const presolve_info_t& presolve_info, #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE -template void convert_user_problem(const user_problem_t& user_problem, - lp_problem_t& problem, - std::vector& new_slacks); +template void convert_user_problem( + const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, + lp_problem_t& problem, + std::vector& new_slacks); template void convert_user_lp_with_guess( const user_problem_t& user_problem, diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp index 947c637cb..7a307e6f7 100644 --- a/cpp/src/dual_simplex/presolve.hpp +++ b/cpp/src/dual_simplex/presolve.hpp @@ -63,6 +63,7 @@ struct presolve_info_t { template void convert_user_problem(const user_problem_t& user_problem, + const simplex_solver_settings_t& settings, lp_problem_t& problem, std::vector& new_slacks); diff --git a/cpp/src/dual_simplex/random.hpp b/cpp/src/dual_simplex/random.hpp index e1ad01fef..dfc60dbd5 100644 --- a/cpp/src/dual_simplex/random.hpp +++ b/cpp/src/dual_simplex/random.hpp @@ -21,7 +21,7 @@ namespace cuopt::linear_programming::dual_simplex { -template +template class random_t { public: random_t(i_t seed) : gen(seed) {} @@ -34,6 +34,12 @@ class random_t { return distrib(gen); } + f_t random() + { + std::uniform_real_distribution<> distrib(0.0, 1.0); + return distrib(gen); + } + private: std::mt19937 gen; }; diff --git a/cpp/src/dual_simplex/right_looking_lu.cpp b/cpp/src/dual_simplex/right_looking_lu.cpp index 57eb9b01d..7fe276ec6 100644 --- a/cpp/src/dual_simplex/right_looking_lu.cpp +++ b/cpp/src/dual_simplex/right_looking_lu.cpp @@ -571,12 +571,12 @@ i_t right_looking_lu(const csc_matrix_t& A, // Find pivot that satisfies // abs(pivot) >= abstol, // abs(pivot) >= threshold_tol * max abs[pivot column] - i_t pivot_i = -1; - i_t pivot_j = -1; - i_t pivot_p = kNone; - constexpr f_t pivot_tol = 1e-11; - constexpr f_t drop_tol = 1e-13; - constexpr f_t threshold_tol = 1.0 / 10.0; + i_t pivot_i = -1; + i_t pivot_j = -1; + i_t pivot_p = kNone; + constexpr f_t pivot_tol = 1e-11; + const f_t drop_tol = tol == 1.0 ? 0.0 : 1e-13; + const f_t threshold_tol = tol; markowitz_search(Cdegree, Rdegree, col_count, diff --git a/cpp/src/dual_simplex/simplex_solver_settings.hpp b/cpp/src/dual_simplex/simplex_solver_settings.hpp index df5e4e1d0..a51ed19bc 100644 --- a/cpp/src/dual_simplex/simplex_solver_settings.hpp +++ b/cpp/src/dual_simplex/simplex_solver_settings.hpp @@ -46,6 +46,8 @@ struct simplex_solver_settings_t { cut_off(std::numeric_limits::infinity()), steepest_edge_ratio(0.5), steepest_edge_primal_tol(1e-9), + hypersparse_threshold(0.05), + threshold_partial_pivoting_tol(1.0 / 10.0), use_steepest_edge_pricing(true), use_harris_ratio(false), use_bound_flip_ratio(true), @@ -86,7 +88,9 @@ struct simplex_solver_settings_t { f_t cut_off; // If the dual objective is greater than the cutoff we stop f_t steepest_edge_ratio; // the ratio of computed steepest edge mismatch from updated steepest edge - f_t steepest_edge_primal_tol; // Primal tolerance divided by steepest edge norm + f_t steepest_edge_primal_tol; // Primal tolerance divided by steepest edge norm + f_t hypersparse_threshold; + mutable f_t threshold_partial_pivoting_tol; bool use_steepest_edge_pricing; // true if using steepest edge pricing, false if using max // infeasibility pricing bool use_harris_ratio; // true if using the harris ratio test diff --git a/cpp/src/dual_simplex/solve.cpp b/cpp/src/dual_simplex/solve.cpp index 464bd7047..e665bae97 100644 --- a/cpp/src/dual_simplex/solve.cpp +++ b/cpp/src/dual_simplex/solve.cpp @@ -244,7 +244,7 @@ lp_status_t solve_linear_program(const user_problem_t& user_problem, f_t start_time = tic(); lp_problem_t original_lp(1, 1, 1); std::vector new_slacks; - convert_user_problem(user_problem, original_lp, new_slacks); + convert_user_problem(user_problem, settings, original_lp, new_slacks); solution.resize(user_problem.num_rows, user_problem.num_cols); lp_solution_t lp_solution(original_lp.num_rows, original_lp.num_cols); std::vector vstatus; @@ -283,7 +283,7 @@ i_t solve(const user_problem_t& problem, lp_problem_t original_lp( problem.num_rows, problem.num_cols, problem.A.col_start[problem.A.n]); std::vector new_slacks; - convert_user_problem(problem, original_lp, new_slacks); + convert_user_problem(problem, settings, original_lp, new_slacks); lp_solution_t solution(original_lp.num_rows, original_lp.num_cols); std::vector vstatus; std::vector edge_norms; diff --git a/cpp/src/dual_simplex/sparse_matrix.cpp b/cpp/src/dual_simplex/sparse_matrix.cpp index 830838bf5..dc4df3990 100644 --- a/cpp/src/dual_simplex/sparse_matrix.cpp +++ b/cpp/src/dual_simplex/sparse_matrix.cpp @@ -16,6 +16,7 @@ */ #include +#include #include @@ -148,6 +149,61 @@ i_t csc_matrix_t::load_a_column(i_t j, std::vector& Aj) const return (col_end - col_start); } +template +void csc_matrix_t::append_column(const std::vector& x) +{ + const i_t m = this->m; + assert(x.size() == m); + const i_t xsz = x.size(); + i_t nz = this->col_start[this->n]; + for (i_t j = 0; j < xsz; ++j) { + if (x[j] != 0.0) { + this->i[nz] = j; + this->x[nz] = x[j]; + nz++; + } + } + this->col_start[this->n + 1] = nz; + this->n++; +} + +template +void csc_matrix_t::append_column(const sparse_vector_t& x) +{ + const i_t m = this->m; + assert(x.n == m); + i_t nz = this->col_start[this->n]; + const i_t xnz = x.i.size(); + for (i_t k = 0; k < xnz; ++k) { + const i_t i = x.i[k]; + const f_t x_val = x.x[k]; + if (x_val != 0.0) { + this->i[nz] = i; + this->x[nz] = x_val; + nz++; + } + } + this->col_start[this->n + 1] = nz; + this->n++; +} + +template +void csc_matrix_t::append_column(i_t x_nz, i_t* i, f_t* x) +{ + i_t nz = this->col_start[this->n]; + for (i_t k = 0; k < x_nz; ++k) { + const i_t i_val = i[k]; + const f_t x_val = x[i_val]; + if (x_val != 0.0) { + this->i[nz] = i_val; + this->x[nz] = x_val; + nz++; + } + } + this->col_start[this->n + 1] = nz; + this->n++; +} + template i_t csc_matrix_t::transpose(csc_matrix_t& AT) const { @@ -360,6 +416,28 @@ void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vecto } } +// x <- x + alpha * A(:, j) +template +void scatter_dense(const csc_matrix_t& A, + i_t j, + f_t alpha, + std::vector& x, + std::vector& mark, + std::vector& indices) +{ + const i_t col_start = A.col_start[j]; + const i_t col_end = A.col_start[j + 1]; + for (i_t p = col_start; p < col_end; ++p) { + const i_t i = A.i[p]; + const f_t ax = A.x[p]; + x[i] += alpha * ax; + if (!mark[i]) { + mark[i] = 1; + indices.push_back(i); + } + } +} + // Compute C = A*B where C is m x n, A is m x k, and B = k x n // Do this by computing C(:, j) = A*B(:, j) = sum (i=1 to k) A(:, k)*B(i, j) template @@ -695,6 +773,13 @@ template void scatter_dense(const csc_matrix_t& A, double alpha, std::vector& x); +template void scatter_dense(const csc_matrix_t& A, + int j, + double alpha, + std::vector& x, + std::vector& mark, + std::vector& indices); + template int multiply(const csc_matrix_t& A, const csc_matrix_t& B, csc_matrix_t& C); diff --git a/cpp/src/dual_simplex/sparse_matrix.hpp b/cpp/src/dual_simplex/sparse_matrix.hpp index 29e6a0cf4..9cc3d6380 100644 --- a/cpp/src/dual_simplex/sparse_matrix.hpp +++ b/cpp/src/dual_simplex/sparse_matrix.hpp @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -29,6 +30,9 @@ namespace cuopt::linear_programming::dual_simplex { template class csr_matrix_t; // Forward declaration of CSR matrix needed to define CSC matrix +template +class sparse_vector_t; // Forward declaration of sparse vector needed to define CSC matrix + // A sparse matrix stored in compressed sparse column format template class csc_matrix_t { @@ -59,6 +63,15 @@ class csc_matrix_t { // Compute the transpose of A i_t transpose(csc_matrix_t& AT) const; + // Append a dense column to the matrix. Assumes the matrix has already been resized accordingly + void append_column(const std::vector& x); + + // Append a sparse column to the matrix. Assumes the matrix has already been resized accordingly + void append_column(const sparse_vector_t& x); + + // Append a sparse column to the matrix. Assumes the matrix has already been resized accordingly + void append_column(i_t nz, i_t* i, f_t* x); + // Remove columns from the matrix i_t remove_columns(const std::vector& cols_to_remove); @@ -131,6 +144,14 @@ i_t scatter(const csc_matrix_t& A, template void scatter_dense(const csc_matrix_t& A, i_t j, f_t alpha, std::vector& x); +template +void scatter_dense(const csc_matrix_t& A, + i_t j, + f_t alpha, + std::vector& x, + std::vector& mark, + std::vector& indices); + // Compute C = A*B where C is m x n, A is m x k, and B = k x n // Do this by computing C(:, j) = A*B(:, j) = sum (i=1 to k) A(:, k)*B(i, j) template diff --git a/cpp/src/dual_simplex/sparse_vector.cpp b/cpp/src/dual_simplex/sparse_vector.cpp new file mode 100644 index 000000000..73a0c0a8f --- /dev/null +++ b/cpp/src/dual_simplex/sparse_vector.cpp @@ -0,0 +1,224 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include + +namespace cuopt::linear_programming::dual_simplex { + +template +sparse_vector_t::sparse_vector_t(const csc_matrix_t& A, i_t col) +{ + const i_t col_start = A.col_start[col]; + const i_t col_end = A.col_start[col + 1]; + n = A.m; + const i_t nz = col_end - col_start; + i.reserve(nz); + x.reserve(nz); + for (i_t k = col_start; k < col_end; ++k) { + i.push_back(A.i[k]); + x.push_back(A.x[k]); + } +} + +template +void sparse_vector_t::from_dense(const std::vector& in) +{ + i.clear(); + x.clear(); + n = in.size(); + i.reserve(n); + x.reserve(n); + for (i_t k = 0; k < n; ++k) { + if (in[k] != 0) { + i.push_back(k); + x.push_back(in[k]); + } + } +} + +template +void sparse_vector_t::to_csc(csc_matrix_t& A) const +{ + A.m = n; + A.n = 1; + A.nz_max = i.size(); + A.col_start.resize(2); + A.col_start[0] = 0; + A.col_start[1] = i.size(); + A.i = i; + A.x = x; +} + +template +void sparse_vector_t::to_dense(std::vector& x_dense) const +{ + x_dense.clear(); + x_dense.resize(n, 0.0); + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + x_dense[i[k]] = x[k]; + } +} + +template +void sparse_vector_t::scatter(std::vector& x_dense) const +{ + // Assumes x_dense is already cleared + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + x_dense[i[k]] += x[k]; + } +} + +template +void sparse_vector_t::inverse_permute_vector(const std::vector& p) +{ + assert(p.size() == n); + i_t nz = i.size(); + std::vector i_perm(nz); + for (i_t k = 0; k < nz; ++k) { + i_perm[k] = p[i[k]]; + } + i = i_perm; +} + +template +void sparse_vector_t::inverse_permute_vector(const std::vector& p, + sparse_vector_t& y) const +{ + i_t m = p.size(); + assert(n == m); + i_t nz = i.size(); + y.n = n; + y.x = x; + std::vector i_perm(nz); + for (i_t k = 0; k < nz; ++k) { + i_perm[k] = p[i[k]]; + } + y.i = i_perm; +} + +template +f_t sparse_vector_t::sparse_dot(const csc_matrix_t& Y, i_t y_col) const +{ + const i_t col_start = Y.col_start[y_col]; + const i_t col_end = Y.col_start[y_col + 1]; + const i_t ny = col_end - col_start; + const i_t nx = i.size(); + f_t dot = 0.0; + for (i_t h = 0, k = col_start; h < nx && k < col_end;) { + const i_t p = i[h]; + const i_t q = Y.i[k]; + if (p == q) { + dot += Y.x[k] * x[h]; + h++; + k++; + } else if (p < q) { + h++; + } else if (q < p) { + k++; + } + } + return dot; +} + +template +void sparse_vector_t::sort() +{ + if (i.size() == 1) { return; } + // If the number of nonzeros is large, use a O(n) bucket sort + if (i.size() > 0.3 * n) { + std::vector bucket(n, 0.0); + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + bucket[i[k]] = x[k]; + } + i.clear(); + i.reserve(nz); + x.clear(); + x.reserve(nz); + for (i_t k = 0; k < n; ++k) { + if (bucket[k] != 0.0) { + i.push_back(k); + x.push_back(bucket[k]); + } + } + } else { + // Use a n log n sort + const i_t nz = i.size(); + std::vector i_sorted(nz); + std::vector x_sorted(nz); + std::vector perm(nz); + for (i_t k = 0; k < nz; ++k) { + perm[k] = k; + } + std::vector& iunsorted = i; + std::sort( + perm.begin(), perm.end(), [&iunsorted](i_t a, i_t b) { return iunsorted[a] < iunsorted[b]; }); + for (i_t k = 0; k < nz; ++k) { + i_sorted[k] = i[perm[k]]; + x_sorted[k] = x[perm[k]]; + } + i = i_sorted; + x = x_sorted; + } + + // Check +#ifdef CHECK_SORT + if (!std::is_sorted(i.begin(), i.end())) { printf("Sort error\n"); } +#endif +} + +template +f_t sparse_vector_t::norm2_squared() const +{ + f_t dot = 0.0; + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + dot += x[k] * x[k]; + } + return dot; +} + +template +void sparse_vector_t::negate() +{ + const i_t nz = x.size(); + for (i_t k = 0; k < nz; ++k) { + x[k] *= -1.0; + } +} + +template +f_t sparse_vector_t::find_coefficient(i_t index) const +{ + const i_t nz = i.size(); + for (i_t k = 0; k < nz; ++k) { + if (i[k] == index) { return x[k]; } + } + return std::numeric_limits::quiet_NaN(); +} + +#ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE +template class sparse_vector_t; +#endif + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/sparse_vector.hpp b/cpp/src/dual_simplex/sparse_vector.hpp new file mode 100644 index 000000000..cf970acda --- /dev/null +++ b/cpp/src/dual_simplex/sparse_vector.hpp @@ -0,0 +1,64 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include + +namespace cuopt::linear_programming::dual_simplex { + +// A sparse vector stored as a list of nonzero coefficients and their indices +template +class sparse_vector_t { + public: + // Construct a sparse vector of dimension n with nz nonzero coefficients + sparse_vector_t(i_t n, i_t nz) : n(n), i(nz), x(nz) {} + // Construct a sparse vector from a dense vector. + sparse_vector_t(const std::vector& in) { from_dense(in); } + // Construct a sparse vector from a column of a CSC matrix + sparse_vector_t(const csc_matrix_t& A, i_t col); + // gather a dense vector into a sparse vector + void from_dense(const std::vector& in); + // convert a sparse vector into a CSC matrix with a single column + void to_csc(csc_matrix_t& A) const; + // convert a sparse vector into a dense vector. Dense vector is cleared and resized. + void to_dense(std::vector& x_dense) const; + // scatter a sparse vector into a dense vector. Assumes x_dense is already cleared or + // preinitialized + void scatter(std::vector& x_dense) const; + // inverse permute the current sparse vector + void inverse_permute_vector(const std::vector& p); + // inverse permute a sparse vector into another sparse vector + void inverse_permute_vector(const std::vector& p, sparse_vector_t& y) const; + // compute the dot product of a sparse vector with a column of a CSC matrix + f_t sparse_dot(const csc_matrix_t& Y, i_t y_col) const; + // ensure the coefficients in the sparse vectory are sorted in terms of increasing index + void sort(); + // compute the squared 2-norm of the sparse vector + f_t norm2_squared() const; + void negate(); + f_t find_coefficient(i_t index) const; + + i_t n; + std::vector i; + std::vector x; +}; + +} // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/triangle_solve.cpp b/cpp/src/dual_simplex/triangle_solve.cpp index eddf04843..13a42d2f9 100644 --- a/cpp/src/dual_simplex/triangle_solve.cpp +++ b/cpp/src/dual_simplex/triangle_solve.cpp @@ -89,26 +89,25 @@ i_t upper_triangular_transpose_solve(const csc_matrix_t& U, std::vecto return 0; } -// \brief Reach computes the reach of b=B(:, col) in the graph of G -// \param[in] B - Sparse CSC matrix containing rhs -// \param[in] col - column of B +// \brief Reach computes the reach of b in the graph of G +// \param[in] b - Sparse vector containing the rhs // \param[in] pinv - inverse permuation vector // \param[in, out] G - Sparse CSC matrix G. The column pointers of G are // modified (but restored) during this call \param[out] xi - stack of size 2*n. // xi[top] .. xi[n-1] contains the reachable indicies \returns top - the size of // the stack template -i_t reach(const csc_matrix_t& B, - i_t col, +i_t reach(const sparse_vector_t& b, const std::optional>& pinv, csc_matrix_t& G, std::vector& xi) { - const i_t m = G.m; - i_t top = m; - for (i_t p = B.col_start[col]; p < B.col_start[col + 1]; ++p) { - if (!MARKED(G.col_start, B.i[p])) { // start a DFS at unmarked node i - top = depth_first_search(B.i[p], pinv, G, top, xi, xi.begin() + m); + const i_t m = G.m; + i_t top = m; + const i_t bnz = b.i.size(); + for (i_t p = 0; p < bnz; ++p) { + if (!MARKED(G.col_start, b.i[p])) { // start a DFS at unmarked node i + top = depth_first_search(b.i[p], pinv, G, top, xi, xi.begin() + m); } } for (i_t p = top; p < m; ++p) { // restore G @@ -152,7 +151,7 @@ i_t depth_first_search(i_t j, } done = 1; // Node j is done if no unvisited neighbors i_t p2 = (jnew < 0) ? 0 : UNFLIP(G.col_start[jnew + 1]); - for (i_t p = pstack[head]; p < p2; ++p) { // Examin all neighbors of j + for (i_t p = pstack[head]; p < p2; ++p) { // Examine all neighbors of j i_t i = G.i[p]; // Consider neighbor i if (MARKED(G.col_start, i)) { continue; // skip visited node i @@ -163,29 +162,31 @@ i_t depth_first_search(i_t j, break; // break to start dfs at node i } if (done) { - head--; // remove j from the recursion stack - xi[--top] = j; // and place it the output stack + pstack[head] = 0; // restore pstack so it can be used again in other routines + xi[head] = 0; // restore xi so it can be used again in other routines + head--; // remove j from the recursion stack + xi[--top] = j; // and place it the output stack } } return top; } template -i_t sparse_triangle_solve(const csc_matrix_t& B, - i_t col, +i_t sparse_triangle_solve(const sparse_vector_t& b, const std::optional>& pinv, std::vector& xi, csc_matrix_t& G, f_t* x) { i_t m = G.m; - assert(B.m == m); - i_t top = reach(B, col, pinv, G, xi); + assert(b.n == m); + i_t top = reach(b, pinv, G, xi); for (i_t p = top; p < m; ++p) { x[xi[p]] = 0; // Clear x vector } - for (i_t p = B.col_start[col]; p < B.col_start[col + 1]; ++p) { - x[B.i[p]] = B.x[p]; // Scatter b + const i_t bnz = b.i.size(); + for (i_t p = 0; p < bnz; ++p) { + x[b.i[p]] = b.x[p]; // Scatter b } for (i_t px = top; px < m; ++px) { i_t j = xi[px]; // x(j) is nonzero @@ -225,8 +226,7 @@ template int upper_triangular_solve(const csc_matrix_t template int upper_triangular_transpose_solve(const csc_matrix_t& U, std::vector& x); -template int reach(const csc_matrix_t& B, - int col, +template int reach(const sparse_vector_t& b, const std::optional>& pinv, csc_matrix_t& G, std::vector& xi); @@ -238,12 +238,17 @@ template int depth_first_search(int j, std::vector& xi, std::vector::iterator pstack); -template int sparse_triangle_solve(const csc_matrix_t& B, - int col, +template int sparse_triangle_solve(const sparse_vector_t& b, const std::optional>& pinv, std::vector& xi, csc_matrix_t& G, double* x); + +template int sparse_triangle_solve(const sparse_vector_t& b, + const std::optional>& pinv, + std::vector& xi, + csc_matrix_t& G, + double* x); #endif } // namespace cuopt::linear_programming::dual_simplex diff --git a/cpp/src/dual_simplex/triangle_solve.hpp b/cpp/src/dual_simplex/triangle_solve.hpp index fc01613c7..5016332da 100644 --- a/cpp/src/dual_simplex/triangle_solve.hpp +++ b/cpp/src/dual_simplex/triangle_solve.hpp @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include @@ -52,17 +53,15 @@ i_t upper_triangular_solve(const csc_matrix_t& U, std::vector& x) template i_t upper_triangular_transpose_solve(const csc_matrix_t& U, std::vector& x); -// \brief Reach computes the reach of b=B(:, col) in the graph of G -// \param[in] B - Sparse CSC matrix containing rhs -// \param[in] col - column of B +// \brief Reach computes the reach of b in the graph of G +// \param[in] b - sparse vector containing the rhs // \param[in] pinv - inverse permuation vector // \param[in, out] G - Sparse CSC matrix G. The column pointers of G are // modified (but restored) during this call \param[out] xi - stack of size 2*n. // xi[top] .. xi[n-1] contains the reachable indicies \returns top - the size of // the stack template -i_t reach(const csc_matrix_t& B, - i_t col, +i_t reach(const sparse_vector_t& b, const std::optional>& pinv, csc_matrix_t& G, std::vector& xi); @@ -95,8 +94,7 @@ i_t depth_first_search(i_t j, // and U is a sparse upper triangular matrix, and b is a sparse // right-hand side. The vector b is obtained from the column of a sparse // matrix. -// \param[in] B - Sparse CSC matrix contain the rhs -// \param[in] col - the column of B to use as b. b = B(:, col) +// \param[in] b - Sparse vector contain the rhs // \param[in] pinv - optional inverse permutation vector // \param[in, out] xi - An array of size 2*m, on output it contains the non-zero // pattern of x in xi[top] through xi[m-1] @@ -104,8 +102,7 @@ i_t depth_first_search(i_t j, // G.col_start is marked and restored during the algorithm // \param[out] - The solution vector xi_t template -i_t sparse_triangle_solve(const csc_matrix_t& B, - i_t col, +i_t sparse_triangle_solve(const sparse_vector_t& b, const std::optional>& pinv, std::vector& xi, csc_matrix_t& G, diff --git a/cpp/src/dual_simplex/vector_math.cpp b/cpp/src/dual_simplex/vector_math.cpp index aa05d5743..239848ac3 100644 --- a/cpp/src/dual_simplex/vector_math.cpp +++ b/cpp/src/dual_simplex/vector_math.cpp @@ -67,6 +67,47 @@ f_t dot(const std::vector& x, const std::vector& y) return dot; } +template +f_t sparse_dot( + i_t const* xind, f_t const* xval, i_t nx, i_t const* yind, i_t ny, f_t const* y_scatter_val) +{ + f_t dot = 0.0; + for (i_t i = 0, j = 0; i < nx && j < ny;) { + const i_t p = xind[i]; + const i_t q = yind[j]; + if (p == q) { + dot += xval[i] * y_scatter_val[q]; + i++; + j++; + } else if (p < q) { + i++; + } else if (q < p) { + j++; + } + } + return dot; +} + +template +f_t sparse_dot(i_t* xind, f_t* xval, i_t nx, i_t* yind, f_t* yval, i_t ny) +{ + f_t dot = 0.0; + for (i_t i = 0, j = 0; i < nx && j < ny;) { + const i_t p = xind[i]; + const i_t q = yind[j]; + if (p == q) { + dot += xval[i] * yval[j]; + i++; + j++; + } else if (p < q) { + i++; + } else if (q < p) { + j++; + } + } + return dot; +} + template f_t sparse_dot(const std::vector& xind, const std::vector& xval, @@ -146,6 +187,16 @@ template double sparse_dot(const std::vector& xind, const std::vector& yind, const std::vector& yval); +template double sparse_dot(int const* xind, + double const* xval, + int nx, + int const* yind, + int ny, + double const* y_scatter_val); + +template double sparse_dot( + int* xind, double* xval, int nx, int* yind, double* yval, int ny); + template int permute_vector(const std::vector& p, const std::vector& b, std::vector& x); diff --git a/cpp/src/dual_simplex/vector_math.hpp b/cpp/src/dual_simplex/vector_math.hpp index 962b21743..c5bd12863 100644 --- a/cpp/src/dual_simplex/vector_math.hpp +++ b/cpp/src/dual_simplex/vector_math.hpp @@ -44,6 +44,13 @@ f_t sparse_dot(const std::vector& xind, const std::vector& yind, const std::vector& yval); +template +f_t sparse_dot( + i_t const* xind, f_t const* xval, i_t nx, i_t const* yind, i_t ny, f_t const* y_scatter_val); + +template +f_t sparse_dot(i_t* xind, f_t* xval, i_t nx, i_t* yind, f_t* yval, i_t ny); + // Computes x = P*b or x=b(p) in MATLAB. template i_t permute_vector(const std::vector& p, const std::vector& b, std::vector& x); diff --git a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp index e675a3d5d..bfd100946 100644 --- a/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp +++ b/cpp/tests/linear_programming/c_api_tests/c_api_tests.cpp @@ -57,7 +57,7 @@ TEST_P(TimeLimitTestFixture, time_limit) method), CUOPT_SUCCESS); EXPECT_EQ(termination_status, CUOPT_TERIMINATION_STATUS_TIME_LIMIT); - EXPECT_NEAR(solve_time, target_solve_time, 0.1); + EXPECT_NEAR(solve_time, target_solve_time, 1.0); } INSTANTIATE_TEST_SUITE_P( c_api,