diff --git a/benchmarks/linear_programming/cuopt/run_mip.cpp b/benchmarks/linear_programming/cuopt/run_mip.cpp
index a4f52cb4e..4a6121f8e 100644
--- a/benchmarks/linear_programming/cuopt/run_mip.cpp
+++ b/benchmarks/linear_programming/cuopt/run_mip.cpp
@@ -80,10 +80,11 @@ void merge_result_files(const std::string& out_dir,
 void write_to_output_file(const std::string& out_dir,
                           const std::string& base_filename,
                           int gpu_id,
+                          int n_gpus,
                           int batch_id,
                           const std::string& data)
 {
-  int output_id        = batch_id * 8 + gpu_id;
+  int output_id        = batch_id * n_gpus + gpu_id;
   std::string filename = out_dir + "/result_" + std::to_string(output_id) + ".txt";
   std::ofstream outfile(filename, std::ios_base::app);
   if (outfile.is_open()) {
@@ -149,6 +150,7 @@ std::vector<std::vector<double>> read_solution_from_dir(const std::string file_p
 int run_single_file(std::string file_path,
                     int device,
                     int batch_id,
+                    int n_gpus,
                     std::string out_dir,
                     std::optional<std::string> initial_solution_dir,
                     bool heuristics_only,
@@ -243,7 +245,7 @@ int run_single_file(std::string file_path,
      << obj_val << "," << benchmark_info.objective_of_initial_population << ","
      << benchmark_info.last_improvement_of_best_feasible << ","
      << benchmark_info.last_improvement_after_recombination << "\n";
-  write_to_output_file(out_dir, base_filename, device, batch_id, ss.str());
+  write_to_output_file(out_dir, base_filename, device, n_gpus, batch_id, ss.str());
   CUOPT_LOG_INFO("Results written to the file %s", base_filename.c_str());
   return sol_found;
 }
@@ -251,6 +253,7 @@ int run_single_file(std::string file_path,
 void run_single_file_mp(std::string file_path,
                         int device,
                         int batch_id,
+                        int n_gpus,
                         std::string out_dir,
                         std::optional<std::string> input_file_dir,
                         bool heuristics_only,
@@ -265,6 +268,7 @@ void run_single_file_mp(std::string file_path,
   int sol_found = run_single_file(file_path,
                                   device,
                                   batch_id,
+                                  n_gpus,
                                   out_dir,
                                   input_file_dir,
                                   heuristics_only,
@@ -462,6 +466,7 @@ int main(int argc, char* argv[])
             run_single_file_mp(file_name,
                                gpu_id,
                                batch_num,
+                               n_gpus,
                                out_dir,
                                initial_solution_file,
                                heuristics_only,
@@ -501,6 +506,7 @@ int main(int argc, char* argv[])
     run_single_file(path,
                     0,
                     0,
+                    n_gpus,
                     out_dir,
                     initial_solution_file,
                     heuristics_only,
diff --git a/ci/test_python.sh b/ci/test_python.sh
index 0d3d1e596..7d504f473 100755
--- a/ci/test_python.sh
+++ b/ci/test_python.sh
@@ -60,6 +60,9 @@ EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
 
+# Due to race condition in certain cases UCX might not be able to cleanup properly, so we set the number of threads to 1
+export OMP_NUM_THREADS=1
+
 rapids-logger "Test cuopt_cli"
 timeout 10m bash ./python/libcuopt/libcuopt/tests/test_cli.sh
 
diff --git a/ci/test_wheel_cuopt.sh b/ci/test_wheel_cuopt.sh
index 1b37ed020..61dabd67f 100755
--- a/ci/test_wheel_cuopt.sh
+++ b/ci/test_wheel_cuopt.sh
@@ -66,11 +66,14 @@ cd -
 RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)"
 export RAPIDS_DATASET_ROOT_DIR
 
-# Please enable this once ISSUE https://github.com/NVIDIA/cuopt/issues/94 is fixed
 # Run CLI tests
 timeout 10m bash ./python/libcuopt/libcuopt/tests/test_cli.sh
 
 # Run Python tests
+
+# Due to race condition in certain cases UCX might not be able to cleanup properly, so we set the number of threads to 1
+export OMP_NUM_THREADS=1
+
 RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest --verbose --capture=no ./python/cuopt/cuopt/tests/
 
 # run jump tests and cvxpy integration tests for only nightly builds
diff --git a/ci/test_wheel_cuopt_server.sh b/ci/test_wheel_cuopt_server.sh
index 5f0b874ba..de4a52f47 100755
--- a/ci/test_wheel_cuopt_server.sh
+++ b/ci/test_wheel_cuopt_server.sh
@@ -37,4 +37,7 @@ rapids-pip-retry install \
 ./datasets/linear_programming/download_pdlp_test_dataset.sh
 ./datasets/mip/download_miplib_test_dataset.sh
 
+# Due to race condition in certain cases UCX might not be able to cleanup properly, so we set the number of threads to 1
+export OMP_NUM_THREADS=1
+
 RAPIDS_DATASET_ROOT_DIR=./datasets timeout 30m python -m pytest --verbose --capture=no ./python/cuopt_server/cuopt_server/tests/
diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml
index 2fdaa8dab..f87e9981d 100644
--- a/conda/environments/all_cuda-129_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-129_arch-aarch64.yaml
@@ -59,10 +59,9 @@ dependencies:
 - pytest-cov
 - pytest<8
 - python>=3.10,<3.14
-- raft-dask==25.12.*,>=0.0.0a0
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
 - rapids-dask-dependency==25.12.*,>=0.0.0a0
-- rapids-logger==0.1.*,>=0.0.0a0
+- rapids-logger==0.2.*,>=0.0.0a0
 - requests
 - rmm==25.12.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml
index 91b638bb1..8105bf9ab 100644
--- a/conda/environments/all_cuda-129_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-129_arch-x86_64.yaml
@@ -59,10 +59,9 @@ dependencies:
 - pytest-cov
 - pytest<8
 - python>=3.10,<3.14
-- raft-dask==25.12.*,>=0.0.0a0
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
 - rapids-dask-dependency==25.12.*,>=0.0.0a0
-- rapids-logger==0.1.*,>=0.0.0a0
+- rapids-logger==0.2.*,>=0.0.0a0
 - requests
 - rmm==25.12.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml
index cb5cfe00e..bf626dd52 100644
--- a/conda/environments/all_cuda-130_arch-aarch64.yaml
+++ b/conda/environments/all_cuda-130_arch-aarch64.yaml
@@ -59,10 +59,9 @@ dependencies:
 - pytest-cov
 - pytest<8
 - python>=3.10,<3.14
-- raft-dask==25.12.*,>=0.0.0a0
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
 - rapids-dask-dependency==25.12.*,>=0.0.0a0
-- rapids-logger==0.1.*,>=0.0.0a0
+- rapids-logger==0.2.*,>=0.0.0a0
 - requests
 - rmm==25.12.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml
index d64cc7537..72691938c 100644
--- a/conda/environments/all_cuda-130_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-130_arch-x86_64.yaml
@@ -59,10 +59,9 @@ dependencies:
 - pytest-cov
 - pytest<8
 - python>=3.10,<3.14
-- raft-dask==25.12.*,>=0.0.0a0
 - rapids-build-backend>=0.4.0,<0.5.0.dev0
 - rapids-dask-dependency==25.12.*,>=0.0.0a0
-- rapids-logger==0.1.*,>=0.0.0a0
+- rapids-logger==0.2.*,>=0.0.0a0
 - requests
 - rmm==25.12.*,>=0.0.0a0
 - scikit-build-core>=0.10.0
diff --git a/conda/recipes/libcuopt/recipe.yaml b/conda/recipes/libcuopt/recipe.yaml
index e18cea2a6..fdfd75257 100644
--- a/conda/recipes/libcuopt/recipe.yaml
+++ b/conda/recipes/libcuopt/recipe.yaml
@@ -67,7 +67,7 @@ cache:
       - gtest ${{ gtest_version }}
       - libraft-headers =${{ dep_minor_version }}
       - librmm =${{ dep_minor_version }}
-      - rapids-logger =0.1
+      - rapids-logger =0.2
       - cuda-nvtx-dev
       - libcudss-dev >=0.7
       - libcurand-dev
@@ -148,7 +148,7 @@ outputs:
         - ${{ pin_subpackage("libmps-parser", exact=True) }}
         - boost
         - cuda-version =${{ cuda_version }}
-        - rapids-logger =0.1
+        - rapids-logger =0.2
         - librmm =${{ dep_minor_version }}
         - cuda-cudart-dev
         - libcublas
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index cb17f0c4a..90bb1c57f 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -190,9 +190,9 @@ FetchContent_Declare(
   # does not have some of the presolvers and settings that we need
   # Mainly, probing and clique merging.
   # This is the reason we are using the development branch
-  # commit from Oct 8, 2025. Once these changes are merged into the main branch,
+  # commit from cliquemergebug branch. Once these changes are merged into the main branch,
   #we can switch to the main branch.
-  GIT_TAG "24ccf5752656df0f15dd9aabe5b97feae829b9ec"
+  GIT_TAG "8f710e33d352bf319d30b9c57e70516222f3f5ca"
   GIT_PROGRESS TRUE
   SYSTEM
 )
@@ -201,8 +201,6 @@ find_package(TBB REQUIRED)
 set(BUILD_TESTING OFF CACHE BOOL "Disable test build for papilo")
 set(PAPILO_NO_BINARIES ON)
 option(LUSOL "Disable LUSOL" OFF)
-# Disable TBB because of a bug in CliqueMerging parallel version
-set(TBB OFF CACHE BOOL "Disable TBB for papilo")
 
 FetchContent_MakeAvailable(papilo)
 
diff --git a/cpp/src/dual_simplex/barrier.cu b/cpp/src/dual_simplex/barrier.cu
index 7a9c2ee24..47f1218f3 100644
--- a/cpp/src/dual_simplex/barrier.cu
+++ b/cpp/src/dual_simplex/barrier.cu
@@ -1487,7 +1487,6 @@ int barrier_solver_t<i_t, f_t>::initial_point(iteration_data_t<i_t, f_t>& data)
   // Perform a numerical factorization
   i_t status;
   if (use_augmented) {
-    settings.log.printf("Factorizing augmented\n");
     status = data.chol->factorize(data.augmented);
   } else {
     if (use_gpu) {
@@ -3243,217 +3242,54 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time,
                                               const barrier_solver_settings_t<i_t, f_t>& options,
                                               lp_solution_t<i_t, f_t>& solution)
 {
-  raft::common::nvtx::range fun_scope("Barrier: solve");
+  try {
+    raft::common::nvtx::range fun_scope("Barrier: solve");
 
-  i_t n = lp.num_cols;
-  i_t m = lp.num_rows;
+    i_t n = lp.num_cols;
+    i_t m = lp.num_rows;
 
-  solution.resize(m, n);
-  settings.log.printf(
-    "Barrier solver: %d constraints, %d variables, %ld nonzeros\n", m, n, lp.A.col_start[n]);
-  settings.log.printf("\n");
-
-  // Compute the number of free variables
-  i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2;
-  if (num_free_variables > 0) {
-    settings.log.printf("Free variables              : %d\n", num_free_variables);
-  }
-
-  // Compute the number of upper bounds
-  i_t num_upper_bounds = 0;
-  for (i_t j = 0; j < n; j++) {
-    if (lp.upper[j] < inf) { num_upper_bounds++; }
-  }
-
-  iteration_data_t<i_t, f_t> data(lp, num_upper_bounds, settings);
-  if (data.symbolic_status != 0) {
-    settings.log.printf("Error in symbolic analysis\n");
-    return lp_status_t::NUMERICAL_ISSUES;
-  }
-  if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
-    settings.log.printf("Barrier solver halted\n");
-    return lp_status_t::CONCURRENT_LIMIT;
-  }
-  data.cusparse_dual_residual_ = data.cusparse_view_.create_vector(data.d_dual_residual_);
-  data.cusparse_r1_            = data.cusparse_view_.create_vector(data.d_r1_);
-  data.cusparse_tmp4_          = data.cusparse_view_.create_vector(data.d_tmp4_);
-  data.cusparse_h_             = data.cusparse_view_.create_vector(data.d_h_);
-  data.cusparse_dx_residual_   = data.cusparse_view_.create_vector(data.d_dx_residual_);
-  data.cusparse_u_             = data.cusparse_view_.create_vector(data.d_u_);
-  data.cusparse_y_residual_    = data.cusparse_view_.create_vector(data.d_y_residual_);
-  data.restrict_u_.resize(num_upper_bounds);
-
-  if (toc(start_time) > settings.time_limit) {
-    settings.log.printf("Barrier time limit exceeded\n");
-    return lp_status_t::TIME_LIMIT;
-  }
-
-  i_t initial_status = initial_point(data);
-  if (toc(start_time) > settings.time_limit) {
-    settings.log.printf("Barrier time limit exceeded\n");
-    return lp_status_t::TIME_LIMIT;
-  }
-  if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
-    settings.log.printf("Barrier solver halted\n");
-    return lp_status_t::CONCURRENT_LIMIT;
-  }
-  if (initial_status != 0) {
-    settings.log.printf("Unable to compute initial point\n");
-    return lp_status_t::NUMERICAL_ISSUES;
-  }
-  compute_residuals<PinnedHostAllocator<f_t>>(data.w, data.x, data.y, data.v, data.z, data);
-
-  f_t primal_residual_norm = std::max(vector_norm_inf<i_t, f_t>(data.primal_residual, stream_view_),
-                                      vector_norm_inf<i_t, f_t>(data.bound_residual, stream_view_));
-  f_t dual_residual_norm   = vector_norm_inf<i_t, f_t>(data.dual_residual, stream_view_);
-  f_t complementarity_residual_norm =
-    std::max(vector_norm_inf<i_t, f_t>(data.complementarity_xz_residual, stream_view_),
-             vector_norm_inf<i_t, f_t>(data.complementarity_wv_residual, stream_view_));
-  f_t mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) /
-           (static_cast<f_t>(n) + static_cast<f_t>(num_upper_bounds));
-
-  f_t norm_b = vector_norm_inf<i_t, f_t>(data.b, stream_view_);
-  f_t norm_c = vector_norm_inf<i_t, f_t>(data.c, stream_view_);
-
-  f_t primal_objective = data.c.inner_product(data.x);
-
-  f_t relative_primal_residual = primal_residual_norm / (1.0 + norm_b);
-  f_t relative_dual_residual   = dual_residual_norm / (1.0 + norm_c);
-  f_t relative_complementarity_residual =
-    complementarity_residual_norm / (1.0 + std::abs(primal_objective));
-
-  dense_vector_t<i_t, f_t> upper(lp.upper);
-  data.gather_upper_bounds(upper, data.restrict_u_);
-  f_t dual_objective = data.b.inner_product(data.y) - data.restrict_u_.inner_product(data.v);
-
-  i_t iter = 0;
-  settings.log.printf("\n");
-  settings.log.printf(
-    "        Objective                            Infeasibility              Time\n");
-  settings.log.printf(
-    "Iter    Primal               Dual            Primal   Dual    Compl.    Elapsed\n");
-  float64_t elapsed_time = toc(start_time);
-  settings.log.printf("%3d   %+.12e %+.12e %.2e %.2e %.2e %.1f\n",
-                      iter,
-                      primal_objective,
-                      dual_objective,
-                      primal_residual_norm,
-                      dual_residual_norm,
-                      complementarity_residual_norm,
-                      elapsed_time);
-
-  bool converged = primal_residual_norm < settings.barrier_relative_feasibility_tol &&
-                   dual_residual_norm < settings.barrier_relative_optimality_tol &&
-                   complementarity_residual_norm < settings.barrier_relative_complementarity_tol;
-
-  data.d_complementarity_xz_residual_.resize(data.complementarity_xz_residual.size(), stream_view_);
-  data.d_complementarity_wv_residual_.resize(data.complementarity_wv_residual.size(), stream_view_);
-  data.d_complementarity_xz_rhs_.resize(data.complementarity_xz_rhs.size(), stream_view_);
-  data.d_complementarity_wv_rhs_.resize(data.complementarity_wv_rhs.size(), stream_view_);
-  raft::copy(data.d_complementarity_xz_residual_.data(),
-             data.complementarity_xz_residual.data(),
-             data.complementarity_xz_residual.size(),
-             stream_view_);
-  raft::copy(data.d_complementarity_wv_residual_.data(),
-             data.complementarity_wv_residual.data(),
-             data.complementarity_wv_residual.size(),
-             stream_view_);
-  raft::copy(data.d_complementarity_xz_rhs_.data(),
-             data.complementarity_xz_rhs.data(),
-             data.complementarity_xz_rhs.size(),
-             stream_view_);
-  raft::copy(data.d_complementarity_wv_rhs_.data(),
-             data.complementarity_wv_rhs.data(),
-             data.complementarity_wv_rhs.size(),
-             stream_view_);
-
-  data.w_save = data.w;
-  data.x_save = data.x;
-  data.y_save = data.y;
-  data.v_save = data.v;
-  data.z_save = data.z;
-
-  const i_t iteration_limit = settings.iteration_limit;
-
-  while (iter < iteration_limit) {
-    raft::common::nvtx::range fun_scope("Barrier: iteration");
+    solution.resize(m, n);
+    settings.log.printf(
+      "Barrier solver: %d constraints, %d variables, %ld nonzeros\n", m, n, lp.A.col_start[n]);
+    settings.log.printf("\n");
 
-    if (toc(start_time) > settings.time_limit) {
-      settings.log.printf("Barrier time limit exceeded\n");
-      return lp_status_t::TIME_LIMIT;
-    }
-    if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
-      settings.log.printf("Barrier solver halted\n");
-      return lp_status_t::CONCURRENT_LIMIT;
+    // Compute the number of free variables
+    i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2;
+    if (num_free_variables > 0) {
+      settings.log.printf("Free variables              : %d\n", num_free_variables);
     }
 
-    // Compute the affine step
-    compute_affine_rhs(data);
-    f_t max_affine_residual = 0.0;
+    // Compute the number of upper bounds
+    i_t num_upper_bounds = 0;
+    for (i_t j = 0; j < n; j++) {
+      if (lp.upper[j] < inf) { num_upper_bounds++; }
+    }
 
-    i_t status = gpu_compute_search_direction(
-      data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff, max_affine_residual);
+    iteration_data_t<i_t, f_t> data(lp, num_upper_bounds, settings);
     if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
       settings.log.printf("Barrier solver halted\n");
       return lp_status_t::CONCURRENT_LIMIT;
     }
-    // Sync to make sure all the async copies to host done inside are finished
-    if (use_gpu) RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
-
-    if (status < 0) {
-      return check_for_suboptimal_solution(options,
-                                           data,
-                                           start_time,
-                                           iter,
-                                           primal_objective,
-                                           primal_residual_norm,
-                                           dual_residual_norm,
-                                           complementarity_residual_norm,
-                                           relative_primal_residual,
-                                           relative_dual_residual,
-                                           relative_complementarity_residual,
-                                           solution);
+    if (data.symbolic_status != 0) {
+      settings.log.printf("Error in symbolic analysis\n");
+      return lp_status_t::NUMERICAL_ISSUES;
     }
+
+    data.cusparse_dual_residual_ = data.cusparse_view_.create_vector(data.d_dual_residual_);
+    data.cusparse_r1_            = data.cusparse_view_.create_vector(data.d_r1_);
+    data.cusparse_tmp4_          = data.cusparse_view_.create_vector(data.d_tmp4_);
+    data.cusparse_h_             = data.cusparse_view_.create_vector(data.d_h_);
+    data.cusparse_dx_residual_   = data.cusparse_view_.create_vector(data.d_dx_residual_);
+    data.cusparse_u_             = data.cusparse_view_.create_vector(data.d_u_);
+    data.cusparse_y_residual_    = data.cusparse_view_.create_vector(data.d_y_residual_);
+    data.restrict_u_.resize(num_upper_bounds);
+
     if (toc(start_time) > settings.time_limit) {
       settings.log.printf("Barrier time limit exceeded\n");
       return lp_status_t::TIME_LIMIT;
     }
-    if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
-      settings.log.printf("Barrier solver halted\n");
-      return lp_status_t::CONCURRENT_LIMIT;
-    }
-
-    f_t mu_aff, sigma, new_mu;
-    compute_target_mu(data, mu, mu_aff, sigma, new_mu);
-
-    compute_cc_rhs(data, new_mu);
-
-    f_t max_corrector_residual = 0.0;
 
-    status = gpu_compute_search_direction(
-      data, data.dw, data.dx, data.dy, data.dv, data.dz, max_corrector_residual);
-    if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
-      settings.log.printf("Barrier solver halted\n");
-      return lp_status_t::CONCURRENT_LIMIT;
-    }
-    // Sync to make sure all the async copies to host done inside are finished
-    if (use_gpu) RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
-    if (status < 0) {
-      return check_for_suboptimal_solution(options,
-                                           data,
-                                           start_time,
-                                           iter,
-                                           primal_objective,
-                                           primal_residual_norm,
-                                           dual_residual_norm,
-                                           complementarity_residual_norm,
-                                           relative_primal_residual,
-                                           relative_dual_residual,
-                                           relative_complementarity_residual,
-                                           solution);
-    }
-    data.has_factorization = false;
-    data.has_solve_info    = false;
+    i_t initial_status = initial_point(data);
     if (toc(start_time) > settings.time_limit) {
       settings.log.printf("Barrier time limit exceeded\n");
       return lp_status_t::TIME_LIMIT;
@@ -3462,112 +3298,284 @@ lp_status_t barrier_solver_t<i_t, f_t>::solve(f_t start_time,
       settings.log.printf("Barrier solver halted\n");
       return lp_status_t::CONCURRENT_LIMIT;
     }
+    if (initial_status != 0) {
+      settings.log.printf("Unable to compute initial point\n");
+      return lp_status_t::NUMERICAL_ISSUES;
+    }
+    compute_residuals<PinnedHostAllocator<f_t>>(data.w, data.x, data.y, data.v, data.z, data);
 
-    compute_final_direction(data);
-    f_t step_primal, step_dual;
-    compute_primal_dual_step_length(data, options.step_scale, step_primal, step_dual);
-    compute_next_iterate(data, options.step_scale, step_primal, step_dual);
-
-    compute_residual_norms(
-      data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm);
+    f_t primal_residual_norm =
+      std::max(vector_norm_inf<i_t, f_t>(data.primal_residual, stream_view_),
+               vector_norm_inf<i_t, f_t>(data.bound_residual, stream_view_));
+    f_t dual_residual_norm = vector_norm_inf<i_t, f_t>(data.dual_residual, stream_view_);
+    f_t complementarity_residual_norm =
+      std::max(vector_norm_inf<i_t, f_t>(data.complementarity_xz_residual, stream_view_),
+               vector_norm_inf<i_t, f_t>(data.complementarity_wv_residual, stream_view_));
+    f_t mu = (data.complementarity_xz_residual.sum() + data.complementarity_wv_residual.sum()) /
+             (static_cast<f_t>(n) + static_cast<f_t>(num_upper_bounds));
 
-    compute_mu(data, mu);
+    f_t norm_b = vector_norm_inf<i_t, f_t>(data.b, stream_view_);
+    f_t norm_c = vector_norm_inf<i_t, f_t>(data.c, stream_view_);
 
-    compute_primal_dual_objective(data, primal_objective, dual_objective);
+    f_t primal_objective = data.c.inner_product(data.x);
 
-    relative_primal_residual = primal_residual_norm / (1.0 + norm_b);
-    relative_dual_residual   = dual_residual_norm / (1.0 + norm_c);
-    relative_complementarity_residual =
+    f_t relative_primal_residual = primal_residual_norm / (1.0 + norm_b);
+    f_t relative_dual_residual   = dual_residual_norm / (1.0 + norm_c);
+    f_t relative_complementarity_residual =
       complementarity_residual_norm / (1.0 + std::abs(primal_objective));
 
-    if (relative_primal_residual < settings.barrier_relaxed_feasibility_tol &&
-        relative_dual_residual < settings.barrier_relaxed_optimality_tol &&
-        relative_complementarity_residual < settings.barrier_relaxed_complementarity_tol) {
-      if (relative_primal_residual < data.relative_primal_residual_save &&
-          relative_dual_residual < data.relative_dual_residual_save &&
-          relative_complementarity_residual < data.relative_complementarity_residual_save) {
-        settings.log.debug(
-          "Saving solution: feasibility %.2e (%.2e), optimality %.2e (%.2e), complementarity "
-          "%.2e (%.2e)\n",
-          relative_primal_residual,
-          primal_residual_norm,
-          relative_dual_residual,
-          dual_residual_norm,
-          relative_complementarity_residual,
-          complementarity_residual_norm);
-        data.w_save                                 = data.w;
-        data.x_save                                 = data.x;
-        data.y_save                                 = data.y;
-        data.v_save                                 = data.v;
-        data.z_save                                 = data.z;
-        data.relative_primal_residual_save          = relative_primal_residual;
-        data.relative_dual_residual_save            = relative_dual_residual;
-        data.relative_complementarity_residual_save = relative_complementarity_residual;
-        data.primal_residual_norm_save              = primal_residual_norm;
-        data.dual_residual_norm_save                = dual_residual_norm;
-        data.complementarity_residual_norm_save     = complementarity_residual_norm;
-      }
-    }
-
-    iter++;
-    elapsed_time = toc(start_time);
-
-    if (primal_objective != primal_objective || dual_objective != dual_objective) {
-      settings.log.printf("Numerical error in objective\n");
-      return lp_status_t::NUMERICAL_ISSUES;
-    }
+    dense_vector_t<i_t, f_t> upper(lp.upper);
+    data.gather_upper_bounds(upper, data.restrict_u_);
+    f_t dual_objective = data.b.inner_product(data.y) - data.restrict_u_.inner_product(data.v);
 
+    i_t iter = 0;
+    settings.log.printf("\n");
+    settings.log.printf(
+      "                  Objective                         Infeasibility        Time\n");
+    settings.log.printf(
+      "Iter   Primal              Dual                Primal   Dual    Compl.   Elapsed\n");
+    float64_t elapsed_time = toc(start_time);
     settings.log.printf("%3d   %+.12e %+.12e %.2e %.2e %.2e %.1f\n",
                         iter,
-                        compute_user_objective(lp, primal_objective),
-                        compute_user_objective(lp, dual_objective),
-                        relative_primal_residual,
-                        relative_dual_residual,
-                        relative_complementarity_residual,
+                        primal_objective,
+                        dual_objective,
+                        primal_residual_norm,
+                        dual_residual_norm,
+                        complementarity_residual_norm,
                         elapsed_time);
 
-    bool primal_feasible = relative_primal_residual < settings.barrier_relative_feasibility_tol;
-    bool dual_feasible   = relative_dual_residual < settings.barrier_relative_optimality_tol;
-    bool small_gap =
-      relative_complementarity_residual < settings.barrier_relative_complementarity_tol;
+    bool converged = primal_residual_norm < settings.barrier_relative_feasibility_tol &&
+                     dual_residual_norm < settings.barrier_relative_optimality_tol &&
+                     complementarity_residual_norm < settings.barrier_relative_complementarity_tol;
+
+    data.d_complementarity_xz_residual_.resize(data.complementarity_xz_residual.size(),
+                                               stream_view_);
+    data.d_complementarity_wv_residual_.resize(data.complementarity_wv_residual.size(),
+                                               stream_view_);
+    data.d_complementarity_xz_rhs_.resize(data.complementarity_xz_rhs.size(), stream_view_);
+    data.d_complementarity_wv_rhs_.resize(data.complementarity_wv_rhs.size(), stream_view_);
+    raft::copy(data.d_complementarity_xz_residual_.data(),
+               data.complementarity_xz_residual.data(),
+               data.complementarity_xz_residual.size(),
+               stream_view_);
+    raft::copy(data.d_complementarity_wv_residual_.data(),
+               data.complementarity_wv_residual.data(),
+               data.complementarity_wv_residual.size(),
+               stream_view_);
+    raft::copy(data.d_complementarity_xz_rhs_.data(),
+               data.complementarity_xz_rhs.data(),
+               data.complementarity_xz_rhs.size(),
+               stream_view_);
+    raft::copy(data.d_complementarity_wv_rhs_.data(),
+               data.complementarity_wv_rhs.data(),
+               data.complementarity_wv_rhs.size(),
+               stream_view_);
+
+    data.w_save = data.w;
+    data.x_save = data.x;
+    data.y_save = data.y;
+    data.v_save = data.v;
+    data.z_save = data.z;
 
-    converged = primal_feasible && dual_feasible && small_gap;
+    const i_t iteration_limit = settings.iteration_limit;
 
-    if (converged) {
-      settings.log.printf("\n");
-      settings.log.printf(
-        "Optimal solution found in %d iterations and %.2fs\n", iter, toc(start_time));
-      settings.log.printf("Objective %+.8e\n", compute_user_objective(lp, primal_objective));
-      settings.log.printf("Primal infeasibility (abs/rel): %8.2e/%8.2e\n",
-                          primal_residual_norm,
-                          relative_primal_residual);
-      settings.log.printf("Dual infeasibility   (abs/rel): %8.2e/%8.2e\n",
-                          dual_residual_norm,
-                          relative_dual_residual);
-      settings.log.printf("Complementarity gap  (abs/rel): %8.2e/%8.2e\n",
-                          complementarity_residual_norm,
-                          relative_complementarity_residual);
-      settings.log.printf("\n");
-      data.to_solution(lp,
-                       iter,
-                       primal_objective,
-                       compute_user_objective(lp, primal_objective),
-                       primal_residual_norm,
-                       dual_residual_norm,
-                       data.cusparse_view_,
-                       solution);
-      return lp_status_t::OPTIMAL;
+    while (iter < iteration_limit) {
+      raft::common::nvtx::range fun_scope("Barrier: iteration");
+
+      if (toc(start_time) > settings.time_limit) {
+        settings.log.printf("Barrier time limit exceeded\n");
+        return lp_status_t::TIME_LIMIT;
+      }
+      if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
+        settings.log.printf("Barrier solver halted\n");
+        return lp_status_t::CONCURRENT_LIMIT;
+      }
+
+      // Compute the affine step
+      compute_affine_rhs(data);
+      f_t max_affine_residual = 0.0;
+
+      i_t status = gpu_compute_search_direction(
+        data, data.dw_aff, data.dx_aff, data.dy_aff, data.dv_aff, data.dz_aff, max_affine_residual);
+      if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
+        settings.log.printf("Barrier solver halted\n");
+        return lp_status_t::CONCURRENT_LIMIT;
+      }
+      // Sync to make sure all the async copies to host done inside are finished
+      if (use_gpu) RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
+
+      if (status < 0) {
+        return check_for_suboptimal_solution(options,
+                                             data,
+                                             start_time,
+                                             iter,
+                                             primal_objective,
+                                             primal_residual_norm,
+                                             dual_residual_norm,
+                                             complementarity_residual_norm,
+                                             relative_primal_residual,
+                                             relative_dual_residual,
+                                             relative_complementarity_residual,
+                                             solution);
+      }
+      if (toc(start_time) > settings.time_limit) {
+        settings.log.printf("Barrier time limit exceeded\n");
+        return lp_status_t::TIME_LIMIT;
+      }
+      if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
+        settings.log.printf("Barrier solver halted\n");
+        return lp_status_t::CONCURRENT_LIMIT;
+      }
+
+      f_t mu_aff, sigma, new_mu;
+      compute_target_mu(data, mu, mu_aff, sigma, new_mu);
+
+      compute_cc_rhs(data, new_mu);
+
+      f_t max_corrector_residual = 0.0;
+
+      status = gpu_compute_search_direction(
+        data, data.dw, data.dx, data.dy, data.dv, data.dz, max_corrector_residual);
+      if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
+        settings.log.printf("Barrier solver halted\n");
+        return lp_status_t::CONCURRENT_LIMIT;
+      }
+      // Sync to make sure all the async copies to host done inside are finished
+      if (use_gpu) RAFT_CUDA_TRY(cudaStreamSynchronize(stream_view_));
+      if (status < 0) {
+        return check_for_suboptimal_solution(options,
+                                             data,
+                                             start_time,
+                                             iter,
+                                             primal_objective,
+                                             primal_residual_norm,
+                                             dual_residual_norm,
+                                             complementarity_residual_norm,
+                                             relative_primal_residual,
+                                             relative_dual_residual,
+                                             relative_complementarity_residual,
+                                             solution);
+      }
+      data.has_factorization = false;
+      data.has_solve_info    = false;
+      if (toc(start_time) > settings.time_limit) {
+        settings.log.printf("Barrier time limit exceeded\n");
+        return lp_status_t::TIME_LIMIT;
+      }
+      if (settings.concurrent_halt != nullptr && *settings.concurrent_halt == 1) {
+        settings.log.printf("Barrier solver halted\n");
+        return lp_status_t::CONCURRENT_LIMIT;
+      }
+
+      compute_final_direction(data);
+      f_t step_primal, step_dual;
+      compute_primal_dual_step_length(data, options.step_scale, step_primal, step_dual);
+      compute_next_iterate(data, options.step_scale, step_primal, step_dual);
+
+      compute_residual_norms(
+        data, primal_residual_norm, dual_residual_norm, complementarity_residual_norm);
+
+      compute_mu(data, mu);
+
+      compute_primal_dual_objective(data, primal_objective, dual_objective);
+
+      relative_primal_residual = primal_residual_norm / (1.0 + norm_b);
+      relative_dual_residual   = dual_residual_norm / (1.0 + norm_c);
+      relative_complementarity_residual =
+        complementarity_residual_norm / (1.0 + std::abs(primal_objective));
+
+      if (relative_primal_residual < settings.barrier_relaxed_feasibility_tol &&
+          relative_dual_residual < settings.barrier_relaxed_optimality_tol &&
+          relative_complementarity_residual < settings.barrier_relaxed_complementarity_tol) {
+        if (relative_primal_residual < data.relative_primal_residual_save &&
+            relative_dual_residual < data.relative_dual_residual_save &&
+            relative_complementarity_residual < data.relative_complementarity_residual_save) {
+          settings.log.debug(
+            "Saving solution: feasibility %.2e (%.2e), optimality %.2e (%.2e), complementarity "
+            "%.2e (%.2e)\n",
+            relative_primal_residual,
+            primal_residual_norm,
+            relative_dual_residual,
+            dual_residual_norm,
+            relative_complementarity_residual,
+            complementarity_residual_norm);
+          data.w_save                                 = data.w;
+          data.x_save                                 = data.x;
+          data.y_save                                 = data.y;
+          data.v_save                                 = data.v;
+          data.z_save                                 = data.z;
+          data.relative_primal_residual_save          = relative_primal_residual;
+          data.relative_dual_residual_save            = relative_dual_residual;
+          data.relative_complementarity_residual_save = relative_complementarity_residual;
+          data.primal_residual_norm_save              = primal_residual_norm;
+          data.dual_residual_norm_save                = dual_residual_norm;
+          data.complementarity_residual_norm_save     = complementarity_residual_norm;
+        }
+      }
+
+      iter++;
+      elapsed_time = toc(start_time);
+
+      if (primal_objective != primal_objective || dual_objective != dual_objective) {
+        settings.log.printf("Numerical error in objective\n");
+        return lp_status_t::NUMERICAL_ISSUES;
+      }
+
+      settings.log.printf("%3d   %+.12e %+.12e %.2e %.2e %.2e %.1f\n",
+                          iter,
+                          compute_user_objective(lp, primal_objective),
+                          compute_user_objective(lp, dual_objective),
+                          relative_primal_residual,
+                          relative_dual_residual,
+                          relative_complementarity_residual,
+                          elapsed_time);
+
+      bool primal_feasible = relative_primal_residual < settings.barrier_relative_feasibility_tol;
+      bool dual_feasible   = relative_dual_residual < settings.barrier_relative_optimality_tol;
+      bool small_gap =
+        relative_complementarity_residual < settings.barrier_relative_complementarity_tol;
+
+      converged = primal_feasible && dual_feasible && small_gap;
+
+      if (converged) {
+        settings.log.printf("\n");
+        settings.log.printf(
+          "Optimal solution found in %d iterations and %.2fs\n", iter, toc(start_time));
+        settings.log.printf("Objective %+.8e\n", compute_user_objective(lp, primal_objective));
+        settings.log.printf("Primal infeasibility (abs/rel): %8.2e/%8.2e\n",
+                            primal_residual_norm,
+                            relative_primal_residual);
+        settings.log.printf("Dual infeasibility   (abs/rel): %8.2e/%8.2e\n",
+                            dual_residual_norm,
+                            relative_dual_residual);
+        settings.log.printf("Complementarity gap  (abs/rel): %8.2e/%8.2e\n",
+                            complementarity_residual_norm,
+                            relative_complementarity_residual);
+        settings.log.printf("\n");
+        data.to_solution(lp,
+                         iter,
+                         primal_objective,
+                         compute_user_objective(lp, primal_objective),
+                         primal_residual_norm,
+                         dual_residual_norm,
+                         data.cusparse_view_,
+                         solution);
+        return lp_status_t::OPTIMAL;
+      }
     }
+    data.to_solution(lp,
+                     iter,
+                     primal_objective,
+                     compute_user_objective(lp, primal_objective),
+                     vector_norm2<i_t, f_t>(data.primal_residual),
+                     vector_norm2<i_t, f_t>(data.dual_residual),
+                     data.cusparse_view_,
+                     solution);
+    return lp_status_t::ITERATION_LIMIT;
+  } catch (const raft::cuda_error& e) {
+    settings.log.debug("Error in barrier_solver_t: %s\n", e.what());
+    return lp_status_t::NUMERICAL_ISSUES;
   }
-  data.to_solution(lp,
-                   iter,
-                   primal_objective,
-                   compute_user_objective(lp, primal_objective),
-                   vector_norm2<i_t, f_t>(data.primal_residual),
-                   vector_norm2<i_t, f_t>(data.dual_residual),
-                   data.cusparse_view_,
-                   solution);
-  return lp_status_t::ITERATION_LIMIT;
 }
 
 #ifdef DUAL_SIMPLEX_INSTANTIATE_DOUBLE
diff --git a/cpp/src/dual_simplex/branch_and_bound.cpp b/cpp/src/dual_simplex/branch_and_bound.cpp
index cf6fd6979..2ce3ee0b4 100644
--- a/cpp/src/dual_simplex/branch_and_bound.cpp
+++ b/cpp/src/dual_simplex/branch_and_bound.cpp
@@ -562,24 +562,16 @@ node_status_t branch_and_bound_t<i_t, f_t>::solve_node(search_tree_t<i_t, f_t>&
 {
   f_t abs_fathom_tol = settings_.absolute_mip_gap_tol / 10;
 
-  std::vector<variable_status_t>& leaf_vstatus = node_ptr->vstatus;
   lp_solution_t<i_t, f_t> leaf_solution(leaf_problem.num_rows, leaf_problem.num_cols);
+  std::vector<variable_status_t>& leaf_vstatus = node_ptr->vstatus;
   assert(leaf_vstatus.size() == leaf_problem.num_cols);
 
-  // Set the correct bounds for the leaf problem
-  leaf_problem.lower = original_lp_.lower;
-  leaf_problem.upper = original_lp_.upper;
-
   std::vector<bool> bounds_changed(leaf_problem.num_cols, false);
   // Technically, we can get the already strengthened bounds from the node/parent instead of
   // getting it from the original problem and re-strengthening. But this requires storing
   // two vectors at each node and potentially cause memory issues
   node_ptr->get_variable_bounds(leaf_problem.lower, leaf_problem.upper, bounds_changed);
 
-  i_t node_iter                    = 0;
-  f_t lp_start_time                = tic();
-  std::vector<f_t> leaf_edge_norms = edge_norms_;  // = node.steepest_edge_norms;
-
   simplex_solver_settings_t lp_settings = settings_;
   lp_settings.set_log(false);
   lp_settings.cut_off    = upper_bound + settings_.dual_tol;
@@ -594,6 +586,10 @@ node_status_t branch_and_bound_t<i_t, f_t>::solve_node(search_tree_t<i_t, f_t>&
   dual::status_t lp_status = dual::status_t::DUAL_UNBOUNDED;
 
   if (feasible) {
+    i_t node_iter                    = 0;
+    f_t lp_start_time                = tic();
+    std::vector<f_t> leaf_edge_norms = edge_norms_;  // = node.steepest_edge_norms;
+
     lp_status = dual_phase2(2,
                             0,
                             lp_start_time,
@@ -610,10 +606,10 @@ node_status_t branch_and_bound_t<i_t, f_t>::solve_node(search_tree_t<i_t, f_t>&
         leaf_problem, lp_start_time, lp_settings, leaf_solution, leaf_vstatus, leaf_edge_norms);
       lp_status = convert_lp_status_to_dual_status(second_status);
     }
-  }
 
-  stats_.total_lp_solve_time += toc(lp_start_time);
-  stats_.total_lp_iters += node_iter;
+    stats_.total_lp_solve_time += toc(lp_start_time);
+    stats_.total_lp_iters += node_iter;
+  }
 
   if (lp_status == dual::status_t::DUAL_UNBOUNDED) {
     // Node was infeasible. Do not branch
@@ -695,17 +691,18 @@ void branch_and_bound_t<i_t, f_t>::exploration_ramp_up(search_tree_t<i_t, f_t>*
                                                        i_t initial_heap_size)
 {
   if (status_ != mip_exploration_status_t::RUNNING) { return; }
-  if (omp_get_thread_num() == 0) { repair_heuristic_solutions(); }
+
+  // Note that we do not know which thread will execute the
+  // `exploration_ramp_up` task, so we allow to any thread
+  // to repair the heuristic solution.
+  repair_heuristic_solutions();
 
   f_t lower_bound      = node->lower_bound;
   f_t upper_bound      = get_upper_bound();
   f_t rel_gap          = user_relative_gap(original_lp_, upper_bound, lower_bound);
   f_t abs_gap          = upper_bound - lower_bound;
-  i_t nodes_explored   = 0;
-  i_t nodes_unexplored = 0;
-
-  nodes_explored   = (stats_.nodes_explored++);
-  nodes_unexplored = (stats_.nodes_unexplored--);
+  i_t nodes_explored   = (++stats_.nodes_explored);
+  i_t nodes_unexplored = (--stats_.nodes_unexplored);
   stats_.nodes_since_last_log++;
 
   if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) {
@@ -714,14 +711,18 @@ void branch_and_bound_t<i_t, f_t>::exploration_ramp_up(search_tree_t<i_t, f_t>*
     return;
   }
 
-  f_t now = toc(stats_.start_time);
+  f_t now                 = toc(stats_.start_time);
+  f_t time_since_last_log = stats_.last_log == 0 ? 1.0 : toc(stats_.last_log);
 
-  if (omp_get_thread_num() == 0) {
-    f_t time_since_last_log = stats_.last_log == 0 ? 1.0 : toc(stats_.last_log);
+  if (((stats_.nodes_since_last_log >= 10 || abs_gap < 10 * settings_.absolute_mip_gap_tol) &&
+       (time_since_last_log >= 1)) ||
+      (time_since_last_log > 30) || now > settings_.time_limit) {
+    // Check if no new node was explored until now. If this is the case,
+    // only the last thread should report the progress
+    if (stats_.nodes_explored.load() == nodes_explored) {
+      stats_.nodes_since_last_log = 0;
+      stats_.last_log             = tic();
 
-    if (((stats_.nodes_since_last_log >= 10 || abs_gap < 10 * settings_.absolute_mip_gap_tol) &&
-         (time_since_last_log >= 1)) ||
-        (time_since_last_log > 30) || now > settings_.time_limit) {
       f_t obj              = compute_user_objective(original_lp_, upper_bound);
       f_t user_lower       = compute_user_objective(original_lp_, root_objective_);
       std::string gap_user = user_mip_gap<f_t>(obj, user_lower);
@@ -735,8 +736,6 @@ void branch_and_bound_t<i_t, f_t>::exploration_ramp_up(search_tree_t<i_t, f_t>*
                            nodes_explored > 0 ? stats_.total_lp_iters / nodes_explored : 0,
                            gap_user.c_str(),
                            now);
-
-      stats_.nodes_since_last_log = 0;
     }
   }
 
@@ -744,6 +743,11 @@ void branch_and_bound_t<i_t, f_t>::exploration_ramp_up(search_tree_t<i_t, f_t>*
     status_ = mip_exploration_status_t::TIME_LIMIT;
     return;
   }
+
+  // Set the correct bounds for the leaf problem
+  leaf_problem.lower = original_lp_.lower;
+  leaf_problem.upper = original_lp_.upper;
+
   node_status_t node_status =
     solve_node(*search_tree, node, leaf_problem, Arow, upper_bound, settings_.log, 'B');
 
@@ -774,7 +778,7 @@ void branch_and_bound_t<i_t, f_t>::exploration_ramp_up(search_tree_t<i_t, f_t>*
 }
 
 template <typename i_t, typename f_t>
-void branch_and_bound_t<i_t, f_t>::explore_subtree(i_t id,
+void branch_and_bound_t<i_t, f_t>::explore_subtree(i_t task_id,
                                                    search_tree_t<i_t, f_t>& search_tree,
                                                    mip_node_t<i_t, f_t>* start_node,
                                                    lp_problem_t<i_t, f_t>& leaf_problem,
@@ -784,7 +788,7 @@ void branch_and_bound_t<i_t, f_t>::explore_subtree(i_t id,
   stack.push_front(start_node);
 
   while (stack.size() > 0 && status_ == mip_exploration_status_t::RUNNING) {
-    if (omp_get_thread_num() == 0) { repair_heuristic_solutions(); }
+    if (task_id == 0) { repair_heuristic_solutions(); }
 
     mip_node_t<i_t, f_t>* node_ptr = stack.front();
     stack.pop_front();
@@ -800,10 +804,10 @@ void branch_and_bound_t<i_t, f_t>::explore_subtree(i_t id,
     // - The current node and its siblings uses the lower bound of the parent before solving the LP
     // relaxation
     // - The lower bound of the parent is lower or equal to its children
-    assert(id < local_lower_bounds_.size());
-    local_lower_bounds_[id] = lower_bound;
-    i_t nodes_explored      = stats_.nodes_explored++;
-    i_t nodes_unexplored    = stats_.nodes_unexplored--;
+    assert(task_id < local_lower_bounds_.size());
+    local_lower_bounds_[task_id] = lower_bound;
+    i_t nodes_explored           = (++stats_.nodes_explored);
+    i_t nodes_unexplored         = (--stats_.nodes_unexplored);
     stats_.nodes_since_last_log++;
 
     if (lower_bound > upper_bound || rel_gap < settings_.relative_mip_gap_tol) {
@@ -814,7 +818,7 @@ void branch_and_bound_t<i_t, f_t>::explore_subtree(i_t id,
 
     f_t now = toc(stats_.start_time);
 
-    if (id == 0) {
+    if (task_id == 0) {
       f_t time_since_last_log = stats_.last_log == 0 ? 1.0 : toc(stats_.last_log);
 
       if (((stats_.nodes_since_last_log >= 1000 || abs_gap < 10 * settings_.absolute_mip_gap_tol) &&
@@ -842,6 +846,10 @@ void branch_and_bound_t<i_t, f_t>::explore_subtree(i_t id,
       return;
     }
 
+    // Set the correct bounds for the leaf problem
+    leaf_problem.lower = original_lp_.lower;
+    leaf_problem.upper = original_lp_.upper;
+
     node_status_t node_status =
       solve_node(search_tree, node_ptr, leaf_problem, Arow, upper_bound, settings_.log, 'B');
 
@@ -866,7 +874,7 @@ void branch_and_bound_t<i_t, f_t>::explore_subtree(i_t id,
         // would be better if we discard the node instead.
         if (get_heap_size() > settings_.num_bfs_threads) {
           mutex_dive_queue_.lock();
-          dive_queue_.push(node->detach_copy());
+          dive_queue_.emplace(node->detach_copy(), leaf_problem.lower, leaf_problem.upper);
           mutex_dive_queue_.unlock();
         }
 
@@ -950,16 +958,16 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(lp_problem_t<i_t, f_t>& leaf_pr
 
   while (status_ == mip_exploration_status_t::RUNNING &&
          (active_subtrees_ > 0 || get_heap_size() > 0)) {
-    std::optional<mip_node_t<i_t, f_t>> start_node;
+    std::optional<diving_root_t<i_t, f_t>> start_node;
 
     mutex_dive_queue_.lock();
     if (dive_queue_.size() > 0) { start_node = dive_queue_.pop(); }
     mutex_dive_queue_.unlock();
 
     if (start_node.has_value()) {
-      if (get_upper_bound() < start_node->lower_bound) { continue; }
+      if (get_upper_bound() < start_node->node.lower_bound) { continue; }
 
-      search_tree_t<i_t, f_t> subtree(std::move(start_node.value()));
+      search_tree_t<i_t, f_t> subtree(std::move(start_node->node));
       std::deque<mip_node_t<i_t, f_t>*> stack;
       stack.push_front(&subtree.root);
 
@@ -975,6 +983,10 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(lp_problem_t<i_t, f_t>& leaf_pr
 
         if (toc(stats_.start_time) > settings_.time_limit) { return; }
 
+        // Set the correct bounds for the leaf problem
+        leaf_problem.lower = start_node->lp_lower;
+        leaf_problem.upper = start_node->lp_upper;
+
         node_status_t node_status =
           solve_node(subtree, node_ptr, leaf_problem, Arow, upper_bound, log, 'D');
 
@@ -985,7 +997,9 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(lp_problem_t<i_t, f_t>& leaf_pr
           auto [first, second] = child_selection(node_ptr);
           stack.push_front(second);
           stack.push_front(first);
+        }
 
+        if (stack.size() > 1) {
           // If the diving thread is consuming the nodes faster than the
           // best first search, then we split the current subtree at the
           // lowest possible point and move to the queue, so it can
@@ -994,7 +1008,7 @@ void branch_and_bound_t<i_t, f_t>::diving_thread(lp_problem_t<i_t, f_t>& leaf_pr
             mutex_dive_queue_.lock();
             mip_node_t<i_t, f_t>* new_node = stack.back();
             stack.pop_back();
-            dive_queue_.push(new_node->detach_copy());
+            dive_queue_.emplace(new_node->detach_copy(), leaf_problem.lower, leaf_problem.upper);
             mutex_dive_queue_.unlock();
           }
         }
diff --git a/cpp/src/dual_simplex/branch_and_bound.hpp b/cpp/src/dual_simplex/branch_and_bound.hpp
index 7b80f88fa..23fb9eb7f 100644
--- a/cpp/src/dual_simplex/branch_and_bound.hpp
+++ b/cpp/src/dual_simplex/branch_and_bound.hpp
@@ -55,36 +55,64 @@ enum class mip_exploration_status_t {
 template <typename i_t, typename f_t>
 void upper_bound_callback(f_t upper_bound);
 
+template <typename i_t, typename f_t>
+struct diving_root_t {
+  mip_node_t<i_t, f_t> node;
+  std::vector<f_t> lp_lower;
+  std::vector<f_t> lp_upper;
+
+  diving_root_t(mip_node_t<i_t, f_t>&& node,
+                const std::vector<f_t>& lower,
+                const std::vector<f_t>& upper)
+    : node(std::move(node)), lp_upper(upper), lp_lower(lower)
+  {
+  }
+
+  friend bool operator>(const diving_root_t<i_t, f_t>& a, const diving_root_t<i_t, f_t>& b)
+  {
+    return a.node.lower_bound > b.node.lower_bound;
+  }
+};
+
 // A min-heap for storing the starting nodes for the dives.
-// This has a maximum size of 8192, such that the container
+// This has a maximum size of 256, such that the container
 // will discard the least promising node if the queue is full.
 template <typename i_t, typename f_t>
 class dive_queue_t {
  private:
-  std::vector<mip_node_t<i_t, f_t>> buffer;
-  static constexpr i_t max_size_ = 2048;
+  std::vector<diving_root_t<i_t, f_t>> buffer;
+  static constexpr i_t max_size_ = 256;
 
  public:
   dive_queue_t() { buffer.reserve(max_size_); }
 
-  void push(mip_node_t<i_t, f_t>&& node)
+  void push(diving_root_t<i_t, f_t>&& node)
   {
     buffer.push_back(std::move(node));
-    std::push_heap(buffer.begin(), buffer.end(), node_compare_t<i_t, f_t>());
+    std::push_heap(buffer.begin(), buffer.end(), std::greater<>());
+    if (buffer.size() > max_size()) { buffer.pop_back(); }
+  }
+
+  void emplace(mip_node_t<i_t, f_t>&& node,
+               const std::vector<f_t>& lower,
+               const std::vector<f_t>& upper)
+  {
+    buffer.emplace_back(std::move(node), lower, upper);
+    std::push_heap(buffer.begin(), buffer.end(), std::greater<>());
     if (buffer.size() > max_size()) { buffer.pop_back(); }
   }
 
-  mip_node_t<i_t, f_t> pop()
+  diving_root_t<i_t, f_t> pop()
   {
-    std::pop_heap(buffer.begin(), buffer.end(), node_compare_t<i_t, f_t>());
-    mip_node_t<i_t, f_t> node = std::move(buffer.back());
+    std::pop_heap(buffer.begin(), buffer.end(), std::greater<>());
+    diving_root_t<i_t, f_t> node = std::move(buffer.back());
     buffer.pop_back();
     return node;
   }
 
   i_t size() const { return buffer.size(); }
   constexpr i_t max_size() const { return max_size_; }
-  const mip_node_t<i_t, f_t>& top() const { return buffer.front(); }
+  const diving_root_t<i_t, f_t>& top() const { return buffer.front(); }
   void clear() { buffer.clear(); }
 };
 
@@ -188,7 +216,7 @@ class branch_and_bound_t {
   // Set the final solution.
   mip_status_t set_final_solution(mip_solution_t<i_t, f_t>& solution, f_t lower_bound);
 
-  // Update the incumbent solution with the new feasible solution.
+  // Update the incumbent solution with the new feasible solution
   // found during branch and bound.
   void add_feasible_solution(f_t leaf_objective,
                              const std::vector<f_t>& leaf_solution,
@@ -207,7 +235,7 @@ class branch_and_bound_t {
                            i_t initial_heap_size);
 
   // Explore the search tree using the best-first search with plunging strategy.
-  void explore_subtree(i_t id,
+  void explore_subtree(i_t task_id,
                        search_tree_t<i_t, f_t>& search_tree,
                        mip_node_t<i_t, f_t>* start_node,
                        lp_problem_t<i_t, f_t>& leaf_problem,
diff --git a/cpp/src/dual_simplex/folding.cpp b/cpp/src/dual_simplex/folding.cpp
index 913d86b0a..8628f69eb 100644
--- a/cpp/src/dual_simplex/folding.cpp
+++ b/cpp/src/dual_simplex/folding.cpp
@@ -1536,10 +1536,12 @@ void folding(lp_problem_t<i_t, f_t>& problem,
   problem.lower     = std::vector<f_t>(reduced_cols, 0.0);
   problem.upper     = std::vector<f_t>(reduced_cols, inf);
 
-  presolve_info.folding_info.c_tilde          = c_tilde;
-  presolve_info.folding_info.A_tilde          = A_tilde;
-  presolve_info.folding_info.is_folded        = true;
-  presolve_info.folding_info.num_upper_bounds = nz_ub;
+  presolve_info.folding_info.c_tilde                      = c_tilde;
+  presolve_info.folding_info.A_tilde                      = A_tilde;
+  presolve_info.folding_info.is_folded                    = true;
+  presolve_info.folding_info.num_upper_bounds             = nz_ub;
+  presolve_info.folding_info.previous_free_variable_pairs = presolve_info.free_variable_pairs;
+  presolve_info.free_variable_pairs.clear();
 
   settings.log.printf("Folding: time %.2f seconds\n", toc(start_time));
 
diff --git a/cpp/src/dual_simplex/presolve.cpp b/cpp/src/dual_simplex/presolve.cpp
index e2790858e..8d80337c7 100644
--- a/cpp/src/dual_simplex/presolve.cpp
+++ b/cpp/src/dual_simplex/presolve.cpp
@@ -1419,9 +1419,10 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
                       std::vector<f_t>& uncrushed_y,
                       std::vector<f_t>& uncrushed_z)
 {
-  std::vector<f_t> input_x = crushed_x;
-  std::vector<f_t> input_y = crushed_y;
-  std::vector<f_t> input_z = crushed_z;
+  std::vector<f_t> input_x             = crushed_x;
+  std::vector<f_t> input_y             = crushed_y;
+  std::vector<f_t> input_z             = crushed_z;
+  std::vector<i_t> free_variable_pairs = presolve_info.free_variable_pairs;
   if (presolve_info.folding_info.is_folded) {
     // We solved a foled problem in the form
     // minimize c_prime^T x_prime
@@ -1474,15 +1475,18 @@ void uncrush_solution(const presolve_info_t<i_t, f_t>& presolve_info,
     input_y.resize(previous_rows - presolve_info.folding_info.num_upper_bounds);
     input_z = ztilde;
     input_z.resize(previous_cols - presolve_info.folding_info.num_upper_bounds);
+
+    // If the original problem had free variables we need to reinstate them
+    free_variable_pairs = presolve_info.folding_info.previous_free_variable_pairs;
   }
 
-  const i_t num_free_variables = presolve_info.free_variable_pairs.size() / 2;
+  const i_t num_free_variables = free_variable_pairs.size() / 2;
   if (num_free_variables > 0) {
     settings.log.printf("Post-solve: Handling free variables %d\n", num_free_variables);
     // We added free variables so we need to map the crushed solution back to the original variables
     for (i_t k = 0; k < 2 * num_free_variables; k += 2) {
-      const i_t u = presolve_info.free_variable_pairs[k];
-      const i_t v = presolve_info.free_variable_pairs[k + 1];
+      const i_t u = free_variable_pairs[k];
+      const i_t v = free_variable_pairs[k + 1];
       input_x[u] -= input_x[v];
     }
     input_z.resize(input_z.size() - num_free_variables);
diff --git a/cpp/src/dual_simplex/presolve.hpp b/cpp/src/dual_simplex/presolve.hpp
index fa8a8db58..bf0aab899 100644
--- a/cpp/src/dual_simplex/presolve.hpp
+++ b/cpp/src/dual_simplex/presolve.hpp
@@ -60,6 +60,7 @@ struct folding_info_t {
       c_tilde(0),
       A_tilde(0, 0, 0),
       num_upper_bounds(0),
+      previous_free_variable_pairs({}),
       is_folded(false)
   {
   }
@@ -69,6 +70,7 @@ struct folding_info_t {
   std::vector<f_t> c_tilde;
   csc_matrix_t<i_t, f_t> A_tilde;
   i_t num_upper_bounds;
+  std::vector<i_t> previous_free_variable_pairs;
   bool is_folded;
 };
 
diff --git a/cpp/src/dual_simplex/sparse_cholesky.cuh b/cpp/src/dual_simplex/sparse_cholesky.cuh
index 51145a36b..7e792998f 100644
--- a/cpp/src/dual_simplex/sparse_cholesky.cuh
+++ b/cpp/src/dual_simplex/sparse_cholesky.cuh
@@ -100,13 +100,17 @@ class sparse_cholesky_base_t {
 template <typename mem_pool_t>
 int cudss_device_alloc(void* ctx, void** ptr, size_t size, cudaStream_t stream)
 {
-  return cudaMallocAsync(ptr, size, stream);
+  int status = cudaMallocAsync(ptr, size, stream);
+  if (status != cudaSuccess) { throw raft::cuda_error("Cuda error in cudss_device_alloc"); }
+  return status;
 }
 
 template <typename mem_pool_t>
 int cudss_device_dealloc(void* ctx, void* ptr, size_t size, cudaStream_t stream)
 {
-  return cudaFreeAsync(ptr, stream);
+  int status = cudaFreeAsync(ptr, stream);
+  if (status != cudaSuccess) { throw raft::cuda_error("Cuda error in cudss_device_dealloc"); }
+  return status;
 }
 
 template <class T>
@@ -534,7 +538,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     }
 
     if (first_factor) {
-      settings_.log.printf("Factorization time          : %.2fs\n", numeric_time);
+      settings_.log.debug("Factorization time          : %.2fs\n", numeric_time);
       first_factor = false;
     }
     if (status != CUDSS_STATUS_SUCCESS) {
@@ -635,7 +639,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
 
     f_t symbolic_time = toc(start_symbolic);
     f_t analysis_time = toc(start_analysis);
-    settings_.log.printf("Symbolic factorization time: %.2fs\n", symbolic_time);
+    settings_.log.printf("Symbolic factorization time : %.2fs\n", symbolic_time);
     if (settings_.concurrent_halt != nullptr && *settings_.concurrent_halt == 1) {
       RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
       handle_ptr_->get_stream().synchronize();
@@ -647,7 +651,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
       cudssDataGet(handle, solverData, CUDSS_DATA_LU_NNZ, &lu_nz, sizeof(int64_t), &size_written),
       status,
       "cudssDataGet for LU_NNZ");
-    settings_.log.printf("Symbolic nonzeros in factor: %e\n", static_cast<f_t>(lu_nz) / 2.0);
+    settings_.log.printf("Symbolic nonzeros in factor : %.2e\n", static_cast<f_t>(lu_nz) / 2.0);
     RAFT_CUDA_TRY(cudaStreamSynchronize(stream));
     handle_ptr_->get_stream().synchronize();
     // TODO: Is there any way to get nonzeros in the factors?
@@ -703,7 +707,7 @@ class sparse_cholesky_cudss_t : public sparse_cholesky_base_t<i_t, f_t> {
     }
 
     if (first_factor) {
-      settings_.log.printf("Factor time %.2fs\n", numeric_time);
+      settings_.log.debug("Factorization time          : %.2fs\n", numeric_time);
       first_factor = false;
     }
     if (status != CUDSS_STATUS_SUCCESS) {
diff --git a/cpp/src/mip/diversity/diversity_manager.cu b/cpp/src/mip/diversity/diversity_manager.cu
index ce96a317a..b11f98e10 100644
--- a/cpp/src/mip/diversity/diversity_manager.cu
+++ b/cpp/src/mip/diversity/diversity_manager.cu
@@ -245,7 +245,7 @@ bool diversity_manager_t<i_t, f_t>::run_presolve(f_t time_limit)
   lp_dual_optimal_solution.resize(problem_ptr->n_constraints,
                                   problem_ptr->handle_ptr->get_stream());
   problem_ptr->handle_ptr->sync_stream();
-  CUOPT_LOG_INFO("After trivial presolve #constraints %d #variables %d objective offset %f.",
+  CUOPT_LOG_INFO("After trivial presolve: %d constraints, %d variables, objective offset %f.",
                  problem_ptr->n_constraints,
                  problem_ptr->n_variables,
                  problem_ptr->presolve_data.objective_offset);
diff --git a/cpp/src/mip/diversity/weights.cuh b/cpp/src/mip/diversity/weights.cuh
index 9f53b8847..d8563ea73 100644
--- a/cpp/src/mip/diversity/weights.cuh
+++ b/cpp/src/mip/diversity/weights.cuh
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <thrust/fill.h>
 #include <raft/core/handle.hpp>
 #include <rmm/device_scalar.hpp>
 #include <rmm/device_uvector.hpp>
diff --git a/cpp/src/mip/problem/presolve_data.cuh b/cpp/src/mip/problem/presolve_data.cuh
index 20ae5fe93..e9ba5f3b3 100644
--- a/cpp/src/mip/problem/presolve_data.cuh
+++ b/cpp/src/mip/problem/presolve_data.cuh
@@ -20,6 +20,7 @@
 #include <cuopt/linear_programming/optimization_problem.hpp>
 
 #include <thrust/sequence.h>
+#include <thrust/uninitialized_fill.h>
 #include <rmm/device_uvector.hpp>
 
 namespace cuopt {
diff --git a/cpp/src/mip/relaxed_lp/lp_state.cuh b/cpp/src/mip/relaxed_lp/lp_state.cuh
index 3bfa00955..8662df754 100644
--- a/cpp/src/mip/relaxed_lp/lp_state.cuh
+++ b/cpp/src/mip/relaxed_lp/lp_state.cuh
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <thrust/fill.h>
 #include <raft/util/cudart_utils.hpp>
 #include <rmm/device_uvector.hpp>
 
diff --git a/dependencies.yaml b/dependencies.yaml
index 419fdce8f..5db5a9ee1 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -46,7 +46,6 @@ files:
       - depends_on_cudf
       - depends_on_cuvs
       - depends_on_pylibraft
-      - depends_on_raft_dask
       - depends_on_rapids_logger
       - rapids_build_backend
   test_cpp:
@@ -195,7 +194,6 @@ files:
       - depends_on_rmm
       - depends_on_cudf
       - depends_on_cuvs
-      - depends_on_raft_dask
       - depends_on_pylibraft
       - depends_on_rapids_logger
   py_test_cuopt:
@@ -328,7 +326,7 @@ dependencies:
     common:
       - output_types: [conda]
         packages:
-          - libcuopt-tests==25.10.*,>=0.0.0a0
+          - libcuopt-tests==25.12.*,>=0.0.0a0
   build_wheels:
     common:
       - output_types: [requirements, pyproject]
@@ -575,7 +573,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - rapids-logger==0.1.*,>=0.0.0a0
+          - rapids-logger==0.2.*,>=0.0.0a0
       - output_types: requirements
         packages:
           # pip recognizes the index as a global option for the requirements.txt file
@@ -660,32 +658,6 @@ dependencies:
             packages:
               - *cuvs_unsuffixed
 
-  depends_on_raft_dask:
-    common:
-      - output_types: conda
-        packages:
-          - &raft_dask_unsuffixed raft-dask==25.12.*,>=0.0.0a0
-      - output_types: requirements
-        packages:
-          - --extra-index-url=https://pypi.nvidia.com
-          - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple
-    specific:
-      - output_types: [requirements, pyproject]
-        matrices:
-          - matrix:
-              cuda: "12.*"
-              cuda_suffixed: "true"
-            packages:
-              - raft-dask-cu12==25.12.*,>=0.0.0a0
-          - matrix:
-              cuda: "13.*"
-              cuda_suffixed: "true"
-            packages:
-              - raft-dask-cu13==25.12.*,>=0.0.0a0
-          - matrix:
-            packages:
-              - *raft_dask_unsuffixed
-
   depends_on_pylibraft:
     common:
       - output_types: conda
diff --git a/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst b/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst
index 6d942bde6..e321e319d 100644
--- a/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst
+++ b/docs/cuopt/source/cuopt-c/lp-milp/lp-milp-c-api.rst
@@ -16,6 +16,13 @@ You may use the following functions to determine the number of bytes used to rep
 .. doxygenfunction:: cuOptGetIntSize
 .. doxygenfunction:: cuOptGetFloatSize
 
+Version Information
+-------------------
+
+You may use the following function to get the version of the cuOpt library
+
+.. doxygenfunction:: cuOptGetVersion
+
 Status Codes
 ------------
 
@@ -25,6 +32,9 @@ Every function in the C API returns a status code that indicates success or fail
 .. doxygendefine:: CUOPT_INVALID_ARGUMENT
 .. doxygendefine:: CUOPT_MPS_FILE_ERROR
 .. doxygendefine:: CUOPT_MPS_PARSE_ERROR
+.. doxygendefine:: CUOPT_VALIDATION_ERROR
+.. doxygendefine:: CUOPT_OUT_OF_MEMORY
+.. doxygendefine:: CUOPT_RUNTIME_ERROR
 
 Optimization Problem
 --------------------
@@ -156,9 +166,22 @@ These constants are used as parameter names in the :c:func:`cuOptSetParameter`,
 .. doxygendefine:: CUOPT_MIP_ABSOLUTE_TOLERANCE
 .. doxygendefine:: CUOPT_MIP_RELATIVE_TOLERANCE
 .. doxygendefine:: CUOPT_MIP_INTEGRALITY_TOLERANCE
+.. doxygendefine:: CUOPT_MIP_ABSOLUTE_GAP
+.. doxygendefine:: CUOPT_MIP_RELATIVE_GAP
 .. doxygendefine:: CUOPT_MIP_SCALING
 .. doxygendefine:: CUOPT_MIP_HEURISTICS_ONLY
+.. doxygendefine:: CUOPT_MIP_PRESOLVE
 .. doxygendefine:: CUOPT_PRESOLVE
+.. doxygendefine:: CUOPT_LOG_TO_CONSOLE
+.. doxygendefine:: CUOPT_CROSSOVER
+.. doxygendefine:: CUOPT_FOLDING
+.. doxygendefine:: CUOPT_AUGMENTED
+.. doxygendefine:: CUOPT_DUALIZE
+.. doxygendefine:: CUOPT_ORDERING
+.. doxygendefine:: CUOPT_ELIMINATE_DENSE_COLUMNS
+.. doxygendefine:: CUOPT_CUDSS_DETERMINISTIC
+.. doxygendefine:: CUOPT_BARRIER_DUAL_INITIAL_POINT
+.. doxygendefine:: CUOPT_DUAL_POSTSOLVE
 .. doxygendefine:: CUOPT_SOLUTION_FILE
 .. doxygendefine:: CUOPT_NUM_CPU_THREADS
 .. doxygendefine:: CUOPT_USER_PROBLEM_FILE
@@ -186,6 +209,7 @@ These constants are used to configure `CUOPT_METHOD` via :c:func:`cuOptSetIntege
 .. doxygendefine:: CUOPT_METHOD_CONCURRENT
 .. doxygendefine:: CUOPT_METHOD_PDLP
 .. doxygendefine:: CUOPT_METHOD_DUAL_SIMPLEX
+.. doxygendefine:: CUOPT_METHOD_BARRIER
 
 
 Solving an LP or MIP
@@ -206,12 +230,15 @@ The output of a solve is a `cuOptSolution` object.
 The following functions may be used to access information from a `cuOptSolution`
 
 .. doxygenfunction:: cuOptGetTerminationStatus
+.. doxygenfunction:: cuOptGetErrorStatus
+.. doxygenfunction:: cuOptGetErrorString
 .. doxygenfunction:: cuOptGetPrimalSolution
 .. doxygenfunction:: cuOptGetObjectiveValue
 .. doxygenfunction:: cuOptGetSolveTime
 .. doxygenfunction:: cuOptGetMIPGap
 .. doxygenfunction:: cuOptGetSolutionBound
 .. doxygenfunction:: cuOptGetDualSolution
+.. doxygenfunction:: cuOptGetDualObjectiveValue
 .. doxygenfunction:: cuOptGetReducedCosts
 
 When you are finished with a `cuOptSolution` object you should destory it with
diff --git a/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb b/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb
index 2cf903c46..b376ac8e4 100644
--- a/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb
+++ b/docs/cuopt/source/cuopt-python/routing/routing-example.ipynb
@@ -12,10 +12,62 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 1,
    "id": "2cb694f7",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/luffy/.local/lib/python3.12/site-packages/cudf/utils/_ptxcompiler.py:64: UserWarning: Error getting driver and runtime versions:\n",
+      "\n",
+      "stdout:\n",
+      "\n",
+      "\n",
+      "\n",
+      "stderr:\n",
+      "\n",
+      "Traceback (most recent call last):\n",
+      "  File \"<string>\", line 4, in <module>\n",
+      "  File \"/home/luffy/miniforge3/envs/cuopt/lib/python3.12/site-packages/numba_cuda/numba/cuda/cudadrv/driver.py\", line 393, in safe_cuda_api_call\n",
+      "    return self._check_cuda_python_error(fname, libfn(*args))\n",
+      "                                                ^^^^^^^^^^^^\n",
+      "TypeError: cuDriverGetVersion() takes no arguments (1 given)\n",
+      "\n",
+      "\n",
+      "Not patching Numba\n",
+      "  warnings.warn(msg, UserWarning)\n",
+      "/home/luffy/.local/lib/python3.12/site-packages/cupy/_environment.py:596: UserWarning: \n",
+      "--------------------------------------------------------------------------------\n",
+      "\n",
+      "  CuPy may not function correctly because multiple CuPy packages are installed\n",
+      "  in your environment:\n",
+      "\n",
+      "    cupy, cupy-cuda12x\n",
+      "\n",
+      "  Follow these steps to resolve this issue:\n",
+      "\n",
+      "    1. For all packages listed above, run the following command to remove all\n",
+      "       existing CuPy installations:\n",
+      "\n",
+      "         $ pip uninstall <package_name>\n",
+      "\n",
+      "      If you previously installed CuPy via conda, also run the following:\n",
+      "\n",
+      "         $ conda uninstall cupy\n",
+      "\n",
+      "    2. Install the appropriate CuPy package.\n",
+      "       Refer to the Installation Guide for detailed instructions.\n",
+      "\n",
+      "         https://docs.cupy.dev/en/stable/install.html\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\n",
+      "  warnings.warn(f'''\n"
+     ]
+    }
+   ],
    "source": [
     "from cuopt import routing\n",
     "from cuopt import distance_engine\n",
@@ -61,7 +113,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 2,
    "id": "5d12f05d",
    "metadata": {},
    "outputs": [],
@@ -100,7 +152,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 3,
    "id": "2c824c99",
    "metadata": {},
    "outputs": [],
@@ -122,7 +174,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 4,
    "id": "4e08f664",
    "metadata": {},
    "outputs": [],
@@ -152,22 +204,50 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 5,
    "id": "9975bf1a",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Waypoint graph node to time matrix index mapping \n",
-      "{np.int64(0): 0, np.int64(4): 1, np.int64(5): 2, np.int64(6): 3}\n",
-      "\n",
-      "     0    1    2    3\n",
-      "0  0.0  6.0  4.0  6.0\n",
-      "1  6.0  0.0  4.0  6.0\n",
-      "2  4.0  4.0  0.0  4.0\n",
-      "3  6.0  6.0  4.0  0.0\n"
+     "ename": "RuntimeError",
+     "evalue": "CuPy failed to load libnvrtc.so.12: OSError: libnvrtc.so.12: cannot open shared object file: No such file or directory",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
+      "\u001b[31mOSError\u001b[39m                                   Traceback (most recent call last)",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/_softlink.pyx:25\u001b[39m, in \u001b[36mcupy_backends.cuda._softlink.SoftLink.__init__\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/cuopt/lib/python3.12/ctypes/__init__.py:379\u001b[39m, in \u001b[36mCDLL.__init__\u001b[39m\u001b[34m(self, name, mode, handle, use_errno, use_last_error, winmode)\u001b[39m\n\u001b[32m    378\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m handle \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m379\u001b[39m     \u001b[38;5;28mself\u001b[39m._handle = \u001b[43m_dlopen\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    380\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "\u001b[31mOSError\u001b[39m: libnvrtc.so.12: cannot open shared object file: No such file or directory",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[31mRuntimeError\u001b[39m                              Traceback (most recent call last)",
+      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 6\u001b[39m\n\u001b[32m      1\u001b[39m waypoint_graph = distance_engine.WaypointMatrix(\n\u001b[32m      2\u001b[39m     offsets,\n\u001b[32m      3\u001b[39m     edges,\n\u001b[32m      4\u001b[39m     weights\n\u001b[32m      5\u001b[39m )\n\u001b[32m----> \u001b[39m\u001b[32m6\u001b[39m cost_matrix = \u001b[43mwaypoint_graph\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcompute_cost_matrix\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtarget_locations\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m      7\u001b[39m transit_time_matrix = cost_matrix.copy(deep=\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[32m      8\u001b[39m target_map = {v:k \u001b[38;5;28;01mfor\u001b[39;00m k, v \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(target_locations)}\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/cuopt/lib/python3.12/site-packages/cuopt/utilities/exception_handler.py:60\u001b[39m, in \u001b[36mcatch_cuopt_exception.<locals>.func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m     58\u001b[39m             \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(err[\u001b[33m\"\u001b[39m\u001b[33mmsg\u001b[39m\u001b[33m\"\u001b[39m])\n\u001b[32m     59\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m60\u001b[39m         \u001b[38;5;28;01mraise\u001b[39;00m e\n\u001b[32m     61\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m     62\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m e\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/cuopt/lib/python3.12/site-packages/cuopt/utilities/exception_handler.py:36\u001b[39m, in \u001b[36mcatch_cuopt_exception.<locals>.func\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m     33\u001b[39m \u001b[38;5;129m@functools\u001b[39m.wraps(f)\n\u001b[32m     34\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mfunc\u001b[39m(*args, **kwargs):\n\u001b[32m     35\u001b[39m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m36\u001b[39m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     37\u001b[39m     \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m     38\u001b[39m         err_msg = \u001b[38;5;28mstr\u001b[39m(e)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/cuopt/lib/python3.12/site-packages/cuopt/distance_engine/waypoint_matrix.py:133\u001b[39m, in \u001b[36mWaypointMatrix.compute_cost_matrix\u001b[39m\u001b[34m(self, target_locations)\u001b[39m\n\u001b[32m    130\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m target_locations.shape[\u001b[32m0\u001b[39m] <= \u001b[32m0\u001b[39m:\n\u001b[32m    131\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m\"\"\"\u001b[39m\u001b[33mTarget_locations length must be positive\u001b[39m\u001b[33m\"\"\"\u001b[39m)\n\u001b[32m--> \u001b[39m\u001b[32m133\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcompute_cost_matrix\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtarget_locations\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/miniforge3/envs/cuopt/lib/python3.12/site-packages/cuopt/distance_engine/waypoint_matrix_wrapper.pyx:81\u001b[39m, in \u001b[36mcuopt.distance_engine.waypoint_matrix_wrapper.WaypointMatrix.compute_cost_matrix\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/cudf/utils/performance_tracking.py:51\u001b[39m, in \u001b[36m_performance_tracking.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m     43\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m nvtx.enabled():\n\u001b[32m     44\u001b[39m     stack.enter_context(\n\u001b[32m     45\u001b[39m         nvtx.annotate(\n\u001b[32m     46\u001b[39m             message=func.\u001b[34m__qualname__\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m     49\u001b[39m         )\n\u001b[32m     50\u001b[39m     )\n\u001b[32m---> \u001b[39m\u001b[32m51\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/cudf/core/dataframe.py:810\u001b[39m, in \u001b[36mDataFrame.__init__\u001b[39m\u001b[34m(self, data, index, columns, dtype, copy, nan_as_null)\u001b[39m\n\u001b[32m    808\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mdescr\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m arr_interface:\n\u001b[32m    809\u001b[39m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(arr_interface[\u001b[33m\"\u001b[39m\u001b[33mdescr\u001b[39m\u001b[33m\"\u001b[39m]) == \u001b[32m1\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m810\u001b[39m         new_df = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_from_arrays\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    811\u001b[39m \u001b[43m            \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m=\u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m=\u001b[49m\u001b[43mcolumns\u001b[49m\n\u001b[32m    812\u001b[39m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    813\u001b[39m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m    814\u001b[39m         new_df = \u001b[38;5;28mself\u001b[39m.from_records(\n\u001b[32m    815\u001b[39m             data, index=index, columns=columns\n\u001b[32m    816\u001b[39m         )\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/cudf/utils/performance_tracking.py:51\u001b[39m, in \u001b[36m_performance_tracking.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m     43\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m nvtx.enabled():\n\u001b[32m     44\u001b[39m     stack.enter_context(\n\u001b[32m     45\u001b[39m         nvtx.annotate(\n\u001b[32m     46\u001b[39m             message=func.\u001b[34m__qualname__\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m     49\u001b[39m         )\n\u001b[32m     50\u001b[39m     )\n\u001b[32m---> \u001b[39m\u001b[32m51\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/cudf/core/dataframe.py:5947\u001b[39m, in \u001b[36mDataFrame._from_arrays\u001b[39m\u001b[34m(cls, data, index, columns, nan_as_null)\u001b[39m\n\u001b[32m   5945\u001b[39m array_data: np.ndarray | cupy.ndarray\n\u001b[32m   5946\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(data, \u001b[33m\"\u001b[39m\u001b[33m__cuda_array_interface__\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m-> \u001b[39m\u001b[32m5947\u001b[39m     array_data = \u001b[43mcupy\u001b[49m\u001b[43m.\u001b[49m\u001b[43masarray\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mF\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[32m   5948\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(data, \u001b[33m\"\u001b[39m\u001b[33m__array_interface__\u001b[39m\u001b[33m\"\u001b[39m):\n\u001b[32m   5949\u001b[39m     array_data = np.asarray(data, order=\u001b[33m\"\u001b[39m\u001b[33mF\u001b[39m\u001b[33m\"\u001b[39m)\n",
+      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/lib/python3.12/site-packages/cupy/_creation/from_data.py:88\u001b[39m, in \u001b[36masarray\u001b[39m\u001b[34m(a, dtype, order, blocking)\u001b[39m\n\u001b[32m     56\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34masarray\u001b[39m(a, dtype=\u001b[38;5;28;01mNone\u001b[39;00m, order=\u001b[38;5;28;01mNone\u001b[39;00m, *, blocking=\u001b[38;5;28;01mFalse\u001b[39;00m):\n\u001b[32m     57\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Converts an object to array.\u001b[39;00m\n\u001b[32m     58\u001b[39m \n\u001b[32m     59\u001b[39m \u001b[33;03m    This is equivalent to ``array(a, dtype, copy=False, order=order)``.\u001b[39;00m\n\u001b[32m   (...)\u001b[39m\u001b[32m     86\u001b[39m \n\u001b[32m     87\u001b[39m \u001b[33;03m    \"\"\"\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m88\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_core\u001b[49m\u001b[43m.\u001b[49m\u001b[43marray\u001b[49m\u001b[43m(\u001b[49m\u001b[43ma\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mblocking\u001b[49m\u001b[43m=\u001b[49m\u001b[43mblocking\u001b[49m\u001b[43m)\u001b[49m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:2502\u001b[39m, in \u001b[36mcupy._core.core.array\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:2512\u001b[39m, in \u001b[36mcupy._core.core.array\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:2543\u001b[39m, in \u001b[36mcupy._core.core._array_from_cupy_ndarray\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:618\u001b[39m, in \u001b[36mcupy._core.core._ndarray_base.astype\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:686\u001b[39m, in \u001b[36mcupy._core.core._ndarray_base.astype\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/_kernel.pyx:1374\u001b[39m, in \u001b[36mcupy._core._kernel.ufunc.__call__\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/_kernel.pyx:1401\u001b[39m, in \u001b[36mcupy._core._kernel.ufunc._get_ufunc_kernel\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/_kernel.pyx:1082\u001b[39m, in \u001b[36mcupy._core._kernel._get_ufunc_kernel\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/_kernel.pyx:94\u001b[39m, in \u001b[36mcupy._core._kernel._get_simple_elementwise_kernel\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/_kernel.pyx:82\u001b[39m, in \u001b[36mcupy._core._kernel._get_simple_elementwise_kernel_from_code\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:2375\u001b[39m, in \u001b[36mcupy._core.core.compile_with_cache\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy/_core/core.pyx:2320\u001b[39m, in \u001b[36mcupy._core.core.assemble_cupy_compiler_options\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/libs/nvrtc.pyx:57\u001b[39m, in \u001b[36mcupy_backends.cuda.libs.nvrtc.getVersion\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/libs/_cnvrtc.pxi:72\u001b[39m, in \u001b[36mcupy_backends.cuda.libs.nvrtc.initialize\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/libs/_cnvrtc.pxi:75\u001b[39m, in \u001b[36mcupy_backends.cuda.libs.nvrtc._initialize\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/libs/_cnvrtc.pxi:153\u001b[39m, in \u001b[36mcupy_backends.cuda.libs.nvrtc._get_softlink\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[36mFile \u001b[39m\u001b[32mcupy_backends/cuda/_softlink.pyx:32\u001b[39m, in \u001b[36mcupy_backends.cuda._softlink.SoftLink.__init__\u001b[39m\u001b[34m()\u001b[39m\n",
+      "\u001b[31mRuntimeError\u001b[39m: CuPy failed to load libnvrtc.so.12: OSError: libnvrtc.so.12: cannot open shared object file: No such file or directory"
      ]
     }
    ],
@@ -230,7 +310,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": null,
    "id": "72b715c7",
    "metadata": {},
    "outputs": [
@@ -409,7 +489,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": null,
    "id": "9e17e899",
    "metadata": {},
    "outputs": [
@@ -496,7 +576,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": null,
    "id": "2e765325",
    "metadata": {},
    "outputs": [],
@@ -525,7 +605,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
+   "execution_count": null,
    "id": "c936b137",
    "metadata": {},
    "outputs": [
@@ -567,7 +647,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
+   "execution_count": null,
    "id": "87c2d9f8",
    "metadata": {},
    "outputs": [],
@@ -596,7 +676,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 46,
+   "execution_count": null,
    "id": "1d325f4b",
    "metadata": {},
    "outputs": [
@@ -642,7 +722,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
+   "execution_count": null,
    "id": "064978ca",
    "metadata": {},
    "outputs": [],
@@ -666,7 +746,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 48,
+   "execution_count": null,
    "id": "b3f328e3",
    "metadata": {},
    "outputs": [],
@@ -708,7 +788,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 49,
+   "execution_count": null,
    "id": "a6babc11",
    "metadata": {
     "scrolled": true
@@ -732,7 +812,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
+   "execution_count": null,
    "id": "28a05ace",
    "metadata": {},
    "outputs": [
@@ -792,7 +872,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 51,
+   "execution_count": null,
    "id": "e0d98709",
    "metadata": {},
    "outputs": [
@@ -838,7 +918,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
+   "execution_count": null,
    "id": "c13cfbf3",
    "metadata": {
     "scrolled": true
@@ -945,7 +1025,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.10"
+   "version": "3.12.11"
   }
  },
  "nbformat": 4,
diff --git a/docs/cuopt/source/introduction.rst b/docs/cuopt/source/introduction.rst
index 100282128..de24f4746 100644
--- a/docs/cuopt/source/introduction.rst
+++ b/docs/cuopt/source/introduction.rst
@@ -66,9 +66,15 @@ This is a linear program.
 
 How cuOpt Solves the Linear Programming Problem
 ------------------------------------------------
-cuOpt includes an LP solver based on `PDLP <https://arxiv.org/abs/2106.04756>`__, a new First-Order Method (FOM) used to solve large-scale LPs. This solver implements gradient descent, enhanced by heuristics, and performing massively parallel operations efficiently by leveraging the latest NVIDIA GPUs.
+cuOpt includes three LP solving methods:
 
-In addition to PDLP, cuOpt includes a dual simplex solver that runs on the CPU. Both algorithms can be run concurrently on the GPU and CPU.
+* **PDLP**: Based on `PDLP <https://arxiv.org/abs/2106.04756>`__, a First-Order Method (FOM) for solving large-scale LPs. This solver implements primal-dual hybrid gradient enhanced by heuristics. Sparse matrix-vector products are perfomed efficiently on NVIDIA GPUs.
+
+* **Barrier (Interior-Point)**: A primal-dual interior-point method that uses GPU-accelerated sparse Cholesky and LDLT solves via cuDSS, and sparse matrix operations via cuSparse.
+
+* **Dual Simplex**: A CPU-based dual simplex solver for small to medium-sized problems.
+
+All three algorithms can be run concurrently on both GPU and CPU, with the fastest solution returned automatically.
 
 Mixed Integer Linear Programming (MILP)
 =========================================
@@ -121,6 +127,7 @@ cuOpt supports the following APIs:
    - `AMPL <https://www.ampl.com/>`_
    - `GAMS <https://www.gams.com/>`_
    - `PuLP <https://pypi.org/project/PuLP/>`_
+   - `JuMP <https://github.com/jump-dev/cuOpt.jl>`_
 
 
 ==================================
diff --git a/docs/cuopt/source/lp-features.rst b/docs/cuopt/source/lp-features.rst
index c5d589907..fc450736b 100644
--- a/docs/cuopt/source/lp-features.rst
+++ b/docs/cuopt/source/lp-features.rst
@@ -13,6 +13,7 @@ The LP solver can be accessed in the following ways:
    -  AMPL
    -  GAMS
    -  PuLP
+   -  JuMP
 
 - **C API**: A native C API that provides direct low-level access to cuOpt's LP capabilities, enabling integration into any application or system that can interface with C.
 
@@ -65,9 +66,11 @@ Users can control how the solver will operate by specifying the PDLP solver mode
 Method
 ------
 
-**Concurrent**: The default method for solving linear programs. When concurrent is selected, cuOpt runs two algorithms at the same time: PDLP on the GPU and dual simplex on the CPU. A solution is returned from the algorithm that finishes first.
+**Concurrent**: The default method for solving linear programs. When concurrent is selected, cuOpt runs three algorithms in parallel: PDLP on the GPU, barrier (interior-point) on the GPU, and dual simplex on the CPU. A solution is returned from the algorithm that finishes first.
 
-**PDLP**: Primal-Dual Hybrid Gradient for Linear Program is an algorithm for solving large-scale linear programming problems on the GPU. PDLP does not attempt to any matrix factorizations during the course of the solve. Select this method if your LP is so large that factorization will not fit into memory. By default PDLP solves to low relative tolerance and the solutions it returns do not lie at a vertex of the feasible region. Enable crossover to obtain a highly accurate basic solution from a PDLP solution.
+**PDLP**: Primal-Dual Hybrid Gradient for Linear Program is an algorithm for solving large-scale linear programming problems on the GPU. PDLP does not attempt any matrix factorizations during the course of the solve. Select this method if your LP is so large that factorization will not fit into memory. By default PDLP solves to low relative tolerance and the solutions it returns do not lie at a vertex of the feasible region. Enable crossover to obtain a highly accurate basic solution from a PDLP solution.
+
+**Barrier**: The barrier method (also known as interior-point method) solves linear programs using a primal-dual predictor-corrector algorithm. This method uses GPU-accelerated sparse Cholesky and sparse LDLT solves via cuDSS, and GPU-accelerated sparse matrix-vector and matrix-matrix operations via cuSparse. Barrier is particularly effective for large-scale problems and can automatically apply techniques like folding, dualization, and dense column elimination to improve performance. This method solves the linear systems at each iteration using the augmented system or the normal equations (ADAT). Enable crossover to obtain a highly accurate basic solution from a barrier solution.
 
 **Dual Simplex**: Dual simplex is the simplex method applied to the dual of the linear program. Dual simplex requires the basis factorization of linear program fit into memory. Select this method if your LP is small to medium sized, or if you require a high-quality basic solution.
 
@@ -75,7 +78,7 @@ Method
 Crossover
 ---------
 
-Crossover allows you to obtain a high-quality basic solution from the results of a PDLP solve. More details can be found :ref:`here <crossover>`.
+Crossover allows you to obtain a high-quality basic solution from the results of a PDLP or barrier solve. When enabled, crossover converts these solutions to a vertex solution (basic solution) with high accuracy. More details can be found :ref:`here <crossover>`.
 
 
 Presolve
diff --git a/docs/cuopt/source/lp-milp-settings.rst b/docs/cuopt/source/lp-milp-settings.rst
index 258695419..d755d5a97 100644
--- a/docs/cuopt/source/lp-milp-settings.rst
+++ b/docs/cuopt/source/lp-milp-settings.rst
@@ -23,8 +23,8 @@ may run slightly over the limit. If set along with the iteration limit, cuOpt wi
 the first limit (iteration or time) is hit.
 
 
-Note: by default there is no time limit. So cuOpt will run until it finds an optimal solution,
-or proves the problem is infeasible or unbounded.
+.. note:: by default there is no time limit. So cuOpt will run until it finds an optimal solution,
+   or proves the problem is infeasible or unbounded.
 
 
 
@@ -33,25 +33,25 @@ Log to Console
 ``CUOPT_LOG_TO_CONSOLE`` controls whether cuOpt should log information to the console during a solve.
 If true, a logging info is written to the console, if false no logging info is written to the console (logs may still be written to a file.)
 
-Note: the default value is true.
+.. note:: the default value is true.
 
 Log File
 ^^^^^^^^
 ``CUOPT_LOG_FILE`` controls the name of a log file where cuOpt should write information about the solve.
 
-Note: the default value is ``""`` and no log file is written. This setting is ignored by the cuOpt service, use the log callback feature instead.
+.. note:: the default value is ``""`` and no log file is written. This setting is ignored by the cuOpt service, use the log callback feature instead.
 
 Solution File
 ^^^^^^^^^^^^^
 ``CUOPT_SOLUTION_FILE`` controls the name of a file where cuOpt should write the solution.
 
-Note: the default value is ``""`` and no solution file is written. This setting is ignored by the cuOpt service.
+.. note:: the default value is ``""`` and no solution file is written. This setting is ignored by the cuOpt service.
 
 User Problem File
 ^^^^^^^^^^^^^^^^^
 ``CUOPT_USER_PROBLEM_FILE`` controls the name of a file where cuOpt should write the user problem.
 
-Note: the default value is ``""`` and no user problem file is written. This setting is ignored by the cuOpt service.
+.. note:: the default value is ``""`` and no user problem file is written. This setting is ignored by the cuOpt service.
 
 Num CPU Threads
 ^^^^^^^^^^^^^^^
@@ -59,7 +59,7 @@ Num CPU Threads
 the amount of CPU resources cuOpt uses. Set this to a large value to improve solve times for CPU
 parallel parts of the solvers.
 
-Note: by default the number of CPU threads is automatically determined based on the number of CPU cores.
+.. note:: by default the number of CPU threads is automatically determined based on the number of CPU cores.
 
 Presolve
 ^^^^^^^^
@@ -78,20 +78,19 @@ We now describe the parameter settings used to control cuOpt's Linear Programmin
 Method
 ^^^^^^
 
-``CUOPT_METHOD`` controls the method to solve the linear programming problem. Three methods are available:
+``CUOPT_METHOD`` controls the method to solve the linear programming problem. Four methods are available:
 
-* ``Concurrent``: Use both PDLP and dual simplex in parallel.
+* ``Concurrent``: Use PDLP, dual simplex, and barrier in parallel (default).
 * ``PDLP``: Use the PDLP method.
 * ``Dual Simplex``: Use the dual simplex method.
+* ``Barrier``: Use the barrier (interior-point) method.
 
-Note: The default method is ``Concurrent``.
+.. note:: The default method is ``Concurrent``.
 
 C API users should use the constants defined in :ref:`method-constants` for this parameter.
 
 Server Thin client users should use the :class:`cuopt_sh_client.SolverMethod` for this parameter.
 
-
-
 PDLP Solver Mode
 ^^^^^^^^^^^^^^^^
 
@@ -117,8 +116,8 @@ For performance reasons, cuOpt's does not constantly checks for iteration limit,
 the solver might run a few extra iterations over the limit. If set along with the time limit,
 cuOpt will stop at the first limit (iteration or time) reached.
 
-Note: by default there is no iteration limit. So, cuOpt will run until it finds an optimal solution,
-or proves the problem is infeasible or unbounded.
+.. note:: by default there is no iteration limit. So, cuOpt will run until it finds an optimal solution,
+   or proves the problem is infeasible or unbounded.
 
 
 Infeasiblity Detection
@@ -129,8 +128,8 @@ is not always accurate. Some problems detected as infeasible may converge under
 Detecting infeasibility consumes both more runtime and memory. The added runtime is between 3% and 7%,
 the added memory consumpution is between 10% and 20%.
 
-Note: by default PDLP will not detect infeasibility. Dual simplex will always detect infeasibility
-regardless of this setting.
+.. note:: by default PDLP will not detect infeasibility. Dual simplex will always detect infeasibility
+   regardless of this setting.
 
 Strict Infeasibility
 ^^^^^^^^^^^^^^^^^^^^
@@ -139,21 +138,21 @@ Strict Infeasibility
 is detected as infeasible, PDLP will stop. When false both the current and average solution need to be
 detected as infeasible for PDLP to stop.
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 .. _crossover:
 
 Crossover
 ^^^^^^^^^
 
-``CUOPT_CROSSOVER`` controls whether PDLP should crossover to a basic solution after a optimal solution is found.
+``CUOPT_CROSSOVER`` controls whether PDLP or barrier should crossover to a basic solution after an optimal solution is found.
 Changing this value has a significant impact on accuracy and runtime.
-By default the solutions provided by PDLP are low accuracy and may have many variables that lie
+By default the solutions provided by PDLP and barrier do not lie at a vertex and thus may have many variables that lie
 between their bounds. Enabling crossover allows the user to obtain a high-quality basic solution
 that lies at a vertex of the feasible region. If n is the number of variables, and m is the number of
 constraints, n - m variables will be on their bounds in a basic solution.
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 Save Best Primal So Far
 ^^^^^^^^^^^^^^^^^^^^^^^
@@ -164,21 +163,104 @@ With this parameter set to true, PDLP
 * If no primal feasible was found, the one with the lowest primal residual will be kept
 * If two have the same primal residual, the one with the best objective will be kept
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 First Primal Feasible
 ^^^^^^^^^^^^^^^^^^^^^
 
 ``CUOPT_FIRST_PRIMAL_FEASIBLE`` controls whether PDLP should stop when the first primal feasible solution is found.
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 Per Constraint Residual
 ^^^^^^^^^^^^^^^^^^^^^^^
 
 ``CUOPT_PER_CONSTRAINT_RESIDUAL`` controls whether PDLP should compute the primal & dual residual per constraint instead of globally.
 
-Note: the default value is false.
+.. note:: the default value is false.
+
+Barrier Solver Settings
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+The following settings control the behavior of the barrier (interior-point) method:
+
+Folding
+"""""""
+
+``CUOPT_FOLDING`` controls whether to fold the linear program. Folding can reduce problem size by exploiting symmetry in the problem.
+
+* ``-1``: Automatic (default) - cuOpt decides whether to fold based on problem characteristics
+* ``0``: Disable folding
+* ``1``: Force folding to run
+
+.. note:: the default value is ``-1`` (automatic).
+
+Dualize
+"""""""
+
+``CUOPT_DUALIZE`` controls whether to dualize the linear program in presolve. Dualizing can improve solve time for problems, with inequality constraints, where there are more constraints than variables.
+
+* ``-1``: Automatic (default) - cuOpt decides whether to dualize based on problem characteristics
+* ``0``: Don't attempt to dualize
+* ``1``: Force dualize
+
+.. note:: the default value is ``-1`` (automatic).
+
+Ordering
+""""""""
+
+``CUOPT_ORDERING`` controls the ordering algorithm used by cuDSS for sparse factorizations. The ordering can significantly impact solver run time.
+
+* ``-1``: Automatic (default) - cuOpt selects the best ordering
+* ``0``: cuDSS default ordering
+* ``1``: AMD (Approximate Minimum Degree) ordering
+
+.. note:: the default value is ``-1`` (automatic).
+
+Augmented System
+""""""""""""""""
+
+``CUOPT_AUGMENTED`` controls which linear system to solve in the barrier method.
+
+* ``-1``: Automatic (default) - cuOpt selects the best linear system to solve
+* ``0``: Solve the ADAT system (normal equations)
+* ``1``: Solve the augmented system
+
+.. note:: the default value is ``-1`` (automatic). The augmented system may be more stable for some problems, while ADAT may be faster for others.
+
+Eliminate Dense Columns
+""""""""""""""""""""""""
+
+``CUOPT_ELIMINATE_DENSE_COLUMNS`` controls whether to eliminate dense columns from the constraint matrix before solving. Eliminating dense columns can improve performance by reducing fill-in during factorization.
+However, extra solves must be performed at each iteration.
+
+* ``true``: Eliminate dense columns (default)
+* ``false``: Don't eliminate dense columns
+
+This setting only has an effect when the ADAT (normal equation) system is solved.
+
+.. note:: the default value is ``true``.
+
+cuDSS Deterministic Mode
+"""""""""""""""""""""""""
+
+``CUOPT_CUDSS_DETERMINISTIC`` controls whether cuDSS operates in deterministic mode. Deterministic mode ensures reproducible results across runs but may be slower.
+
+* ``true``: Use deterministic mode
+* ``false``: Use non-deterministic mode (default)
+
+.. note:: the default value is ``false``. Enable deterministic mode if reproducibility is more important than performance.
+
+Dual Initial Point
+""""""""""""""""""
+
+``CUOPT_BARRIER_DUAL_INITIAL_POINT`` controls the method used to compute the dual initial point for the barrier solver. The choice of initial point will affect the number of iterations performed by barrier.
+
+* ``-1``: Automatic (default) - cuOpt selects the best method
+* ``0``: Use an initial point from a heuristic approach based on the paper "On Implementing Mehrotra's Predictor–Corrector Interior-Point Method for Linear Programming" (SIAM J. Optimization, 1992) by Lustig, Martsten, Shanno.
+* ``1``: Use an initial point from solving a least squares problem that minimizes the norms of the dual variables and reduced costs while statisfying the dual equality constraints.
+
+.. note:: the default value is ``-1`` (automatic).
 
 Absolute Primal Tolerance
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -204,7 +286,7 @@ The primal feasibility condition is computed as follows::
 
    primal_feasiblity < absolute_primal_tolerance + relative_primal_tolerance * l2_norm(b)
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
 
 Absolute Dual Tolerance
 ^^^^^^^^^^^^^^^^^^^^^^^
@@ -217,7 +299,7 @@ The dual feasibility condition is computed as follows::
 
    dual_feasiblity < absolute_dual_tolerance + relative_dual_tolerance * l2_norm(c)
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
 
 Relative Dual Tolerance
 ^^^^^^^^^^^^^^^^^^^^^^^
@@ -228,7 +310,7 @@ The dual feasibility condition is computed as follows::
 
    dual_feasiblity < absolute_dual_tolerance + relative_dual_tolerance * l2_norm(c)
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
 
 
 Absolute Gap Tolerance
@@ -241,7 +323,7 @@ The duality gap is computed as follows::
 
    duality_gap < absolute_gap_tolerance + relative_gap_tolerance * (|primal_objective| + |dual_objective|)
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
 
 
 Relative Gap Tolerance
@@ -253,7 +335,7 @@ The duality gap is computed as follows::
 
    duality_gap < absolute_gap_tolerance + relative_gap_tolerance * (|primal_objective| + |dual_objective|)
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
 
 
 Mixed Integer Linear Programming
@@ -269,7 +351,7 @@ Heuristics only
 bound is improved via the GPU. When set to false, both the GPU and CPU are used and
 the dual bound is improved on the CPU.
 
-Note: the default value is false.
+.. note:: the default value is false.
 
 Scaling
 ^^^^^^^
@@ -277,7 +359,7 @@ Scaling
 ``CUOPT_MIP_SCALING`` controls if scaling should be applied to the MIP problem. When true scaling is applied,
 when false, no scaling is applied.
 
-Note: the defaulte value is true.
+.. note:: the defaulte value is true.
 
 
 Absolute Tolerance
@@ -285,14 +367,14 @@ Absolute Tolerance
 
 ``CUOPT_MIP_ABSOLUTE_TOLERANCE`` controls the MIP absolute tolerance.
 
-Note: the default value is ``1e-6``.
+.. note:: the default value is ``1e-6``.
 
 Relative Tolerance
 ^^^^^^^^^^^^^^^^^^
 
 ``CUOPT_MIP_RELATIVE_TOLERANCE`` controls the MIP relative tolerance.
 
-Note: the default value is ``1e-12``.
+.. note:: the default value is ``1e-12``.
 
 
 Integrality Tolerance
@@ -301,7 +383,7 @@ Integrality Tolerance
 ``CUOPT_INTEGRALITY_TOLERANCE`` controls the MIP integrality tolerance. A variable is considered to be integral, if
 it is within the integrality tolerance of an integer.
 
-Note: the default value is ``1e-5``.
+.. note:: the default value is ``1e-5``.
 
 Absolute MIP Gap
 ^^^^^^^^^^^^^^^^
@@ -316,7 +398,7 @@ when minimizing or
 
 when maximizing.
 
-Note: the default value is ``1e-10``.
+.. note:: the default value is ``1e-10``.
 
 Relative MIP Gap
 ^^^^^^^^^^^^^^^^
@@ -328,4 +410,4 @@ Relative MIP Gap
 If the Best Objective and the Dual Bound are both zero the gap is zero. If the best objective value is zero, the
 gap is infinity.
 
-Note: the default value is ``1e-4``.
+.. note:: the default value is ``1e-4``.
diff --git a/docs/cuopt/source/milp-features.rst b/docs/cuopt/source/milp-features.rst
index 40eba5c40..429bc0530 100644
--- a/docs/cuopt/source/milp-features.rst
+++ b/docs/cuopt/source/milp-features.rst
@@ -13,6 +13,7 @@ The MILP solver can be accessed in the following ways:
    - AMPL
    - GAMS
    - PuLP
+   - JuMP
 
 - **C API**: A native C API that provides direct low-level access to cuOpt's MILP solver, enabling integration into any application or system that can interface with C.
 
diff --git a/docs/cuopt/source/release-notes.rst b/docs/cuopt/source/release-notes.rst
index 6443782d8..e01866f7c 100644
--- a/docs/cuopt/source/release-notes.rst
+++ b/docs/cuopt/source/release-notes.rst
@@ -2,6 +2,93 @@
 Release Notes
 =====================
 
+====================
+Release Notes 25.10
+====================
+
+New Features (25.10)
+--------------------
+
+- New barrier method for solving LPs. Uses cuDSS for sparse Cholesky / LDT.
+- Concurrent mode for LPs now uses PDLP, dual simplex, and barrier
+- New PDLP solver mode Stable3.
+- MIP presolve using Papilo (enabled by default). LP presolve using Papilo (optional).
+- Parallel branch and bound on the CPU: multiple best-first search and diving threads
+
+Breaking Changes (25.10)
+------------------------
+
+- New PDLP Solver mode Stable3 is the default
+
+
+Improvements (25.10)
+--------------------
+
+- Add setting "CUOPT_BARRIER_DUAL_INITIAL_POINT" to change the dual initial point used by barrier
+- CPUFJ for local search + simple rounding
+- FP as a local search
+- Sub-MIP recombiner and B&B global variable changes
+- Implement GF(2) presolve reduction
+- Implement node presolve
+- CUDA 13/12.9 support
+- Build and test with CUDA 13.0.0
+- Add read/write MPS and relaxation to python API
+- Decompression for ``.mps.gz`` and ``.mps.bz2`` files
+- Enable parallelism for root node presolve
+- Enable singleton stuffing and use Papilo default params
+- Make infeasibility checks consistent between the main solver and presolver
+- Add maximization support for root node presolve
+- Performance improvement in dual simplex's right-looking LU factorization
+- Fix high GPU memory usage
+- Print cuOpt version / machine info before solving
+- ``cuopt-server``: update dependencies (drop httpx, add psutil)
+- Add nightly testing of cuOpt jump interface
+- Compression tests are not run when compression is disabled
+- Add sanitizer build option- Heuristic Improvements: balance between generation and improvement heuristics
+- Loosen presolve tolerance and update timers to report cumulative presolve/solve time
+- Warn in case a dependent library is not found in libcuopt load
+- Combined variable bounds
+- Add Commit Sha to container for reference
+- use GCC 14, consolidate dependency groups, update pre-commit hooks
+- Add support for nightly ``cuopt-examples`` notebook testing
+- Reduce hard-coded version usage in repo
+- Container to work on all different users including root
+- Changes to download LP and MILP datasets, and also disable cvxpy testing for 3.10
+- Faster engine compile time
+- Fix pre-commit for trailing whitespace and end of file
+- Merge update version and fix version format bugs
+- This library now supports the QPS format, which is an extension of the standard MPS format for representing quadratic programming problems.
+
+
+Bug Fixes (25.10)
+-----------------
+
+- Fix variables out of bounds caused by CPUFJ LP scratch thread
+- Fix the maybe-uninitialized compilation error
+- Fix linking errors in the test suite when disabling C adaptor
+- Compute relative gap with respect to user objectives
+- Add http timeout values for general, send, and receive to client
+- Fix bug in ``fixed_problem_computation``
+- Remove ``limiting_resource_adaptor`` leftover
+- Add support for cuda13 container and fix cuda13 lib issues in wheel
+- Return Infeasible if the user problem contains crossing bounds
+- Fix out-of-bound access in ``clean_up_infeasibilities``
+- Empty columns with infinite bounds are not removed
+
+
+Documentation (25.10)
+---------------------
+
+- Add tutorial video links to Decompression
+- Add warmstart, model update, update docs
+- add docs on CI workflow inputs
+- Add name to drop-down for video link
+- Add video link to the docs and to the Readme
+- Add documentation on nightly installation commands
+- Fix version in version tab, change log, and fix typos
+- Doc update for container version update, and add ``nvidia-cuda-runtime`` as a dependency
+
+
 ====================
 Release Notes 25.08
 ====================
diff --git a/docs/cuopt/source/system-requirements.rst b/docs/cuopt/source/system-requirements.rst
index e7d963ae5..7ad702194 100644
--- a/docs/cuopt/source/system-requirements.rst
+++ b/docs/cuopt/source/system-requirements.rst
@@ -47,6 +47,7 @@ Dependencies are installed automatically when using the pip and Conda installati
       - CUDA 12.2 with Driver 535.86.10+
       - CUDA 12.5 with Driver 555.42.06+
       - CUDA 12.9 with Driver 570.42.01+
+      - CUDA 13.0 with Driver 580.65.06+
 
 .. dropdown:: Recommended Requirements for Best Performance
 
diff --git a/docs/cuopt/source/thirdparty_modeling_languages/index.rst b/docs/cuopt/source/thirdparty_modeling_languages/index.rst
index 3fa6c5466..0acda399a 100644
--- a/docs/cuopt/source/thirdparty_modeling_languages/index.rst
+++ b/docs/cuopt/source/thirdparty_modeling_languages/index.rst
@@ -21,3 +21,10 @@ PuLP Support
 
 PuLP can be used with near zero code changes: simply switch to cuOpt as a solver to solve linear and mixed-integer programming problems.
 Please refer to the `PuLP documentation <https://pypi.org/project/PuLP/>`_ for more information. Also, see the example notebook in the `cuopt-examples <https://github.com/NVIDIA/cuopt-examples>`_ repository.
+
+--------------------------
+JuMP Support
+--------------------------
+
+JuMP can be used with near zero code changes: simply switch to cuOpt as a solver to solve linear and mixed-integer programming problems.
+Please refer to the `JuMP documentation <https://github.com/jump-dev/cuOpt.jl>`_ for more information.
diff --git a/docs/cuopt/source/versions1.json b/docs/cuopt/source/versions1.json
index da50d0918..1c5b37349 100644
--- a/docs/cuopt/source/versions1.json
+++ b/docs/cuopt/source/versions1.json
@@ -1,24 +1,28 @@
 [
-    {
-      "version": "25.10.00",
-      "url": "../25.10.00/",
-      "name": "latest",
-      "preferred": true
-    },
-    {
-      "version": "25.08.00",
-      "url": "../25.08.00/"
-    },
-    {
-      "version": "25.05",
-      "url": "../25.05/"
-    },
-    {
-      "version": "25.02",
-      "url": "../25.02/"
-    },
-    {
-      "version": "24.11",
-      "url": "../24.11/"
-    }
-  ]
+  {
+    "version": "25.12.00",
+    "url": "../25.12.00/",
+    "name": "latest",
+    "preferred": true
+  },
+  {
+    "version": "25.10.00",
+    "url": "../25.10.00/"
+  },
+  {
+    "version": "25.08.00",
+    "url": "../25.08.00/"
+  },
+  {
+    "version": "25.05",
+    "url": "../25.05/"
+  },
+  {
+    "version": "25.02",
+    "url": "../25.02/"
+  },
+  {
+    "version": "24.11",
+    "url": "../24.11/"
+  }
+]
diff --git a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
index 50641d331..2c196751f 100644
--- a/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
+++ b/python/cuopt/cuopt/linear_programming/data_model/data_model_wrapper.pyx
@@ -25,8 +25,6 @@ import warnings
 
 import numpy as np
 
-import cudf
-
 from libc.stdint cimport uintptr_t
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
@@ -46,13 +44,11 @@ def type_cast(np_obj, np_type, name):
 
 
 def get_data_ptr(array):
-    if isinstance(array, cudf.Series):
-        return array.__cuda_array_interface__['data'][0]
-    elif isinstance(array, np.ndarray):
+    if isinstance(array, np.ndarray):
         return array.__array_interface__['data'][0]
     else:
         raise Exception(
-            "get_data_ptr must be called with cudf.Series or np.ndarray"
+            "get_data_ptr must be called with np.ndarray"
         )
 
 
diff --git a/python/cuopt/cuopt/linear_programming/pyproject.toml b/python/cuopt/cuopt/linear_programming/pyproject.toml
index 1e55450fc..5a3809234 100644
--- a/python/cuopt/cuopt/linear_programming/pyproject.toml
+++ b/python/cuopt/cuopt/linear_programming/pyproject.toml
@@ -20,7 +20,7 @@ license = { text = "Apache-2.0" }
 requires-python = ">=3.10"
 dependencies = [
     "numpy>=1.23.5,<3.0a0",
-    "rapids-logger==0.1.*,>=0.0.0a0",
+    "rapids-logger==0.2.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
     "Intended Audience :: Developers",
@@ -39,7 +39,7 @@ Source = "https://github.com/nvidia/cuopt"
 test = [
     "pytest-cov",
     "pytest<8",
-    "rapids-logger==0.1.*,>=0.0.0a0",
+    "rapids-logger==0.2.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [tool.setuptools]
@@ -83,5 +83,5 @@ requires = [
     "cython>=3.0.3",
     "ninja",
     "numpy>=1.23.5,<3.0a0",
-    "rapids-logger==0.1.*,>=0.0.0a0",
+    "rapids-logger==0.2.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cuopt/cuopt/routing/vehicle_routing.py b/python/cuopt/cuopt/routing/vehicle_routing.py
index d4a96748c..623f15477 100644
--- a/python/cuopt/cuopt/routing/vehicle_routing.py
+++ b/python/cuopt/cuopt/routing/vehicle_routing.py
@@ -34,7 +34,7 @@
 class DataModel(vehicle_routing_wrapper.DataModel):
     """
 
-    DataModel(n_locations, n_fleet, n_orders: int = -1, session_id=None)
+    DataModel(n_locations, n_fleet, n_orders: int = -1)
 
     Initialize a Data Model.
 
@@ -46,8 +46,6 @@ class DataModel(vehicle_routing_wrapper.DataModel):
         number of vehicles/technician in the fleet.
     n_orders : Integer
         number of orders.
-    session_id : Integer
-        This is used with dask for Multi GPU scenario.
 
     Note:
       - A cost matrix must be set before passing
@@ -67,16 +65,8 @@ class DataModel(vehicle_routing_wrapper.DataModel):
     """
 
     @catch_cuopt_exception
-    def __init__(
-        self,
-        n_locations,
-        n_fleet,
-        n_orders: int = -1,
-        session_id=None,
-    ):
-        super().__init__(
-            n_locations, n_fleet, n_orders=n_orders, session_id=session_id
-        )
+    def __init__(self, n_locations, n_fleet, n_orders: int = -1):
+        super().__init__(n_locations, n_fleet, n_orders=n_orders)
 
     @catch_cuopt_exception
     def add_cost_matrix(self, cost_mat, vehicle_type=0):
diff --git a/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx b/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx
index d2f3b091b..afde21304 100644
--- a/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx
+++ b/python/cuopt/cuopt/routing/vehicle_routing_wrapper.pyx
@@ -34,8 +34,6 @@ from datetime import date, datetime
 
 from dateutil.relativedelta import relativedelta
 
-from raft_dask.common import Comms, local_handle
-
 from cuopt.routing.assignment import Assignment
 from cuopt.utilities import type_cast
 
@@ -161,14 +159,11 @@ cdef class DataModel:
     cdef unique_ptr[data_model_view_t[int, float]] c_data_model_view
     cdef unique_ptr[handle_t] handle_ptr
 
-    def __init__(self, int num_locations, int fleet_size, int n_orders=-1,
-                 session_id=None):
+    def __init__(self, int num_locations, int fleet_size, int n_orders=-1):
         cdef handle_t* handle_ = <handle_t*><size_t>NULL
-        if session_id is None:
-            self.handle_ptr.reset(new handle_t())
-            handle_ = self.handle_ptr.get()
-        else:
-            handle_ = <handle_t*><size_t>local_handle(session_id).getHandle()
+
+        self.handle_ptr.reset(new handle_t())
+        handle_ = self.handle_ptr.get()
 
         self.c_data_model_view.reset(new data_model_view_t[int, float](
             handle_,
diff --git a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
index 1f0ade10e..42059bf3d 100644
--- a/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
+++ b/python/cuopt/cuopt/tests/linear_programming/test_python_API.py
@@ -34,10 +34,21 @@
     sense,
 )
 from cuopt.linear_programming.solver.solver_parameters import (
+    CUOPT_AUGMENTED,
+    CUOPT_BARRIER_DUAL_INITIAL_POINT,
+    CUOPT_CUDSS_DETERMINISTIC,
+    CUOPT_DUALIZE,
+    CUOPT_ELIMINATE_DENSE_COLUMNS,
+    CUOPT_FOLDING,
     CUOPT_INFEASIBILITY_DETECTION,
+    CUOPT_METHOD,
+    CUOPT_ORDERING,
     CUOPT_PDLP_SOLVER_MODE,
 )
-from cuopt.linear_programming.solver_settings import PDLPSolverMode
+from cuopt.linear_programming.solver_settings import (
+    PDLPSolverMode,
+    SolverMethod,
+)
 
 RAPIDS_DATASET_ROOT_DIR = os.getenv("RAPIDS_DATASET_ROOT_DIR")
 if RAPIDS_DATASET_ROOT_DIR is None:
@@ -395,6 +406,7 @@ def test_warm_start():
 
     settings = SolverSettings()
     settings.set_parameter(CUOPT_PDLP_SOLVER_MODE, PDLPSolverMode.Stable2)
+    settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP)
     settings.set_optimality_tolerance(1e-3)
     settings.set_parameter(CUOPT_INFEASIBILITY_DETECTION, False)
 
@@ -449,3 +461,225 @@ def test_problem_update():
     prob.updateObjective(constant=5, sense=MINIMIZE)
     prob.solve()
     assert prob.ObjValue == pytest.approx(5)
+
+
+@pytest.mark.parametrize(
+    "test_name,settings_config",
+    [
+        (
+            "automatic",
+            {
+                CUOPT_FOLDING: -1,
+                CUOPT_DUALIZE: -1,
+                CUOPT_ORDERING: -1,
+                CUOPT_AUGMENTED: -1,
+            },
+        ),
+        (
+            "forced_on",
+            {
+                CUOPT_FOLDING: 1,
+                CUOPT_DUALIZE: 1,
+                CUOPT_ORDERING: 1,
+                CUOPT_AUGMENTED: 1,
+                CUOPT_ELIMINATE_DENSE_COLUMNS: True,
+                CUOPT_CUDSS_DETERMINISTIC: True,
+            },
+        ),
+        (
+            "disabled",
+            {
+                CUOPT_FOLDING: 0,
+                CUOPT_DUALIZE: 0,
+                CUOPT_ORDERING: 0,
+                CUOPT_AUGMENTED: 0,
+                CUOPT_ELIMINATE_DENSE_COLUMNS: False,
+                CUOPT_CUDSS_DETERMINISTIC: False,
+            },
+        ),
+        (
+            "mixed",
+            {
+                CUOPT_FOLDING: 1,
+                CUOPT_DUALIZE: 0,
+                CUOPT_ORDERING: -1,
+                CUOPT_AUGMENTED: 1,
+            },
+        ),
+        (
+            "folding_on",
+            {
+                CUOPT_FOLDING: 1,
+            },
+        ),
+        (
+            "folding_off",
+            {
+                CUOPT_FOLDING: 0,
+            },
+        ),
+        (
+            "dualize_on",
+            {
+                CUOPT_DUALIZE: 1,
+            },
+        ),
+        (
+            "dualize_off",
+            {
+                CUOPT_DUALIZE: 0,
+            },
+        ),
+        (
+            "amd_ordering",
+            {
+                CUOPT_ORDERING: 1,
+            },
+        ),
+        (
+            "cudss_ordering",
+            {
+                CUOPT_ORDERING: 0,
+            },
+        ),
+        (
+            "augmented_system",
+            {
+                CUOPT_AUGMENTED: 1,
+            },
+        ),
+        (
+            "adat_system",
+            {
+                CUOPT_AUGMENTED: 0,
+            },
+        ),
+        (
+            "no_dense_elim",
+            {
+                CUOPT_ELIMINATE_DENSE_COLUMNS: False,
+            },
+        ),
+        (
+            "cudss_deterministic",
+            {
+                CUOPT_CUDSS_DETERMINISTIC: True,
+            },
+        ),
+        (
+            "combo1",
+            {
+                CUOPT_FOLDING: 1,
+                CUOPT_DUALIZE: 1,
+                CUOPT_ORDERING: 1,
+            },
+        ),
+        (
+            "combo2",
+            {
+                CUOPT_FOLDING: 0,
+                CUOPT_AUGMENTED: 0,
+                CUOPT_ELIMINATE_DENSE_COLUMNS: False,
+            },
+        ),
+        (
+            "dual_initial_point_automatic",
+            {
+                CUOPT_BARRIER_DUAL_INITIAL_POINT: -1,
+            },
+        ),
+        (
+            "dual_initial_point_lustig",
+            {
+                CUOPT_BARRIER_DUAL_INITIAL_POINT: 0,
+            },
+        ),
+        (
+            "dual_initial_point_least_squares",
+            {
+                CUOPT_BARRIER_DUAL_INITIAL_POINT: 1,
+            },
+        ),
+        (
+            "combo3_with_dual_init",
+            {
+                CUOPT_AUGMENTED: 1,
+                CUOPT_BARRIER_DUAL_INITIAL_POINT: 1,
+                CUOPT_ELIMINATE_DENSE_COLUMNS: True,
+            },
+        ),
+    ],
+)
+def test_barrier_solver_settings(test_name, settings_config):
+    """
+    Parameterized test for barrier solver with different configurations.
+
+    Tests the barrier solver across various settings combinations to ensure
+    correctness and robustness. Each configuration tests different aspects
+    of the barrier solver implementation.
+
+    Problem:
+        maximize   5*xs + 20*xl
+        subject to  1*xs +  3*xl <= 200
+                    3*xs +  2*xl <= 160
+                    xs, xl >= 0
+
+    Expected Solution:
+        Optimal objective: 1333.33
+        xs = 0, xl = 66.67 (corner solution where constraint 1 is binding)
+
+    Args
+    ----
+        test_name: Descriptive name for the test configuration
+        settings_config: Dictionary of barrier solver parameters to set
+    """
+    prob = Problem(f"Barrier Test - {test_name}")
+
+    # Add variables
+    xs = prob.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xs")
+    xl = prob.addVariable(lb=0, vtype=VType.CONTINUOUS, name="xl")
+
+    # Add constraints
+    prob.addConstraint(xs + 3 * xl <= 200, name="constraint1")
+    prob.addConstraint(3 * xs + 2 * xl <= 160, name="constraint2")
+
+    # Set objective: maximize 5*xs + 20*xl
+    prob.setObjective(5 * xs + 20 * xl, sense=MAXIMIZE)
+
+    # Configure solver settings
+    settings = SolverSettings()
+    settings.set_parameter(CUOPT_METHOD, SolverMethod.Barrier)
+    settings.set_parameter("time_limit", 10)
+
+    # Apply test-specific settings
+    for param_name, param_value in settings_config.items():
+        settings.set_parameter(param_name, param_value)
+
+    print(f"\nTesting configuration: {test_name}")
+    print(f"Settings: {settings_config}")
+
+    # Solve the problem
+    prob.solve(settings)
+
+    print(f"Status: {prob.Status.name}")
+    print(f"Objective: {prob.ObjValue}")
+    print(f"xs = {xs.Value}, xl = {xl.Value}")
+
+    # Verify solution
+    assert prob.solved, f"Problem not solved for {test_name}"
+    assert prob.Status.name == "Optimal", f"Not optimal for {test_name}"
+    assert prob.ObjValue == pytest.approx(
+        1333.33, rel=0.01
+    ), f"Incorrect objective for {test_name}"
+    assert xs.Value == pytest.approx(
+        0.0, abs=1e-4
+    ), f"Incorrect xs value for {test_name}"
+    assert xl.Value == pytest.approx(
+        66.67, rel=0.01
+    ), f"Incorrect xl value for {test_name}"
+
+    # Verify constraint slacks are non-negative
+    for c in prob.getConstraints():
+        assert (
+            c.Slack >= -1e-6
+        ), f"Negative slack for {c.getConstraintName()} in {test_name}"
diff --git a/python/cuopt/pyproject.toml b/python/cuopt/pyproject.toml
index c2b7df786..b7d32e463 100644
--- a/python/cuopt/pyproject.toml
+++ b/python/cuopt/pyproject.toml
@@ -42,9 +42,8 @@ dependencies = [
     "numpy>=1.23.5,<3.0a0",
     "pandas>=2.0",
     "pylibraft==25.12.*,>=0.0.0a0",
-    "raft-dask==25.12.*,>=0.0.0a0",
     "rapids-dask-dependency==25.12.*,>=0.0.0a0",
-    "rapids-logger==0.1.*,>=0.0.0a0",
+    "rapids-logger==0.2.*,>=0.0.0a0",
     "rmm==25.12.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
@@ -62,7 +61,7 @@ test = [
     "numpy>=1.23.5,<3.0a0",
     "pytest-cov",
     "pytest<8",
-    "rapids-logger==0.1.*,>=0.0.0a0",
+    "rapids-logger==0.2.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
@@ -91,7 +90,6 @@ known_rapids = [
     "cudf",
     "pylibraft",
     "rmm",
-    "raft_dask",
 ]
 known_first_party = [
     "cuopt",
@@ -157,6 +155,6 @@ requires = [
     "libcuopt==25.12.*,>=0.0.0a0",
     "ninja",
     "pylibraft==25.12.*,>=0.0.0a0",
-    "rapids-logger==0.1.*,>=0.0.0a0",
+    "rapids-logger==0.2.*,>=0.0.0a0",
     "rmm==25.12.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
diff --git a/python/cuopt_self_hosted/pyproject.toml b/python/cuopt_self_hosted/pyproject.toml
index 5d726afa6..a21c09933 100644
--- a/python/cuopt_self_hosted/pyproject.toml
+++ b/python/cuopt_self_hosted/pyproject.toml
@@ -86,7 +86,6 @@ known_rapids = [
     "cudf",
     "pylibraft",
     "rmm",
-    "raft_dask",
     "cuopt",
 ]
 known_first_party = [
diff --git a/python/cuopt_server/cuopt_server/tests/test_lp.py b/python/cuopt_server/cuopt_server/tests/test_lp.py
index 8fc85aa3a..4a01daaca 100644
--- a/python/cuopt_server/cuopt_server/tests/test_lp.py
+++ b/python/cuopt_server/cuopt_server/tests/test_lp.py
@@ -146,3 +146,77 @@ def test_sample_milp(
         res.json()["response"]["solver_response"],
         expected_status,
     )
+
+
+# @pytest.mark.skip(reason="Skipping barrier solver options test")
+@pytest.mark.parametrize(
+    "folding, dualize, ordering, augmented, eliminate_dense, cudss_determ, "
+    "dual_initial_point",
+    [
+        # Test automatic settings (default)
+        (-1, -1, -1, -1, True, False, -1),
+        # Test folding off, no dualization, cuDSS default ordering, ADAT system
+        (0, 0, 0, 0, True, False, 0),
+        # Test folding on, force dualization, AMD ordering, augmented system
+        (1, 1, 1, 1, True, True, 1),
+        # Test mixed settings: automatic folding, no dualize, AMD, augmented
+        (-1, 0, 1, 1, False, False, 0),
+        # Test no folding, automatic dualize, cuDSS default, ADAT
+        (0, -1, 0, 0, True, True, -1),
+        # Test dual initial point with Lustig-Marsten-Shanno
+        (-1, -1, -1, -1, True, False, 0),
+        # Test dual initial point with least squares
+        (-1, -1, -1, 1, True, False, 1),
+    ],
+)
+def test_barrier_solver_options(
+    cuoptproc,  # noqa
+    folding,
+    dualize,
+    ordering,
+    augmented,
+    eliminate_dense,
+    cudss_determ,
+    dual_initial_point,
+):
+    """
+    Test the barrier solver (method=3) with various configuration options:
+    - folding: (-1) automatic, (0) off, (1) on
+    - dualize: (-1) automatic, (0) don't dualize, (1) force dualize
+    - ordering: (-1) automatic, (0) cuDSS default, (1) AMD
+    - augmented: (-1) automatic, (0) ADAT, (1) augmented system
+    - eliminate_dense_columns: True to eliminate, False to not
+    - cudss_deterministic: True for deterministic, False for
+      nondeterministic
+    - barrier_dual_initial_point: (-1) automatic, (0) Lustig-Marsten-Shanno,
+      (1) dual least squares
+    """
+    data = get_std_data_for_lp()
+
+    # Use barrier solver (method=3)
+    data["solver_config"]["method"] = 3
+
+    # Configure barrier solver options
+    data["solver_config"]["folding"] = folding
+    data["solver_config"]["dualize"] = dualize
+    data["solver_config"]["ordering"] = ordering
+    data["solver_config"]["augmented"] = augmented
+    data["solver_config"]["eliminate_dense_columns"] = eliminate_dense
+    data["solver_config"]["cudss_deterministic"] = cudss_determ
+    data["solver_config"]["barrier_dual_initial_point"] = dual_initial_point
+
+    res = get_lp(client, data)
+
+    assert res.status_code == 200
+
+    print("\n=== Barrier Solver Test Configuration ===")
+    print(f"folding={folding}, dualize={dualize}, ordering={ordering}")
+    print(f"augmented={augmented}, eliminate_dense={eliminate_dense}")
+    print(f"cudss_deterministic={cudss_determ}")
+    print(f"barrier_dual_initial_point={dual_initial_point}")
+    print(res.json())
+
+    validate_lp_result(
+        res.json()["response"]["solver_response"],
+        LPTerminationStatus.Optimal.name,
+    )
diff --git a/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py b/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py
index be67894be..242b34545 100644
--- a/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py
+++ b/python/cuopt_server/cuopt_server/tests/test_pdlp_warmstart.py
@@ -21,9 +21,13 @@
 from cuopt.linear_programming import solver_settings
 from cuopt.linear_programming.solver.solver_parameters import (
     CUOPT_INFEASIBILITY_DETECTION,
+    CUOPT_METHOD,
     CUOPT_PDLP_SOLVER_MODE,
 )
-from cuopt.linear_programming.solver_settings import PDLPSolverMode
+from cuopt.linear_programming.solver_settings import (
+    PDLPSolverMode,
+    SolverMethod,
+)
 
 from cuopt_server.tests.utils.utils import cuoptproc  # noqa
 from cuopt_server.tests.utils.utils import (
@@ -45,6 +49,7 @@ def test_warmstart(cuoptproc):  # noqa
     settings.set_optimality_tolerance(1e-4)
     settings.set_parameter(CUOPT_INFEASIBILITY_DETECTION, False)
     settings.set_parameter(CUOPT_PDLP_SOLVER_MODE, PDLPSolverMode.Stable2)
+    settings.set_parameter(CUOPT_METHOD, SolverMethod.PDLP)
     data["solver_config"] = settings.toDict()
 
     headers = {"CLIENT-VERSION": "custom"}
diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
index 8eeca3645..b74d66b09 100644
--- a/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
+++ b/python/cuopt_server/cuopt_server/utils/linear_programming/data_definition.py
@@ -520,6 +520,8 @@ class SolverConfig(StrictModel):
         "<br>"
         "- Dual Simplex: 2, Dual Simplex method"
         "<br>"
+        "- Barrier: 3, Barrier method"
+        "<br>"
         "Note: Not supported for MILP. ",
     )
     mip_scaling: Optional[bool] = Field(
diff --git a/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py b/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py
index b05974217..1dbfaf51e 100644
--- a/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py
+++ b/python/cuopt_server/cuopt_server/utils/linear_programming/solver.py
@@ -440,15 +440,15 @@ def is_mip(var_types):
             solver_settings.set_parameter(
                 CUOPT_LOG_FILE, solver_config.log_file
             )
-        if solver_config.augmented != "":
+        if solver_config.augmented is not None:
             solver_settings.set_parameter(
                 CUOPT_AUGMENTED, solver_config.augmented
             )
-        if solver_config.folding != "":
+        if solver_config.folding is not None:
             solver_settings.set_parameter(CUOPT_FOLDING, solver_config.folding)
-        if solver_config.dualize != "":
+        if solver_config.dualize is not None:
             solver_settings.set_parameter(CUOPT_DUALIZE, solver_config.dualize)
-        if solver_config.ordering != "":
+        if solver_config.ordering is not None:
             solver_settings.set_parameter(
                 CUOPT_ORDERING, solver_config.ordering
             )
diff --git a/python/cuopt_server/pyproject.toml b/python/cuopt_server/pyproject.toml
index 41e0b6daf..b1d397b77 100644
--- a/python/cuopt_server/pyproject.toml
+++ b/python/cuopt_server/pyproject.toml
@@ -96,7 +96,6 @@ known_rapids = [
     "cudf",
     "pylibraft",
     "rmm",
-    "raft_dask",
     "cuopt",
 ]
 known_first_party = [
diff --git a/python/libcuopt/CMakeLists.txt b/python/libcuopt/CMakeLists.txt
index 175e501e4..b6fbb6b2b 100644
--- a/python/libcuopt/CMakeLists.txt
+++ b/python/libcuopt/CMakeLists.txt
@@ -86,6 +86,7 @@ set(rpaths
   "$ORIGIN/../../nvidia/curand/lib"
   "$ORIGIN/../../nvidia/cusolver/lib"
   "$ORIGIN/../../nvidia/cusparse/lib"
+  "$ORIGIN/../../nvidia/nvjitlink/lib"
 )
 
 # Add CUDA version-specific paths based on CUDA compiler version
diff --git a/python/libcuopt/pyproject.toml b/python/libcuopt/pyproject.toml
index fbbbdd087..278092e29 100644
--- a/python/libcuopt/pyproject.toml
+++ b/python/libcuopt/pyproject.toml
@@ -52,7 +52,7 @@ dependencies = [
     "nvidia-cusolver",
     "nvidia-cusparse",
     "nvidia-nvtx",
-    "rapids-logger==0.1.*,>=0.0.0a0",
+    "rapids-logger==0.2.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 
 [project.urls]
@@ -97,5 +97,5 @@ requires = [
     "cuopt-mps-parser==25.12.*,>=0.0.0a0",
     "librmm==25.12.*,>=0.0.0a0",
     "ninja",
-    "rapids-logger==0.1.*,>=0.0.0a0",
+    "rapids-logger==0.2.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.