Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 32 additions & 6 deletions benchmarks/linear_programming/cuopt/run_mip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,10 @@
#include <raft/core/handle.hpp>

#include <rmm/mr/device/cuda_async_memory_resource.hpp>
#include <rmm/mr/device/limiting_resource_adaptor.hpp>
#include <rmm/mr/device/logging_resource_adaptor.hpp>
#include <rmm/mr/device/pool_memory_resource.hpp>
#include <rmm/mr/device/tracking_resource_adaptor.hpp>

#include <rmm/mr/device/owning_wrapper.hpp>

Expand Down Expand Up @@ -256,7 +259,9 @@ void run_single_file_mp(std::string file_path,
{
std::cout << "running file " << file_path << " on gpu : " << device << std::endl;
auto memory_resource = make_async();
rmm::mr::set_current_device_resource(memory_resource.get());
auto limiting_adaptor =
rmm::mr::limiting_resource_adaptor(memory_resource.get(), 6ULL * 1024ULL * 1024ULL * 1024ULL);
rmm::mr::set_current_device_resource(&limiting_adaptor);
int sol_found = run_single_file(file_path,
device,
batch_id,
Expand Down Expand Up @@ -340,6 +345,15 @@ int main(int argc, char* argv[])
.scan<'g', double>()
.default_value(std::numeric_limits<double>::max());

program.add_argument("--memory-limit")
.help("memory limit in MB")
.scan<'g', double>()
.default_value(0.0);

program.add_argument("--track-allocations")
.help("track allocations (t/f)")
.default_value(std::string("f"));

// Parse arguments
try {
program.parse_args(argc, argv);
Expand All @@ -362,10 +376,12 @@ int main(int argc, char* argv[])
std::string result_file;
int batch_num = -1;

bool heuristics_only = program.get<std::string>("--heuristics-only")[0] == 't';
int num_cpu_threads = program.get<int>("--num-cpu-threads");
bool write_log_file = program.get<std::string>("--write-log-file")[0] == 't';
bool log_to_console = program.get<std::string>("--log-to-console")[0] == 't';
bool heuristics_only = program.get<std::string>("--heuristics-only")[0] == 't';
int num_cpu_threads = program.get<int>("--num-cpu-threads");
bool write_log_file = program.get<std::string>("--write-log-file")[0] == 't';
bool log_to_console = program.get<std::string>("--log-to-console")[0] == 't';
double memory_limit = program.get<double>("--memory-limit");
bool track_allocations = program.get<std::string>("--track-allocations")[0] == 't';

if (program.is_used("--out-dir")) {
out_dir = program.get<std::string>("--out-dir");
Expand Down Expand Up @@ -469,7 +485,17 @@ int main(int argc, char* argv[])
merge_result_files(out_dir, result_file, n_gpus, batch_num);
} else {
auto memory_resource = make_async();
rmm::mr::set_current_device_resource(memory_resource.get());
if (memory_limit > 0) {
auto limiting_adaptor =
rmm::mr::limiting_resource_adaptor(memory_resource.get(), memory_limit * 1024ULL * 1024ULL);
rmm::mr::set_current_device_resource(&limiting_adaptor);
} else if (track_allocations) {
rmm::mr::tracking_resource_adaptor tracking_adaptor(memory_resource.get(),
/*capture_stacks=*/true);
rmm::mr::set_current_device_resource(&tracking_adaptor);
} else {
rmm::mr::set_current_device_resource(memory_resource.get());
}
run_single_file(path,
0,
0,
Expand Down
12 changes: 9 additions & 3 deletions cpp/src/mip/problem/problem.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "problem_kernels.cuh"

#include <utilities/copy_helpers.hpp>
#include <utilities/cuda_helpers.cuh>
#include <utilities/macros.cuh>

#include <linear_programming/utils.cuh>
Expand Down Expand Up @@ -810,16 +811,21 @@ void problem_t<i_t, f_t>::compute_related_variables(double time_limit)

handle_ptr->sync_stream();

// previously used constants were based on 40GB of memory. Scale accordingly on smaller GPUs
// We can't rely on querying free memory or allocation try/catch
// since this would break determinism guarantees (GPU may be shared by other processes)
f_t size_factor = std::min(1.0, cuopt::get_device_memory_size() / 1e9 / 40.0);

// TODO: determine optimal number of slices based on available GPU memory? This used to be 2e9 /
// n_variables
i_t max_slice_size = 6e8 / n_variables;
i_t max_slice_size = 6e8 * size_factor / n_variables;

rmm::device_uvector<i_t> varmap(max_slice_size * n_variables, handle_ptr->get_stream());
rmm::device_uvector<i_t> offsets(max_slice_size * n_variables, handle_ptr->get_stream());

related_variables.resize(0, handle_ptr->get_stream());
// TODO: this used to be 1e8
related_variables.reserve(1e8, handle_ptr->get_stream()); // reserve space
related_variables.reserve(1e8 * size_factor, handle_ptr->get_stream()); // reserve space
related_variables_offsets.resize(n_variables + 1, handle_ptr->get_stream());
related_variables_offsets.set_element_to_zero_async(0, handle_ptr->get_stream());

Expand Down Expand Up @@ -863,7 +869,7 @@ void problem_t<i_t, f_t>::compute_related_variables(double time_limit)
auto current_time = std::chrono::high_resolution_clock::now();
// if the related variable array would wind up being too large for available memory, abort
// TODO this used to be 1e9
if (related_variables.size() > 1e9 ||
if (related_variables.size() > 1e9 * size_factor ||
std::chrono::duration_cast<std::chrono::seconds>(current_time - start_time).count() >
time_limit) {
CUOPT_LOG_DEBUG(
Expand Down
25 changes: 25 additions & 0 deletions cpp/src/utilities/cuda_helpers.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
#include <raft/util/cuda_utils.cuh>
#include <raft/util/cudart_utils.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/cuda_async_memory_resource.hpp>
#include <rmm/mr/device/limiting_resource_adaptor.hpp>

namespace cuopt {

Expand Down Expand Up @@ -208,4 +210,27 @@ DI void sorted_insert(T* array, T item, int curr_size, int max_size)
array[0] = item;
}

inline size_t get_device_memory_size()
{
// Otherwise, we need to get the free memory from the device
size_t free_mem, total_mem;
cudaMemGetInfo(&free_mem, &total_mem);
Copy link
Contributor

@rg20 rg20 Aug 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In pool memory allocator, we would have probably allocated most of the memory, but the allocator itself might have available memory.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is only used to get the total device memory :) free_mem is a placeholder here, only total_mem is used


auto res = rmm::mr::get_current_device_resource();
auto limiting_adaptor =
dynamic_cast<rmm::mr::limiting_resource_adaptor<rmm::mr::cuda_async_memory_resource>*>(res);
// Did we specifiy an explicit memory limit?
if (limiting_adaptor) {
printf("limiting_adaptor->get_allocation_limit(): %fMiB\n",
limiting_adaptor->get_allocation_limit() / (double)1e6);
printf("used_mem: %fMiB\n", limiting_adaptor->get_allocated_bytes() / (double)1e6);
printf("free_mem: %fMiB\n",
(limiting_adaptor->get_allocation_limit() - limiting_adaptor->get_allocated_bytes()) /
(double)1e6);
return std::min(total_mem, limiting_adaptor->get_allocation_limit());
} else {
return total_mem;
}
}

} // namespace cuopt