Skip to content

Commit

Permalink
Allow max_num_of_gh_pair_per_launch to be customized
Browse files Browse the repository at this point in the history
  • Loading branch information
YuanTingHsieh committed Dec 5, 2024
1 parent e855561 commit 3fa8b94
Showing 1 changed file with 11 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -101,12 +101,16 @@ class CUDAPlugin: public LocalPlugin {
double total_agg_time_ = 0;
double total_prepare_bin_time_ = 0;
size_t gh_pair_array_size = 0;
int threads_per_block_ = 512;
int max_num_of_gh_pair_per_launch_ = 1 << 23;

public:
explicit CUDAPlugin(std::vector<std::pair<std::string_view, std::string_view> > const &args): LocalPlugin(args) {
bool fix_seed = get_bool(args, "fix_seed");
threads_per_block_ = get_int(args, "threads_per_block");
bool fix_seed = get_bool(args, "fix_seed", false);
// the maximum number of instances (big number) is limited by
// (1) CPU memory, as the gh_pair_array need to be hold in CPU memory
// (2) GPU memory, when the active gh_pair_array is copied into GPU for calculation

max_num_of_gh_pair_per_launch_ = get_int(args, "max_num_of_gh_pair_per_launch", 1 << 23);
paillier_cipher_ptr_ = new PaillierCipher<bits>(bits/2, fix_seed, debug_);
encrypted_gh_pairs_ = nullptr;
gh_pair_array = nullptr;
Expand Down Expand Up @@ -568,11 +572,7 @@ class CUDAPlugin: public LocalPlugin {
int tuple_length = 2;
size_t IPB = threads_per_block_ / TPI;

// the maximum number of instances (big number) is limited by
// (1) CPU memory, as the gh_pair_array need to be hold in CPU memory
// (2) GPU memory, when the active gh_pair_array is copied into GPU for calculation
size_t max_num_of_instances_per_launch = 1 << 23; // maximum numbers that can fit into GPU memory
unsigned int max_blocks = max_num_of_instances_per_launch / IPB;
unsigned int max_blocks = max_num_of_gh_pair_per_launch_ / IPB;

if (debug_) std::cout << overall_timer_.now() << ": Preparing bin_xxx" << std::endl;

Expand All @@ -589,15 +589,15 @@ class CUDAPlugin: public LocalPlugin {

if (debug_) std::cout << overall_timer_.now() << ": before prepareBinArray" << std::endl;
size_t total_sample_ids = prepareBinArray(ridx, size);
if (debug_) std::cout << overall_timer_.now() << ": after prepareBinArray, max_num_of_instances_per_launch: " << max_num_of_instances_per_launch << " , total_sample_ids: " << total_sample_ids << std::endl;
if (debug_) std::cout << overall_timer_.now() << ": after prepareBinArray, max_num_of_gh_pair_per_launch_: " << max_num_of_gh_pair_per_launch_ << " , total_sample_ids: " << total_sample_ids << std::endl;

// weird situation where all things are empty
if (total_sample_ids == 0) {
fillResult(result, total_bin_size);
return;
}

int num_tuples_per_array = std::min(total_sample_ids, max_num_of_instances_per_launch) / tuple_length;
int num_tuples_per_array = std::min(total_sample_ids, max_num_of_gh_pair_per_launch_) / tuple_length;
int num_gh_pairs = num_tuples_per_array * tuple_length; // needs to be a multiple of tuple
size_t array_size = sizeof(GHPair) * num_gh_pairs;
if (debug_) std::cout << "gh pair array size is " << array_size << std::endl;
Expand All @@ -616,7 +616,7 @@ class CUDAPlugin: public LocalPlugin {
total_prepare_bin_time_ += elapsed;
#endif

if (debug_) std::cout << overall_timer_.now() << ": max_num_of_instances_per_launch: " << max_num_of_instances_per_launch << " num_gh_pairs: " << num_gh_pairs << std::endl;
if (debug_) std::cout << overall_timer_.now() << ": max_num_of_gh_pair_per_launch_: " << max_num_of_gh_pair_per_launch_ << " num_gh_pairs: " << num_gh_pairs << std::endl;

int* rbt = (int*)malloc(sizeof(int) * num_tuples_per_array);

Expand Down

0 comments on commit 3fa8b94

Please sign in to comment.