Skip to content

Commit

Permalink
[GPUGraph] fix infer && add infer_table_cap (PaddlePaddle#108)
Browse files Browse the repository at this point in the history
* fix ins no

* fix FillOnestep args

* fix infer && add infer table cap

* fix infer
  • Loading branch information
Thunderbrook authored Sep 14, 2022
1 parent cc71f56 commit 1462c54
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 26 deletions.
45 changes: 23 additions & 22 deletions paddle/fluid/framework/data_feed.cu
Original file line number Diff line number Diff line change
Expand Up @@ -369,9 +369,8 @@ int GraphDataGenerator::FillIdShowClkTensor(int total_instance,
} else {
uint64_t *d_type_keys =
reinterpret_cast<uint64_t *>(d_device_keys_[cursor]->ptr());
auto &infer_node_type_start = infer_node_type_start_;
d_type_keys += infer_node_type_start[cursor];
infer_node_type_start[cursor] += total_instance / 2;
d_type_keys += infer_node_start_;
infer_node_start_ += total_instance / 2;
CopyDuplicateKeys<<<GET_BLOCKS(total_instance / 2),
CUDA_NUM_THREADS,
0,
Expand Down Expand Up @@ -547,11 +546,9 @@ int GraphDataGenerator::GenerateBatch() {
int res = 0;
auto gpu_graph_ptr = GraphGpuWrapper::GetInstance();
if (!gpu_graph_training_) {
size_t device_key_size = h_device_keys_len_[cursor_];
total_instance =
(infer_node_type_start_[cursor_] + batch_size_ <= device_key_size)
? batch_size_
: device_key_size - infer_node_type_start_[cursor_];
total_instance = (infer_node_start_ + batch_size_ <= infer_node_end_)
? batch_size_
: infer_node_end_ - infer_node_start_;
VLOG(1) << "in graph_data generator:batch_size = " << batch_size_
<< " instance = " << total_instance;
total_instance *= 2;
Expand Down Expand Up @@ -866,9 +863,9 @@ int GraphDataGenerator::InsertTable(const unsigned long *d_keys,
sample_stream_);
cudaStreamSynchronize(sample_stream_);
// 产生了足够多的node,采样结束
VLOG(2) << "table capcity: " << table_capcity_ << ", " << h_uniq_node_num
VLOG(2) << "table capcity: " << train_table_cap_ << ", " << h_uniq_node_num
<< " used";
if (h_uniq_node_num + len >= table_capcity_) {
if (h_uniq_node_num + len >= train_table_cap_) {
return 1;
}
table_->insert(d_keys, len, d_uniq_node_num, sample_stream_);
Expand Down Expand Up @@ -901,9 +898,9 @@ int GraphDataGenerator::FillInferBuf() {
}
size_t device_key_size = h_device_keys_len_[infer_cursor];
total_row_ =
(global_infer_node_type_start[infer_cursor] + table_capcity_ <=
(global_infer_node_type_start[infer_cursor] + infer_table_cap_ <=
device_key_size)
? table_capcity_
? infer_table_cap_
: device_key_size - global_infer_node_type_start[infer_cursor];

host_vec_.resize(total_row_);
Expand All @@ -918,7 +915,9 @@ int GraphDataGenerator::FillInferBuf() {
VLOG(1) << "cursor: " << infer_cursor
<< " start: " << global_infer_node_type_start[infer_cursor]
<< " num: " << total_row_;
infer_node_start_ = global_infer_node_type_start[infer_cursor];
global_infer_node_type_start[infer_cursor] += total_row_;
infer_node_end_ = global_infer_node_type_start[infer_cursor];
cursor_ = infer_cursor;
}
return 0;
Expand Down Expand Up @@ -957,7 +956,7 @@ int GraphDataGenerator::FillWalkBuf() {
auto &node_type_start = gpu_graph_ptr->node_type_start_[gpuid_];
auto &finish_node_type = gpu_graph_ptr->finish_node_type_[gpuid_];
auto &type_to_index = gpu_graph_ptr->get_graph_type_to_index();
auto& cursor = gpu_graph_ptr->cursor_[thread_id_];
auto &cursor = gpu_graph_ptr->cursor_[thread_id_];
size_t node_type_len = first_node_type.size();
int remain_size =
buf_size_ - walk_degree_ * once_sample_startid_len_ * walk_len_;
Expand Down Expand Up @@ -1006,9 +1005,9 @@ int GraphDataGenerator::FillWalkBuf() {
int step = 1;
VLOG(2) << "sample edge type: " << path[0] << " step: " << 1;
jump_rows_ = sample_res.total_sample_size;
VLOG(2) << "i = " << i << " start = " << start
<< " tmp_len = " << tmp_len << " cursor = " << node_type
<< " cur_node_idx = " << cur_node_idx << " jump row: " << jump_rows_;
VLOG(2) << "i = " << i << " start = " << start << " tmp_len = " << tmp_len
<< " cursor = " << node_type << " cur_node_idx = " << cur_node_idx
<< " jump row: " << jump_rows_;
VLOG(2) << "jump_row: " << jump_rows_;
if (jump_rows_ == 0) {
node_type_start[node_type] = tmp_len + start;
Expand Down Expand Up @@ -1311,14 +1310,13 @@ void GraphDataGenerator::AllocResource(int thread_id,
place_ = platform::CUDAPlace(gpuid_);

platform::CUDADeviceGuard guard(gpuid_);
table_capcity_ = once_sample_startid_len_ * repeat_time_ * 10 * 24;
if (FLAGS_gpugraph_storage_mode != GpuGraphStorageMode::WHOLE_HBM) {
table_ = new HashTable<uint64_t, uint64_t>(
table_capcity_ / FLAGS_gpugraph_hbm_table_load_factor);
train_table_cap_ / FLAGS_gpugraph_hbm_table_load_factor);
}
VLOG(1) << "AllocResource gpuid " << gpuid_
<< " feed_vec.size: " << feed_vec.size()
<< " table cap: " << table_capcity_;
<< " table cap: " << train_table_cap_;
sample_stream_ = gpu_graph_ptr->get_local_stream(gpuid_);
train_stream_ = dynamic_cast<phi::GPUContext *>(
platform::DeviceContextPool::Instance().Get(place_))
Expand Down Expand Up @@ -1349,7 +1347,6 @@ void GraphDataGenerator::AllocResource(int thread_id,
h_device_keys_len_.push_back(h_graph_all_type_keys_len[i][thread_id]);
}
VLOG(2) << "h_device_keys size: " << h_device_keys_len_.size();
infer_node_type_start_ = std::vector<int>(h_device_keys_len_.size(), 0);
size_t once_max_sample_keynum = walk_degree_ * once_sample_startid_len_;
d_prefix_sum_ = memory::AllocShared(
place_,
Expand Down Expand Up @@ -1440,11 +1437,15 @@ void GraphDataGenerator::SetConfig(
repeat_time_ = graph_config.sample_times_one_chunk();
buf_size_ =
once_sample_startid_len_ * walk_len_ * walk_degree_ * repeat_time_;
VLOG(2) << "Confirm GraphConfig, walk_degree : " << walk_degree_
train_table_cap_ = graph_config.train_table_cap();
infer_table_cap_ = graph_config.infer_table_cap();
VLOG(0) << "Confirm GraphConfig, walk_degree : " << walk_degree_
<< ", walk_len : " << walk_len_ << ", window : " << window_
<< ", once_sample_startid_len : " << once_sample_startid_len_
<< ", sample_times_one_chunk : " << repeat_time_
<< ", batch_size: " << batch_size_;
<< ", batch_size: " << batch_size_
<< ", train_table_cap: " << train_table_cap_
<< ", infer_table_cap: " << infer_table_cap_;
std::string first_node_type = graph_config.first_node_type();
std::string meta_path = graph_config.meta_path();
auto gpu_graph_ptr = GraphGpuWrapper::GetInstance();
Expand Down
8 changes: 4 additions & 4 deletions paddle/fluid/framework/data_feed.h
Original file line number Diff line number Diff line change
Expand Up @@ -940,8 +940,6 @@ class GraphDataGenerator {
int gpuid_;
// start ids
// int64_t* device_keys_;
// size_t device_key_size_;
// point to device_keys_
size_t cursor_;
int thread_id_;
size_t jump_rows_;
Expand Down Expand Up @@ -979,7 +977,6 @@ class GraphDataGenerator {
size_t buf_size_;
int repeat_time_;
std::vector<int> window_step_;
std::vector<int> infer_node_type_start_;
BufState buf_state_;
int batch_size_;
int slot_num_;
Expand All @@ -988,8 +985,11 @@ class GraphDataGenerator {
bool gpu_graph_training_;
std::vector<uint64_t> host_vec_;
std::vector<uint64_t> h_device_keys_len_;
uint64_t table_capcity_;
uint64_t train_table_cap_;
uint64_t infer_table_cap_;
int total_row_;
size_t infer_node_start_;
size_t infer_node_end_;
};

class DataFeed {
Expand Down
2 changes: 2 additions & 0 deletions paddle/fluid/framework/data_feed.proto
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ message GraphConfig {
optional string first_node_type = 8;
optional string meta_path = 9;
optional bool gpu_graph_training = 10 [ default = true ];
optional int64 train_table_cap = 11 [ default = 80000 ];
optional int64 infer_table_cap = 12 [ default = 80000 ];
}

message DataFeedDesc {
Expand Down
4 changes: 4 additions & 0 deletions python/paddle/fluid/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,10 @@ def set_graph_config(self, config):
self.proto_desc.graph_config.meta_path = config.get("meta_path", "")
self.proto_desc.graph_config.gpu_graph_training = config.get(
"gpu_graph_training", True)
self.proto_desc.graph_config.train_table_cap = config.get(
"train_table_cap", 800000)
self.proto_desc.graph_config.infer_table_cap = config.get(
"infer_table_cap", 800000)
self.dataset.set_gpu_graph_mode(True)


Expand Down

0 comments on commit 1462c54

Please sign in to comment.