diff --git a/paddle/fluid/framework/data_feed.cu b/paddle/fluid/framework/data_feed.cu index 5e7be45c8f619..20e17a3f4ebf2 100644 --- a/paddle/fluid/framework/data_feed.cu +++ b/paddle/fluid/framework/data_feed.cu @@ -369,9 +369,8 @@ int GraphDataGenerator::FillIdShowClkTensor(int total_instance, } else { uint64_t *d_type_keys = reinterpret_cast(d_device_keys_[cursor]->ptr()); - auto &infer_node_type_start = infer_node_type_start_; - d_type_keys += infer_node_type_start[cursor]; - infer_node_type_start[cursor] += total_instance / 2; + d_type_keys += infer_node_start_; + infer_node_start_ += total_instance / 2; CopyDuplicateKeys<<= table_capcity_) { + if (h_uniq_node_num + len >= train_table_cap_) { return 1; } table_->insert(d_keys, len, d_uniq_node_num, sample_stream_); @@ -901,9 +898,9 @@ int GraphDataGenerator::FillInferBuf() { } size_t device_key_size = h_device_keys_len_[infer_cursor]; total_row_ = - (global_infer_node_type_start[infer_cursor] + table_capcity_ <= + (global_infer_node_type_start[infer_cursor] + infer_table_cap_ <= device_key_size) - ? table_capcity_ + ? infer_table_cap_ : device_key_size - global_infer_node_type_start[infer_cursor]; host_vec_.resize(total_row_); @@ -918,7 +915,9 @@ int GraphDataGenerator::FillInferBuf() { VLOG(1) << "cursor: " << infer_cursor << " start: " << global_infer_node_type_start[infer_cursor] << " num: " << total_row_; + infer_node_start_ = global_infer_node_type_start[infer_cursor]; global_infer_node_type_start[infer_cursor] += total_row_; + infer_node_end_ = global_infer_node_type_start[infer_cursor]; cursor_ = infer_cursor; } return 0; @@ -957,7 +956,7 @@ int GraphDataGenerator::FillWalkBuf() { auto &node_type_start = gpu_graph_ptr->node_type_start_[gpuid_]; auto &finish_node_type = gpu_graph_ptr->finish_node_type_[gpuid_]; auto &type_to_index = gpu_graph_ptr->get_graph_type_to_index(); - auto& cursor = gpu_graph_ptr->cursor_[thread_id_]; + auto &cursor = gpu_graph_ptr->cursor_[thread_id_]; size_t node_type_len = first_node_type.size(); int remain_size = buf_size_ - walk_degree_ * once_sample_startid_len_ * walk_len_; @@ -1006,9 +1005,9 @@ int GraphDataGenerator::FillWalkBuf() { int step = 1; VLOG(2) << "sample edge type: " << path[0] << " step: " << 1; jump_rows_ = sample_res.total_sample_size; - VLOG(2) << "i = " << i << " start = " << start - << " tmp_len = " << tmp_len << " cursor = " << node_type - << " cur_node_idx = " << cur_node_idx << " jump row: " << jump_rows_; + VLOG(2) << "i = " << i << " start = " << start << " tmp_len = " << tmp_len + << " cursor = " << node_type << " cur_node_idx = " << cur_node_idx + << " jump row: " << jump_rows_; VLOG(2) << "jump_row: " << jump_rows_; if (jump_rows_ == 0) { node_type_start[node_type] = tmp_len + start; @@ -1311,14 +1310,13 @@ void GraphDataGenerator::AllocResource(int thread_id, place_ = platform::CUDAPlace(gpuid_); platform::CUDADeviceGuard guard(gpuid_); - table_capcity_ = once_sample_startid_len_ * repeat_time_ * 10 * 24; if (FLAGS_gpugraph_storage_mode != GpuGraphStorageMode::WHOLE_HBM) { table_ = new HashTable( - table_capcity_ / FLAGS_gpugraph_hbm_table_load_factor); + train_table_cap_ / FLAGS_gpugraph_hbm_table_load_factor); } VLOG(1) << "AllocResource gpuid " << gpuid_ << " feed_vec.size: " << feed_vec.size() - << " table cap: " << table_capcity_; + << " table cap: " << train_table_cap_; sample_stream_ = gpu_graph_ptr->get_local_stream(gpuid_); train_stream_ = dynamic_cast( platform::DeviceContextPool::Instance().Get(place_)) @@ -1349,7 +1347,6 @@ void GraphDataGenerator::AllocResource(int thread_id, h_device_keys_len_.push_back(h_graph_all_type_keys_len[i][thread_id]); } VLOG(2) << "h_device_keys size: " << h_device_keys_len_.size(); - infer_node_type_start_ = std::vector(h_device_keys_len_.size(), 0); size_t once_max_sample_keynum = walk_degree_ * once_sample_startid_len_; d_prefix_sum_ = memory::AllocShared( place_, @@ -1440,11 +1437,15 @@ void GraphDataGenerator::SetConfig( repeat_time_ = graph_config.sample_times_one_chunk(); buf_size_ = once_sample_startid_len_ * walk_len_ * walk_degree_ * repeat_time_; - VLOG(2) << "Confirm GraphConfig, walk_degree : " << walk_degree_ + train_table_cap_ = graph_config.train_table_cap(); + infer_table_cap_ = graph_config.infer_table_cap(); + VLOG(0) << "Confirm GraphConfig, walk_degree : " << walk_degree_ << ", walk_len : " << walk_len_ << ", window : " << window_ << ", once_sample_startid_len : " << once_sample_startid_len_ << ", sample_times_one_chunk : " << repeat_time_ - << ", batch_size: " << batch_size_; + << ", batch_size: " << batch_size_ + << ", train_table_cap: " << train_table_cap_ + << ", infer_table_cap: " << infer_table_cap_; std::string first_node_type = graph_config.first_node_type(); std::string meta_path = graph_config.meta_path(); auto gpu_graph_ptr = GraphGpuWrapper::GetInstance(); diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h index e7d0d9d8e297b..5e48e3b466a9a 100644 --- a/paddle/fluid/framework/data_feed.h +++ b/paddle/fluid/framework/data_feed.h @@ -940,8 +940,6 @@ class GraphDataGenerator { int gpuid_; // start ids // int64_t* device_keys_; - // size_t device_key_size_; - // point to device_keys_ size_t cursor_; int thread_id_; size_t jump_rows_; @@ -979,7 +977,6 @@ class GraphDataGenerator { size_t buf_size_; int repeat_time_; std::vector window_step_; - std::vector infer_node_type_start_; BufState buf_state_; int batch_size_; int slot_num_; @@ -988,8 +985,11 @@ class GraphDataGenerator { bool gpu_graph_training_; std::vector host_vec_; std::vector h_device_keys_len_; - uint64_t table_capcity_; + uint64_t train_table_cap_; + uint64_t infer_table_cap_; int total_row_; + size_t infer_node_start_; + size_t infer_node_end_; }; class DataFeed { diff --git a/paddle/fluid/framework/data_feed.proto b/paddle/fluid/framework/data_feed.proto index a7ab70948795f..38791a124c56e 100644 --- a/paddle/fluid/framework/data_feed.proto +++ b/paddle/fluid/framework/data_feed.proto @@ -38,6 +38,8 @@ message GraphConfig { optional string first_node_type = 8; optional string meta_path = 9; optional bool gpu_graph_training = 10 [ default = true ]; + optional int64 train_table_cap = 11 [ default = 80000 ]; + optional int64 infer_table_cap = 12 [ default = 80000 ]; } message DataFeedDesc { diff --git a/python/paddle/fluid/dataset.py b/python/paddle/fluid/dataset.py index 9fba7bb70f189..cf5a77298074c 100644 --- a/python/paddle/fluid/dataset.py +++ b/python/paddle/fluid/dataset.py @@ -1080,6 +1080,10 @@ def set_graph_config(self, config): self.proto_desc.graph_config.meta_path = config.get("meta_path", "") self.proto_desc.graph_config.gpu_graph_training = config.get( "gpu_graph_training", True) + self.proto_desc.graph_config.train_table_cap = config.get( + "train_table_cap", 800000) + self.proto_desc.graph_config.infer_table_cap = config.get( + "infer_table_cap", 800000) self.dataset.set_gpu_graph_mode(True)