Skip to content

Commit 789fe48

Browse files
authored
[Improve] Add auxiliary functions to get vertex chunk num or edge chunk num with infos (#95)
1 parent c31a88b commit 789fe48

File tree

5 files changed

+44
-10
lines changed

5 files changed

+44
-10
lines changed

include/gar/graph.h

+7-7
Original file line numberDiff line numberDiff line change
@@ -649,7 +649,7 @@ class EdgesCollection<AdjListType::ordered_by_source> {
649649
fs->GetFileNumOfDir(base_dir));
650650
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
651651
chunk_end_ = 0;
652-
for (size_t i = 0; i < vertex_chunk_num; ++i) {
652+
for (IdType i = 0; i < vertex_chunk_num; ++i) {
653653
std::string chunk_dir = base_dir + "/part" + std::to_string(i);
654654
GAR_ASSIGN_OR_RAISE_ERROR(edge_chunk_nums[i],
655655
fs->GetFileNumOfDir(chunk_dir));
@@ -684,7 +684,7 @@ class EdgesCollection<AdjListType::ordered_by_source> {
684684
GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num,
685685
fs->GetFileNumOfDir(base_dir));
686686
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
687-
for (size_t i = 0; i < vertex_chunk_num; ++i) {
687+
for (IdType i = 0; i < vertex_chunk_num; ++i) {
688688
std::string chunk_dir = base_dir + "/part" + std::to_string(i);
689689
GAR_ASSIGN_OR_RAISE_ERROR(edge_chunk_nums[i],
690690
fs->GetFileNumOfDir(chunk_dir));
@@ -870,7 +870,7 @@ class EdgesCollection<AdjListType::ordered_by_dest> {
870870
fs->GetFileNumOfDir(base_dir));
871871
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
872872
chunk_end_ = 0;
873-
for (size_t i = 0; i < vertex_chunk_num; ++i) {
873+
for (IdType i = 0; i < vertex_chunk_num; ++i) {
874874
std::string chunk_dir = base_dir + "/part" + std::to_string(i);
875875
GAR_ASSIGN_OR_RAISE_ERROR(edge_chunk_nums[i],
876876
fs->GetFileNumOfDir(chunk_dir));
@@ -905,7 +905,7 @@ class EdgesCollection<AdjListType::ordered_by_dest> {
905905
GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num,
906906
fs->GetFileNumOfDir(base_dir));
907907
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
908-
for (size_t i = 0; i < vertex_chunk_num; ++i) {
908+
for (IdType i = 0; i < vertex_chunk_num; ++i) {
909909
std::string chunk_dir = base_dir + "/part" + std::to_string(i);
910910
GAR_ASSIGN_OR_RAISE_ERROR(edge_chunk_nums[i],
911911
fs->GetFileNumOfDir(chunk_dir));
@@ -1091,7 +1091,7 @@ class EdgesCollection<AdjListType::unordered_by_source> {
10911091
fs->GetFileNumOfDir(base_dir));
10921092
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
10931093
chunk_end_ = 0;
1094-
for (size_t i = 0; i < vertex_chunk_num; ++i) {
1094+
for (IdType i = 0; i < vertex_chunk_num; ++i) {
10951095
std::string chunk_dir = base_dir + "/part" + std::to_string(i);
10961096
GAR_ASSIGN_OR_RAISE_ERROR(edge_chunk_nums[i],
10971097
fs->GetFileNumOfDir(chunk_dir));
@@ -1126,7 +1126,7 @@ class EdgesCollection<AdjListType::unordered_by_source> {
11261126
GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num,
11271127
fs->GetFileNumOfDir(base_dir));
11281128
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
1129-
for (size_t i = 0; i < vertex_chunk_num; ++i) {
1129+
for (IdType i = 0; i < vertex_chunk_num; ++i) {
11301130
std::string chunk_dir = base_dir + "/part" + std::to_string(i);
11311131
GAR_ASSIGN_OR_RAISE_ERROR(edge_chunk_nums[i],
11321132
fs->GetFileNumOfDir(chunk_dir));
@@ -1317,7 +1317,7 @@ class EdgesCollection<AdjListType::unordered_by_dest> {
13171317
GAR_ASSIGN_OR_RAISE_ERROR(auto vertex_chunk_num,
13181318
fs->GetFileNumOfDir(base_dir));
13191319
std::vector<IdType> edge_chunk_nums(vertex_chunk_num, 0);
1320-
for (size_t i = 0; i < vertex_chunk_num; ++i) {
1320+
for (IdType i = 0; i < vertex_chunk_num; ++i) {
13211321
std::string chunk_dir = base_dir + "/part" + std::to_string(i);
13221322
GAR_ASSIGN_OR_RAISE_ERROR(edge_chunk_nums[i],
13231323
fs->GetFileNumOfDir(chunk_dir));

include/gar/utils/filesystem.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ class FileSystem {
113113
*
114114
* the file is not pure file, it can be a directory or other type of file.
115115
*/
116-
Result<size_t> GetFileNumOfDir(const std::string& dir_path,
116+
Result<IdType> GetFileNumOfDir(const std::string& dir_path,
117117
bool recursive = false) const noexcept;
118118

119119
private:

include/gar/utils/reader_utils.h

+8
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,14 @@ Result<std::pair<IdType, IdType>> GetAdjListOffsetOfVertex(
2929
const EdgeInfo& edge_info, const std::string& prefix,
3030
AdjListType adj_list_type, IdType vid) noexcept;
3131

32+
Result<IdType> GetVertexChunkNum(const std::string& prefix,
33+
const VertexInfo& vertex_info) noexcept;
34+
35+
Result<IdType> GetEdgeChunkNum(const std::string& prefix,
36+
const EdgeInfo& edge_info,
37+
AdjListType adj_list_type,
38+
IdType vertex_chunk_index) noexcept;
39+
3240
} // namespace utils
3341
} // namespace GAR_NAMESPACE_INTERNAL
3442
#endif // GAR_UTILS_READER_UTILS_H_

src/filesystem.cc

+2-2
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ Status FileSystem::CopyFile(const std::string& src_path,
166166
return Status::OK();
167167
}
168168

169-
Result<size_t> FileSystem::GetFileNumOfDir(const std::string& dir_path,
169+
Result<IdType> FileSystem::GetFileNumOfDir(const std::string& dir_path,
170170
bool recursive) const noexcept {
171171
arrow::fs::FileSelector file_selector;
172172
file_selector.base_dir = dir_path;
@@ -175,7 +175,7 @@ Result<size_t> FileSystem::GetFileNumOfDir(const std::string& dir_path,
175175
arrow::fs::FileInfoVector file_infos;
176176
GAR_RETURN_ON_ARROW_ERROR_AND_ASSIGN(file_infos,
177177
arrow_fs_->GetFileInfo(file_selector));
178-
return file_infos.size();
178+
return static_cast<IdType>(file_infos.size());
179179
}
180180

181181
Result<std::shared_ptr<FileSystem>> FileSystemFromUriOrPath(

src/reader_utils.cc

+26
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,32 @@ Result<std::pair<IdType, IdType>> GetAdjListOffsetOfVertex(
6767
static_cast<IdType>(array->Value(1)));
6868
}
6969

70+
Result<IdType> GetVertexChunkNum(const std::string& prefix,
71+
const VertexInfo& vertex_info) noexcept {
72+
std::string out_prefix;
73+
GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
74+
GAR_ASSIGN_OR_RAISE(auto vertex_num_file_suffix,
75+
vertex_info.GetVerticesNumFilePath());
76+
std::string vertex_num_file_path = out_prefix + vertex_num_file_suffix;
77+
GAR_ASSIGN_OR_RAISE(auto vertex_num,
78+
fs->ReadFileToValue<IdType>(vertex_num_file_path));
79+
return (vertex_num + vertex_info.GetChunkSize() - 1) /
80+
vertex_info.GetChunkSize();
81+
}
82+
83+
Result<IdType> GetEdgeChunkNum(const std::string& prefix,
84+
const EdgeInfo& edge_info,
85+
AdjListType adj_list_type,
86+
IdType vertex_chunk_index) noexcept {
87+
std::string out_prefix;
88+
GAR_ASSIGN_OR_RAISE(auto fs, FileSystemFromUriOrPath(prefix, &out_prefix));
89+
GAR_ASSIGN_OR_RAISE(auto adj_prefix,
90+
edge_info.GetAdjListPathPrefix(adj_list_type));
91+
std::string chunk_dir =
92+
out_prefix + adj_prefix + "part" + std::to_string(vertex_chunk_index);
93+
return fs->GetFileNumOfDir(chunk_dir);
94+
}
95+
7096
} // namespace utils
7197

7298
} // namespace GAR_NAMESPACE_INTERNAL

0 commit comments

Comments
 (0)