Skip to content

Commit

Permalink
add paramater 'label' for createinfos
Browse files Browse the repository at this point in the history
  • Loading branch information
Elssky committed Oct 25, 2024
1 parent dd0fff6 commit f87900b
Show file tree
Hide file tree
Showing 10 changed files with 148 additions and 73 deletions.
2 changes: 1 addition & 1 deletion cpp/examples/bgl_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ int main(int argc, char* argv[]) {
int chunk_size = 100;
auto version = graphar::InfoVersion::Parse("gar/v1").value();
auto new_info = graphar::CreateVertexInfo(vertex_type, chunk_size, {group},
vertex_prefix, version);
{}, vertex_prefix, version);
// dump new vertex info
ASSERT(new_info->IsValidated());
ASSERT(new_info->Dump().status().ok());
Expand Down
4 changes: 2 additions & 2 deletions cpp/examples/construct_info_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ int main(int argc, char* argv[]) {
graphar::CreatePropertyGroup(property_vector_2, graphar::FileType::ORC);

// create vertex info
auto vertex_info = graphar::CreateVertexInfo(type, chunk_size, {group1},
auto vertex_info = graphar::CreateVertexInfo(type, chunk_size, {group1}, {},
vertex_prefix, version);

ASSERT(vertex_info != nullptr);
Expand Down Expand Up @@ -150,7 +150,7 @@ int main(int argc, char* argv[]) {

// create graph info
auto graph_info = graphar::CreateGraphInfo(name, {vertex_info}, {edge_info},
prefix, version);
{}, prefix, version);
ASSERT(graph_info->GetName() == name);
ASSERT(graph_info->GetPrefix() == prefix);
ASSERT(graph_info->GetVertexInfos().size() == 1);
Expand Down
6 changes: 3 additions & 3 deletions cpp/examples/snap_dataset_to_graphar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ int main(int argc, char* argv[]) {
std::string type = "node", vertex_prefix = "vertex/node/";

// create vertex info
auto vertex_info = graphar::CreateVertexInfo(type, VERTEX_CHUNK_SIZE, {},
auto vertex_info = graphar::CreateVertexInfo(type, VERTEX_CHUNK_SIZE, {}, {},
vertex_prefix, version);

// save & dump
Expand All @@ -75,8 +75,8 @@ int main(int argc, char* argv[]) {

/*------------------construct graph info------------------*/
// create graph info
auto graph_info = graphar::CreateGraphInfo(graph_name, {vertex_info},
{edge_info}, save_path, version);
auto graph_info = graphar::CreateGraphInfo(
graph_name, {vertex_info}, {edge_info}, {}, save_path, version);
// save & dump
ASSERT(!graph_info->Dump().has_error());
ASSERT(graph_info->Save(save_path + graph_name + ".graph.yml").ok());
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/graphar/filesystem.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "graphar/expression.h"
#include "graphar/filesystem.h"
#include "graphar/fwd.h"
#include "graphar/general_params.h"

namespace graphar::detail {
template <typename U, typename T>
Expand Down Expand Up @@ -233,8 +234,11 @@ Status FileSystem::WriteTableToFile(const std::shared_ptr<arrow::Table>& table,
break;
}
case FileType::PARQUET: {
auto schema = table->schema();
auto column_num = schema->num_fields();
parquet::WriterProperties::Builder builder;
builder.compression(arrow::Compression::type::ZSTD); // enable compression
builder.encoding(graphar::GeneralParams::kLabelCol, parquet::Encoding::RLE);
RETURN_NOT_ARROW_OK(parquet::arrow::WriteTable(
*table, arrow::default_memory_pool(), output_stream, 64 * 1024 * 1024,
builder.build(), parquet::default_arrow_writer_properties()));
Expand Down
7 changes: 5 additions & 2 deletions cpp/src/graphar/fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,8 @@ std::shared_ptr<AdjacentList> CreateAdjacentList(
*/
std::shared_ptr<VertexInfo> CreateVertexInfo(
const std::string& type, IdType chunk_size,
const PropertyGroupVector& property_groups, const std::string& prefix = "",
const PropertyGroupVector& property_groups,
const std::vector<std::string>& labels = {}, const std::string& prefix = "",
std::shared_ptr<const InfoVersion> version = nullptr);

/**
Expand Down Expand Up @@ -167,6 +168,7 @@ std::shared_ptr<EdgeInfo> CreateEdgeInfo(
* @param name The name of the graph
* @param vertex_infos The vertex info vector of the graph
* @param edge_infos The edge info vector of the graph
* @param labels The vertex labels of the graph.
* @param prefix The absolute path prefix to store chunk files of the graph.
* Defaults to "./"
* @param version The version of the graph info
Expand All @@ -175,7 +177,8 @@ std::shared_ptr<EdgeInfo> CreateEdgeInfo(
*/
std::shared_ptr<GraphInfo> CreateGraphInfo(
const std::string& name, const VertexInfoVector& vertex_infos,
const EdgeInfoVector& edge_infos, const std::string& prefix,
const EdgeInfoVector& edge_infos, const std::vector<std::string>& labels,
const std::string& prefix,
std::shared_ptr<const InfoVersion> version = nullptr,
const std::unordered_map<std::string, std::string>& extra_info = {});

Expand Down
1 change: 1 addition & 0 deletions cpp/src/graphar/general_params.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ struct GeneralParams {
static constexpr const char* kDstIndexCol = "_graphArDstIndex";
static constexpr const char* kOffsetCol = "_graphArOffset";
static constexpr const char* kPrimaryCol = "_graphArPrimary";
static constexpr const char* kLabelCol = "_graphArLabel";
};

} // namespace graphar
91 changes: 69 additions & 22 deletions cpp/src/graphar/graph_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -191,10 +191,12 @@ class VertexInfo::Impl {
public:
Impl(const std::string& type, IdType chunk_size, const std::string& prefix,
const PropertyGroupVector& property_groups,
const std::vector<std::string>& labels,
std::shared_ptr<const InfoVersion> version)
: type_(type),
chunk_size_(chunk_size),
property_groups_(std::move(property_groups)),
labels_(labels),
prefix_(prefix),
version_(std::move(version)) {
if (prefix_.empty()) {
Expand Down Expand Up @@ -241,6 +243,7 @@ class VertexInfo::Impl {
std::string type_;
IdType chunk_size_;
PropertyGroupVector property_groups_;
std::vector<std::string> labels_;
std::string prefix_;
std::shared_ptr<const InfoVersion> version_;
std::unordered_map<std::string, int> property_name_to_index_;
Expand All @@ -252,9 +255,11 @@ class VertexInfo::Impl {

VertexInfo::VertexInfo(const std::string& type, IdType chunk_size,
const PropertyGroupVector& property_groups,
const std::vector<std::string>& labels,
const std::string& prefix,
std::shared_ptr<const InfoVersion> version)
: impl_(new Impl(type, chunk_size, prefix, property_groups, version)) {}
: impl_(new Impl(type, chunk_size, prefix, property_groups, labels,
version)) {}

VertexInfo::~VertexInfo() = default;

Expand All @@ -264,6 +269,10 @@ IdType VertexInfo::GetChunkSize() const { return impl_->chunk_size_; }

const std::string& VertexInfo::GetPrefix() const { return impl_->prefix_; }

const std::vector<std::string>& VertexInfo::GetLabels() const {
return impl_->labels_;
}

const std::shared_ptr<const InfoVersion>& VertexInfo::version() const {
return impl_->version_;
}
Expand Down Expand Up @@ -367,21 +376,22 @@ Result<std::shared_ptr<VertexInfo>> VertexInfo::AddPropertyGroup(
}
return std::make_shared<VertexInfo>(
impl_->type_, impl_->chunk_size_,
AddVectorElement(impl_->property_groups_, property_group), impl_->prefix_,
impl_->version_);
AddVectorElement(impl_->property_groups_, property_group), impl_->labels_,
impl_->prefix_, impl_->version_);
}

bool VertexInfo::IsValidated() const { return impl_->is_validated(); }

std::shared_ptr<VertexInfo> CreateVertexInfo(
const std::string& type, IdType chunk_size,
const PropertyGroupVector& property_groups, const std::string& prefix,
const PropertyGroupVector& property_groups,
const std::vector<std::string>& labels, const std::string& prefix,
std::shared_ptr<const InfoVersion> version) {
if (type.empty() || chunk_size <= 0) {
return nullptr;
}
return std::make_shared<VertexInfo>(type, chunk_size, property_groups, prefix,
version);
return std::make_shared<VertexInfo>(type, chunk_size, property_groups, labels,
prefix, version);
}

Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(
Expand All @@ -396,6 +406,13 @@ Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(
if (!yaml->operator[]("prefix").IsNone()) {
prefix = yaml->operator[]("prefix").As<std::string>();
}
std::vector<std::string> labels;
const auto& labels_node = yaml->operator[]("labels");
if (labels_node.IsSequence()) {
for (auto it = labels_node.Begin(); it != labels_node.End(); it++) {
labels.push_back((*it).second.As<std::string>());
}
}
std::shared_ptr<const InfoVersion> version = nullptr;
if (!yaml->operator[]("version").IsNone()) {
GAR_ASSIGN_OR_RAISE(
Expand Down Expand Up @@ -430,8 +447,8 @@ Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(
std::make_shared<PropertyGroup>(property_vec, file_type, pg_prefix));
}
}
return std::make_shared<VertexInfo>(type, chunk_size, property_groups, prefix,
version);
return std::make_shared<VertexInfo>(type, chunk_size, property_groups, labels,
prefix, version);
}

Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(const std::string& input) {
Expand All @@ -449,6 +466,13 @@ Result<std::string> VertexInfo::Dump() const noexcept {
node["type"] = impl_->type_;
node["chunk_size"] = std::to_string(impl_->chunk_size_);
node["prefix"] = impl_->prefix_;
if (impl_->labels_.size() > 0) {
node["labels"];
for (const auto& label : impl_->labels_) {
node["labels"].PushBack();
node["labels"][node["labels"].Size() - 1] = label;
}
}
for (const auto& pg : impl_->property_groups_) {
::Yaml::Node pg_node;
if (!pg->GetPrefix().empty()) {
Expand Down Expand Up @@ -1042,21 +1066,32 @@ static Result<std::shared_ptr<GraphInfo>> ConstructGraphInfo(
edge_infos.push_back(edge_info);
}
}
return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, prefix,
version, extra_info);

std::vector<std::string> labels;
if (!graph_meta->operator[]("labels").IsNone()) {
const auto& labels_node = graph_meta->operator[]("labels");
if (labels_node.IsSequence()) {
for (auto it = labels_node.Begin(); it != labels_node.End(); it++) {
labels.push_back((*it).second.As<std::string>());
}
}
}
return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, labels,
prefix, version, extra_info);
}

} // namespace

class GraphInfo::Impl {
public:
Impl(const std::string& graph_name, VertexInfoVector vertex_infos,
EdgeInfoVector edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version,
EdgeInfoVector edge_infos, const std::vector<std::string>& labels,
const std::string& prefix, std::shared_ptr<const InfoVersion> version,
const std::unordered_map<std::string, std::string>& extra_info)
: name_(graph_name),
vertex_infos_(std::move(vertex_infos)),
edge_infos_(std::move(edge_infos)),
labels_(labels),
prefix_(prefix),
version_(std::move(version)),
extra_info_(extra_info) {
Expand Down Expand Up @@ -1099,6 +1134,7 @@ class GraphInfo::Impl {
std::string name_;
VertexInfoVector vertex_infos_;
EdgeInfoVector edge_infos_;
std::vector<std::string> labels_;
std::string prefix_;
std::shared_ptr<const InfoVersion> version_;
std::unordered_map<std::string, std::string> extra_info_;
Expand All @@ -1108,16 +1144,20 @@ class GraphInfo::Impl {

GraphInfo::GraphInfo(
const std::string& graph_name, VertexInfoVector vertex_infos,
EdgeInfoVector edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version,
EdgeInfoVector edge_infos, const std::vector<std::string>& labels,
const std::string& prefix, std::shared_ptr<const InfoVersion> version,
const std::unordered_map<std::string, std::string>& extra_info)
: impl_(new Impl(graph_name, std::move(vertex_infos), std::move(edge_infos),
prefix, version, extra_info)) {}
labels, prefix, version, extra_info)) {}

GraphInfo::~GraphInfo() = default;

const std::string& GraphInfo::GetName() const { return impl_->name_; }

const std::vector<std::string>& GraphInfo::GetLabels() const {
return impl_->labels_;
}

const std::string& GraphInfo::GetPrefix() const { return impl_->prefix_; }

const std::shared_ptr<const InfoVersion>& GraphInfo::version() const {
Expand Down Expand Up @@ -1196,7 +1236,7 @@ Result<std::shared_ptr<GraphInfo>> GraphInfo::AddVertex(
}
return std::make_shared<GraphInfo>(
impl_->name_, AddVectorElement(impl_->vertex_infos_, vertex_info),
impl_->edge_infos_, impl_->prefix_, impl_->version_);
impl_->edge_infos_, impl_->labels_, impl_->prefix_, impl_->version_);
}

Result<std::shared_ptr<GraphInfo>> GraphInfo::AddEdge(
Expand All @@ -1210,20 +1250,20 @@ Result<std::shared_ptr<GraphInfo>> GraphInfo::AddEdge(
}
return std::make_shared<GraphInfo>(
impl_->name_, impl_->vertex_infos_,
AddVectorElement(impl_->edge_infos_, edge_info), impl_->prefix_,
impl_->version_);
AddVectorElement(impl_->edge_infos_, edge_info), impl_->labels_,
impl_->prefix_, impl_->version_);
}

std::shared_ptr<GraphInfo> CreateGraphInfo(
const std::string& name, const VertexInfoVector& vertex_infos,
const EdgeInfoVector& edge_infos, const std::string& prefix,
std::shared_ptr<const InfoVersion> version,
const EdgeInfoVector& edge_infos, const std::vector<std::string>& labels,
const std::string& prefix, std::shared_ptr<const InfoVersion> version,
const std::unordered_map<std::string, std::string>& extra_info) {
if (name.empty()) {
return nullptr;
}
return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, prefix,
version, extra_info);
return std::make_shared<GraphInfo>(name, vertex_infos, edge_infos, labels,
prefix, version, extra_info);
}

Result<std::shared_ptr<GraphInfo>> GraphInfo::Load(const std::string& path) {
Expand Down Expand Up @@ -1275,6 +1315,13 @@ Result<std::string> GraphInfo::Dump() const {
edge->GetDstType()) +
".edge.yaml";
}
if (impl_->labels_.size() > 0) {
node["labels"];
for (const auto& label : impl_->labels_) {
node["labels"].PushBack();
node["labels"][node["labels"].Size() - 1] = label;
}
}
if (impl_->version_ != nullptr) {
node["version"] = impl_->version_->ToString();
}
Expand Down
18 changes: 17 additions & 1 deletion cpp/src/graphar/graph_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,12 +180,14 @@ class VertexInfo {
* @param type The type of the vertex.
* @param chunk_size The number of vertices in each vertex chunk.
* @param property_groups The property group vector of the vertex.
* @param labels The labels of the vertex.
* @param prefix The prefix of the vertex info. If left empty, the default
* prefix will be set to the type of the vertex.
* @param version The format version of the vertex info.
*/
explicit VertexInfo(const std::string& type, IdType chunk_size,
const PropertyGroupVector& property_groups,
const std::vector<std::string>& labels = {},
const std::string& prefix = "",
std::shared_ptr<const InfoVersion> version = nullptr);

Expand Down Expand Up @@ -227,6 +229,12 @@ class VertexInfo {
*/
const std::shared_ptr<const InfoVersion>& version() const;

/**
* Get the labels of the vertex.
* @return The labels of the vertex.
*/
const std::vector<std::string>& GetLabels() const;

/**
* Get the number of property groups of the vertex.
*
Expand Down Expand Up @@ -694,14 +702,16 @@ class GraphInfo {
* @param graph_name The name of the graph.
* @param vertex_infos The vertex info vector of the graph.
* @param edge_infos The edge info vector of the graph.
* @param labels The vertex labels of the graph.
* @param prefix The absolute path prefix to store chunk files of the graph.
* Defaults to "./".
* @param version The version of the graph info.
* @param extra_info The extra metadata of the graph info.
*/
explicit GraphInfo(
const std::string& graph_name, VertexInfoVector vertex_infos,
EdgeInfoVector edge_infos, const std::string& prefix = "./",
EdgeInfoVector edge_infos, const std::vector<std::string>& labels = {},
const std::string& prefix = "./",
std::shared_ptr<const InfoVersion> version = nullptr,
const std::unordered_map<std::string, std::string>& extra_info = {});

Expand Down Expand Up @@ -753,6 +763,12 @@ class GraphInfo {
*/
const std::string& GetName() const;

/**
* @brief Get the vertex labels of the graph.
* @return The vertex labels of the graph.
*/
const std::vector<std::string>& GetLabels() const;

/**
* @brief Get the absolute path prefix of the chunk files.
* @return The absolute path prefix of the chunk files.
Expand Down
Loading

0 comments on commit f87900b

Please sign in to comment.