-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Generate latest docs on CI, from commit 106912d.
github-actions
committed
Jan 24, 2024
0 parents
commit 48a1f3b
Showing
744 changed files
with
275,998 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Copyright 2022-2023 Alibaba Group Holding Limited. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
|
||
// For format details, see https://aka.ms/devcontainer.json. For config options, see the | ||
// README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node | ||
{ | ||
"name": "GraphAr", | ||
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile | ||
"image": "registry.cn-hongkong.aliyuncs.com/graphscope/graphar-dev:latest", | ||
// "image": "ubuntu:22.04", | ||
|
||
// Features to add to the dev container. More info: https://containers.dev/features. | ||
"features": { | ||
"ghcr.io/devcontainers/features/common-utils:2":{ | ||
"installZsh": "true", | ||
"configureZshAsDefaultShell": "true", | ||
"installOhMyZsh": true, | ||
"upgradePackages": "false" | ||
} | ||
}, | ||
// Configure tool-specific properties. | ||
"customizations": { | ||
// Configure properties specific to VS Code. | ||
"vscode": { | ||
"settings": {}, | ||
"extensions": [ | ||
"streetsidesoftware.code-spell-checker", | ||
"eamodio.gitlens", | ||
"github.copilot", | ||
"github.copilot-labs" | ||
] | ||
} | ||
}, | ||
|
||
// Set `remoteUser` to `root` to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. | ||
"remoteUser": "graphar", | ||
|
||
// Use 'postCreateCommand' to run commands before the container is created. | ||
"initializeCommand": "sudo docker pull registry.cn-hongkong.aliyuncs.com/graphscope/graphar-dev:latest", | ||
|
||
// Uncomment this to enable C++ and Rust debugging in containers | ||
// "capAdd": ["SYS_PTRACE"], | ||
// "securityOpt": ["seccomp=unconfined"], | ||
|
||
// Use 'forwardPorts' to make a list of ports inside the container available locally. | ||
// "forwardPorts": [3000], | ||
|
||
// Use 'portsAttributes' to set default properties for specific forwarded ports. | ||
// More info: https://containers.dev/implementors/json_reference/#port-attributes | ||
// "portsAttributes": { | ||
// "9000": { | ||
// "label": "Hello Remote World", | ||
// "onAutoForward": "notify" | ||
// } | ||
// }, | ||
|
||
// Use 'postCreateCommand' to run commands after the container is created. | ||
// "postCreateCommand": "yarn install" | ||
|
||
// Improve performance | ||
|
||
// Uncomment these to mount a folder to a volume | ||
// https://code.visualstudio.com/remote/advancedcontainers/improve-performance#_use-a-targeted-named-volume | ||
// "mounts": [ | ||
// "source=${localWorkspaceFolderBasename}-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume" | ||
// ], | ||
|
||
|
||
// Uncomment these to use a named volume for your entire source tree | ||
// https://code.visualstudio.com/remote/advancedcontainers/improve-performance#_use-a-named-volume-for-your-entire-source-tree | ||
// "workspaceMount": "source=gs,target=/workspaces,type=volume", | ||
// "workspaceFolder": "/workspaces" | ||
"postCreateCommand": "sudo chown -R graphar /workspaces && bash pre-commit/install-hook.sh && bash pre-commit/prepare-commit-msg" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
title = "Gitleaks for Vineyard" | ||
|
||
[extend] | ||
useDefault = true | ||
|
||
[[rules]] | ||
description = "Alibaba AccessKey ID" | ||
id = "alibaba-access-key-id" | ||
regex = '''(?i)((LTAI)[a-z0-9]+)''' | ||
keywords = [ | ||
"ltai", | ||
] | ||
|
||
[[rules]] | ||
description = "Alibaba AccessKey ID" | ||
id = "alibaba-access-id-in-config" | ||
regex = '''(?i)((access).?id\s*=\s*.+)''' | ||
keywords = [ | ||
"access", | ||
] | ||
|
||
[[rules]] | ||
description = "Alibaba AccessKey ID" | ||
id = "alibaba-access-key-in-config" | ||
regex = '''(?i)((access).?key\s*=\s*.+)''' | ||
keywords = [ | ||
"access", | ||
] | ||
|
||
[[rules]] | ||
description = "Alibaba AccessKey ID" | ||
id = "alibaba-access-secret-in-config" | ||
regex = '''(?i)((access).?secret\s*=\s*.+)''' | ||
keywords = [ | ||
"access", | ||
"secret", | ||
] | ||
|
||
[[rules]] | ||
description = "Alibaba AccessKey ID" | ||
id = "alibaba-access-key-id-in-config" | ||
regex = '''(?i)((access).?key.?id\s*=\s*.+)''' | ||
keywords = [ | ||
"access", | ||
] | ||
|
||
[rules.allowlist] | ||
paths = [ | ||
'''modules/io/python/drivers/io/tests/test_open.py''', | ||
'''modules/io/python/drivers/io/tests/test_serialize.py''', | ||
] | ||
|
||
[[rules]] | ||
description = "Alibaba AccessKey ID" | ||
id = "alibaba-access-key-secret-in-config" | ||
regex = '''(?i)((access).?key.?secret\s*=\s*.+)''' | ||
keywords = [ | ||
"access", | ||
"secret", | ||
] | ||
|
||
[rules.allowlist] | ||
paths = [ | ||
'''modules/io/python/drivers/io/tests/test_open.py''', | ||
'''modules/io/python/drivers/io/tests/test_serialize.py''', | ||
] | ||
|
||
[[rules]] | ||
description = "Alibaba AccessKey ID" | ||
id = "alibaba-secret-access-key-in-config" | ||
regex = '''(?i)((secret).?access.?key\s*=\s*.+)''' | ||
keywords = [ | ||
"access", | ||
"secret", | ||
] | ||
|
||
[allowlist] | ||
paths = [ | ||
'''build''', | ||
'''docs/_build''', | ||
'''docs/_templates/footer.html''', | ||
'''thirdparty''', | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
header: | ||
license: | ||
spdx-id: Apache-2.0 | ||
copyright-owner: Apache Software Foundation | ||
content: | | ||
Copyright 2022-2023 Alibaba Group Holding Limited. | ||
Licensed under the Apache License, Version 2.0 (the "License"); | ||
you may not use this file except in compliance with the License. | ||
You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, software | ||
distributed under the License is distributed on an "AS IS" BASIS, | ||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
See the License for the specific language governing permissions and | ||
limitations under the License. | ||
paths-ignore: | ||
- 'dist' | ||
- 'licenses' | ||
- '**/*.md' | ||
- 'LICENSE' | ||
- 'NOTICE' | ||
- 'testing' | ||
- 'spark/src/test/resources' | ||
- 'java/src/test/resources' | ||
- '.licenserc.yaml' | ||
- '.gitignore' | ||
- '.gitleaks.toml' | ||
- '.gitmodules' | ||
- 'pre-commit-config.yaml' | ||
- 'docs' | ||
- '**/.gitignore' | ||
- 'spark/.scalafix.conf' | ||
- 'spark/.scalafmt.conf' | ||
- 'cpp/apidoc' | ||
- 'spark/src/main/scala/com/alibaba/graphar/datasources' | ||
- '*.md' | ||
- '*.rst' | ||
- '**/*.json' | ||
- 'pyspark/poetry.lock' # This file is generated automatically by Poetry-tool; there is no way to add license header | ||
|
||
comment: on-failure | ||
|
||
# If you don't want to check dependencies' license compatibility, remove the following part | ||
dependency: | ||
files: | ||
- spark/pom.xml # If this is a maven project. | ||
- java/pom.xml # If this is a maven project. |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Copyright 2022-2023 Alibaba Group Holding Limited. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
repos: | ||
- repo: https://github.com/zricethezav/gitleaks | ||
rev: v8.15.0 | ||
hooks: | ||
- id: gitleaks | ||
args: | ||
- '--verbose' | ||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions
1
_panels_static/panels-bootstrap.5fd3999ee7762ccc51105388f4a9d115.css
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
_panels_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
7 changes: 7 additions & 0 deletions
7
_panels_static/panels-variables.06eb56fa6e07937060861dad626602ad.css
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
:root { | ||
--tabs-color-label-active: hsla(231, 99%, 66%, 1); | ||
--tabs-color-label-inactive: rgba(178, 206, 245, 0.62); | ||
--tabs-color-overline: rgb(207, 236, 238); | ||
--tabs-color-underline: rgb(207, 236, 238); | ||
--tabs-size-label: 1rem; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
Co-Work with BGL | ||
============================ | ||
|
||
The `Boost Graph Library (BGL) <https://cs.brown.edu/~jwicks/boost/libs/graph/doc/>`_ is the first C++ library to apply the principles of generic programming to the construction of the advanced data structures and algorithms used in graph computations. The BGL graph interface and graph components are generic in the same sense as the Standard Template Library (STL). And it provides some built-in algorithms which cover a core set of algorithm patterns and a larger set of graph algorithms. | ||
|
||
We take calculating CC as an example, to demonstrate how BGL works with GraphAr. A weakly connected component is a maximal subgraph of a graph such that for every pair of vertices in it, there is an undirected path connecting them. And the CC algorithm is to identify all such components in a graph. Learn more about `the CC algorithm <https://en.wikipedia.org/wiki/Connected_component>`_. | ||
|
||
The source code of CC based on BGL can be found at `bgl_example.cc`_. In this program, the graph information file is first read to get the metadata: | ||
|
||
.. code:: C++ | ||
|
||
std::string path = ... // the path of the graph information file | ||
auto graph_info = GraphArchive::GraphInfo::Load(path).value(); | ||
|
||
And then, the vertex collection and the edge collection are established as the handles to access the graph data: | ||
|
||
.. code:: C++ | ||
|
||
auto maybe_vertices = GraphArchive::VerticesCollection::Make(graph_info, "person"); | ||
auto vertices = maybe_vertices.value(); | ||
auto maybe_edges = GraphArchive::EdgesCollection::Make(graph_info, "person", "knows", "person", GraphArchive::AdjListType::ordered_by_source); | ||
auto edges = maybe_edges.value(); | ||
|
||
Next, we construct the in-memory graph data structure for BGL by traversing the vertices and edges via GraphAr's high-level reading interface (the vertex iterator and the edge iterator): | ||
|
||
.. code:: C++ | ||
|
||
// define the Graph type in BGL | ||
typedef boost::adjacency_list<boost::vecS, // use vector to store edges | ||
boost::vecS, // use vector to store vertices | ||
boost::undirectedS, // undirected | ||
boost::property<boost::vertex_name_t, int64_t>, // vertex property | ||
boost::no_property> Graph; // no edge property | ||
// descriptors for vertex in BGL | ||
typedef typename boost::graph_traits<Graph>::vertex_descriptor Vertex; | ||
|
||
// declare a graph object with (num_vertices) vertices and an edge iterator | ||
std::vector<std::pair<GraphArchive::IdType, GraphArchive::IdType>> edges_array; | ||
auto it_begin = edges->begin(), it_end = edges->end(); | ||
for (auto it = it_begin; it != it_end; ++it) | ||
edges_array.push_back(std::make_pair(it.source(), it.destination())); | ||
Graph g(edges_array.begin(), edges_array.end(), num_vertices); | ||
|
||
// define the internal vertex property "id" | ||
boost::property_map<Graph, boost::vertex_name_t>::type id = get(boost::vertex_name_t(), g); | ||
auto v_it_begin = vertices->begin(), v_it_end = vertices->end(); | ||
for (auto it = v_it_begin; it != v_it_end; ++it) { | ||
auto vertex = *it; | ||
boost::put(id, vertex.id(), vertex.property<int64_t>("id").value()); | ||
} | ||
After that, an internal CC algorithm provided by BGL is called: | ||
|
||
.. code:: C++ | ||
|
||
// define the external vertex property "component" | ||
std::vector<int> component(num_vertices); | ||
// call algorithm: cc | ||
int cc_num = boost::connected_components(g, &component[0]); | ||
std::cout << "Total number of components: " << cc_num << std::endl; | ||
|
||
Finally, we could use a **VerticesBuilder** of GraphAr to write the results to new generated GAR files: | ||
|
||
.. code:: C++ | ||
|
||
// construct a new property group | ||
GraphArchive::Property cc = {"cc", GraphArchive::int32(), false}; | ||
std::vector<GraphArchive::Property> property_vector = {cc}; | ||
auto group = GraphArchive::CreatePropertyGroup(property_vector, GraphArchive::FileType::PARQUET); | ||
|
||
// construct the new vertex info | ||
std::string vertex_label = "cc_result", vertex_prefix = "result/"; | ||
int chunk_size = 100; | ||
auto new_info = GraphArchive::CreateVertexInfo(vertex_label, chunk_size, {group}, vertex_prefix); | ||
|
||
// access the vertices via the index map and vertex iterator of BGL | ||
typedef boost::property_map<Graph, boost::vertex_index_t>::type IndexMap; | ||
IndexMap index = boost::get(boost::vertex_index, g); | ||
typedef boost::graph_traits<Graph>::vertex_iterator vertex_iter; | ||
std::pair<vertex_iter, vertex_iter> vp; | ||
|
||
// dump the results through the VerticesBuilder | ||
GraphArchive::builder::VerticesBuilder builder(new_info, "/tmp/"); | ||
for (vp = boost::vertices(g); vp.first!= vp.second; ++vp.first) { | ||
Vertex v = *vp.first; | ||
GraphArchive::builder::Vertex vertex(index[v]); | ||
vertex.AddProperty(cc.name, component[index[v]]); | ||
builder.AddVertex(vertex); | ||
} | ||
builder.Dump(); | ||
|
||
.. _bgl_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/bgl_example.cc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
Integrate into GraphScope | ||
============================ | ||
|
||
`GraphScope <https://graphscope.io/>`_ is a unified distributed graph computing platform that provides a one-stop environment for performing diverse graph operations on a cluster through a user-friendly Python interface. As an important application case of GraphAr, we have integrated it into GraphScope. | ||
|
||
GraphScope works on a graph G fragmented via a partition strategy picked by the user and each worker maintains a fragment of G. Given a query, it posts the same query to all the workers and computes following the BSP (Bulk Synchronous Parallel) model. More specifically, each worker first executes processing against its local fragment, to compute partial answers in parallel. And then each worker may exchange partial results with other processors via synchronous message passing. | ||
|
||
To integrate GraphAr into GraphScope, we implemented *ArrowFragmentBuilder* and *ArrowFragmentWriter*. *ArrowFragmentBuilder* establishes the fragments for workers of GraphScope through reading GAR files in parallel. Conversely, *ArrowFragmentWriter* can take the GraphScope fragments and save them as GAR files. If you're interested in knowing more about the implementation, please refer to the `source code <https://github.com/v6d-io/v6d/commit/0eda2067e45fbb4ac46892398af0edc84fe1c27b>`_. | ||
|
||
|
||
Performance Report | ||
------------------------ | ||
|
||
Parameter settings | ||
`````````````````` | ||
The time performance of *ArrowFragmentBuilder* and *ArrowFragmentWriter* in GraphScope is heavily dependent on the partitioning of the graph into GAR files, that is, the *vertex chunk size* and *edge chunk size*, which are specified in the vertex information file and in the edge information file, respectively. See `GraphAr File Format <../user-guide/file-format.html>`_ to understand the chunk size definitions in GAR. | ||
|
||
Generally speaking, fewer chunks are created if the file size is large. On small graphs, this can be disadvantageous as it reduces the degree of parallelism, prolonging disk I/O time. On the other hand, having too many small files increases the overhead associated with the file system and the file parser. | ||
|
||
We have conducted micro benchmarks to compare the time performance for reading/writing GAR files by *ArrowFragmentBuilder*/*ArrowFragmentWriter*, across different *vertex chunk size* and *edge chunk size* configurations. The settings we recommend for *vertex chunk size* and *edge chunk size* are **2^18** and **2^22**, respectively, which lead to efficient performance in most cases. These settings can be used as the reference values when integrating GraphAr into other systems besides GraphScope. | ||
|
||
Time performance results | ||
```````````````````````` | ||
Here we report the performance results of *ArrowFragmentBuilder*, and compare it with loading the same graph through the default loading strategy of GraphScope (through reading the csv files in parallel) . The execution time reported below includes loading the graph data from the disk into memory, as well as building GraphScope fragments from such data. The experiments are conducted on a cluster of 4 AliCloud ecs.r6.6xlarge instances (24vCPU, 192GB memory), and using `com-friendster <https://snap.stanford.edu/data/com-Friendster.html>`_ (a simple graph) and `ldbc-snb-30 <https://ldbcouncil.org/benchmarks/snb/>`_ (a multi-labeled property graph) as datasets. | ||
|
||
+----------------+---------+-----------------+-----------------+ | ||
| Dataset | Workers | Default Loading | GraphAr Loading | | ||
+================+=========+=================+=================+ | ||
| com-friendster | 4 | 282s | 54s | | ||
+----------------+---------+-----------------+-----------------+ | ||
| ldbc-snb-30 | 4 | 196s | 40s | | ||
+----------------+---------+-----------------+-----------------+ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
Examples | ||
--------- | ||
|
||
.. toctree:: | ||
:maxdepth: 2 | ||
|
||
bgl | ||
graphscope | ||
out-of-core |
Oops, something went wrong.