diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 000000000..9a995a9ad --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,88 @@ +/* + * Copyright 2022-2023 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node +{ + "name": "GraphAr", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "registry.cn-hongkong.aliyuncs.com/graphscope/graphar-dev:latest", + // "image": "ubuntu:22.04", + + // Features to add to the dev container. More info: https://containers.dev/features. + "features": { + "ghcr.io/devcontainers/features/common-utils:2":{ + "installZsh": "true", + "configureZshAsDefaultShell": "true", + "installOhMyZsh": true, + "upgradePackages": "false" + } + }, + // Configure tool-specific properties. + "customizations": { + // Configure properties specific to VS Code. + "vscode": { + "settings": {}, + "extensions": [ + "streetsidesoftware.code-spell-checker", + "eamodio.gitlens", + "github.copilot", + "github.copilot-labs" + ] + } + }, + + // Set `remoteUser` to `root` to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. + "remoteUser": "graphar", + + // Use 'postCreateCommand' to run commands before the container is created. + "initializeCommand": "sudo docker pull registry.cn-hongkong.aliyuncs.com/graphscope/graphar-dev:latest", + + // Uncomment this to enable C++ and Rust debugging in containers + // "capAdd": ["SYS_PTRACE"], + // "securityOpt": ["seccomp=unconfined"], + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [3000], + + // Use 'portsAttributes' to set default properties for specific forwarded ports. + // More info: https://containers.dev/implementors/json_reference/#port-attributes + // "portsAttributes": { + // "9000": { + // "label": "Hello Remote World", + // "onAutoForward": "notify" + // } + // }, + + // Use 'postCreateCommand' to run commands after the container is created. + // "postCreateCommand": "yarn install" + + // Improve performance + + // Uncomment these to mount a folder to a volume + // https://code.visualstudio.com/remote/advancedcontainers/improve-performance#_use-a-targeted-named-volume + // "mounts": [ + // "source=${localWorkspaceFolderBasename}-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume" + // ], + + + // Uncomment these to use a named volume for your entire source tree + // https://code.visualstudio.com/remote/advancedcontainers/improve-performance#_use-a-named-volume-for-your-entire-source-tree + // "workspaceMount": "source=gs,target=/workspaces,type=volume", + // "workspaceFolder": "/workspaces" + "postCreateCommand": "sudo chown -R graphar /workspaces && bash pre-commit/install-hook.sh && bash pre-commit/prepare-commit-msg" +} diff --git a/.gitleaks.toml b/.gitleaks.toml new file mode 100644 index 000000000..4451dfaa2 --- /dev/null +++ b/.gitleaks.toml @@ -0,0 +1,83 @@ +title = "Gitleaks for Vineyard" + +[extend] +useDefault = true + +[[rules]] +description = "Alibaba AccessKey ID" +id = "alibaba-access-key-id" +regex = '''(?i)((LTAI)[a-z0-9]+)''' +keywords = [ + "ltai", +] + +[[rules]] +description = "Alibaba AccessKey ID" +id = "alibaba-access-id-in-config" +regex = '''(?i)((access).?id\s*=\s*.+)''' +keywords = [ + "access", +] + +[[rules]] +description = "Alibaba AccessKey ID" +id = "alibaba-access-key-in-config" +regex = '''(?i)((access).?key\s*=\s*.+)''' +keywords = [ + "access", +] + +[[rules]] +description = "Alibaba AccessKey ID" +id = "alibaba-access-secret-in-config" +regex = '''(?i)((access).?secret\s*=\s*.+)''' +keywords = [ + "access", + "secret", +] + +[[rules]] +description = "Alibaba AccessKey ID" +id = "alibaba-access-key-id-in-config" +regex = '''(?i)((access).?key.?id\s*=\s*.+)''' +keywords = [ + "access", +] + +[rules.allowlist] +paths = [ + '''modules/io/python/drivers/io/tests/test_open.py''', + '''modules/io/python/drivers/io/tests/test_serialize.py''', +] + +[[rules]] +description = "Alibaba AccessKey ID" +id = "alibaba-access-key-secret-in-config" +regex = '''(?i)((access).?key.?secret\s*=\s*.+)''' +keywords = [ + "access", + "secret", +] + +[rules.allowlist] +paths = [ + '''modules/io/python/drivers/io/tests/test_open.py''', + '''modules/io/python/drivers/io/tests/test_serialize.py''', +] + +[[rules]] +description = "Alibaba AccessKey ID" +id = "alibaba-secret-access-key-in-config" +regex = '''(?i)((secret).?access.?key\s*=\s*.+)''' +keywords = [ + "access", + "secret", +] + +[allowlist] +paths = [ + '''build''', + '''docs/_build''', + '''docs/_templates/footer.html''', + '''thirdparty''', +] diff --git a/.licenserc.yaml b/.licenserc.yaml new file mode 100644 index 000000000..c6b69dc01 --- /dev/null +++ b/.licenserc.yaml @@ -0,0 +1,52 @@ +header: + license: + spdx-id: Apache-2.0 + copyright-owner: Apache Software Foundation + content: | + Copyright 2022-2023 Alibaba Group Holding Limited. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + paths-ignore: + - 'dist' + - 'licenses' + - '**/*.md' + - 'LICENSE' + - 'NOTICE' + - 'testing' + - 'spark/graphar/src/test/resources' + - 'java/src/test/resources' + - '.licenserc.yaml' + - '.gitignore' + - '.gitleaks.toml' + - '.gitmodules' + - 'pre-commit-config.yaml' + - 'docs' + - '**/.gitignore' + - '**/.scalafix.conf' + - '**/.scalafmt.conf' + - 'cpp/apidoc' + - 'spark/datasources-32/src/main/scala/com/alibaba/graphar/datasources' + - 'spark/datasources-33/src/main/scala/com/alibaba/graphar/datasources' + - '*.md' + - '*.rst' + - '**/*.json' + - 'pyspark/poetry.lock' # This file is generated automatically by Poetry-tool; there is no way to add license header + + comment: on-failure + +# If you don't want to check dependencies' license compatibility, remove the following part +dependency: + files: + - spark/pom.xml # If this is a maven project. + - java/pom.xml # If this is a maven project. diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 000000000..e69de29bb diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..338f09c93 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +# Copyright 2022-2023 Alibaba Group Holding Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +repos: + - repo: https://github.com/zricethezav/gitleaks + rev: v8.15.0 + hooks: + - id: gitleaks + args: + - '--verbose' + diff --git a/_images/edge_logical_table.png b/_images/edge_logical_table.png new file mode 100644 index 000000000..4ac9b001b Binary files /dev/null and b/_images/edge_logical_table.png differ diff --git a/_images/edge_physical_table1.png b/_images/edge_physical_table1.png new file mode 100644 index 000000000..589867c71 Binary files /dev/null and b/_images/edge_physical_table1.png differ diff --git a/_images/edge_physical_table2.png b/_images/edge_physical_table2.png new file mode 100644 index 000000000..047c01f7a Binary files /dev/null and b/_images/edge_physical_table2.png differ diff --git a/_images/overview.png b/_images/overview.png new file mode 100644 index 000000000..b8c101ee7 Binary files /dev/null and b/_images/overview.png differ diff --git a/_images/property_graph.png b/_images/property_graph.png new file mode 100644 index 000000000..5da8c035d Binary files /dev/null and b/_images/property_graph.png differ diff --git a/_images/vertex_logical_table.png b/_images/vertex_logical_table.png new file mode 100644 index 000000000..da3769de0 Binary files /dev/null and b/_images/vertex_logical_table.png differ diff --git a/_images/vertex_physical_table.png b/_images/vertex_physical_table.png new file mode 100644 index 000000000..2794c5938 Binary files /dev/null and b/_images/vertex_physical_table.png differ diff --git a/_modules/graphar_pyspark.html b/_modules/graphar_pyspark.html new file mode 100644 index 000000000..01ec68f98 --- /dev/null +++ b/_modules/graphar_pyspark.html @@ -0,0 +1,356 @@ + + + + + + + + graphar_pyspark - GraphAr + + + + + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+
GraphAr
+
+
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for graphar_pyspark

+# Copyright 2022-2023 Alibaba Group Holding Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""GraphSession and initialization."""
+
+from pyspark.sql import SparkSession
+
+from graphar_pyspark.errors import GraphArIsNotInitializedError
+
+
+class _GraphArSession:
+    """Singleton GraphAr helper object, that contains SparkSession and JVM.
+
+    It is implemented as a module-level instance of the class.
+    """
+
+    def __init__(self) -> None:
+        self.ss = None
+        self.sc = None
+        self.jvm = None
+        self.graphar = None
+        self.jsc = None
+        self.jss = None
+
+    def set_spark_session(self, spark_session: SparkSession) -> None:
+        self.ss = spark_session  # Python SparkSession
+        self.sc = spark_session.sparkContext  # Python SparkContext
+        self.jvm = spark_session._jvm  # JVM
+        self.graphar = spark_session._jvm.com.alibaba.graphar  # Alias to scala graphar
+        self.jsc = spark_session._jsc  # Java SparkContext
+        self.jss = spark_session._jsparkSession  # Java SparkSession
+
+    def is_initialized(self) -> bool:
+        return self.ss is not None
+
+
+GraphArSession = _GraphArSession()
+
+
+
[docs]def initialize(spark: SparkSession) -> None: + """Initialize GraphAr session. + + :param spark: pyspark SparkSession object. + """ + GraphArSession.set_spark_session( + spark, + ) # modify the global GraphArSession singleton.
+ + +def _check_session() -> None: + if not GraphArSession.is_initialized(): + msg = "GraphArSession is not initialized. Call `pyspark_graphar.initialize` first!" + raise GraphArIsNotInitializedError(msg) +
+
+
+
+ + +
+
+ + Made with + Furo + +
+
+
+ + +
+
+
+ +
+
+ +
+
+ + + + + + + + + \ No newline at end of file diff --git a/_modules/graphar_pyspark/enums.html b/_modules/graphar_pyspark/enums.html new file mode 100644 index 000000000..a0d7472d2 --- /dev/null +++ b/_modules/graphar_pyspark/enums.html @@ -0,0 +1,398 @@ + + + + + + + + graphar_pyspark.enums - GraphAr + + + + + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+
GraphAr
+
+
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for graphar_pyspark.enums

+# Copyright 2022-2023 Alibaba Group Holding Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Enumerations and constants."""
+
+from enum import Enum
+
+from py4j.java_gateway import JavaObject
+
+from graphar_pyspark import GraphArSession, _check_session
+
+
+
[docs]class GarType(Enum): + """Main data type in gar enumeration.""" + + BOOL = "bool" + INT32 = "int32" + INT64 = "int64" + FLOAT = "float" + DOUBLE = "double" + STRING = "string" + LIST = "list" + +
[docs] @staticmethod + def from_scala(jvm_obj: JavaObject) -> "GarType": + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + _check_session() + return GarType(GraphArSession.graphar.GarType.GarTypeToString(jvm_obj))
+ +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + _check_session() + return GraphArSession.graphar.GarType.StringToGarType(self.value)
+ + +
[docs]class FileType(Enum): + """Type of file format.""" + + CSV = "csv" + PARQUET = "parquet" + ORC = "orc" + +
[docs] @staticmethod + def from_scala(jvm_obj: JavaObject) -> "FileType": + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + _check_session() + return FileType(GraphArSession.graphar.FileType.FileTypeToString(jvm_obj))
+ +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + _check_session() + return GraphArSession.graphar.FileType.StringToFileType(self.value)
+ + +
[docs]class AdjListType(Enum): + """Adj list type enumeration for adjacency list of graph.""" + + UNORDERED_BY_SOURCE = "unordered_by_source" + UNORDERED_BY_DEST = "unordered_by_dest" + ORDERED_BY_SOURCE = "ordered_by_source" + ORDERED_BY_DEST = "ordered_by_dest" + +
[docs] @staticmethod + def from_scala(jvm_obj: JavaObject) -> "AdjListType": + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + _check_session() + return AdjListType( + GraphArSession.graphar.AdjListType.AdjListTypeToString(jvm_obj), + )
+ +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + _check_session() + return GraphArSession.graphar.AdjListType.StringToAdjListType(self.value)
+
+
+
+
+ + +
+
+ + Made with + Furo + +
+
+
+ + +
+
+
+ +
+
+ +
+
+ + + + + + + + + \ No newline at end of file diff --git a/_modules/graphar_pyspark/errors.html b/_modules/graphar_pyspark/errors.html new file mode 100644 index 000000000..457c7618f --- /dev/null +++ b/_modules/graphar_pyspark/errors.html @@ -0,0 +1,315 @@ + + + + + + + + graphar_pyspark.errors - GraphAr + + + + + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+
GraphAr
+
+
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for graphar_pyspark.errors

+# Copyright 2022-2023 Alibaba Group Holding Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Custom Exceptions."""
+
+
+
[docs]class InvalidGraphFormatError(ValueError): + """Exception that graph format arguments have wrong format."""
+ + +
[docs]class GraphArIsNotInitializedError(ValueError): + """Exception that GraphAr Session is not initialized."""
+
+
+
+
+ + +
+
+ + Made with + Furo + +
+
+
+ + +
+
+
+ +
+
+ +
+
+ + + + + + + + + \ No newline at end of file diff --git a/_modules/graphar_pyspark/graph.html b/_modules/graphar_pyspark/graph.html new file mode 100644 index 000000000..d353126ef --- /dev/null +++ b/_modules/graphar_pyspark/graph.html @@ -0,0 +1,553 @@ + + + + + + + + graphar_pyspark.graph - GraphAr + + + + + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+
GraphAr
+
+
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for graphar_pyspark.graph

+# Copyright 2022-2023 Alibaba Group Holding Limited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Bidnings to com.alibaba.graphar.graph."""
+
+from __future__ import annotations
+
+from collections.abc import Mapping
+from dataclasses import dataclass
+from typing import Optional, Union
+
+from py4j.java_gateway import JavaObject
+from pyspark.sql import DataFrame
+
+from graphar_pyspark import GraphArSession, _check_session
+from graphar_pyspark.enums import FileType
+from graphar_pyspark.errors import InvalidGraphFormatError
+from graphar_pyspark.info import GraphInfo
+
+
+
[docs]@dataclass(frozen=True) +class EdgeLabels: + """A triplet that describe edge. Contains source, edge and dest labels. Immutable.""" + + src_label: str + edge_label: str + dst_label: str
+ + +
[docs]@dataclass(frozen=True) +class GraphReaderResult: + """A simple immutable class, that represent results of reading a graph with GraphReader.""" + + vertex_dataframes: Mapping[str, DataFrame] + edge_dataframes: Mapping[EdgeLabels, Mapping[str, DataFrame]] + +
[docs] @staticmethod + def from_scala( + jvm_result: tuple[ + dict[str, JavaObject], + dict[tuple[str, str, str], dict[str, JavaObject]], + ], + ) -> "GraphReaderResult": + """Create an instance of the Class from JVM method output. + + :param jvm_result: structure, returned from JVM. + :returns: instance of Python Class. + """ + first_dict = {} + first_scala_map = jvm_result._1() + first_scala_map_iter = first_scala_map.keySet().iterator() + + while first_scala_map_iter.hasNext(): + k = first_scala_map_iter.next() + first_dict[k] = DataFrame(first_scala_map.get(k).get(), GraphArSession.ss) + + second_dict = {} + second_scala_map = jvm_result._2() + second_scala_map_iter = second_scala_map.keySet().iterator() + + while second_scala_map_iter.hasNext(): + k = second_scala_map_iter.next() + nested_scala_map = second_scala_map.get(k).get() + nested_scala_map_iter = nested_scala_map.keySet().iterator() + inner_dict = {} + + while nested_scala_map_iter.hasNext(): + kk = nested_scala_map_iter.next() + inner_dict[kk] = DataFrame( + nested_scala_map.get(kk).get(), + GraphArSession.ss, + ) + + second_dict[EdgeLabels(k._1(), k._2(), k._3())] = inner_dict + + return GraphReaderResult( + vertex_dataframes=first_dict, + edge_dataframes=second_dict, + )
+ + +
[docs]class GraphReader: + """The helper object for reading graph through the definitions of graph info.""" + +
[docs] @staticmethod + def read( + graph_info: Union[GraphInfo, str], + ) -> GraphReaderResult: + """Read the graph as vertex and edge DataFrames with the graph info yaml file or GraphInfo object. + + :param graph_info: The path of the graph info yaml or GraphInfo instance. + :returns: GraphReaderResults, that contains vertex and edge dataframes. + """ + _check_session() + if isinstance(graph_info, str): + graph_info = GraphInfo.load_graph_info(graph_info) + + jvm_result = GraphArSession.graphar.graph.GraphReader.readWithGraphInfo( + graph_info.to_scala(), + GraphArSession.jss, + ) + return GraphReaderResult.from_scala(jvm_result)
+ + +
[docs]class GraphWriter: + """The helper class for writing graph.""" + + def __init__(self, jvm_obj: JavaObject) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + self._jvm_graph_writer_obj = jvm_obj + +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_graph_writer_obj
+ +
[docs] @staticmethod + def from_scala(jvm_obj: JavaObject) -> "GraphWriter": + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return GraphWriter(jvm_obj)
+ +
[docs] @staticmethod + def from_python() -> "GraphWriter": + """Create an instance of the Class from Python arguments.""" + return GraphWriter(GraphArSession.graphar.graph.GraphWriter())
+ +
[docs] def put_vertex_data(self, label: str, df: DataFrame, primary_key: str) -> None: + """Put the vertex DataFrame into writer. + + :param label: label of vertex. + :param df: DataFrame of the vertex type. + :param primary_key: primary key of the vertex type, default is empty, which take the first property column as primary key. + """ + self._jvm_graph_writer_obj.PutVertexData(label, df._jdf, primary_key)
+ +
[docs] def put_edge_data(self, relation: tuple[str, str, str], df: DataFrame) -> None: + """Put the egde datafrme into writer. + + :param relation: 3-Tuple (source label, edge label, target label) to indicate edge type. + :param df: data frame of edge type. + """ + relation_jvm = GraphArSession.jvm.scala.Tuple3( + relation[0], relation[1], relation[2], + ) + self._jvm_graph_writer_obj.PutEdgeData(relation_jvm, df._jdf)
+ +
[docs] def write_with_graph_info(self, graph_info: Union[GraphInfo, str]) -> None: + """Write the graph data in graphar format with graph info. + + Note: original method is `write` but there is not directly overloading in Python. + + :param graph_info: the graph info object for the graph or the path to graph info object. + """ + if isinstance(graph_info, str): + self._jvm_graph_writer_obj.write(graph_info, GraphArSession.jss) + else: + self._jvm_graph_writer_obj.write(graph_info.to_scala(), GraphArSession.jss)
+ +
[docs] def write( + self, + path: str, + name: str = "graph", + vertex_chunk_size: Optional[int] = None, + edge_chunk_size: Optional[int] = None, + file_type: Optional[FileType] = None, + version: Optional[str] = None, + ) -> None: + """Write graph data in graphar format. + + Note: for default parameters check com.alibaba.graphar.GeneralParams; + For this method None for any of arguments means that the default value will be used. + + :param path: the directory to write. + :param name: the name of graph, default is 'grpah' + :param vertex_chunk_size: the chunk size for vertices, default is 2^18 + :param edge_chunk_size: the chunk size for edges, default is 2^22 + :param file_type: the file type for data payload file, support [parquet, orc, csv], default is parquet. + :param version: version of graphar format, default is v1. + """ + if vertex_chunk_size is None: + vertex_chunk_size = ( + GraphArSession.graphar.GeneralParams.defaultVertexChunkSize + ) + + if edge_chunk_size is None: + edge_chunk_size = GraphArSession.graphar.GeneralParams.defaultEdgeChunkSize + + file_type = ( + GraphArSession.graphar.GeneralParams.defaultFileType + if file_type is None + else file_type.value + ) + + if version is None: + version = GraphArSession.graphar.GeneralParams.defaultVersion + + self._jvm_graph_writer_obj.write( + path, + GraphArSession.jss, + name, + vertex_chunk_size, + edge_chunk_size, + file_type, + version, + )
+ + +
[docs]class GraphTransformer: + """The helper object for transforming graphs through the definitions of their infos.""" + +
[docs] @staticmethod + def transform( + source_graph_info: Union[str, GraphInfo], + dest_graph_info: Union[str, GraphInfo], + ) -> None: + """Transform the graphs following the meta data provided or defined in info files. + + Note: both arguments should be strings or GrapInfo instances! Mixed arguments type is not supported. + + :param source_graph_info: The path of the graph info yaml file for the source graph OR the info object for the source graph. + :param dest_graph_info: The path of the graph info yaml file for the destination graph OR the info object for the destination graph. + :raise InvalidGraphFormatException: if you pass mixed format of source and dest graph info. + """ + _check_session() + if isinstance(source_graph_info, str) and isinstance(dest_graph_info, str): + GraphArSession.graphar.graph.GraphTransformer.transform( + source_graph_info, + dest_graph_info, + GraphArSession.jss, + ) + elif isinstance(source_graph_info, GraphInfo) and isinstance( + dest_graph_info, + GraphInfo, + ): + GraphArSession.graphar.graph.GraphTransformer.transform( + source_graph_info.to_scala(), + dest_graph_info.to_scala(), + GraphArSession.jss, + ) + else: + msg = "Both src and dst graph info objects should be of the same type. " + msg += f"But {type(source_graph_info)} and {type(dest_graph_info)} were provided!" + raise InvalidGraphFormatError(msg)
+
+
+
+
+ + +
+
+ + Made with + Furo + +
+
+
+ + +
+
+
+ +
+
+ +
+
+ + + + + + + + + \ No newline at end of file diff --git a/_modules/graphar_pyspark/info.html b/_modules/graphar_pyspark/info.html new file mode 100644 index 000000000..fb68f1e3f --- /dev/null +++ b/_modules/graphar_pyspark/info.html @@ -0,0 +1,1799 @@ + + + + + + + + graphar_pyspark.info - GraphAr + + + + + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+
GraphAr
+
+
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for graphar_pyspark.info

+# copyright 2022-2023 alibaba group holding limited.
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+"""Bindings to com.alibaba.graphar info classes."""
+
+# because we are using type-hints, we need to define few custom TypeVar
+# to describe returns of classmethods;
+
+from __future__ import annotations
+
+import os
+from collections.abc import Sequence
+from typing import Optional, TypeVar, Union
+
+from py4j.java_collections import JavaList
+from py4j.java_gateway import JavaObject
+
+from graphar_pyspark import GraphArSession, _check_session
+from graphar_pyspark.enums import AdjListType, FileType, GarType
+
+# Return type of Property classmethods
+PropertyType = TypeVar("PropertyType", bound="Property")
+
+
+
[docs]class Property: + """The property information of vertex or edge.""" + + def __init__( + self, + name: Optional[str], + data_type: Optional[GarType], + is_primary: Optional[bool], + is_nullable: Optional[bool], + jvm_obj: Optional[JavaObject] = None, + ) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + if jvm_obj is not None: + self._jvm_property_obj = jvm_obj + else: + property_pyobj = GraphArSession.graphar.Property() + property_pyobj.setName(name) + property_pyobj.setData_type(data_type.value) + property_pyobj.setIs_primary(is_primary) + property_pyobj.setIs_nullable(is_nullable) + + self._jvm_property_obj = property_pyobj + +
[docs] def get_name(self) -> str: + """Get name from corresponding JVM object. + + :returns: name + """ + return self._jvm_property_obj.getName()
+ +
[docs] def set_name(self, name: str) -> None: + """Mutate corresponding JVM object. + + :param name: name + """ + self._jvm_property_obj.setName(name)
+ +
[docs] def get_data_type(self) -> GarType: + """Get data type from corresponding JVM object. + + :returns: data type + """ + return GarType(self._jvm_property_obj.getData_type())
+ +
[docs] def set_data_type(self, data_type: GarType) -> None: + """Mutate corresponding JVM object. + + :param data_type: data type + """ + self._jvm_property_obj.setData_type(data_type.value)
+ +
[docs] def get_is_primary(self) -> bool: + """Get is priamry flag from corresponding JVM object. + + :returns: is primary + """ + return self._jvm_property_obj.getIs_primary()
+ +
[docs] def set_is_primary(self, is_primary: bool) -> None: + """Mutate corresponding JVM object. + + :param is_primary: is primary + """ + self._jvm_property_obj.setIs_primary(is_primary)
+ +
[docs] def set_is_nullable(self, is_nullable: bool) -> None: + """Mutate corresponding JVM object. + + :param is_nullable: is nullable + """ + self._jvm_property_obj.setIs_nullable(is_nullable)
+ +
[docs] def get_is_nullable(self) -> bool: + """Get is nullable flag from corresponding JVM object. + + :returns: is nullable + """ + return self._jvm_property_obj.getIs_nullable()
+ +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_property_obj
+ +
[docs] @classmethod + def from_scala(cls: type[PropertyType], jvm_obj: JavaObject) -> PropertyType: + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return cls(None, None, None, None, jvm_obj)
+ +
[docs] @classmethod + def from_python( + cls: type[PropertyType], + name: str, + data_type: GarType, + is_primary: bool, + is_nullable: Optional[bool] = None, + ) -> PropertyType: + """Create an instance of the Class from Python arguments. + + :param name: property name + :param data_type: property data type + :param is_primary: flag that property is primary + :param is_nullable: flag that property is nullable (optional, default is None) + :returns: instance of Python Class. + """ + return cls(name, data_type, is_primary, is_nullable, None)
+ + def __eq__(self, other: object) -> bool: + if not isinstance(other, Property): + return False + + return ( + (self.get_name() == other.get_name()) + and (self.get_data_type() == other.get_data_type()) + and (self.get_is_primary() == other.get_is_primary()) + and (self.get_is_nullable() == other.get_is_nullable()) + )
+ + +# Return type of PropertyGroup classmethods +PropertyGroupType = TypeVar("PropertyGroupType", bound="PropertyGroup") + + +
[docs]class PropertyGroup: + """PropertyGroup is a class to store the property group information.""" + + def __init__( + self, + prefix: Optional[str], + file_type: Optional[FileType], + properties: Optional[Sequence[Property]], + jvm_obj: Optional[JavaObject], + ) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + if jvm_obj is not None: + self._jvm_property_group_obj = jvm_obj + else: + property_group = GraphArSession.graphar.PropertyGroup() + property_group.setPrefix(prefix) + property_group.setFile_type(file_type.value) + property_group.setProperties( + [py_property.to_scala() for py_property in properties], + ) + self._jvm_property_group_obj = property_group + +
[docs] def get_prefix(self) -> str: + """Get prefix from the corresponding JVM object. + + :returns: prefix + """ + return self._jvm_property_group_obj.getPrefix()
+ +
[docs] def set_prefix(self, prefix: str) -> None: + """Mutate the corresponding JVM object. + + :param prefix: prefix + """ + self._jvm_property_group_obj.setPrefix(prefix)
+ +
[docs] def get_file_type(self) -> FileType: + """Get file type from the corresponding JVM object. + + :returns: FileType + """ + return FileType(self._jvm_property_group_obj.getFile_type())
+ +
[docs] def set_file_type(self, file_type: FileType) -> None: + """Mutate the corresponding JVM object. + + :param file_type: FileType + """ + self._jvm_property_group_obj.setFile_type(file_type.value)
+ +
[docs] def get_properties(self) -> Sequence[Property]: + """Get properties from the corresponding JVM object. + + :returns: list of Properties + """ + return [ + Property.from_scala(jvm_property) + for jvm_property in self._jvm_property_group_obj.getProperties() + ]
+ +
[docs] def set_properties(self, properties: Sequence[Property]) -> None: + """Mutate the corresponding JVM object. + + :param properties: list of Properties + """ + self._jvm_property_group_obj.setProperties( + [py_property.to_scala() for py_property in properties], + )
+ +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_property_group_obj
+ +
[docs] @classmethod + def from_scala( + cls: type[PropertyGroupType], + jvm_obj: JavaObject, + ) -> PropertyGroupType: + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return cls(None, None, None, jvm_obj)
+ +
[docs] @classmethod + def from_python( + cls: type[PropertyGroupType], + prefix: str, + file_type: FileType, + properties: Sequence[Property], + ) -> PropertyGroupType: + """Create an instance of the class from Python args. + + :param prefix: path prefix + :param file_type: type of file + :param properties: list of properties + """ + return cls(prefix, file_type, properties, None)
+ + def __eq__(self, other: object) -> bool: + if not isinstance(other, PropertyGroup): + return False + + return ( + (self.get_prefix() == other.get_prefix()) + and (self.get_file_type() == other.get_file_type()) + and (len(self.get_properties()) == len(other.get_properties())) + and all( + p_left == p_right + for p_left, p_right in zip( + self.get_properties(), + other.get_properties(), + ) + ) + )
+ + +# Return type of VertexInfo classmethods +VertexInfoType = TypeVar("VertexInfoType", bound="VertexInfo") + + +
[docs]class VertexInfo: + """VertexInfo is a class to store the vertex meta information.""" + + def __init__( + self, + label: Optional[str], + chunk_size: Optional[int], + prefix: Optional[str], + property_groups: Optional[Sequence[PropertyGroup]], + version: Optional[str], + jvm_obj: Optional[JavaObject], + ) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + if jvm_obj is not None: + self._jvm_vertex_info_obj = jvm_obj + else: + vertex_info = GraphArSession.graphar.VertexInfo() + vertex_info.setLabel(label) + vertex_info.setChunk_size(chunk_size) + vertex_info.setPrefix(prefix) + vertex_info.setProperty_groups( + [py_property_group.to_scala() for py_property_group in property_groups], + ) + vertex_info.setVersion(version) + self._jvm_vertex_info_obj = vertex_info + +
[docs] def get_label(self) -> str: + """Get label from the corresponding JVM object. + + :returns: label + """ + return self._jvm_vertex_info_obj.getLabel()
+ +
[docs] def set_label(self, label: str) -> None: + """Mutate the corresponding JVM object. + + :param label: new label + """ + self._jvm_vertex_info_obj.setLabel(label)
+ +
[docs] def get_chunk_size(self) -> int: + """Get chunk size from the corresponding JVM object. + + :returns: chunk size + """ + return self._jvm_vertex_info_obj.getChunk_size()
+ +
[docs] def set_chunk_size(self, chunk_size: int) -> None: + """Mutate the corresponding JVM object. + + :param chunk_size: new chunk size + """ + self._jvm_vertex_info_obj.setChunk_size(chunk_size)
+ +
[docs] def get_prefix(self) -> str: + """Get prefix from the corresponding JVM object. + + :returns: prefix + """ + return self._jvm_vertex_info_obj.getPrefix()
+ +
[docs] def set_prefix(self, prefix: str) -> None: + """Mutate the corresponding JVM object. + + :param prefix: the new pefix. + """ + self._jvm_vertex_info_obj.setPrefix(prefix)
+ +
[docs] def get_property_groups(self) -> Sequence[PropertyGroup]: + """Get property groups from the corresponding JVM object. + + :returns: property groups + """ + return [ + PropertyGroup.from_scala(jvm_property_group) + for jvm_property_group in self._jvm_vertex_info_obj.getProperty_groups() + ]
+ +
[docs] def set_property_groups(self, property_groups: Sequence[PropertyGroup]) -> None: + """Mutate the corresponding JVM object. + + :param property_groups: new property groups + """ + self._jvm_vertex_info_obj.setProperty_groups( + [py_property_group.to_scala() for py_property_group in property_groups], + )
+ +
[docs] def get_version(self) -> str: + """Get version from the corresponding JVM object. + + :returns: version + """ + return self._jvm_vertex_info_obj.getVersion()
+ +
[docs] def set_version(self, version: str) -> None: + """Mutate the corresponding JVM object. + + :param version: the new version. + """ + self._jvm_vertex_info_obj.setVersion(version)
+ +
[docs] def contain_property_group(self, property_group: PropertyGroup) -> bool: + """Check if the vertex info contains the property group. + + :param property_group: the property group to check. + :returns: true if the vertex info contains the property group, otherwise false. + """ + return self._jvm_vertex_info_obj.containPropertyGroup(property_group.to_scala())
+ +
[docs] def contain_property(self, property_name: str) -> bool: + """Check if the vertex info contains certain property. + + :param property_name: name of the property. + :returns: true if the vertex info contains the property, otherwise false. + """ + return self._jvm_vertex_info_obj.containProperty(property_name)
+ +
[docs] def get_property_group(self, property_name: str) -> PropertyGroup: + """Get the property group that contains property. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_name: name of the property. + :returns: property group that contains the property, otherwise raise IllegalArgumentException error. + """ + return PropertyGroup.from_scala( + self._jvm_vertex_info_obj.getPropertyGroup(property_name), + )
+ +
[docs] def get_property_type(self, property_name: str) -> GarType: + """Get the data type of property. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_name: name of the property. + :returns: the data type in gar of the property. If the vertex info does not contains the property, raise IllegalArgumentException error. + """ + return GarType.from_scala( + self._jvm_vertex_info_obj.getPropertyType(property_name), + )
+ +
[docs] def is_primary_key(self, property_name: str) -> bool: + """Check if the property is primary key. + + :param property_name: name of the property to check. + :returns: true if the property if the primary key of vertex info, otherwise return false. + """ + return self._jvm_vertex_info_obj.isPrimaryKey(property_name)
+ +
[docs] def get_primary_key(self) -> str: + """Get primary key of vertex info. + + :returns: name of the primary key. + """ + return self._jvm_vertex_info_obj.getPrimaryKey()
+ +
[docs] def is_nullable_key(self, property_name: str) -> bool: + """Check if the property is nullable key. + + :param property_name: name of the property to check. + :returns: true if the property if a nullable key of vertex info, otherwise return false + """ + return self._jvm_vertex_info_obj.isNullableKey(property_name)
+ +
[docs] def is_validated(self) -> bool: + """Check if the vertex info is validated. + + :returns: true if the vertex info is validated, otherwise return false. + """ + return self._jvm_vertex_info_obj.isValidated()
+ +
[docs] def get_vertices_num_file_path(self) -> str: + """Get the vertex num file path of vertex info. + + :returns: vertex num file path of vertex info. + """ + return self._jvm_vertex_info_obj.getVerticesNumFilePath()
+ +
[docs] def get_file_path(self, property_group: PropertyGroup, chunk_index: int) -> str: + """Get the chunk file path of property group of vertex chunk. + + :param property_group: the property group. + :param chunk_index: the index of vertex chunk + :returns: chunk file path. + + """ + return self._jvm_vertex_info_obj.getFilePath( + property_group.to_scala(), + chunk_index, + )
+ +
[docs] def get_path_prefix(self, property_group: PropertyGroup) -> str: + """Get the path prefix for the specified property group. + + :param property_group: the property group. + :returns: the path prefix of the property group chunk files. + """ + return self._jvm_vertex_info_obj.getPathPrefix(property_group.to_scala())
+ +
[docs] def dump(self) -> str: + """Dump to Yaml string. + + :returns: yaml string + """ + return self._jvm_vertex_info_obj.dump()
+ +
[docs] @staticmethod + def load_vertex_info(vertex_info_path: str) -> "VertexInfo": + """Load a yaml file from path and construct a VertexInfo from it. + + :param vertexInfoPath: yaml file path + :returns: VertexInfo object + """ + return VertexInfo.from_scala( + GraphArSession.graphar.VertexInfo.loadVertexInfo( + vertex_info_path, + GraphArSession.jss, + ), + )
+ +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_vertex_info_obj
+ +
[docs] @classmethod + def from_scala(cls: type[VertexInfoType], jvm_obj: JavaObject) -> VertexInfoType: + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return VertexInfo( + None, + None, + None, + None, + None, + jvm_obj, + )
+ +
[docs] @classmethod + def from_python( + cls: type[VertexInfoType], + label: str, + chunk_size: int, + prefix: str, + property_groups: Sequence[PropertyGroup], + version: str, + ) -> VertexInfoType: + """Create an instance of the class based on python args. + + :param label: label of the vertex + :chunk_size: chunk size + :prefix: vertex prefix + :property_groups: list of property groups + :version: version of GAR + """ + return VertexInfo(label, chunk_size, prefix, property_groups, version, None)
+ + +# Return type of AdjList classmethods +AdjListClassType = TypeVar("AdjListClassType", bound="AdjList") + + +
[docs]class AdjList: + """AdjList is a class to store the adj list information of edge.""" + + def __init__( + self, + ordered: Optional[bool], + aligned_by: Optional[str], + prefix: Optional[str], + file_type: Optional[FileType], + jvm_obj: Optional[JavaObject], + ) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + if jvm_obj is not None: + self._jvm_adj_list_obj = jvm_obj + else: + jvm_adj_list = GraphArSession.graphar.AdjList() + jvm_adj_list.setOrdered(ordered) + jvm_adj_list.setAligned_by(aligned_by) + jvm_adj_list.setPrefix(prefix) + jvm_adj_list.setFile_type(file_type.value) + self._jvm_adj_list_obj = jvm_adj_list + +
[docs] def get_ordered(self) -> bool: + """Get ordered flag from the corresponding JVM object. + + :returns: ordered + """ + return self._jvm_adj_list_obj.getOrdered()
+ +
[docs] def set_ordered(self, ordered: bool) -> None: + """Mutate the corresponding JVM object. + + :param ordered: new ordered flag + """ + self._jvm_adj_list_obj.setOrdered(ordered)
+ +
[docs] def get_aligned_by(self) -> str: + """Get aligned_by from the corresponding JVM object. + + :returns: aligned by as a string ("src", "dst") + """ + return self._jvm_adj_list_obj.getAligned_by()
+ +
[docs] def set_aligned_by(self, aligned_by: str) -> None: + """Mutate the corresponding JVM object. + + :param aligned_by: the new aligned_by (recommended to use "src" or "dst") + + """ + self._jvm_adj_list_obj.setAligned_by(aligned_by)
+ +
[docs] def get_prefix(self) -> str: + """Get prefix from the corresponding JVM object. + + :returns: prefix + """ + return self._jvm_adj_list_obj.getPrefix()
+ +
[docs] def set_prefix(self, prefix: str) -> None: + """Mutate the corresponding JVM object. + + :param prefix: the new prefix + + """ + self._jvm_adj_list_obj.setPrefix(prefix)
+ +
[docs] def get_file_type(self) -> FileType: + """Get FileType (as Enum) from the corresponding JVM object. + + :returns: file type + """ + return FileType(self._jvm_adj_list_obj.getFile_type())
+ +
[docs] def set_file_type(self, file_type: FileType) -> None: + """Mutate the corresponding JVM object. + + :param file_type: the new file type + """ + self._jvm_adj_list_obj.setFile_type(file_type.value)
+ +
[docs] def get_adj_list_type(self) -> AdjListType: + """Get adj list type. + + :returns: adj list type. + """ + return AdjListType(self._jvm_adj_list_obj.getAdjList_type())
+ +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_adj_list_obj
+ +
[docs] @classmethod + def from_scala( + cls: type[AdjListClassType], + jvm_obj: JavaObject, + ) -> AdjListClassType: + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return AdjList(None, None, None, None, jvm_obj)
+ +
[docs] @classmethod + def from_python( + cls: type[AdjListClassType], + ordered: bool, + aligned_by: str, + prefix: str, + file_type: FileType, + ) -> AdjListClassType: + """Create an instance of the class from python arguments. + + :param ordered: ordered flag + :param aligned_by: recommended values are "src" or "dst" + :param prefix: path prefix + :param file_type: file type + """ + if not prefix.endswith(os.sep): + prefix += os.sep + return AdjList(ordered, aligned_by, prefix, file_type, None)
+ + def __eq__(self, other: object) -> bool: + if not isinstance(other, AdjList): + return False + + return ( + (self.get_ordered() == other.get_ordered()) + and (self.get_aligned_by() == other.get_aligned_by()) + and (self.get_prefix() == other.get_prefix()) + and (self.get_file_type() == other.get_file_type()) + )
+ + +# Return type of EdgeInfo classmethods +EdgeInfoType = TypeVar("EdgeInfoType", bound="EdgeInfo") + + +
[docs]class EdgeInfo: + """Edge info is a class to store the edge meta information.""" + + def __init__( + self, + src_label: Optional[str], + edge_label: Optional[str], + dst_label: Optional[str], + chunk_size: Optional[int], + src_chunk_size: Optional[int], + dst_chunk_size: Optional[int], + directed: Optional[bool], + prefix: Optional[str], + adj_lists: Sequence[AdjList], + property_groups: Optional[Sequence[PropertyGroup]], + version: Optional[str], + jvm_edge_info_obj: JavaObject, + ) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + if jvm_edge_info_obj is not None: + self._jvm_edge_info_obj = jvm_edge_info_obj + else: + edge_info = GraphArSession.graphar.EdgeInfo() + edge_info.setSrc_label(src_label) + edge_info.setEdge_label(edge_label) + edge_info.setDst_label(dst_label) + edge_info.setChunk_size(chunk_size) + edge_info.setSrc_chunk_size(src_chunk_size) + edge_info.setDst_chunk_size(dst_chunk_size) + edge_info.setDirected(directed) + edge_info.setPrefix(prefix) + edge_info.setAdj_lists( + [py_adj_list.to_scala() for py_adj_list in adj_lists], + ) + edge_info.setProperty_groups( + [py_property_group.to_scala() for py_property_group in property_groups], + ) + edge_info.setVersion(version) + self._jvm_edge_info_obj = edge_info + +
[docs] def get_src_label(self) -> str: + """Get src label from the corresponding JVM object. + + :returns: src label + """ + return self._jvm_edge_info_obj.getSrc_label()
+ +
[docs] def set_src_label(self, src_label: str) -> None: + """Mutate the corresponding JVM object. + + :param src_label: the new src label + """ + self._jvm_edge_info_obj.setSrc_label(src_label)
+ +
[docs] def get_edge_label(self) -> str: + """Get edge label from the corresponding JVM object. + + :returns: edge label + """ + return self._jvm_edge_info_obj.getEdge_label()
+ +
[docs] def set_edge_label(self, edge_label: str) -> None: + """Mutate the corresponding JVM object. + + :param edge_label: the new edge label + """ + self._jvm_edge_info_obj.setEdge_label(edge_label)
+ +
[docs] def get_dst_label(self) -> str: + """Get dst label from the corresponding JVM object. + + :returns: dst label + """ + return self._jvm_edge_info_obj.getDst_label()
+ +
[docs] def set_dst_label(self, dst_label: str) -> None: + """Mutate the corresponding JVM object. + + :param dst_label: the new dst label + """ + self._jvm_edge_info_obj.setDst_label(dst_label)
+ +
[docs] def get_chunk_size(self) -> int: + """Get chunk size from the corresponding JVM object. + + :returns: chunk size + """ + return self._jvm_edge_info_obj.getChunk_size()
+ +
[docs] def set_chunk_size(self, chunk_size: int) -> None: + """Mutate the corresponding JVM object. + + :param chunk_size: the new chunk size + """ + self._jvm_edge_info_obj.setChunk_size(chunk_size)
+ +
[docs] def get_src_chunk_size(self) -> int: + """Get source chunk size from the corresponding JVM object. + + :returns: source chunk size + """ + return self._jvm_edge_info_obj.getSrc_chunk_size()
+ +
[docs] def set_src_chunk_size(self, src_chunk_size: int) -> None: + """Mutate the corresponding JVM object. + + :param src_chunk_size: the new source chunk size. + """ + self._jvm_edge_info_obj.setSrc_chunk_size(src_chunk_size)
+ +
[docs] def get_dst_chunk_size(self) -> int: + """Get dest chunk size from the corresponding JVM object. + + :returns: destination chunk size + """ + return self._jvm_edge_info_obj.getDst_chunk_size()
+ +
[docs] def set_dst_chunk_size(self, dst_chunk_size: int) -> None: + """Mutate the corresponding JVM object. + + :param dst_chunk_size: the new destination chunk size. + """ + self._jvm_edge_info_obj.setDst_chunk_size(dst_chunk_size)
+ +
[docs] def get_directed(self) -> bool: + """Get directed flag from the corresponding JVM object. + + :returns: directed flag + """ + return self._jvm_edge_info_obj.getDirected()
+ +
[docs] def set_directed(self, directed: bool) -> None: + """Mutate the corresponding JVM object. + + :param directed: the new directed flag + """ + self._jvm_edge_info_obj.setDirected(directed)
+ +
[docs] def get_prefix(self) -> str: + """Get prefix from the corresponding JVM object. + + :returns: prefix + """ + return self._jvm_edge_info_obj.getPrefix()
+ +
[docs] def set_prefix(self, prefix: str) -> None: + """Mutate the corresponding JVM object. + + :param prefix: the new prefix + """ + self._jvm_edge_info_obj.setPrefix(prefix)
+ +
[docs] def get_adj_lists(self) -> Sequence[AdjList]: + """Get adj lists from the corresponding JVM object. + + :returns: sequence of AdjList + """ + return [ + AdjList.from_scala(jvm_adj_list) + for jvm_adj_list in self._jvm_edge_info_obj.getAdj_lists() + ]
+ +
[docs] def set_adj_lists(self, adj_lists: Sequence[AdjList]) -> None: + """Mutate the corresponding JVM object. + + :param adj_lists: the new adj lists, sequence of AdjList + """ + self._jvm_edge_info_obj.setAdj_lists( + [py_adj_list.to_scala() for py_adj_list in adj_lists], + )
+ +
[docs] def get_property_groups(self) -> Sequence[PropertyGroup]: + """Get the property groups of adj list type. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :returns: property groups of edge info. + """ + return [ + PropertyGroup.from_scala(jvm_property_group) + for jvm_property_group in self._jvm_edge_info_obj.getProperty_groups() + ]
+ +
[docs] def set_property_groups(self, property_groups: Sequence[PropertyGroup]) -> None: + """Mutate the corresponding JVM object. + + :param property_groups: the new property groups, sequence of PropertyGroup + """ + self._jvm_edge_info_obj.setProperty_groups( + [py_property_group.to_scala() for py_property_group in property_groups], + )
+ +
[docs] def get_version(self) -> str: + """Get GAR version from the corresponding JVM object. + + :returns: GAR version + """ + return self._jvm_edge_info_obj.getVersion()
+ +
[docs] def set_version(self, version: str) -> None: + """Mutate the corresponding JVM object. + + :param version: the new GAR version + """ + self._jvm_edge_info_obj.setVersion(version)
+ +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_edge_info_obj
+ +
[docs] @classmethod + def from_scala(cls: type[EdgeInfoType], jvm_obj: JavaObject) -> EdgeInfoType: + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return EdgeInfo( + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + jvm_obj, + )
+ +
[docs] @classmethod + def from_python( + cls: type[EdgeInfoType], + src_label: str, + edge_label: str, + dst_label: str, + chunk_size: int, + src_chunk_size: int, + dst_chunk_size: int, + directed: bool, + prefix: str, + adj_lists: Sequence[AdjList], + property_groups: Sequence[PropertyGroup], + version: str, + ) -> EdgeInfoType: + """Create an instance of the class from python arguments. + + :param src_label: source vertex label + :param edge_label: edges label + :param dst_label: destination vertex label + :param chunk_size: chunk size + :param src_chunk_size: source chunk size + :param dst_chunk_size: destination chunk size + :param directed: directed graph flag + :param prefix: path prefix + :param adj_lists: sequence of AdjList objects + :property_groups: sequence of of PropertyGroup objects + :param version: version of GAR format + """ + if not prefix.endswith(os.sep): + prefix += os.sep + + return EdgeInfo( + src_label, + edge_label, + dst_label, + chunk_size, + src_chunk_size, + dst_chunk_size, + directed, + prefix, + adj_lists, + property_groups, + version, + None, + )
+ +
[docs] def contain_adj_list(self, adj_list_type: AdjListType) -> bool: + """Check if the edge info supports the adj list type. + + :param adj_list_type: adjList type in gar to check. + :returns: true if edge info supports the adj list type, otherwise return false. + """ + return self._jvm_edge_info_obj.containAdjList(adj_list_type.to_scala())
+ +
[docs] def get_adj_list_prefix(self, adj_list_type: AdjListType) -> str: + """Get path prefix of adj list type. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param adj_list_type: The input adj list type in gar. + :returns: path prefix of the adj list type, if edge info not support the adj list type, raise an IllegalArgumentException error. + """ + return self._jvm_edge_info_obj.getAdjListPrefix(adj_list_type.to_scala())
+ +
[docs] def get_adj_list_file_type(self, adj_list_type: AdjListType) -> FileType: + """Get the adj list topology chunk file type of adj list type. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param adj_list_type: the input adj list type. + :returns: file format type in gar of the adj list type, if edge info not support the adj list type, + raise an IllegalArgumentException error. + """ + return FileType.from_scala( + self._jvm_edge_info_obj.getAdjListFileType(adj_list_type.to_scala()), + )
+ +
[docs] def contain_property_group( + self, + property_group: PropertyGroup, + ) -> bool: + """Check if the edge info contains the property group. + + :param property_group: the property group to check. + :returns: true if the edge info contains the property group in certain adj list + structure. + """ + return self._jvm_edge_info_obj.containPropertyGroup( + property_group.to_scala(), + )
+ +
[docs] def contain_property(self, property_name: str) -> bool: + """Check if the edge info contains the property. + + :param property_name: name of the property. + :returns: true if edge info contains the property, otherwise false. + """ + return self._jvm_edge_info_obj.containProperty(property_name)
+ +
[docs] def get_property_group( + self, + property_name: str, + ) -> PropertyGroup: + """Get property group that contains property with adj list type. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_name: name of the property. + :returns: property group that contains the property. If edge info not find the property group that contains the property, + raise error. + """ + return PropertyGroup.from_scala( + self._jvm_edge_info_obj.getPropertyGroup(property_name), + )
+ +
[docs] def get_property_type(self, property_name: str) -> GarType: + """Get the data type of property. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_name: name of the property. + :returns: data type in gar of the property. If edge info not contains the property, raise an IllegalArgumentException error. + """ + return GarType.from_scala( + self._jvm_edge_info_obj.getPropertyType(property_name), + )
+ +
[docs] def is_primary_key(self, property_name: str) -> bool: + """Check the property is primary key of edge info. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_name: name of the property. + :returns: true if the property is the primary key of edge info, false if not. If + edge info not contains the property, raise an IllegalArgumentException error. + """ + return self._jvm_edge_info_obj.isPrimaryKey(property_name)
+ +
[docs] def is_nullable_key(self, property_name: str) -> bool: + """Check the property is nullable key of edge info. + + :param property_name: name of the property. + :returns: true if the property is the nullable key of edge info, false if not. If + edge info not contains the property, raise an IllegalArgumentException error. + """ + return self._jvm_edge_info_obj.isNullableKey(property_name)
+ +
[docs] def get_primary_key(self) -> str: + """Get Primary key of edge info. + + :returns: primary key of edge info. + """ + return self._jvm_edge_info_obj.getPrimaryKey()
+ +
[docs] def is_validated(self) -> bool: + """Check if the edge info is validated. + + :returns: true if edge info is validated or false if not. + """ + return self._jvm_edge_info_obj.isValidated()
+ +
[docs] def get_vertices_num_file_path(self, adj_list_type: AdjListType) -> str: + """Get the vertex num file path. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param adj_list_type: type of adj list structure. + :returns: the vertex num file path. If edge info not support the adj list type, + raise an IllegalArgumentException error. + """ + return self._jvm_edge_info_obj.getVerticesNumFilePath(adj_list_type.to_scala())
+ +
[docs] def get_edges_num_path_prefix(self, adj_list_type: AdjListType) -> str: + """Get the path prefix of the edge num file path. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param adj_list_type: type of adj list structure. + :returns: the edge num file path. If edge info not support the adj list type, raise + an IllegalArgumentException error. + """ + return self._jvm_edge_info_obj.getEdgesNumPathPrefix(adj_list_type.to_scala())
+ +
[docs] def get_edges_num_file_path( + self, + chunk_index: int, + adj_list_type: AdjListType, + ) -> str: + """Get the edge num file path of the vertex chunk. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param chunk_index: index of vertex chunk. + :param adj_list_type: type of adj list structure. + :returns: the edge num file path. If edge info not support the adj list type, raise + an IllegalArgumentException error. + """ + return self._jvm_edge_info_obj.getEdgesNumFilePath( + chunk_index, + adj_list_type.to_scala(), + )
+ +
[docs] def get_adj_list_offset_file_path( + self, + chunk_index: int, + adj_list_type: AdjListType, + ) -> str: + """Get the adj list offset chunk file path of vertex chunk the offset chunks is aligned with the vertex chunks. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param chunk_index: index of vertex chunk. + :param adj_list_type: type of adj list structure. + :returns: the offset chunk file path. If edge info not support the adj list type, raise an IllegalArgumentException error. + + """ + return self._jvm_edge_info_obj.getAdjListOffsetFilePath( + chunk_index, + adj_list_type.to_scala(), + )
+ +
[docs] def get_offset_path_prefix(self, adj_list_type: AdjListType) -> str: + """Get the path prefix of the adjacency list offset for the given adjacency list type. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param adj_list_type: type of adj list structure. + :returns: the path prefix of the offset. If edge info not support the adj list type, raise an IllegalArgumentException error. + + """ + return self._jvm_edge_info_obj.getOffsetPathPrefix(adj_list_type.to_scala())
+ +
[docs] def get_adj_list_file_path( + self, + vertex_chunk_index: int, + chunk_index: int, + adj_list_type: AdjListType, + ) -> str: + """Get the file path of adj list topology chunk. + + :param vertex_chunk_index: index of vertex chunk. + :param chunk_index: index of edge chunk. + :param adj_list_type: type of adj list structure. + :returns: adj list chunk file path. + """ + return self._jvm_edge_info_obj.getAdjListFilePath( + vertex_chunk_index, + chunk_index, + adj_list_type.to_scala(), + )
+ +
[docs] def get_adj_list_path_prefix( + self, + vertex_chunk_index: Optional[int], + adj_list_type: AdjListType, + ) -> str: + """Get the path prefix of adj list topology chunk of certain vertex chunk. + + :param vertex_chunk_index: index of vertex chunk (optional). + :param adj_list_type: type of adj list structure. + :returns: path prefix of the edge chunk of vertices of given vertex chunk. + """ + if vertex_chunk_index is None: + return self._jvm_edge_info_obj.getAdjListPathPrefix( + adj_list_type.to_scala(), + ) + + return self._jvm_edge_info_obj.getAdjListPathPrefix( + vertex_chunk_index, + adj_list_type.to_scala(), + )
+ +
[docs] def get_property_file_path( + self, + property_group: PropertyGroup, + adj_list_type: AdjListType, + vertex_chunk_index: int, + chunk_index: int, + ) -> str: + """Get the chunk file path of adj list property group. the property group chunks is aligned with the adj list topology chunks. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_group: property group + :param adj_list_type: type of adj list structure. + :param vertex_chunk_index: index of vertex chunk. + :param chunk_index: index of edge chunk. + :returns: property group chunk file path. If edge info not contains the property group, raise an IllegalArgumentException error. + """ + return self._jvm_edge_info_obj.getPropertyFilePath( + property_group.to_scala(), + adj_list_type.to_scala(), + vertex_chunk_index, + chunk_index, + )
+ +
[docs] def get_property_group_path_prefix( + self, + property_group: PropertyGroup, + adj_list_type: AdjListType, + vertex_chunk_index: Optional[int] = None, + ) -> str: + """Get path prefix of adj list property group of certain vertex chunk. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_group: property group. + :param adj_list_type: type of adj list structure. + :param vertex_chunk_index: index of vertex chunk (optional, default is None). + :returns: path prefix of property group chunks of of vertices of given vertex + chunk. If edge info not contains the property group, raise an IllegalArgumentException error. + """ + if vertex_chunk_index is not None: + return self._jvm_edge_info_obj.getPropertyGroupPathPrefix( + property_group.to_scala(), + adj_list_type.to_scala(), + vertex_chunk_index, + ) + + return self._jvm_edge_info_obj.getPropertyGroupPathPrefix( + property_group.to_scala(), + adj_list_type.to_scala(), + )
+ +
[docs] def get_concat_key(self) -> str: + """Get concat key. + + :returns: concat key + """ + return self._jvm_edge_info_obj.getConcatKey()
+ +
[docs] def dump(self) -> str: + """Dump to Yaml string. + + :returns: yaml-string representation. + """ + return self._jvm_edge_info_obj.dump()
+ +
[docs] @staticmethod + def load_edge_info(edge_info_path: str) -> "EdgeInfo": + """Load a yaml file from path and construct a EdgeInfo from it. + + :param edge_info_path: path of edge info YAML file. + :returns: EdgeInfo object. + """ + return EdgeInfo.from_scala( + GraphArSession.graphar.EdgeInfo.loadEdgeInfo( + edge_info_path, + GraphArSession.jss, + ), + )
+ + +
[docs]class GraphInfo: + """GraphInfo is a class to store the graph meta information.""" + + def __init__( + self, + name: Optional[str], + prefix: Optional[str], + vertices: Optional[list[str]], + edges: Optional[list[str]], + version: Optional[str], + jvm_grpah_info_obj: Optional[JavaObject], + ) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + if jvm_grpah_info_obj is not None: + self._jvm_graph_info_obj = jvm_grpah_info_obj + else: + graph_info = GraphArSession.graphar.GraphInfo() + graph_info.setName(name) + graph_info.setPrefix(prefix) + graph_info.setVertices(vertices) + graph_info.setEdges(edges) + graph_info.setVersion(version) + self._jvm_graph_info_obj = graph_info + +
[docs] def get_name(self) -> str: + """Get name from the corresponding JVM object. + + :returns: name + """ + return self._jvm_graph_info_obj.getName()
+ +
[docs] def set_name(self, name: str) -> None: + """Mutate the corresponding JVM object. + + :param name: new name + """ + self._jvm_graph_info_obj.setName(name)
+ +
[docs] def get_prefix(self) -> str: + """Get prefix from corresponding JVM object. + + :returns: prefix + """ + return self._jvm_graph_info_obj.getPrefix()
+ +
[docs] def set_prefix(self, prefix: str) -> None: + """Mutate the corresponding JVM object. + + :param prefix: new prefix + """ + self._jvm_graph_info_obj.setPrefix(prefix)
+ +
[docs] def get_vertices(self) -> JavaList: + """Get list of vertices from the corresponding JVM object. + + :returns: vertices + """ + return self._jvm_graph_info_obj.getVertices()
+ +
[docs] def set_vertices(self, vertices: Union[list[str], JavaList]) -> None: + """Mutate the corresponding JVM object. + + :param vertices: new list of vertices + """ + self._jvm_graph_info_obj.setVertices(vertices)
+ +
[docs] def get_edges(self) -> JavaList: + """Get list of edges from the corresponding JVM object. + + :returns: edges + """ + return self._jvm_graph_info_obj.getEdges()
+ +
[docs] def set_edges(self, edges: Union[list[str], JavaList]) -> None: + """Mutate the corresponding JVM object. + + :param edges: new list of edges. + """ + self._jvm_graph_info_obj.setEdges(edges)
+ +
[docs] def get_version(self) -> str: + """Get GAR version from the corresponding JVM object. + + :returns: version + """ + return self._jvm_graph_info_obj.getVersion()
+ +
[docs] def set_version(self, version: str) -> None: + """Mutate the corresponding JVM object. + + :param version: new version of GAR + """ + self._jvm_graph_info_obj.setVersion(version)
+ +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_graph_info_obj
+ +
[docs] @staticmethod + def from_scala(jvm_obj: JavaObject) -> "GraphInfo": + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return GraphInfo(None, None, None, None, None, jvm_obj)
+ +
[docs] @staticmethod + def from_python( + name: str, + prefix: str, + vertices: Sequence[str], + edges: Sequence[str], + version: str, + ) -> "GraphInfo": + """Create an instance of the class from python arguments. + + :param name: name of the graph + :param prefix: path prefix + :param vertices: list of vertices + :param edges: list of edges + :param version: version of GAR format + """ + if not prefix.endswith(os.sep): + prefix += os.sep + return GraphInfo(name, prefix, vertices, edges, version, None)
+ +
[docs] def add_vertex_info(self, vertex_info: VertexInfo) -> None: + """Add VertexInfo to GraphInfo. + + :param vertex_info: VertexInfo to add + """ + self._jvm_graph_info_obj.addVertexInfo(vertex_info.to_scala())
+ +
[docs] def add_edge_info(self, edge_info: EdgeInfo) -> None: + """Add EdgeInfo to GraphInfo. + + :param edge_info: EdgeInfo to add + """ + self._jvm_graph_info_obj.addEdgeInfo(edge_info.to_scala())
+ +
[docs] def get_vertex_info(self, label: str) -> VertexInfo: + """Get vertex info from the corresponding JVM object. + + :param label: label of vertex + """ + return VertexInfo.from_scala(self._jvm_graph_info_obj.getVertexInfo(label))
+ +
[docs] def get_edge_info( + self, + src_label: str, + edge_label: str, + dst_label: str, + ) -> EdgeInfo: + """Get edge info from the corresponding JVM object. + + :param src_label: source label + :param edge_label: edge label + :param dst_label: destination label + """ + return EdgeInfo.from_scala( + self._jvm_graph_info_obj.getEdgeInfo(src_label, edge_label, dst_label), + )
+ +
[docs] def get_vertex_infos(self) -> dict[str, VertexInfo]: + """Get all vertex infos from the corresponding JVM object. + + :returns: Mapping label -> VertexInfo + """ + scala_map = self._jvm_graph_info_obj.getVertexInfos() + keys_set_iter = scala_map.keySet().iterator() + res = {} + while keys_set_iter.hasNext(): + k = keys_set_iter.next() + res[k] = VertexInfo.from_scala(scala_map.get(k)) + + return res
+ +
[docs] def get_edge_infos(self) -> dict[str, EdgeInfo]: + """Get all edge infos from the corresponding JVM object. + + :returns: Mapping {src_label}_{edge_label}_{dst_label} -> EdgeInfo + """ + scala_map = self._jvm_graph_info_obj.getEdgeInfos() + keys_set_iter = scala_map.keySet().iterator() + res = {} + while keys_set_iter.hasNext(): + k = keys_set_iter.next() + res[k] = EdgeInfo.from_scala(scala_map.get(k)) + + return res
+ +
[docs] def dump(self) -> str: + """Dump to Yaml string. + + :returns: YAML-string representation of object. + """ + return self._jvm_graph_info_obj.dump()
+ +
[docs] @staticmethod + def load_graph_info(graph_info_path: str) -> "GraphInfo": + """Load a yaml file from path and construct a GraphInfo from it. + + :param graph_info_path: path of GraphInfo YAML file. + :returns: GraphInfo object. + """ + return GraphInfo.from_scala( + GraphArSession.graphar.GraphInfo.loadGraphInfo( + graph_info_path, + GraphArSession.jss, + ), + )
+
+
+
+
+ + +
+
+ + Made with + Furo + +
+
+
+ + +
+
+
+ +
+
+ +
+
+ + + + + + + + + \ No newline at end of file diff --git a/_modules/graphar_pyspark/reader.html b/_modules/graphar_pyspark/reader.html new file mode 100644 index 000000000..e351a4abc --- /dev/null +++ b/_modules/graphar_pyspark/reader.html @@ -0,0 +1,753 @@ + + + + + + + + graphar_pyspark.reader - GraphAr + + + + + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+
GraphAr
+
+
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for graphar_pyspark.reader

+# copyright 2022-2023 alibaba group holding limited.
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+"""Bidnings to com.alibaba.graphar.graph."""
+
+from __future__ import annotations
+
+import os
+from typing import Optional
+
+from py4j.java_gateway import JavaObject
+from pyspark.sql import DataFrame
+
+from graphar_pyspark import GraphArSession, _check_session
+from graphar_pyspark.enums import AdjListType
+from graphar_pyspark.info import EdgeInfo, PropertyGroup, VertexInfo
+
+
+
[docs]class VertexReader: + """Reader for vertex chunks.""" + + def __init__( + self, + prefix: Optional[str], + vertex_info: Optional[VertexInfo], + jvm_obj: Optional[JavaObject], + ) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + if jvm_obj is not None: + self._jvm_vertex_reader_obj = jvm_obj + else: + self._jvm_vertex_reader_obj = GraphArSession.graphar.reader.VertexReader( + prefix, + vertex_info.to_scala(), + GraphArSession.jss, + ) + +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_vertex_reader_obj
+ +
[docs] @staticmethod + def from_scala(jvm_obj: JavaObject) -> "VertexReader": + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return VertexReader(None, None, jvm_obj)
+ +
[docs] @staticmethod + def from_python(prefix: str, vertex_info: VertexInfo) -> "VertexReader": + """Create an instance of the Class from Python arguments. + + :param prefix: the absolute prefix. + :param vertex_info: the vertex info that describes the vertex type. + """ + if not prefix.endswith(os.sep): + prefix += os.sep + return VertexReader(prefix, vertex_info, None)
+ +
[docs] def read_vertices_number(self) -> int: + """Load the total number of vertices for this vertex type. + + :returns: total number of vertices. + """ + return self._jvm_vertex_reader_obj.readVerticesNumber()
+ +
[docs] def read_vertex_property_chunk( + self, + property_group: PropertyGroup, + chunk_index: int, + ) -> DataFrame: + """Load a single vertex property chunk as a DataFrame. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_group: property group. + :param chunk_index: index of vertex chunk. + :returns: vertex property chunk DataFrame. Raise IllegalArgumentException if the property group not contained. + """ + return DataFrame( + self._jvm_vertex_reader_obj.readVertexPropertyChunk( + property_group.to_scala(), + chunk_index, + ), + GraphArSession.ss, + )
+ +
[docs] def read_vertex_property_group(self, property_group: PropertyGroup) -> DataFrame: + """Load all chunks for a property group as a DataFrame. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_group: property group. + :returns: DataFrame that contains all chunks of property group. Raise IllegalArgumentException if the property group not contained. + """ + return DataFrame( + self._jvm_vertex_reader_obj.readVertexPropertyGroup( + property_group.to_scala(), + ), + GraphArSession.ss, + )
+ +
[docs] def read_multiple_vertex_property_groups( + self, + property_groups: list[PropertyGroup], + ) -> DataFrame: + """Load the chunks for multiple property groups as a DataFrame. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_groups: list of property groups. + :returns: DataFrame that contains all chunks of property group. Raise IllegalArgumentException if the property group not contained. + """ + return DataFrame( + self._jvm_vertex_reader_obj.readMultipleVertexPropertyGroups( + [py_property_group.to_scala() for py_property_group in property_groups], + ), + GraphArSession.ss, + )
+ +
[docs] def read_all_vertex_property_groups(self) -> DataFrame: + """Load the chunks for all property groups as a DataFrame. + + :returns: DataFrame that contains all property group chunks of vertex. + """ + return DataFrame( + self._jvm_vertex_reader_obj.readAllVertexPropertyGroups(), + GraphArSession.ss, + )
+ + +
[docs]class EdgeReader: + """Reader for edge chunks.""" + + def __init__( + self, + prefix: Optional[str], + edge_info: Optional[EdgeInfo], + adj_list_type: Optional[AdjListType], + jvm_obj: Optional[JavaObject], + ) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + if jvm_obj is not None: + self._jvm_edge_reader_obj = jvm_obj + else: + self._jvm_edge_reader_obj = GraphArSession.graphar.reader.EdgeReader( + prefix, + edge_info.to_scala(), + adj_list_type.to_scala(), + GraphArSession.jss, + ) + +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_edge_reader_obj
+ +
[docs] @staticmethod + def from_scala(jvm_obj: JavaObject) -> "EdgeReader": + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return EdgeReader(None, None, None, jvm_obj)
+ +
[docs] @staticmethod + def from_python( + prefix: str, + edge_info: EdgeInfo, + adj_list_type: AdjListType, + ) -> "EdgeReader": + """Create an instance of the Class from Python arguments. + + Note that constructor would raise IllegalArgumentException if edge info does not support given adjListType. + + :param prefix: the absolute prefix. + :param edge_info: the edge info that describes the edge type. + :param adj_list_type: the adj list type for the edge. + """ + if not prefix.endswith(os.sep): + prefix += os.sep + return EdgeReader(prefix, edge_info, adj_list_type, None)
+ +
[docs] def read_vertices_number(self) -> int: + """Load the total number of src/dst vertices for this edge type. + + :returns: total number of vertices. + """ + return self._jvm_edge_reader_obj.readVerticesNumber()
+ +
[docs] def read_vertex_chunk_number(self) -> int: + """Load the chunk number of src/dst vertices. + + :returns: chunk number of vertices. + """ + return self._jvm_edge_reader_obj.readVertexChunkNumber()
+ +
[docs] def read_edges_number(self, chunk_index: Optional[int] = None) -> int: + """Load the number of edges for the vertex chunk or for this edge type. + + :param chunk_index: index of vertex chunk (optional, default is None) + if not provided, returns the number of edges for this edge type + if provided, returns the number of edges for the vertex chunk + :returns: the number of edges + """ + if chunk_index is None: + return self._jvm_edge_reader_obj.readEdgesNumber() + return self._jvm_edge_reader_obj.readEdgesNumber(chunk_index)
+ +
[docs] def read_offset(self, chunk_index: int) -> DataFrame: + """Load a single offset chunk as a DataFrame. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param chunk_index: index of offset chunk + :returns: offset chunk DataFrame. Raise IllegalArgumentException if adjListType is + not AdjListType.ordered_by_source or AdjListType.ordered_by_dest. + """ + return DataFrame( + self._jvm_edge_reader_obj.readOffset(chunk_index), + GraphArSession.ss, + )
+ +
[docs] def read_adj_list_chunk( + self, + vertex_chunk_index: int, + chunk_index: int, + ) -> DataFrame: + """Load a single AdjList chunk as a DataFrame. + + :param vertex_chunk_index: index of vertex chunk + :param chunk_index: index of AdjList chunk. + :returns: AdjList chunk DataFrame + """ + return DataFrame( + self._jvm_edge_reader_obj.readAdjListChunk(vertex_chunk_index, chunk_index), + GraphArSession.ss, + )
+ +
[docs] def read_adj_list_for_vertex_chunk( + self, + vertex_chunk_index: int, + add_index: bool = True, + ) -> DataFrame: + """Load all AdjList chunks for a vertex chunk as a DataFrame. + + :param vertex_chunk_index: index of vertex chunk. + :param add_index: flag that add edge index column or not in the final DataFrame. + :returns: DataFrame of all AdjList chunks of vertices in given vertex chunk. + """ + return DataFrame( + self._jvm_edge_reader_obj.readAdjListForVertexChunk( + vertex_chunk_index, + add_index, + ), + GraphArSession.ss, + )
+ +
[docs] def read_all_adj_list(self, add_index: bool = True) -> DataFrame: + """Load all AdjList chunks for this edge type as a DataFrame. + + :param add_index: flag that add index column or not in the final DataFrame. + :returns: DataFrame of all AdjList chunks. + """ + return DataFrame( + self._jvm_edge_reader_obj.readAllAdjList(add_index), + GraphArSession.ss, + )
+ +
[docs] def read_edge_property_chunk( + self, + property_group: PropertyGroup, + vertex_chunk_index: int, + chunk_index: int, + ) -> DataFrame: + """Load a single edge property chunk as a DataFrame. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_group: property group. + :param vertex_chunk_index: index of vertex chunk. + :param chunk_index: index of property group chunk. + :returns: property group chunk DataFrame. If edge info does not contain the + property group, raise an IllegalArgumentException error. + """ + return DataFrame( + self._jvm_edge_reader_obj.readEdgePropertyChunk( + property_group.to_scala(), + vertex_chunk_index, + chunk_index, + ), + )
+ +
[docs] def read_edge_property_group_for_vertex_chunk( + self, + property_group: PropertyGroup, + vertex_chunk_index: int, + add_index: bool = True, + ) -> DataFrame: + """Load the chunks for a property group of a vertex chunk as a DataFrame. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_group: property group. + :param vertex_chunk_index: index of vertex chunk. + :param add_index: flag that add edge index column or not in the final DataFrame. + :returns: DataFrame that contains all property group chunks of vertices in given + vertex chunk. If edge info does not contain the property group, raise an IllegalArgumentException error. + """ + return DataFrame( + self._jvm_edge_reader_obj.readEdgePropertyGroupForVertexChunk( + property_group.to_scala(), + vertex_chunk_index, + add_index, + ), + GraphArSession.ss, + )
+ +
[docs] def read_edge_property_group( + self, + property_group: PropertyGroup, + add_index: bool = True, + ) -> DataFrame: + """Load all chunks for a property group as a DataFrame. + + WARNING! Exceptions from the JVM are not checked inside, it is just a proxy-method! + + :param property_group: property group. + :param add_index: flag that add edge index column or not in the final DataFrame. + :returns: DataFrame that contains all chunks of property group. If edge info does + not contain the property group, raise an IllegalArgumentException error. + """ + return DataFrame( + self._jvm_edge_reader_obj.readEdgePropertyGroup( + property_group.to_scala(), + add_index, + ), + GraphArSession.ss, + )
+ +
[docs] def read_multiple_edge_property_groups_for_vertex_chunk( + self, + property_groups: list[PropertyGroup], + vertex_chunk_index: int, + add_index: bool = True, + ) -> DataFrame: + """Load the chunks for multiple property groups of a vertex chunk as a DataFrame. + + :param property_groups: list of property groups. + :param vertex_chunk_index: index of vertex chunk. + :param add_index: flag that add edge index column or not in the final DataFrame. + :returns: DataFrame that contains all property groups chunks of a vertex chunk. + """ + return DataFrame( + self._jvm_edge_reader_obj.readMultipleEdgePropertyGroupsForVertexChunk( + [py_property_group.to_scala() for py_property_group in property_groups], + vertex_chunk_index, + add_index, + ), + GraphArSession.ss, + )
+ +
[docs] def read_multiple_edge_property_groups( + self, + property_groups: list[PropertyGroup], + add_index: bool = True, + ) -> DataFrame: + """Load the chunks for multiple property groups as a DataFrame. + + :param property_groups: list of property groups. + :param add_index: flag that add edge index column or not in the final DataFrame. + :returns: DataFrame tha contains all property groups chunks of edge. + """ + return DataFrame( + self._jvm_edge_reader_obj.readMultipleEdgePropertyGroups( + [py_property_group.to_scala() for py_property_group in property_groups], + add_index, + ), + GraphArSession.ss, + )
+ +
[docs] def read_all_edge_property_groups_for_vertex_chunk( + self, + vertex_chunk_index: int, + add_index: bool = True, + ) -> DataFrame: + """Load the chunks for all property groups of a vertex chunk as a DataFrame. + + :param vertex_chunk_index: index of vertex chunk. + :param add_index: flag that add edge index column or not in the final DataFrame. + :returns: DataFrame that contains all property groups chunks of a vertex chunk. + """ + return DataFrame( + self._jvm_edge_reader_obj.readAllEdgePropertyGroupsForVertexChunk( + vertex_chunk_index, + add_index, + ), + GraphArSession.ss, + )
+ +
[docs] def read_all_edge_property_groups(self, add_index: bool = True) -> DataFrame: + """Load the chunks for all property groups as a DataFrame. + + :param add_index: flag that add edge index column or not in the final DataFrame. + :returns: DataFrame tha contains all property groups chunks of edge. + """ + return DataFrame( + self._jvm_edge_reader_obj.readAllEdgePropertyGroups(add_index), + GraphArSession.ss, + )
+ +
[docs] def read_edges_for_vertex_chunk( + self, + vertex_chunk_index: int, + add_index: bool = True, + ) -> DataFrame: + """Load the chunks for the AdjList and all property groups for a vertex chunk as a DataFrame. + + :param vertex_chunk_index: index of vertex chunk + :param add_index: flag that add edge index column or not in the final DataFrame. + :returns: DataFrame that contains all chunks of AdjList and property groups of vertices in given vertex chunk. + """ + return DataFrame( + self._jvm_edge_reader_obj.readEdgesForVertexChunk( + vertex_chunk_index, + add_index, + ), + GraphArSession.ss, + )
+ +
[docs] def read_edges(self, add_index: bool = True) -> DataFrame: + """Load the chunks for the AdjList and all property groups as a DataFrame. + + :param add_index: flag that add edge index column or not in the final DataFrame. + :returns: DataFrame that contains all chunks of AdjList and property groups of edges. + """ + return DataFrame( + self._jvm_edge_reader_obj.readEdges(add_index), + GraphArSession.ss, + )
+
+
+
+
+ + +
+
+ + Made with + Furo + +
+
+
+ + +
+
+
+ +
+
+ +
+
+ + + + + + + + + \ No newline at end of file diff --git a/_modules/graphar_pyspark/util.html b/_modules/graphar_pyspark/util.html new file mode 100644 index 000000000..fc2f2845d --- /dev/null +++ b/_modules/graphar_pyspark/util.html @@ -0,0 +1,540 @@ + + + + + + + + graphar_pyspark.util - GraphAr + + + + + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+
GraphAr
+
+
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for graphar_pyspark.util

+# copyright 2022-2023 alibaba group holding limited.
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+"""Bindings to com.alibaba.graphar.util."""
+
+from __future__ import annotations
+
+from typing import Optional
+
+from pyspark.sql import DataFrame
+
+from graphar_pyspark import GraphArSession, _check_session
+
+
+
[docs]class IndexGenerator: + """IndexGenerator is an object to help generating the indices for vertex/edge DataFrames.""" + +
[docs] @staticmethod + def construct_vertex_index_mapping( + vertex_df: DataFrame, + primary_key: str, + ) -> DataFrame: + """Generate a vertex index mapping from the primary key. + + The resulting DataFrame contains two columns: vertex index & primary key. + + :param vertex_df: input vertex DataFrame. + :param primary_key: the primary key of vertex + :returns: a DataFrame contains two columns: vertex index & primary key. + """ + _check_session() + return DataFrame( + GraphArSession.graphar.util.IndexGenerator.constructVertexIndexMapping( + vertex_df._jdf, + primary_key, + ), + GraphArSession.ss, + )
+ +
[docs] @staticmethod + def generate_vertex_index_column(vertex_df: DataFrame) -> DataFrame: + """Add a column contains vertex index to DataFrame. + + :param vertex_df: the input vertex DataFrame. + :returns: DataFrame that contains a new vertex index column. + """ + _check_session() + return DataFrame( + GraphArSession.graphar.util.IndexGenerator.generateVertexIndexColumn( + vertex_df._jdf, + ), + GraphArSession.ss, + )
+ +
[docs] @staticmethod + def generate_vertex_index_column_and_index_mapping( + vertex_df: DataFrame, + primary_key: str = "", + ) -> (DataFrame, DataFrame): + """Add an index column and generate a new index mapping. + + :param vertex_df: the input vertex DataFrame. + :param primary_key: the primary key of vertex. + :returns: the new vertex DataFrame and mapping DataFrame. + """ + _check_session() + jvm_res = GraphArSession.graphar.util.IndexGenerator.generateVertexIndexColumnAndIndexMapping( + vertex_df._jdf, + primary_key, + ) + + return ( + DataFrame(jvm_res._1(), GraphArSession.ss), + DataFrame(jvm_res._2(), GraphArSession.ss), + )
+ +
[docs] @staticmethod + def generate_edge_index_column(edge_df: DataFrame) -> DataFrame: + """Add a column contains edge index to input edge DataFrame. + + :param edge_df: DataFrame with edges. + :returns: DataFrame with edges and index. + """ + _check_session() + return DataFrame( + GraphArSession.graphar.util.IndexGenerator.generateEdgeIndexColumn( + edge_df._jdf, + ), + GraphArSession.ss, + )
+ +
[docs] @staticmethod + def generate_src_index_for_edges_from_mapping( + edge_df: DataFrame, + src_column_name: str, + src_index_mapping: DataFrame, + ) -> DataFrame: + """Join the edge table with the vertex index mapping for source column. + + :param edge_df: edges DataFrame + :param src_column_name: join-column + :param src_index_mapping: mapping DataFrame + :returns: DataFrame with index + """ + _check_session() + return DataFrame( + GraphArSession.graphar.util.IndexGenerator.generateSrcIndexForEdgesFromMapping( + edge_df._jdf, + src_column_name, + src_index_mapping._jdf, + ), + GraphArSession.ss, + )
+ +
[docs] @staticmethod + def generate_dst_index_for_edges_from_mapping( + edge_df: DataFrame, + dst_column_name: str, + dst_index_mapping: DataFrame, + ) -> DataFrame: + """Join the edge table with the vertex index mapping for destination column. + + :param edge_df: edges DataFrame + :param dst_column_name: join-column + :param dst_index_mapping: mapping DataFrame + :returns: DataFrame with index + """ + _check_session() + return DataFrame( + GraphArSession.graphar.util.IndexGenerator.generateDstIndexForEdgesFromMapping( + edge_df._jdf, + dst_column_name, + dst_index_mapping._jdf, + ), + GraphArSession.ss, + )
+ +
[docs] @staticmethod + def generate_src_and_dst_index_for_edges_from_mapping( + edge_df: DataFrame, + src_column_name: Optional[str], + dst_column_name: Optional[str], + src_index_mapping: DataFrame, + dst_index_mapping: DataFrame, + ) -> DataFrame: + """Join the edge table with the vertex index mapping for source & destination columns. + + Assumes that the first and second columns are the src and dst columns if they are None. + + + :param edge_df: edge DataFrame + :param src_column_name: src column, optional (the first col from edge_df will be used if None) + :param dst_column_name: dst column, optional (the second col from edge_df will be used if None) + :param src_index_mapping: source mapping DataFrame + :param dst_index_mapping: dest mapping DataFrame + :returns: DataFrame with indices + """ + _check_session() + if src_column_name is None: + src_column_name = edge_df.columns[0] + + if dst_column_name is None: + dst_column_name = edge_df.columns[1] + + return DataFrame( + GraphArSession.graphar.util.IndexGenerator.generateSrcAndDstIndexForEdgesFromMapping( + edge_df._jdf, + src_column_name, + dst_column_name, + src_index_mapping._jdf, + dst_index_mapping._jdf, + ), + GraphArSession.ss, + )
+ +
[docs] @staticmethod + def generate_scr_index_for_edges( + edge_df: DataFrame, + src_column_name: str, + ) -> DataFrame: + """Construct vertex index for source column. + + :param edge_df: edge DataFrame + :param src_column_name: source column + :returns: DataFrame with index + """ + _check_session() + return DataFrame( + GraphArSession.graphar.util.IndexGenerator.generateSrcIndexForEdges( + edge_df._jdf, + src_column_name, + ), + GraphArSession.ss, + )
+ +
[docs] @staticmethod + def generate_dst_index_for_edges( + edge_df: DataFrame, + dst_column_name: str, + ) -> DataFrame: + """Construct vertex index for destination column. + + :param edge_df: edge DataFrame + :param src_column_name: destination column + :returns: DataFrame with index + """ + _check_session() + return DataFrame( + GraphArSession.graphar.util.IndexGenerator.generateDstIndexForEdges( + edge_df._jdf, + dst_column_name, + ), + GraphArSession.ss, + )
+ +
[docs] @staticmethod + def generate_src_and_dst_index_unitedly_for_edges( + edge_df: DataFrame, + src_column_name: str, + dst_column_name: str, + ) -> DataFrame: + """Union and construct vertex index for source & destination columns. + + :param edge_df: edge DataFrame + :param src_column_name: source column name + :param dst_column_name: destination column name + :returns: DataFrame with index + """ + _check_session() + return DataFrame( + GraphArSession.graphar.util.IndexGenerator.generateSrcAndDstIndexUnitedlyForEdges( + edge_df._jdf, + src_column_name, + dst_column_name, + ), + GraphArSession.ss, + )
+
+
+
+
+ + +
+
+ + Made with + Furo + +
+
+
+ + +
+
+
+ +
+
+ +
+
+ + + + + + + + + \ No newline at end of file diff --git a/_modules/graphar_pyspark/writer.html b/_modules/graphar_pyspark/writer.html new file mode 100644 index 000000000..c41703b33 --- /dev/null +++ b/_modules/graphar_pyspark/writer.html @@ -0,0 +1,477 @@ + + + + + + + + graphar_pyspark.writer - GraphAr + + + + + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+
GraphAr
+
+
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for graphar_pyspark.writer

+# copyright 2022-2023 alibaba group holding limited.
+#
+# licensed under the apache license, version 2.0 (the "license");
+# you may not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+#     http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+"""Bindings to com.alibaba.graphar.writer."""
+
+
+from __future__ import annotations
+
+import os
+from typing import Optional
+
+from py4j.java_gateway import JavaObject
+from pyspark.sql import DataFrame
+
+from graphar_pyspark import GraphArSession, _check_session
+from graphar_pyspark.enums import AdjListType
+from graphar_pyspark.info import EdgeInfo, PropertyGroup, VertexInfo
+
+
+
[docs]class VertexWriter: + """Writer for vertex DataFrame.""" + + def __init__( + self, + prefix: Optional[str], + vertex_info: Optional[VertexInfo], + vertex_df: Optional[DataFrame], + num_vertices: Optional[int], + jvm_obj: Optional[JavaObject], + ) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + if jvm_obj is not None: + self._jvm_vertex_writer_obj = jvm_obj + else: + num_vertices = -1 if num_vertices is None else num_vertices + self._jvm_vertex_writer_obj = GraphArSession.graphar.writer.VertexWriter( + prefix, + vertex_info.to_scala(), + vertex_df._jdf, + num_vertices, + ) + +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_vertex_writer_obj
+ +
[docs] @staticmethod + def from_scala(jvm_obj: JavaObject) -> "VertexWriter": + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return VertexWriter(None, None, None, None, jvm_obj)
+ +
[docs] @staticmethod + def from_python( + prefix: str, + vertex_info: VertexInfo, + vertex_df: DataFrame, + num_vertices: Optional[int], + ) -> "VertexWriter": + """Create an instance of the Class from Python arguments. + + :param prefix: the absolute prefix. + :param vertex_info: the vertex info that describes the vertex type. + :param vertex_df: the input vertex DataFrame. + :param num_vertices: the number of vertices, optional + """ + if not prefix.endswith(os.sep): + prefix += os.sep + return VertexWriter(prefix, vertex_info, vertex_df, num_vertices, None)
+ +
[docs] def write_vertex_properties( + self, + property_group: Optional[PropertyGroup] = None, + ) -> None: + """Generate chunks of the property group (or all property groups) for vertex DataFrame. + + :param property_group: property group (optional, default is None) + if provided, generate chunks of the property group, otherwise generate for all property groups. + """ + if property_group is not None: + self._jvm_vertex_writer_obj.writeVertexProperties(property_group.to_scala()) + else: + self._jvm_vertex_writer_obj.writeVertexProperties()
+ + +
[docs]class EdgeWriter: + """Writer for edge DataFrame.""" + + def __init__( + self, + prefix: Optional[str], + edge_info: Optional[EdgeInfo], + adj_list_type: Optional[AdjListType], + vertex_num: Optional[int], + edge_df: Optional[DataFrame], + jvm_obj: Optional[JavaObject], + ) -> None: + """One should not use this constructor directly, please use `from_scala` or `from_python`.""" + _check_session() + if jvm_obj is not None: + self._jvm_edge_writer_obj = jvm_obj + else: + self._jvm_edge_writer_obj = GraphArSession.graphar.writer.EdgeWriter( + prefix, + edge_info.to_scala(), + adj_list_type.to_scala(), + vertex_num, + edge_df._jdf, + ) + +
[docs] def to_scala(self) -> JavaObject: + """Transform object to JVM representation. + + :returns: JavaObject + """ + return self._jvm_edge_writer_obj
+ +
[docs] @staticmethod + def from_scala(jvm_obj: JavaObject) -> "EdgeWriter": + """Create an instance of the Class from the corresponding JVM object. + + :param jvm_obj: scala object in JVM. + :returns: instance of Python Class. + """ + return EdgeWriter(None, None, None, None, None, jvm_obj)
+ +
[docs] @staticmethod + def from_python( + prefix: str, + edge_info: EdgeInfo, + adj_list_type: AdjListType, + vertex_num: int, + edge_df: DataFrame, + ) -> "EdgeWriter": + """Create an instance of the Class from Python arguments. + + :param prefix: the absolute prefix. + :param edge_info: the edge info that describes the ede type. + :param adj_list_type: the adj list type for the edge. + :param vertex_num: vertex number of the primary vertex label + :param edge_df: the input edge DataFrame. + """ + if not prefix.endswith(os.sep): + prefix += os.sep + return EdgeWriter(prefix, edge_info, adj_list_type, vertex_num, edge_df, None)
+ +
[docs] def write_adj_list(self) -> None: + """Generate the chunks of AdjList from edge DataFrame for this edge type.""" + self._jvm_edge_writer_obj.writeAdjList()
+ +
[docs] def write_edge_properties( + self, + property_group: Optional[PropertyGroup] = None, + ) -> None: + """Generate the chunks of all or selected property groups from edge DataFrame. + + :param property_group: property group (optional, default is None) + if provided, generate the chunks of selected property group, otherwise generate for all groups. + """ + if property_group is not None: + self._jvm_edge_writer_obj.writeEdgeProperties(property_group.to_scala()) + else: + self._jvm_edge_writer_obj.writeEdgeProperties()
+ +
[docs] def write_edges(self) -> None: + """Generate the chunks for the AdjList and all property groups from edge.""" + self._jvm_edge_writer_obj.writeEdges()
+
+
+
+
+ + +
+
+ + Made with + Furo + +
+
+
+ + +
+
+
+ +
+
+ +
+
+ + + + + + + + + \ No newline at end of file diff --git a/_modules/index.html b/_modules/index.html new file mode 100644 index 000000000..f15c018b8 --- /dev/null +++ b/_modules/index.html @@ -0,0 +1,300 @@ + + + + + + + + Overview: module code - GraphAr + + + + + + + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark mode + + + + + + + + + + + + + + + + + + + +
+
+
+ +
+
+
GraphAr
+
+
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+ +
+
+ + +
+
+ + Made with + Furo + +
+
+
+ + +
+
+
+ +
+
+ +
+
+ + + + + + + + + \ No newline at end of file diff --git a/_panels_static/panels-bootstrap.5fd3999ee7762ccc51105388f4a9d115.css b/_panels_static/panels-bootstrap.5fd3999ee7762ccc51105388f4a9d115.css new file mode 100644 index 000000000..1b057df2f --- /dev/null +++ b/_panels_static/panels-bootstrap.5fd3999ee7762ccc51105388f4a9d115.css @@ -0,0 +1 @@ +.badge{border-radius:.25rem;display:inline-block;font-size:75%;font-weight:700;line-height:1;padding:.25em .4em;text-align:center;vertical-align:baseline;white-space:nowrap}.badge:empty{display:none}.btn .badge{position:relative;top:-1px}.badge-pill{border-radius:10rem;padding-left:.6em;padding-right:.6em}.badge-primary{background-color:#007bff;color:#fff}.badge-primary[href]:focus,.badge-primary[href]:hover{background-color:#0062cc;color:#fff;text-decoration:none}.badge-secondary{background-color:#6c757d;color:#fff}.badge-secondary[href]:focus,.badge-secondary[href]:hover{background-color:#545b62;color:#fff;text-decoration:none}.badge-success{background-color:#28a745;color:#fff}.badge-success[href]:focus,.badge-success[href]:hover{background-color:#1e7e34;color:#fff;text-decoration:none}.badge-info{background-color:#17a2b8;color:#fff}.badge-info[href]:focus,.badge-info[href]:hover{background-color:#117a8b;color:#fff;text-decoration:none}.badge-warning{background-color:#ffc107;color:#212529}.badge-warning[href]:focus,.badge-warning[href]:hover{background-color:#d39e00;color:#212529;text-decoration:none}.badge-danger{background-color:#dc3545;color:#fff}.badge-danger[href]:focus,.badge-danger[href]:hover{background-color:#bd2130;color:#fff;text-decoration:none}.badge-light{background-color:#f8f9fa;color:#212529}.badge-light[href]:focus,.badge-light[href]:hover{background-color:#dae0e5;color:#212529;text-decoration:none}.badge-dark{background-color:#343a40;color:#fff}.badge-dark[href]:focus,.badge-dark[href]:hover{background-color:#1d2124;color:#fff;text-decoration:none}.border-0{border:0 !important}.border-top-0{border-top:0 !important}.border-right-0{border-right:0 !important}.border-bottom-0{border-bottom:0 !important}.border-left-0{border-left:0 !important}.p-0{padding:0 !important}.pt-0,.py-0{padding-top:0 !important}.pr-0,.px-0{padding-right:0 !important}.pb-0,.py-0{padding-bottom:0 !important}.pl-0,.px-0{padding-left:0 !important}.p-1{padding:.25rem !important}.pt-1,.py-1{padding-top:.25rem !important}.pr-1,.px-1{padding-right:.25rem !important}.pb-1,.py-1{padding-bottom:.25rem !important}.pl-1,.px-1{padding-left:.25rem !important}.p-2{padding:.5rem !important}.pt-2,.py-2{padding-top:.5rem !important}.pr-2,.px-2{padding-right:.5rem !important}.pb-2,.py-2{padding-bottom:.5rem !important}.pl-2,.px-2{padding-left:.5rem !important}.p-3{padding:1rem !important}.pt-3,.py-3{padding-top:1rem !important}.pr-3,.px-3{padding-right:1rem !important}.pb-3,.py-3{padding-bottom:1rem !important}.pl-3,.px-3{padding-left:1rem !important}.p-4{padding:1.5rem !important}.pt-4,.py-4{padding-top:1.5rem !important}.pr-4,.px-4{padding-right:1.5rem !important}.pb-4,.py-4{padding-bottom:1.5rem !important}.pl-4,.px-4{padding-left:1.5rem !important}.p-5{padding:3rem !important}.pt-5,.py-5{padding-top:3rem !important}.pr-5,.px-5{padding-right:3rem !important}.pb-5,.py-5{padding-bottom:3rem !important}.pl-5,.px-5{padding-left:3rem !important}.m-0{margin:0 !important}.mt-0,.my-0{margin-top:0 !important}.mr-0,.mx-0{margin-right:0 !important}.mb-0,.my-0{margin-bottom:0 !important}.ml-0,.mx-0{margin-left:0 !important}.m-1{margin:.25rem !important}.mt-1,.my-1{margin-top:.25rem !important}.mr-1,.mx-1{margin-right:.25rem !important}.mb-1,.my-1{margin-bottom:.25rem !important}.ml-1,.mx-1{margin-left:.25rem !important}.m-2{margin:.5rem !important}.mt-2,.my-2{margin-top:.5rem !important}.mr-2,.mx-2{margin-right:.5rem !important}.mb-2,.my-2{margin-bottom:.5rem !important}.ml-2,.mx-2{margin-left:.5rem !important}.m-3{margin:1rem !important}.mt-3,.my-3{margin-top:1rem !important}.mr-3,.mx-3{margin-right:1rem !important}.mb-3,.my-3{margin-bottom:1rem !important}.ml-3,.mx-3{margin-left:1rem !important}.m-4{margin:1.5rem !important}.mt-4,.my-4{margin-top:1.5rem !important}.mr-4,.mx-4{margin-right:1.5rem !important}.mb-4,.my-4{margin-bottom:1.5rem !important}.ml-4,.mx-4{margin-left:1.5rem !important}.m-5{margin:3rem !important}.mt-5,.my-5{margin-top:3rem !important}.mr-5,.mx-5{margin-right:3rem !important}.mb-5,.my-5{margin-bottom:3rem !important}.ml-5,.mx-5{margin-left:3rem !important}.btn{background-color:transparent;border:1px solid transparent;border-radius:.25rem;color:#212529;cursor:pointer;display:inline-block;font-size:1rem;font-weight:400;line-height:1.5;padding:.375rem .75rem;text-align:center;transition:color .15s ease-in-out, background-color .15s ease-in-out, border-color .15s ease-in-out, box-shadow .15s ease-in-out;-moz-user-select:none;-ms-user-select:none;-webkit-user-select:none;user-select:none;vertical-align:middle}.btn:hover{color:#212529;text-decoration:none}.btn:visited{color:#212529}.btn.focus,.btn:focus{box-shadow:0 0 0 .2rem rgba(0,123,255,0.25);outline:0}.btn.disabled,.btn:disabled{opacity:.65}@media (prefers-reduced-motion: reduce){.btn{transition:none}}a.btn.disabled,fieldset:disabled a.btn{pointer-events:none}.btn-primary{background-color:#007bff;border-color:#007bff;color:#fff}.btn-primary:visited{color:#fff}.btn-primary:hover{background-color:#0069d9;border-color:#0062cc;color:#fff}.btn-primary.focus,.btn-primary:focus{background-color:#0069d9;border-color:#0062cc;box-shadow:0 0 0 .2rem rgba(0,123,255,0.5);color:#fff}.btn-primary.disabled,.btn-primary:disabled{background-color:#007bff;border-color:#007bff;color:#fff}.btn-primary.active:not(:disabled):not(.disabled),.btn-primary:not(:disabled):not(.disabled):active,.show>.btn-primary.dropdown-toggle{background-color:#0062cc;border-color:#005cbf;color:#fff}.btn-primary.active:not(:disabled):not(.disabled):focus,.btn-primary:not(:disabled):not(.disabled):active:focus,.show>.btn-primary.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(0,123,255,0.5)}.btn-secondary{background-color:#6c757d;border-color:#6c757d;color:#fff}.btn-secondary:visited{color:#fff}.btn-secondary:hover{background-color:#5a6268;border-color:#545b62;color:#fff}.btn-secondary.focus,.btn-secondary:focus{background-color:#5a6268;border-color:#545b62;box-shadow:0 0 0 .2rem rgba(108,117,125,0.5);color:#fff}.btn-secondary.disabled,.btn-secondary:disabled{background-color:#6c757d;border-color:#6c757d;color:#fff}.btn-secondary.active:not(:disabled):not(.disabled),.btn-secondary:not(:disabled):not(.disabled):active,.show>.btn-secondary.dropdown-toggle{background-color:#545b62;border-color:#4e555b;color:#fff}.btn-secondary.active:not(:disabled):not(.disabled):focus,.btn-secondary:not(:disabled):not(.disabled):active:focus,.show>.btn-secondary.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(108,117,125,0.5)}.btn-success{background-color:#28a745;border-color:#28a745;color:#fff}.btn-success:visited{color:#fff}.btn-success:hover{background-color:#218838;border-color:#1e7e34;color:#fff}.btn-success.focus,.btn-success:focus{background-color:#218838;border-color:#1e7e34;box-shadow:0 0 0 .2rem rgba(40,167,69,0.5);color:#fff}.btn-success.disabled,.btn-success:disabled{background-color:#28a745;border-color:#28a745;color:#fff}.btn-success.active:not(:disabled):not(.disabled),.btn-success:not(:disabled):not(.disabled):active,.show>.btn-success.dropdown-toggle{background-color:#1e7e34;border-color:#1c7430;color:#fff}.btn-success.active:not(:disabled):not(.disabled):focus,.btn-success:not(:disabled):not(.disabled):active:focus,.show>.btn-success.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(40,167,69,0.5)}.btn-info{background-color:#17a2b8;border-color:#17a2b8;color:#fff}.btn-info:visited{color:#fff}.btn-info:hover{background-color:#138496;border-color:#117a8b;color:#fff}.btn-info.focus,.btn-info:focus{background-color:#138496;border-color:#117a8b;box-shadow:0 0 0 .2rem rgba(23,162,184,0.5);color:#fff}.btn-info.disabled,.btn-info:disabled{background-color:#17a2b8;border-color:#17a2b8;color:#fff}.btn-info.active:not(:disabled):not(.disabled),.btn-info:not(:disabled):not(.disabled):active,.show>.btn-info.dropdown-toggle{background-color:#117a8b;border-color:#10707f;color:#fff}.btn-info.active:not(:disabled):not(.disabled):focus,.btn-info:not(:disabled):not(.disabled):active:focus,.show>.btn-info.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(23,162,184,0.5)}.btn-warning{background-color:#ffc107;border-color:#ffc107;color:#212529}.btn-warning:visited{color:#212529}.btn-warning:hover{background-color:#e0a800;border-color:#d39e00;color:#212529}.btn-warning.focus,.btn-warning:focus{background-color:#e0a800;border-color:#d39e00;box-shadow:0 0 0 .2rem rgba(255,193,7,0.5);color:#212529}.btn-warning.disabled,.btn-warning:disabled{background-color:#ffc107;border-color:#ffc107;color:#212529}.btn-warning.active:not(:disabled):not(.disabled),.btn-warning:not(:disabled):not(.disabled):active,.show>.btn-warning.dropdown-toggle{background-color:#d39e00;border-color:#c69500;color:#212529}.btn-warning.active:not(:disabled):not(.disabled):focus,.btn-warning:not(:disabled):not(.disabled):active:focus,.show>.btn-warning.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(255,193,7,0.5)}.btn-danger{background-color:#dc3545;border-color:#dc3545;color:#fff}.btn-danger:visited{color:#fff}.btn-danger:hover{background-color:#c82333;border-color:#bd2130;color:#fff}.btn-danger.focus,.btn-danger:focus{background-color:#c82333;border-color:#bd2130;box-shadow:0 0 0 .2rem rgba(220,53,69,0.5);color:#fff}.btn-danger.disabled,.btn-danger:disabled{background-color:#dc3545;border-color:#dc3545;color:#fff}.btn-danger.active:not(:disabled):not(.disabled),.btn-danger:not(:disabled):not(.disabled):active,.show>.btn-danger.dropdown-toggle{background-color:#bd2130;border-color:#b21f2d;color:#fff}.btn-danger.active:not(:disabled):not(.disabled):focus,.btn-danger:not(:disabled):not(.disabled):active:focus,.show>.btn-danger.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(220,53,69,0.5)}.btn-light{background-color:#f8f9fa;border-color:#f8f9fa;color:#212529}.btn-light:visited{color:#212529}.btn-light:hover{background-color:#e2e6ea;border-color:#dae0e5;color:#212529}.btn-light.focus,.btn-light:focus{background-color:#e2e6ea;border-color:#dae0e5;box-shadow:0 0 0 .2rem rgba(248,249,250,0.5);color:#212529}.btn-light.disabled,.btn-light:disabled{background-color:#f8f9fa;border-color:#f8f9fa;color:#212529}.btn-light.active:not(:disabled):not(.disabled),.btn-light:not(:disabled):not(.disabled):active,.show>.btn-light.dropdown-toggle{background-color:#dae0e5;border-color:#d3d9df;color:#212529}.btn-light.active:not(:disabled):not(.disabled):focus,.btn-light:not(:disabled):not(.disabled):active:focus,.show>.btn-light.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(248,249,250,0.5)}.btn-dark{background-color:#343a40;border-color:#343a40;color:#fff}.btn-dark:visited{color:#fff}.btn-dark:hover{background-color:#23272b;border-color:#1d2124;color:#fff}.btn-dark.focus,.btn-dark:focus{background-color:#23272b;border-color:#1d2124;box-shadow:0 0 0 .2rem rgba(52,58,64,0.5);color:#fff}.btn-dark.disabled,.btn-dark:disabled{background-color:#343a40;border-color:#343a40;color:#fff}.btn-dark.active:not(:disabled):not(.disabled),.btn-dark:not(:disabled):not(.disabled):active,.show>.btn-dark.dropdown-toggle{background-color:#1d2124;border-color:#171a1d;color:#fff}.btn-dark.active:not(:disabled):not(.disabled):focus,.btn-dark:not(:disabled):not(.disabled):active:focus,.show>.btn-dark.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(52,58,64,0.5)}.btn-outline-primary{border-color:#007bff;color:#007bff}.btn-outline-primary:visited{color:#007bff}.btn-outline-primary:hover{background-color:#007bff;border-color:#007bff;color:#fff}.btn-outline-primary.focus,.btn-outline-primary:focus{box-shadow:0 0 0 .2rem rgba(0,123,255,0.5)}.btn-outline-primary.disabled,.btn-outline-primary:disabled{background-color:transparent;color:#007bff}.btn-outline-primary.active:not(:disabled):not(.disabled),.btn-outline-primary:not(:disabled):not(.disabled):active,.show>.btn-outline-primary.dropdown-toggle{background-color:#007bff;border-color:#007bff;color:#fff}.btn-outline-primary.active:not(:disabled):not(.disabled):focus,.btn-outline-primary:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-primary.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(0,123,255,0.5)}.btn-outline-secondary{border-color:#6c757d;color:#6c757d}.btn-outline-secondary:visited{color:#6c757d}.btn-outline-secondary:hover{background-color:#6c757d;border-color:#6c757d;color:#fff}.btn-outline-secondary.focus,.btn-outline-secondary:focus{box-shadow:0 0 0 .2rem rgba(108,117,125,0.5)}.btn-outline-secondary.disabled,.btn-outline-secondary:disabled{background-color:transparent;color:#6c757d}.btn-outline-secondary.active:not(:disabled):not(.disabled),.btn-outline-secondary:not(:disabled):not(.disabled):active,.show>.btn-outline-secondary.dropdown-toggle{background-color:#6c757d;border-color:#6c757d;color:#fff}.btn-outline-secondary.active:not(:disabled):not(.disabled):focus,.btn-outline-secondary:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-secondary.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(108,117,125,0.5)}.btn-outline-success{border-color:#28a745;color:#28a745}.btn-outline-success:visited{color:#28a745}.btn-outline-success:hover{background-color:#28a745;border-color:#28a745;color:#fff}.btn-outline-success.focus,.btn-outline-success:focus{box-shadow:0 0 0 .2rem rgba(40,167,69,0.5)}.btn-outline-success.disabled,.btn-outline-success:disabled{background-color:transparent;color:#28a745}.btn-outline-success.active:not(:disabled):not(.disabled),.btn-outline-success:not(:disabled):not(.disabled):active,.show>.btn-outline-success.dropdown-toggle{background-color:#28a745;border-color:#28a745;color:#fff}.btn-outline-success.active:not(:disabled):not(.disabled):focus,.btn-outline-success:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-success.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(40,167,69,0.5)}.btn-outline-info{border-color:#17a2b8;color:#17a2b8}.btn-outline-info:visited{color:#17a2b8}.btn-outline-info:hover{background-color:#17a2b8;border-color:#17a2b8;color:#fff}.btn-outline-info.focus,.btn-outline-info:focus{box-shadow:0 0 0 .2rem rgba(23,162,184,0.5)}.btn-outline-info.disabled,.btn-outline-info:disabled{background-color:transparent;color:#17a2b8}.btn-outline-info.active:not(:disabled):not(.disabled),.btn-outline-info:not(:disabled):not(.disabled):active,.show>.btn-outline-info.dropdown-toggle{background-color:#17a2b8;border-color:#17a2b8;color:#fff}.btn-outline-info.active:not(:disabled):not(.disabled):focus,.btn-outline-info:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-info.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(23,162,184,0.5)}.btn-outline-warning{border-color:#ffc107;color:#ffc107}.btn-outline-warning:visited{color:#ffc107}.btn-outline-warning:hover{background-color:#ffc107;border-color:#ffc107;color:#212529}.btn-outline-warning.focus,.btn-outline-warning:focus{box-shadow:0 0 0 .2rem rgba(255,193,7,0.5)}.btn-outline-warning.disabled,.btn-outline-warning:disabled{background-color:transparent;color:#ffc107}.btn-outline-warning.active:not(:disabled):not(.disabled),.btn-outline-warning:not(:disabled):not(.disabled):active,.show>.btn-outline-warning.dropdown-toggle{background-color:#ffc107;border-color:#ffc107;color:#212529}.btn-outline-warning.active:not(:disabled):not(.disabled):focus,.btn-outline-warning:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-warning.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(255,193,7,0.5)}.btn-outline-danger{border-color:#dc3545;color:#dc3545}.btn-outline-danger:visited{color:#dc3545}.btn-outline-danger:hover{background-color:#dc3545;border-color:#dc3545;color:#fff}.btn-outline-danger.focus,.btn-outline-danger:focus{box-shadow:0 0 0 .2rem rgba(220,53,69,0.5)}.btn-outline-danger.disabled,.btn-outline-danger:disabled{background-color:transparent;color:#dc3545}.btn-outline-danger.active:not(:disabled):not(.disabled),.btn-outline-danger:not(:disabled):not(.disabled):active,.show>.btn-outline-danger.dropdown-toggle{background-color:#dc3545;border-color:#dc3545;color:#fff}.btn-outline-danger.active:not(:disabled):not(.disabled):focus,.btn-outline-danger:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-danger.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(220,53,69,0.5)}.btn-outline-light{border-color:#f8f9fa;color:#f8f9fa}.btn-outline-light:visited{color:#f8f9fa}.btn-outline-light:hover{background-color:#f8f9fa;border-color:#f8f9fa;color:#212529}.btn-outline-light.focus,.btn-outline-light:focus{box-shadow:0 0 0 .2rem rgba(248,249,250,0.5)}.btn-outline-light.disabled,.btn-outline-light:disabled{background-color:transparent;color:#f8f9fa}.btn-outline-light.active:not(:disabled):not(.disabled),.btn-outline-light:not(:disabled):not(.disabled):active,.show>.btn-outline-light.dropdown-toggle{background-color:#f8f9fa;border-color:#f8f9fa;color:#212529}.btn-outline-light.active:not(:disabled):not(.disabled):focus,.btn-outline-light:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-light.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(248,249,250,0.5)}.btn-outline-dark{border-color:#343a40;color:#343a40}.btn-outline-dark:visited{color:#343a40}.btn-outline-dark:hover{background-color:#343a40;border-color:#343a40;color:#fff}.btn-outline-dark.focus,.btn-outline-dark:focus{box-shadow:0 0 0 .2rem rgba(52,58,64,0.5)}.btn-outline-dark.disabled,.btn-outline-dark:disabled{background-color:transparent;color:#343a40}.btn-outline-dark.active:not(:disabled):not(.disabled),.btn-outline-dark:not(:disabled):not(.disabled):active,.show>.btn-outline-dark.dropdown-toggle{background-color:#343a40;border-color:#343a40;color:#fff}.btn-outline-dark.active:not(:disabled):not(.disabled):focus,.btn-outline-dark:not(:disabled):not(.disabled):active:focus,.show>.btn-outline-dark.dropdown-toggle:focus{box-shadow:0 0 0 .2rem rgba(52,58,64,0.5)}.btn-link{color:#007bff;font-weight:400;text-decoration:none}.btn-link:hover{color:#0056b3;text-decoration:underline}.btn-link.focus,.btn-link:focus{box-shadow:none;text-decoration:underline}.btn-link.disabled,.btn-link:disabled{color:#6c757d;pointer-events:none}.btn-group-lg>.btn,.btn-lg{border-radius:.3rem;font-size:1.25rem;line-height:1.5;padding:.5rem 1rem}.btn-group-sm>.btn,.btn-sm{border-radius:.2rem;font-size:.875rem;line-height:1.5;padding:.25rem .5rem}.btn-block{display:block;width:100%}.btn-block+.btn-block{margin-top:.5rem}input.btn-block[type=button],input.btn-block[type=reset],input.btn-block[type=submit]{width:100%}.stretched-link::after{background-color:rgba(0,0,0,0);bottom:0;content:'';left:0;pointer-events:auto;position:absolute;right:0;top:0;z-index:1}.text-wrap{white-space:normal !important}.card{background-clip:border-box;background-color:#fff;border:1px solid rgba(0,0,0,0.125);border-radius:.25rem;display:-ms-flexbox;display:flex;-ms-flex-direction:column;flex-direction:column;min-width:0;position:relative;word-wrap:break-word}.card>hr{margin-left:0;margin-right:0}.card>.list-group:first-child .list-group-item:first-child{border-top-left-radius:.25rem;border-top-right-radius:.25rem}.card>.list-group:last-child .list-group-item:last-child{border-bottom-left-radius:.25rem;border-bottom-right-radius:.25rem}.card-body{-ms-flex:1 1 auto;flex:1 1 auto;min-height:1px;padding:1.25rem}.card-title{margin-bottom:.75rem}.card-subtitle{margin-bottom:0;margin-top:-.375rem}.card-text:last-child{margin-bottom:0}.card-link:hover{text-decoration:none}.card-link+.card-link{margin-left:1.25rem}.card-header{background-color:rgba(0,0,0,0.03);border-bottom:1px solid rgba(0,0,0,0.125);margin-bottom:0;padding:.75rem 1.25rem}.card-header:first-child{border-radius:calc(.25rem - 1px) calc(.25rem - 1px) 0 0}.card-header+.list-group .list-group-item:first-child{border-top:0}.card-footer{background-color:rgba(0,0,0,0.03);border-top:1px solid rgba(0,0,0,0.125);padding:.75rem 1.25rem}.card-footer:last-child{border-radius:0 0 calc(.25rem - 1px) calc(.25rem - 1px)}.card-header-tabs{border-bottom:0;margin-bottom:-.75rem;margin-left:-.625rem;margin-right:-.625rem}.card-header-pills{margin-left:-.625rem;margin-right:-.625rem}.card-img-overlay{bottom:0;left:0;padding:1.25rem;position:absolute;right:0;top:0}.card-img,.card-img-bottom,.card-img-top{-ms-flex-negative:0;flex-shrink:0;width:100%}.card-img,.card-img-top{border-top-left-radius:calc(.25rem - 1px);border-top-right-radius:calc(.25rem - 1px)}.card-img,.card-img-bottom{border-bottom-left-radius:calc(.25rem - 1px);border-bottom-right-radius:calc(.25rem - 1px)}.w-100{width:100% !important}.shadow{box-shadow:0 0.5rem 1rem rgba(0,0,0,0.15) !important}.bg-primary{background-color:#007bff !important}button.bg-primary:focus,button.bg-primary:hover{background-color:#0062cc !important}a.bg-primary:focus,a.bg-primary:hover{background-color:#0062cc !important}a.text-primary:focus,a.text-primary:hover{color:#121416 !important}.bg-secondary{background-color:#6c757d !important}button.bg-secondary:focus,button.bg-secondary:hover{background-color:#545b62 !important}a.bg-secondary:focus,a.bg-secondary:hover{background-color:#545b62 !important}a.text-secondary:focus,a.text-secondary:hover{color:#121416 !important}.bg-success{background-color:#28a745 !important}button.bg-success:focus,button.bg-success:hover{background-color:#1e7e34 !important}a.bg-success:focus,a.bg-success:hover{background-color:#1e7e34 !important}a.text-success:focus,a.text-success:hover{color:#121416 !important}.bg-info{background-color:#17a2b8 !important}button.bg-info:focus,button.bg-info:hover{background-color:#117a8b !important}a.bg-info:focus,a.bg-info:hover{background-color:#117a8b !important}a.text-info:focus,a.text-info:hover{color:#121416 !important}.bg-warning{background-color:#ffc107 !important}button.bg-warning:focus,button.bg-warning:hover{background-color:#d39e00 !important}a.bg-warning:focus,a.bg-warning:hover{background-color:#d39e00 !important}a.text-warning:focus,a.text-warning:hover{color:#121416 !important}.bg-danger{background-color:#dc3545 !important}button.bg-danger:focus,button.bg-danger:hover{background-color:#bd2130 !important}a.bg-danger:focus,a.bg-danger:hover{background-color:#bd2130 !important}a.text-danger:focus,a.text-danger:hover{color:#121416 !important}.bg-light{background-color:#f8f9fa !important}button.bg-light:focus,button.bg-light:hover{background-color:#dae0e5 !important}a.bg-light:focus,a.bg-light:hover{background-color:#dae0e5 !important}a.text-light:focus,a.text-light:hover{color:#121416 !important}.bg-dark{background-color:#343a40 !important}button.bg-dark:focus,button.bg-dark:hover{background-color:#1d2124 !important}a.bg-dark:focus,a.bg-dark:hover{background-color:#1d2124 !important}a.text-dark:focus,a.text-dark:hover{color:#121416 !important}.bg-white{background-color:#fff !important}button.bg-white:focus,button.bg-white:hover{background-color:#e6e6e6 !important}a.bg-white:focus,a.bg-white:hover{background-color:#e6e6e6 !important}a.text-white:focus,a.text-white:hover{color:#121416 !important}.text-primary{color:#007bff !important}.text-secondary{color:#6c757d !important}.text-success{color:#28a745 !important}.text-info{color:#17a2b8 !important}.text-warning{color:#ffc107 !important}.text-danger{color:#dc3545 !important}.text-light{color:#f8f9fa !important}.text-dark{color:#343a40 !important}.text-white{color:#fff !important}.text-body{color:#212529 !important}.text-muted{color:#6c757d !important}.text-black-50{color:rgba(0,0,0,0.5) !important}.text-white-50{color:rgba(255,255,255,0.5) !important}.bg-transparent{background-color:transparent !important}.text-justify{text-align:justify !important}.text-left{text-align:left !important}.text-right{text-align:right !important}.text-center{text-align:center !important}.font-weight-light{font-weight:300 !important}.font-weight-lighter{font-weight:lighter !important}.font-weight-normal{font-weight:400 !important}.font-weight-bold{font-weight:700 !important}.font-weight-bolder{font-weight:bolder !important}.font-italic{font-style:italic !important}.container{margin-left:auto;margin-right:auto;padding-left:15px;padding-right:15px;width:100%}@media (min-width: 576px){.container{max-width:540px}}@media (min-width: 768px){.container{max-width:720px}}@media (min-width: 992px){.container{max-width:960px}}@media (min-width: 1200px){.container{max-width:1140px}}.container-fluid,.container-lg,.container-md,.container-sm,.container-xl{margin-left:auto;margin-right:auto;padding-left:15px;padding-right:15px;width:100%}@media (min-width: 576px){.container,.container-sm{max-width:540px}}@media (min-width: 768px){.container,.container-md,.container-sm{max-width:720px}}@media (min-width: 992px){.container,.container-lg,.container-md,.container-sm{max-width:960px}}@media (min-width: 1200px){.container,.container-lg,.container-md,.container-sm,.container-xl{max-width:1140px}}.row{display:-ms-flexbox;display:flex;-ms-flex-wrap:wrap;flex-wrap:wrap;margin-left:-15px;margin-right:-15px}.col-lg,.col-lg-1,.col-lg-10,.col-lg-11,.col-lg-12,.col-lg-2,.col-lg-3,.col-lg-4,.col-lg-5,.col-lg-6,.col-lg-7,.col-lg-8,.col-lg-9,.col-lg-auto,.col-md,.col-md-1,.col-md-10,.col-md-11,.col-md-12,.col-md-2,.col-md-3,.col-md-4,.col-md-5,.col-md-6,.col-md-7,.col-md-8,.col-md-9,.col-md-auto,.col-sm,.col-sm-1,.col-sm-10,.col-sm-11,.col-sm-12,.col-sm-2,.col-sm-3,.col-sm-4,.col-sm-5,.col-sm-6,.col-sm-7,.col-sm-8,.col-sm-9,.col-sm-auto,.col-xl,.col-xl-1,.col-xl-10,.col-xl-11,.col-xl-12,.col-xl-2,.col-xl-3,.col-xl-4,.col-xl-5,.col-xl-6,.col-xl-7,.col-xl-8,.col-xl-9,.col-xl-auto{padding-left:15px;padding-right:15px;position:relative;width:100%}@media (min-width: 576px){.col-sm{flex-basis:0;flex-grow:1;-ms-flex-positive:1;-ms-flex-preferred-size:0;max-width:100%}.col-sm-auto{-ms-flex:0 0 auto;flex:0 0 auto;max-width:100%;width:auto}.col-sm-1{-ms-flex:0 0 8.33333%;flex:0 0 8.33333%;max-width:8.33333%}.col-sm-2{-ms-flex:0 0 16.66667%;flex:0 0 16.66667%;max-width:16.66667%}.col-sm-3{-ms-flex:0 0 25%;flex:0 0 25%;max-width:25%}.col-sm-4{-ms-flex:0 0 33.33333%;flex:0 0 33.33333%;max-width:33.33333%}.col-sm-5{-ms-flex:0 0 41.66667%;flex:0 0 41.66667%;max-width:41.66667%}.col-sm-6{-ms-flex:0 0 50%;flex:0 0 50%;max-width:50%}.col-sm-7{-ms-flex:0 0 58.33333%;flex:0 0 58.33333%;max-width:58.33333%}.col-sm-8{-ms-flex:0 0 66.66667%;flex:0 0 66.66667%;max-width:66.66667%}.col-sm-9{-ms-flex:0 0 75%;flex:0 0 75%;max-width:75%}.col-sm-10{-ms-flex:0 0 83.33333%;flex:0 0 83.33333%;max-width:83.33333%}.col-sm-11{-ms-flex:0 0 91.66667%;flex:0 0 91.66667%;max-width:91.66667%}.col-sm-12{-ms-flex:0 0 100%;flex:0 0 100%;max-width:100%}}@media (min-width: 768px){.col-md{flex-basis:0;flex-grow:1;-ms-flex-positive:1;-ms-flex-preferred-size:0;max-width:100%}.col-md-auto{-ms-flex:0 0 auto;flex:0 0 auto;max-width:100%;width:auto}.col-md-1{-ms-flex:0 0 8.33333%;flex:0 0 8.33333%;max-width:8.33333%}.col-md-2{-ms-flex:0 0 16.66667%;flex:0 0 16.66667%;max-width:16.66667%}.col-md-3{-ms-flex:0 0 25%;flex:0 0 25%;max-width:25%}.col-md-4{-ms-flex:0 0 33.33333%;flex:0 0 33.33333%;max-width:33.33333%}.col-md-5{-ms-flex:0 0 41.66667%;flex:0 0 41.66667%;max-width:41.66667%}.col-md-6{-ms-flex:0 0 50%;flex:0 0 50%;max-width:50%}.col-md-7{-ms-flex:0 0 58.33333%;flex:0 0 58.33333%;max-width:58.33333%}.col-md-8{-ms-flex:0 0 66.66667%;flex:0 0 66.66667%;max-width:66.66667%}.col-md-9{-ms-flex:0 0 75%;flex:0 0 75%;max-width:75%}.col-md-10{-ms-flex:0 0 83.33333%;flex:0 0 83.33333%;max-width:83.33333%}.col-md-11{-ms-flex:0 0 91.66667%;flex:0 0 91.66667%;max-width:91.66667%}.col-md-12{-ms-flex:0 0 100%;flex:0 0 100%;max-width:100%}}@media (min-width: 992px){.col-lg{flex-basis:0;flex-grow:1;-ms-flex-positive:1;-ms-flex-preferred-size:0;max-width:100%}.col-lg-auto{-ms-flex:0 0 auto;flex:0 0 auto;max-width:100%;width:auto}.col-lg-1{-ms-flex:0 0 8.33333%;flex:0 0 8.33333%;max-width:8.33333%}.col-lg-2{-ms-flex:0 0 16.66667%;flex:0 0 16.66667%;max-width:16.66667%}.col-lg-3{-ms-flex:0 0 25%;flex:0 0 25%;max-width:25%}.col-lg-4{-ms-flex:0 0 33.33333%;flex:0 0 33.33333%;max-width:33.33333%}.col-lg-5{-ms-flex:0 0 41.66667%;flex:0 0 41.66667%;max-width:41.66667%}.col-lg-6{-ms-flex:0 0 50%;flex:0 0 50%;max-width:50%}.col-lg-7{-ms-flex:0 0 58.33333%;flex:0 0 58.33333%;max-width:58.33333%}.col-lg-8{-ms-flex:0 0 66.66667%;flex:0 0 66.66667%;max-width:66.66667%}.col-lg-9{-ms-flex:0 0 75%;flex:0 0 75%;max-width:75%}.col-lg-10{-ms-flex:0 0 83.33333%;flex:0 0 83.33333%;max-width:83.33333%}.col-lg-11{-ms-flex:0 0 91.66667%;flex:0 0 91.66667%;max-width:91.66667%}.col-lg-12{-ms-flex:0 0 100%;flex:0 0 100%;max-width:100%}}@media (min-width: 1200px){.col-xl{flex-basis:0;flex-grow:1;-ms-flex-positive:1;-ms-flex-preferred-size:0;max-width:100%}.col-xl-auto{-ms-flex:0 0 auto;flex:0 0 auto;max-width:100%;width:auto}.col-xl-1{-ms-flex:0 0 8.33333%;flex:0 0 8.33333%;max-width:8.33333%}.col-xl-2{-ms-flex:0 0 16.66667%;flex:0 0 16.66667%;max-width:16.66667%}.col-xl-3{-ms-flex:0 0 25%;flex:0 0 25%;max-width:25%}.col-xl-4{-ms-flex:0 0 33.33333%;flex:0 0 33.33333%;max-width:33.33333%}.col-xl-5{-ms-flex:0 0 41.66667%;flex:0 0 41.66667%;max-width:41.66667%}.col-xl-6{-ms-flex:0 0 50%;flex:0 0 50%;max-width:50%}.col-xl-7{-ms-flex:0 0 58.33333%;flex:0 0 58.33333%;max-width:58.33333%}.col-xl-8{-ms-flex:0 0 66.66667%;flex:0 0 66.66667%;max-width:66.66667%}.col-xl-9{-ms-flex:0 0 75%;flex:0 0 75%;max-width:75%}.col-xl-10{-ms-flex:0 0 83.33333%;flex:0 0 83.33333%;max-width:83.33333%}.col-xl-11{-ms-flex:0 0 91.66667%;flex:0 0 91.66667%;max-width:91.66667%}.col-xl-12{-ms-flex:0 0 100%;flex:0 0 100%;max-width:100%}}.d-flex{display:-ms-flexbox !important;display:flex !important}.sphinx-bs,.sphinx-bs *{-moz-box-sizing:border-box;-webkit-box-sizing:border-box;box-sizing:border-box}.sphinx-bs p{margin-top:0} diff --git a/_panels_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css b/_panels_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css new file mode 100644 index 000000000..fc14abc85 --- /dev/null +++ b/_panels_static/panels-main.c949a650a448cc0ae9fd3441c0e17fb0.css @@ -0,0 +1 @@ +details.dropdown .summary-title{padding-right:3em !important;-moz-user-select:none;-ms-user-select:none;-webkit-user-select:none;user-select:none}details.dropdown:hover{cursor:pointer}details.dropdown .summary-content{cursor:default}details.dropdown summary{list-style:none;padding:1em}details.dropdown summary .octicon.no-title{vertical-align:middle}details.dropdown[open] summary .octicon.no-title{visibility:hidden}details.dropdown summary::-webkit-details-marker{display:none}details.dropdown summary:focus{outline:none}details.dropdown summary:hover .summary-up svg,details.dropdown summary:hover .summary-down svg{opacity:1}details.dropdown .summary-up svg,details.dropdown .summary-down svg{display:block;opacity:.6}details.dropdown .summary-up,details.dropdown .summary-down{pointer-events:none;position:absolute;right:1em;top:.75em}details.dropdown[open] .summary-down{visibility:hidden}details.dropdown:not([open]) .summary-up{visibility:hidden}details.dropdown.fade-in[open] summary~*{-moz-animation:panels-fade-in .5s ease-in-out;-webkit-animation:panels-fade-in .5s ease-in-out;animation:panels-fade-in .5s ease-in-out}details.dropdown.fade-in-slide-down[open] summary~*{-moz-animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out;-webkit-animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out;animation:panels-fade-in .5s ease-in-out, panels-slide-down .5s ease-in-out}@keyframes panels-fade-in{0%{opacity:0}100%{opacity:1}}@keyframes panels-slide-down{0%{transform:translate(0, -10px)}100%{transform:translate(0, 0)}}.octicon{display:inline-block;fill:currentColor;vertical-align:text-top}.tabbed-content{box-shadow:0 -.0625rem var(--tabs-color-overline),0 .0625rem var(--tabs-color-underline);display:none;order:99;padding-bottom:.75rem;padding-top:.75rem;width:100%}.tabbed-content>:first-child{margin-top:0 !important}.tabbed-content>:last-child{margin-bottom:0 !important}.tabbed-content>.tabbed-set{margin:0}.tabbed-set{border-radius:.125rem;display:flex;flex-wrap:wrap;margin:1em 0;position:relative}.tabbed-set>input{opacity:0;position:absolute}.tabbed-set>input:checked+label{border-color:var(--tabs-color-label-active);color:var(--tabs-color-label-active)}.tabbed-set>input:checked+label+.tabbed-content{display:block}.tabbed-set>input:focus+label{outline-style:auto}.tabbed-set>input:not(.focus-visible)+label{outline:none;-webkit-tap-highlight-color:transparent}.tabbed-set>label{border-bottom:.125rem solid transparent;color:var(--tabs-color-label-inactive);cursor:pointer;font-size:var(--tabs-size-label);font-weight:700;padding:1em 1.25em .5em;transition:color 250ms;width:auto;z-index:1}html .tabbed-set>label:hover{color:var(--tabs-color-label-active)} diff --git a/_panels_static/panels-variables.06eb56fa6e07937060861dad626602ad.css b/_panels_static/panels-variables.06eb56fa6e07937060861dad626602ad.css new file mode 100644 index 000000000..adc616622 --- /dev/null +++ b/_panels_static/panels-variables.06eb56fa6e07937060861dad626602ad.css @@ -0,0 +1,7 @@ +:root { +--tabs-color-label-active: hsla(231, 99%, 66%, 1); +--tabs-color-label-inactive: rgba(178, 206, 245, 0.62); +--tabs-color-overline: rgb(207, 236, 238); +--tabs-color-underline: rgb(207, 236, 238); +--tabs-size-label: 1rem; +} \ No newline at end of file diff --git a/_sources/cpp/examples/bgl.rst.txt b/_sources/cpp/examples/bgl.rst.txt new file mode 100644 index 000000000..763b7fadb --- /dev/null +++ b/_sources/cpp/examples/bgl.rst.txt @@ -0,0 +1,93 @@ +Co-Work with BGL +============================ + +The `Boost Graph Library (BGL) `_ is the first C++ library to apply the principles of generic programming to the construction of the advanced data structures and algorithms used in graph computations. The BGL graph interface and graph components are generic in the same sense as the Standard Template Library (STL). And it provides some built-in algorithms which cover a core set of algorithm patterns and a larger set of graph algorithms. + +We take calculating CC as an example, to demonstrate how BGL works with GraphAr. A weakly connected component is a maximal subgraph of a graph such that for every pair of vertices in it, there is an undirected path connecting them. And the CC algorithm is to identify all such components in a graph. Learn more about `the CC algorithm `_. + +The source code of CC based on BGL can be found at `bgl_example.cc`_. In this program, the graph information file is first read to get the metadata: + +.. code:: C++ + + std::string path = ... // the path of the graph information file + auto graph_info = graphar::GraphInfo::Load(path).value(); + +And then, the vertex collection and the edge collection are established as the handles to access the graph data: + +.. code:: C++ + + auto maybe_vertices = graphar::VerticesCollection::Make(graph_info, "person"); + auto vertices = maybe_vertices.value(); + auto maybe_edges = graphar::EdgesCollection::Make(graph_info, "person", "knows", "person", graphar::AdjListType::ordered_by_source); + auto edges = maybe_edges.value(); + +Next, we construct the in-memory graph data structure for BGL by traversing the vertices and edges via GraphAr's high-level reading interface (the vertex iterator and the edge iterator): + +.. code:: C++ + + // define the Graph type in BGL + typedef boost::adjacency_list, // vertex property + boost::no_property> Graph; // no edge property + // descriptors for vertex in BGL + typedef typename boost::graph_traits::vertex_descriptor Vertex; + + // declare a graph object with (num_vertices) vertices and an edge iterator + std::vector> edges_array; + auto it_begin = edges->begin(), it_end = edges->end(); + for (auto it = it_begin; it != it_end; ++it) + edges_array.push_back(std::make_pair(it.source(), it.destination())); + Graph g(edges_array.begin(), edges_array.end(), num_vertices); + + // define the internal vertex property "id" + boost::property_map::type id = get(boost::vertex_name_t(), g); + auto v_it_begin = vertices->begin(), v_it_end = vertices->end(); + for (auto it = v_it_begin; it != v_it_end; ++it) { + auto vertex = *it; + boost::put(id, vertex.id(), vertex.property("id").value()); + } + +After that, an internal CC algorithm provided by BGL is called: + +.. code:: C++ + + // define the external vertex property "component" + std::vector component(num_vertices); + // call algorithm: cc + int cc_num = boost::connected_components(g, &component[0]); + std::cout << "Total number of components: " << cc_num << std::endl; + +Finally, we could use a **VerticesBuilder** of GraphAr to write the results to new generated GAR files: + +.. code:: C++ + + // construct a new property group + graphar::Property cc = {"cc", graphar::int32(), false}; + std::vector property_vector = {cc}; + auto group = graphar::CreatePropertyGroup(property_vector, graphar::FileType::PARQUET); + + // construct the new vertex info + std::string vertex_label = "cc_result", vertex_prefix = "result/"; + int chunk_size = 100; + auto new_info = graphar::CreateVertexInfo(vertex_label, chunk_size, {group}, vertex_prefix); + + // access the vertices via the index map and vertex iterator of BGL + typedef boost::property_map::type IndexMap; + IndexMap index = boost::get(boost::vertex_index, g); + typedef boost::graph_traits::vertex_iterator vertex_iter; + std::pair vp; + + // dump the results through the VerticesBuilder + graphar::builder::VerticesBuilder builder(new_info, "/tmp/"); + for (vp = boost::vertices(g); vp.first!= vp.second; ++vp.first) { + Vertex v = *vp.first; + graphar::builder::Vertex vertex(index[v]); + vertex.AddProperty(cc.name, component[index[v]]); + builder.AddVertex(vertex); + } + builder.Dump(); + + +.. _bgl_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/bgl_example.cc diff --git a/_sources/cpp/examples/graphscope.rst.txt b/_sources/cpp/examples/graphscope.rst.txt new file mode 100644 index 000000000..43f195c6b --- /dev/null +++ b/_sources/cpp/examples/graphscope.rst.txt @@ -0,0 +1,32 @@ +Integrate into GraphScope +============================ + +`GraphScope `_ is a unified distributed graph computing platform that provides a one-stop environment for performing diverse graph operations on a cluster through a user-friendly Python interface. As an important application case of GraphAr, we have integrated it into GraphScope. + +GraphScope works on a graph G fragmented via a partition strategy picked by the user and each worker maintains a fragment of G. Given a query, it posts the same query to all the workers and computes following the BSP (Bulk Synchronous Parallel) model. More specifically, each worker first executes processing against its local fragment, to compute partial answers in parallel. And then each worker may exchange partial results with other processors via synchronous message passing. + +To integrate GraphAr into GraphScope, we implemented *ArrowFragmentBuilder* and *ArrowFragmentWriter*. *ArrowFragmentBuilder* establishes the fragments for workers of GraphScope through reading GAR files in parallel. Conversely, *ArrowFragmentWriter* can take the GraphScope fragments and save them as GAR files. If you're interested in knowing more about the implementation, please refer to the `source code `_. + + +Performance Report +------------------------ + +Parameter settings +`````````````````` +The time performance of *ArrowFragmentBuilder* and *ArrowFragmentWriter* in GraphScope is heavily dependent on the partitioning of the graph into GAR files, that is, the *vertex chunk size* and *edge chunk size*, which are specified in the vertex information file and in the edge information file, respectively. See `GraphAr File Format <../user-guide/file-format.html>`_ to understand the chunk size definitions in GAR. + +Generally speaking, fewer chunks are created if the file size is large. On small graphs, this can be disadvantageous as it reduces the degree of parallelism, prolonging disk I/O time. On the other hand, having too many small files increases the overhead associated with the file system and the file parser. + +We have conducted micro benchmarks to compare the time performance for reading/writing GAR files by *ArrowFragmentBuilder*/*ArrowFragmentWriter*, across different *vertex chunk size* and *edge chunk size* configurations. The settings we recommend for *vertex chunk size* and *edge chunk size* are **2^18** and **2^22**, respectively, which lead to efficient performance in most cases. These settings can be used as the reference values when integrating GraphAr into other systems besides GraphScope. + +Time performance results +```````````````````````` +Here we report the performance results of *ArrowFragmentBuilder*, and compare it with loading the same graph through the default loading strategy of GraphScope (through reading the csv files in parallel) . The execution time reported below includes loading the graph data from the disk into memory, as well as building GraphScope fragments from such data. The experiments are conducted on a cluster of 4 AliCloud ecs.r6.6xlarge instances (24vCPU, 192GB memory), and using `com-friendster `_ (a simple graph) and `ldbc-snb-30 `_ (a multi-labeled property graph) as datasets. + ++----------------+---------+-----------------+-----------------+ +| Dataset | Workers | Default Loading | GraphAr Loading | ++================+=========+=================+=================+ +| com-friendster | 4 | 282s | 54s | ++----------------+---------+-----------------+-----------------+ +| ldbc-snb-30 | 4 | 196s | 40s | ++----------------+---------+-----------------+-----------------+ diff --git a/_sources/cpp/examples/index.rst.txt b/_sources/cpp/examples/index.rst.txt new file mode 100644 index 000000000..0d9abdd6c --- /dev/null +++ b/_sources/cpp/examples/index.rst.txt @@ -0,0 +1,10 @@ +Examples +--------- + +.. toctree:: + :maxdepth: 2 + + bgl + graphscope + out-of-core + snap-to-graphar diff --git a/_sources/cpp/examples/out-of-core.rst.txt b/_sources/cpp/examples/out-of-core.rst.txt new file mode 100644 index 000000000..9c395e8a6 --- /dev/null +++ b/_sources/cpp/examples/out-of-core.rst.txt @@ -0,0 +1,122 @@ +Out-of-core Graph Algorithms +============================ + +An important application case of GraphAr is to serve out-of-core graph processing scenarios. With the graph data saved as GAR files in the disk, GraphAr provides a set of reading interfaces to allow to load part of graph data into memory when needed, to conduct analytics. While it is more convenient and efficient to store the entirety of the graph in memory (as is done in BGL), out-of-core graph processing makes it possible to complete analytics on the large-scale graphs using limited memory/computing resources. + +The are some out-of-core graph analytic algorithms that have been implemented based on GraphAr, include: + +- PageRank (PR) +- Connected Components (CC) +- Breadth First Search (BFS) + +These algorithms represent for different compute patterns and are usually building blocks for constructing other graph algorithms. + +PageRank +------------------------ + +`PageRank (PR) `_ is an algorithm used by Google Search to rank web pages in their search engine results. The source code of PageRank based on GraphAr located at `pagerank_example.cc`_, and the explanations can be found in the `Getting Started <../user-guide/getting-started.html#a-pagerank-example>`_ page. + +Connected Components +------------------------ + +A weakly connected component is a maximal subgraph of a graph such that for every pair of vertices in it, there is an undirected path connecting them. And `Connected Components (CC) `_ is an algorithm to identify all weakly connected components in a graph. `CC based on BGL `_ is provided in GraphAr, also, we implement out-of-core algorithms for this application. + +A typical method for calculating CC is label propagation. In this algorithm, each vertex is attached with a property which represents its component label, being its own vertex id initially. In the subsequent supersteps (i.e., iterations), a vertex will update its label if it receives a smaller id and then it propagates this id to all its neighbors. + +This algorithm can be implemented based on streaming the edges via GraphAr's reading interface. That is to say, the edges are accessed and processed chunk by chunk, instead of being loaded into memory at once (as in the BGL example). + +.. code:: C++ + + // construct the edge collection in GraphAr + auto edges = ... + auto it_begin = edges->begin(), it_end = edges->end(); + + // initialize for all vertices + std::vector component(num_vertices); + for (graphar::IdType i = 0; i < num_vertices; i++) + component[i] = i; + + // stream all edges for each iteration + for (int iter = 0; ; iter++) { + bool flag = false; + for (auto it = it_begin; it != it_end; ++it) { + graphar::IdType src = it.source(), dst = it.destination(); + // update + if (component[src] < component[dst]) { + component[dst] = component[src]; + flag = true; + } else if (component[src] > component[dst]) { + component[src] = component[dst]; + flag = true; + } + } + // check if it should terminate + if (!flag) break; + } + +The file `cc_stream_example.cc`_ located inside the source tree contains the complete implementation for this algorithm. Also, we can only process active vertices (the vertices which are updated in the last iteration) and the corresponding edges for each iteration, since an inactive vertex does not need to update its neighbors. Please refer to `cc_push_example.cc`_ for the complete code. + +.. tip:: + + In this example, two kinds of edges are used. The **ordered_by_source** edges are used to access all outgoing edges of an active vertex, and **ordered_by_dest** edges are used to access the incoming edges. In this way, all the neighbors of an active vertex can be accessed and processed. + + Although GraphAr supports to get the outgoing (incoming) edges of a single vertex for all adjList types, it is most efficient when the type is **ordered_by_source** (**ordered_by_dest**) since it can avoid to read redundant data. + +Breadth First Search +------------------------ + +`Breadth First Search (BFS) `_ is a traversing algorithm that starts from a selected vertex (the root) and traverse the graph layerwise thus exploring the neighbor vertices (vertices which are directly connected to the root), and then it moves towards the next-level neighbor vertices. + +An out-of-core BFS algorithm could be implemented based on streaming the graph data via GraphAr. For each vertex, a property named *distance* is created and initialized to represent the distance from the root to this vertex. As with the standard BFS algorithms in other graph processing systems, for the iteration/superstep *i* (starting from 0), the active vertices contain all vertices reachable from the root in *i* hops (i.e., *distance[v]= i*). At the beginning of the algorithm, only the root vertex is active. This algorithm terminates when there are no more active vertices. + +.. code:: C++ + + // construct the edge collection in GraphAr + auto edges = ... + auto it_begin = edges->begin(), it_end = edges->end(); + + // initialize for all vertices + graphar::IdType root = 0; // the BFS root + std::vector distance(num_vertices); + for (graphar::IdType i = 0; i < num_vertices; i++) + distance[i] = (i == root ? 0 : -1); + + // stream all edges for each iteration + for (int iter = 0; ; iter++) { + graphar::IdType count = 0; + for (auto it = it_begin; it != it_end; ++it) { + graphar::IdType src = it.source(), dst = it.destination(); + // update + if (distance[src] == iter && distance[dst] == -1) { + distance[dst] = distance[src] + 1; + count++; + } + } + // check if it should terminate + if (count == 0) break; + } + +The above algorithm is implemented based on streaming all edges for each iteration, the source code can be found at `bfs_stream_example.cc`_. + +Meanwhile, BFS could be implemented in a **push**-style which only traverses the edges that from active vertices for each iteration, which is typically more efficient on real-world graphs. This implementation can be found at `bfs_push_example.cc`_. Similarly, we provide a BFS implementation in a **pull**-style which only traverses the edges that lead to non-visited vertices (i.e., the vertices that have not been traversed), as shown in `bfs_pull_example.cc`_. + +.. tip:: + + In common cases of graph processing, the **push**-style is more efficient when the set of active vertices is very sparse, while the **pull**-style fits when it is dense. + +In some cases, it is required to record the path of BFS, that is, to maintain each vertex's predecessor (also called *father*) in the traversing tree rather than only recording the distance. The implementation of BFS with recording fathers can be found at `bfs_father_example.cc`_. + + +.. _pagerank_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/pagerank_example.cc + +.. _cc_stream_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/cc_stream_example.cc + +.. _cc_push_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/cc_push_example.cc + +.. _bfs_stream_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/bfs_stream_example.cc + +.. _bfs_push_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/bfs_push_example.cc + +.. _bfs_pull_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/bfs_pull_example.cc + +.. _bfs_father_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/bfs_father_example.cc diff --git a/_sources/cpp/examples/snap-to-graphar.rst.txt b/_sources/cpp/examples/snap-to-graphar.rst.txt new file mode 100644 index 000000000..f34cd73d0 --- /dev/null +++ b/_sources/cpp/examples/snap-to-graphar.rst.txt @@ -0,0 +1,81 @@ +Convert SNAP Datasets to GraphAr Format +======================================= + +`SNAP `_ (Stanford Network Analysis Project) is a general-purpose network analysis and graph mining library. It provides a variety of datasets for research and development. In this section, we will show how to convert the SNAP datasets to GraphAr format, showcasing the process with the `ego-Facebook `_ graph as a case study. The conversion leverages GraphInfo constructors and the high-level writer functions from the C++ library. + + +Prepare the SNAP Dataset +------------------------ + +Before converting, download the ego-Facebook dataset from the SNAP website. The dataset is a text file with each line representing an edge in the graph. + +.. code:: bash + + cd /path/to/your/dataset + wget https://snap.stanford.edu/data/facebook_combined.txt.gz + gunzip facebook_combined.txt.gz + + +Convert the SNAP Dataset to GraphAr Format +------------------------------------------ + +The initial phase involves constructing VertexInfo, EdgeInfo, and GraphInfo objects, which are subsequently serialized into YAML files. +For instance, the code snippet below illustrates the creation and storage of the vertex information file. + +.. code:: C++ + + auto version = GAR_NAMESPACE::InfoVersion::Parse("gar/v1").value(); + + // meta info + std::string vertex_label = "node", vertex_prefix = "vertex/node/"; + + // create vertex info + auto vertex_info = GAR_NAMESPACE::CreateVertexInfo( + vertex_label, VERTEX_CHUNK_SIZE, {}, vertex_prefix, version); + + // save & dump vertex info + ASSERT(!vertex_info->Dump().has_error()); + ASSERT(vertex_info->Save(save_path + "node.vertex.yml").ok()); + + // create and save edge info file ... + auto edge_info = ... + ASSERT(!edge_info->Dump().has_error()); + ASSERT(edge_info->Save(save_path + "node_links_node.edge.yml").ok()); + + // create and save graph info file ... + auto graph_info = ... + ASSERT(!graph_info->Dump().has_error()); + ASSERT(graph_info->Save(save_path + graph_name + ".graph.yml").ok()); + + +Subsequently, we employ the high-level vertex and edge builders provided by the GraphAr C++ library to generate payload data files with vertex and edge data. +The code snippet that follows demonstrates the generation and preservation of the edge data file. + +.. code:: C++ + + // construct edges builder + auto e_builder = GAR_NAMESPACE::builder::EdgesBuilder::Make( + edge_info, save_path, ADJLIST_TYPE, VERTEX_COUNT) + .value(); + // read edge data from file + std::ifstream file(DATA_PATH); + std::string line; + while (std::getline(file, line)) { + std::istringstream iss(line); + // skip comments + if (line[0] == '#') { + continue; + } + int src, dst; + if (!(iss >> src >> dst)) { + break; + } + GAR_NAMESPACE::builder::Edge e(src, dst); + ASSERT(e_builder->AddEdge(e).ok()); + } + + // dump & clear + ASSERT(e_builder->Dump().ok()); + e_builder->Clear(); + +For comprehensive insights into this example, please consult the accompanying `source code `_ . diff --git a/_sources/cpp/getting-started.rst.txt b/_sources/cpp/getting-started.rst.txt new file mode 100644 index 000000000..7d6de0bd1 --- /dev/null +++ b/_sources/cpp/getting-started.rst.txt @@ -0,0 +1,198 @@ +Getting Started +============================ + +This article is a quick guide that explains how to work with GraphAr C++. To begin with, please refer to the `Building Steps`_ to install GraphAr. + + +GAR Information Files +------------------------ + +GAR uses a group of Yaml files to save the meta information for a graph. + +Graph information +````````````````` +The graph information file defines the most basic information of a graph includes its name, the root directory path of the data files, the vertex information and edge information files it contains, and the version of GraphAr. For example, the file "ldbc_sample.graph.yml" defines an example graph named "ldbc_sample", which includes one type of vertices ("person") and one type of edges ("person knows person"). + +.. code:: Yaml + + name: ldbc_sample + prefix: ./ + vertices: + - person.vertex.yml + edges: + - person_knows_person.edge.yml + version: gar/v1 + +Vertex information +`````````````````` +Each vertex information file defines a single group of vertices with the same vertex label, e.g., "person" in this case. The vertex chunk size, the relative path for vertex data files and the version of GraphAr are specified. These vertices could have some properties, which are divided into property groups. Each property group has its own file type (CSV, ORC or Parquet) and the prefix of the relative path for its data files, it also lists all properties in this group, with every property contains the name, data type and if it is the primary key. + +The file `person.vertex.yml`_ located inside the test data contains an example of the vertex information file. In this example, the "person" vertices have two property groups. The first group contains only one property (named "id") and the second group contains three properties ("firstName", "lastName" and "gender"). + +Edge information +```````````````` +Each edge information file defines a single type of edges with specific labels for the source vertex, destination vertex and the edge, e.g., "person_knows_person" in this case. It defines the meta information such as the edge chunk size, the source vertex chunk size, the destination vertex chunk size, if the edges are directed or not, the relative file path for edge data files, the adjLists and the version of GraphAr. The file `person_knows_person.edge.yml`_ located inside the test data contains an example of the edge information file. + +In GAR format, separate data files are used to store the structure (called adjList) and the properties for edges. The adjList type can be either of **unordered_by_source**, **unordered_by_dest**, **ordered_by_source** or **ordered_by_dest** (see `Edges in GraphAr `_ for more). For a specific type of adjList, the meta information includes its file path prefix, the file type, as well as all the property groups attached. + +.. note:: + + It is allowed to store different types of adjLists for a group of edges at the same time. + + + +GAR Data Files +------------------------ + +Property data +````````````` +The vertex properties are stored in vertex property chunks with the chunk size specified by the vertex information file. Different property groups correspond to individual groups of data files. +In our example, the property group ("first name", "last name", "gender") for vertex chunk 0 of "person" vertices are stored in `./vertex/person/firstName_lastName_gender/chunk0`_. + +In practice of graph processing, it is common to only query a subset of columns of the properties. Thus, the column-oriented formats like Apache ORC and Apache Parquet are more efficient, which eliminate the need to read columns that are not relevant. We also provide data files in ORC and Parquet for the example graph in the `test data`_. + +Similar with vertices, the edge properties are stored in edge property chunks. For each vertex chunk, its associated edges (if the edge type is **ordered_by_source** or **unordered_by_source**, the associated edges are those in which the source vertex is in that chunk; otherwise, if the edge type is **ordered_by_dest** or **unordered_by_dest**, the associated edges are those in which the destination is in that chunk) are maintained in some edge chunks, with the size of each chunk not exceeding the edge chunk size specified in the edge information file. + +For instance, the file `./edge/person_knows_person/ordered_by_source/creationDate/part0/chunk0`_ stores the property group "creationDate" of "person_knows_person" edges for the first edge chunk of the first vertex chunk, and the adjList type of the edges is **ordered_by_source**. + +AdjList data +```````````` +The adjList in GAR describes the topology structure, i.e., the internal id of the source vertex and the destination vertex for each of a group of edges. As explained in `Edges in GraphAr `_, the edges are separated into edge chunks, and each edge chunk has its own adjList table and 0 or more property tables. + +For example, the file `./edge/person_knows_person/ordered_by_source/adj_list/part0/chunk0`_ saves the adjList of "person_knows_person" edges for the first edge chunk of the first vertex chunk, and the adjList type of the edges is "ordered_by_source". This adjList table consists of only two columns: one for the source and one for the destination; it can be saved in CSV, ORC, or Parquet files. + +.. note:: + + If the edges are ordered, there may also be offset chunks to construct the index for accessing edges of a single vertex. These chunks will store the start offset of each vertex's edges, see `./edge/person_knows_person/ordered_by_source/offset/chunk0`_ as an example. + + +How to Use GAR +------------------------ + +Construct information +````````````````````` +It is convenient to construct the GAR metadata and dump it to generate information files. We provide an `example program`_ located in the source code which shows how to construct and dump the files for graph information, vertex information and edge information. + +Also, the metadata of a graph can be constructed easily through reading the already existed information files, as the following code illustrates: + +.. code:: C++ + + // construct graph information from file + std::string path = ... // the path of the graph information file (e.g., ldbc_sample.graph.yml) + auto graph_info = graphar::GraphInfo::Load(path).value(); + + // get vertex information + auto vertex_info = graph_info->GetVertexInfo("person"); + if (vertex_info != nullptr) { + // use vertex_info ... + } + + // get edge information + auto edge_info = graph_info->GetEdgeInfo("person", "knows", "person"); + if (edge_info != nullptr) { + // use edge_info ... + } + + +Read GAR files +`````````````` +GAR supports the flexible reading of graph data, e.g., allowing to read data of a single vertex, a vertex chunk, or all vertices with a specific label. In addition, necessary property groups can be selected to read and avoid reading all properties from the files. Furthermore, GAR provides convenient and flexible access to adjList, offset and property chunks for edges. + +As a simple case, the following example shows how to read all vertices with label "person" of the graph defined by "graph_info" and output the values of "id" and "firstName" for each vertex. + +.. code:: C++ + + graph_info = ... + auto vertices = graphar::VerticesCollection::Make(graph_info, "person").value(); + + for (auto it = vertices->begin(); it != vertices->end(); ++it) { + // get a vertex and access its data + auto vertex = *it; + std::cout << "id=" << vertex.property("id").value() << ", firstName=" << vertex.property("firstName").value() << std::endl; + } + +The next example reads all edges with label "person_knows_person" from the above graph and outputs the end vertices for each edge. + +.. code:: C++ + + graph_info = ... + auto expect = graphar::EdgesCollection::Make(graph_info, "person", "konws", "person", graphar::AdjListType::ordered_by_source); + auto edges = expect.value(); + + for (auto it = edges->begin(); it != edges->end(); ++it) { + // get an edge and access its data + auto edge = *it; + std::cout << "src=" << edge.source() << ", dst=" << edge.destination() << std::endl; + } + +See also `C++ Reader API Reference `_. + +Write GAR files +``````````````` +As same with the readers, the GAR writers provide different-level methods to output the graph data in memory into GAR files. + +As the simplest cases, the fist example below adds vertices to **VerticesBuilder**, and then dumps the data to files; the second example constructs a collection of edges and then dumps them. + +.. code:: C++ + + vertex_info = ... + prefix = ... + graphar::builder::VerticesBuilder builder(vertex_info, prefix); + + // add a vertex + graphar::builder::Vertex v; + v.AddProperty("id", 933); + v.AddProperty("firstName", "Alice"); + builder.AddVertex(v); + // add other vertices + // ... + + // write to GAR files + builder.Dump(); + +.. code:: C++ + + edge_info = ... + prefix = ... + vertices_num = ... + graphar::builder::EdgesBuilder builder(edge_info, prefix, graphar::AdjListType::ordered_by_source, vertices_num); + + // add an edge (0 -> 3) + graphar::builder::Edge e(0, 3); + e.AddProperty("creationDate", "2011-07-20T20:02:04.233+0000"); + builder.AddEdge(e); + // add other edges + // ... + + // write to GAR files + builder.Dump(); + +See also `C++ Writer API Reference `_. + +A PageRank Example +`````````````````` +Here we will go through an example of out-of-core graph analytic algorithms based on GAR which calculates the PageRank. Please look `here `_ if you want a detailed explanation of the PageRank algorithm. And the source code can be found at `pagerank_example.cc`_. + +This program first reads in the graph information file to obtain the metadata; then, it constructs the vertex and edge collections to enable access to the graph. After that, an implementation of the PageRank algorithm is provided, with data for the vertices stored in memory, and the edges streamed through disk I/O. Finally, the vertex information with type "person" is extended to include a new property named "pagerank" (a new vertex information file named *person-new-pagerank.vertex.yml* is saved) and the **VerticesBuilder** is used to write the results to new generated data chunks. + +Please refer to `more examples `_ to learn about the other available case studies utilizing GraphAr. + +.. _Building Steps: https://github.com/alibaba/GraphAr/blob/main/README.rst#building-libraries + +.. _person.vertex.yml: https://github.com/GraphScope/gar-test/blob/main/ldbc_sample/csv/person.vertex.yml + +.. _person_knows_person.edge.yml: https://github.com/GraphScope/gar-test/blob/main/ldbc_sample/csv/person_knows_person.edge.yml + +.. _./vertex/person/firstName_lastName_gender/chunk0: https://github.com/GraphScope/gar-test/blob/main/ldbc_sample/csv/vertex/person/firstName_lastName_gender/chunk0 + +.. _test data: https://github.com/GraphScope/gar-test/blob/main/ldbc_sample/ + +.. _./edge/person_knows_person/ordered_by_source/creationDate/part0/chunk0: https://github.com/GraphScope/gar-test/blob/main/ldbc_sample/csv/edge/person_knows_person/ordered_by_source/creationDate/part0/chunk0 + +.. _./edge/person_knows_person/ordered_by_source/adj_list/part0/chunk0: https://github.com/GraphScope/gar-test/blob/main/ldbc_sample/csv/edge/person_knows_person/ordered_by_source/adj_list/part0/chunk0 + +.. _./edge/person_knows_person/ordered_by_source/offset/chunk0: https://github.com/GraphScope/gar-test/blob/main/ldbc_sample/csv/edge/person_knows_person/ordered_by_source/offset/chunk0 + +.. _example program: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/construct_info_example.cc + +.. _pagerank_example.cc: https://github.com/alibaba/GraphAr/blob/main/cpp/examples/pagerank_example.cc diff --git a/_sources/cpp/index.rst.txt b/_sources/cpp/index.rst.txt new file mode 100644 index 000000000..34b6b6b63 --- /dev/null +++ b/_sources/cpp/index.rst.txt @@ -0,0 +1,9 @@ +C++ Library +============================ + +.. toctree:: + :maxdepth: 2 + + Getting Started + Examples + Reference diff --git a/_sources/cpp/reference/index.rst.txt b/_sources/cpp/reference/index.rst.txt new file mode 100644 index 000000000..44b62a4e8 --- /dev/null +++ b/_sources/cpp/reference/index.rst.txt @@ -0,0 +1,301 @@ +API Reference +================== + +.. _cpp-api: + +.. default-domain:: cpp + +Graph Info +----------- + +.. doxygenclass:: graphar::Property + :members: + :undoc-members: + +.. doxygenclass:: graphar::PropertyGroup + :members: + :undoc-members: + +.. doxygenfunction:: graphar::CreatePropertyGroup + +.. doxygenclass:: graphar::AdjacentList + :members: + :undoc-members: + +.. doxygenfunction:: graphar::CreateAdjacentList + +.. doxygenclass:: graphar::VertexInfo + :members: + :undoc-members: + +.. doxygenfunction:: graphar::CreateVertexInfo + +.. doxygenclass:: graphar::EdgeInfo + :members: + :undoc-members: + +.. doxygenfunction:: graphar::CreateEdgeInfo + +.. doxygenclass:: graphar::GraphInfo + :members: + :undoc-members: + +.. doxygenfunction:: graphar::CreateGraphInfo + +Readers +--------------------- + +Chunk Info Reader +~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::VertexPropertyChunkInfoReader + :members: + :undoc-members: + +.. doxygenclass:: graphar::AdjListChunkInfoReader + :members: + :undoc-members: + +.. doxygenclass:: graphar::AdjListPropertyChunkInfoReader + :members: + :undoc-members: + +Arrow Chunk Reader +~~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::VertexPropertyArrowChunkReader + :members: + :undoc-members: + +.. doxygenclass:: graphar::AdjListArrowChunkReader + :members: + :undoc-members: + +.. doxygenclass:: graphar::AdjListOffsetArrowChunkReader + :members: + :undoc-members: + +Vertices Collection +~~~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::Vertex + :members: + :undoc-members: + +.. doxygenclass:: graphar::VertexIter + :members: + :undoc-members: + +.. doxygenclass:: graphar::VerticesCollection + :members: + :undoc-members: + +Edges Collection +~~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::Edge + :members: + :undoc-members: + +.. doxygenclass:: graphar::EdgeIter + :members: + :undoc-members: + +.. doxygenclass:: graphar::EdgesCollection + :members: + :undoc-members: + +.. doxygenclass:: graphar::OBSEdgeCollection + :members: + :undoc-members: + +.. doxygenclass:: graphar::OBDEdgesCollection + :members: + :undoc-members: + +.. doxygenclass:: graphar::UBSEdgesCollection + :members: + :undoc-members: + +.. doxygenclass:: graphar::UBDEdgesCollection + :members: + :undoc-members: + +Writer and Builder +--------------------- + +Chunk Writer +~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::VertexPropertyWriter + :members: + :undoc-members: + +.. doxygenclass:: graphar::EdgeChunkWriter + :members: + :undoc-members: + +Builder +~~~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::builder::Vertex + :members: + :undoc-members: + +.. doxygenclass:: graphar::builder::Edge + :members: + :undoc-members: + +.. doxygenclass:: graphar::builder::VerticesBuilder + :members: + :undoc-members: + +.. doxygenclass:: graphar::builder::EdgesBuilder + :members: + :undoc-members: + + +Types +-------- + +Id Type +~~~~~~~~~~~~~~~~~~~ + +.. doxygentypedef:: graphar::IdType + +Data Type +~~~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::DataType + :members: + :undoc-members: + +.. doxygenfunction:: graphar::boolean +.. doxygenfunction:: graphar::int32 +.. doxygenfunction:: graphar::int64 +.. doxygenfunction:: graphar::float32 +.. doxygenfunction:: graphar::float64 +.. doxygenfunction:: graphar::string +.. doxygenfunction:: graphar::list + +File Type +~~~~~~~~~~~~~~~~~~~ +.. doxygenenum:: graphar::FileType + +Adj List Type +~~~~~~~~~~~~~~~~~~~ +.. doxygenenum:: graphar::AdjListType + +Validate Level +~~~~~~~~~~~~~~~~~~~ +.. doxygenenum:: graphar::ValidateLevel + + +Utilities +--------- + +Result and Status +~~~~~~~~~~~~~~~~~~~ + +.. doxygentypedef:: graphar::Result + +.. doxygenclass:: graphar::Status + :members: + :undoc-members: + +FileSystem +~~~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::FileSystem + :members: + :undoc-members: + +.. doxygenfunction:: graphar::FileSystemFromUriOrPath + +Yaml Parser +~~~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::Yaml + :members: + :undoc-members: + +Info Version +~~~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::InfoVersion + :members: + :undoc-members: + +Expression +~~~~~~~~~~~~~~~~~~~ + +.. doxygenclass:: graphar::Expression + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionProperty + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionLiteral + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionNot + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionUnaryOp + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionBinaryOp + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionEqual + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionNotEqual + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionGreaterThan + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionGreaterEqual + :members: + :undoc-members: + + +.. doxygenclass:: graphar::ExpressionLessThan + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionLessEqual + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionAnd + :members: + :undoc-members: + +.. doxygenclass:: graphar::ExpressionOr + :members: + :undoc-members: + +.. doxygenfunction:: graphar::_Property(const Property&) +.. doxygenfunction:: graphar::_Property(const std::string&) +.. doxygenfunction:: graphar::_Literal +.. doxygenfunction:: graphar::_Not +.. doxygenfunction:: graphar::_Equal +.. doxygenfunction:: graphar::_NotEqual +.. doxygenfunction:: graphar::_GreaterThan +.. doxygenfunction:: graphar::_GreaterEqual +.. doxygenfunction:: graphar::_LessThan +.. doxygenfunction:: graphar::_LessEqual +.. doxygenfunction:: graphar::_And +.. doxygenfunction:: graphar::_Or + diff --git a/_sources/developers/community.rst.txt b/_sources/developers/community.rst.txt new file mode 100644 index 000000000..6659c1014 --- /dev/null +++ b/_sources/developers/community.rst.txt @@ -0,0 +1,71 @@ +GraphAr Community +==================== + +We welcome participation from everyone and encourage you to join us, ask questions, and get involved. + +All participation in the GraphAr project is governed by the `GraphAr Code of Conduct`_. + +Questions? +----------- + +Mailing list +^^^^^^^^^^^^^ +The `GraphAr mailing list`_ is for discussion of GraphAr development and usage. + +Send an email to `graphar+subscribe@googlegroups.com `_ +to subscribe to the mailing list. + + +Tag Convention +""""""""""""""""" +The mailing list follow the convention of prefixing subjects with one or more tags in order to +clarify the scope and purpose of messages. For example: + +- [ANNOUNCE] GraphAr 0.1.0 released +- [DISCUSS][C++] How to implement a new API in C++? +- [DISCUSS][Format] How to support partition pruning? +- [Spark][Reader] Support the push-down of filters + +When emailing to the mail list, please prefix the subject line with one or more tags. +Depending the topic of your email, tags may include one or more: + +- Supported Environments: e.g., [C++], [Spark], [Java], etc. +- Specifications: e.g., [Format], [Reader], [Writer], [API], etc. + +You may also prefix your subject line with [DISCUSS] if your email is intended to prompt a discussion +rather than get an answer to a specific question. + +Slack +^^^^^^ +If you are not used to communicate with mailing list, you can also join the +`GraphAr Slack`_ to chat with other GraphAr users and developers. + +GitHub Issues +^^^^^^^^^^^^^ +We use GitHub issues as a way to ask questions and engage with the GraphAr developer community +and for maintaining a queue of development work and as the public record of work on the project. +We use the mailing list for development discussions, where a lengthy discussions is required. + +Community Meeting +^^^^^^^^^^^^^^^^^ +We host online meetings to provide spaces for synchronous discussions about the GraphAr project. +These discussions usually focus on topics of interest to developers who are contributing to GraphAr, +but we welcome users of GraphAr to join. + +The community meeting runs weekly, every Tuesday at 7:00 PM (UTC+8). The meeting is hosted on Ali Meeting. + +See the `community meeting notes`_ for the next meeting. + + +Contributing +------------ +As mentioned above, we use `GitHub `_ for our issue tracker and for source control. +See the `contribution guidelines `_ for more. + +.. _GraphAr Code of Conduct: https://github.com/alibaba/GraphAr/blob/main/CODE_OF_CONDUCT.md + +.. _GraphAr mailing list: https://groups.google.com/g/graphar + +.. _GraphAr Slack: https://join.slack.com/t/grapharworkspace/shared_invite/zt-1wh5vo828-yxs0MlXYBPBBNvjOGhL4kQ + +.. _community meeting notes: https://github.com/alibaba/GraphAr/wiki/Community-Meeting-Agenda diff --git a/_sources/developers/contributing.rst.txt b/_sources/developers/contributing.rst.txt new file mode 100644 index 000000000..3dc0f5e37 --- /dev/null +++ b/_sources/developers/contributing.rst.txt @@ -0,0 +1,4 @@ +.. This file is just a placeholder to refer the top-level CONTRIBUTING.rst + to sphinx doc workspace. + +.. include:: ../../CONTRIBUTING.rst diff --git a/_sources/developers/java-dev.rst.txt b/_sources/developers/java-dev.rst.txt new file mode 100644 index 000000000..62cbd26a5 --- /dev/null +++ b/_sources/developers/java-dev.rst.txt @@ -0,0 +1,108 @@ +Java Development +================ + +Introduction +------------ + +GraphAr Java library based on GraphAr C++ library and an efficient FFI +for Java and C++ called +`FastFFI `__. + +Source Code Level +~~~~~~~~~~~~~~~~~ + +- Interface + +- Class + +- JNI code + +- GraphAr C++ library + +If you want to use classes or functions of GraphAr C++ library through JAVA SDK, you only need to write interfaces with +annotations. After the interfaces are ready, the java code for the interfaces and the the C++ code which include JNI +code for native methods will be automatically generated by FastFFI.For +annotation's usage, please refer to +`FastFFI `__. + + + +Runtime Level +~~~~~~~~~~~~~ + +Interfaces and classes will be compiled to bytecode. Usually, JNI code will be compiled to bitcode as a part of +dynamic library which can be called by native methods directly. +If llvm4jni is enable, suitable method in JNI will be transferred to bytecode. + +For decoupling the implementation of C++ and Java, we use a bridge dynamic library called gar-jni to connect them, it +will integrate all C++ dependencies(e.g. JNI code, GraphAr C++ library and arrow C++) +and can be called by native methods in Java directly. +Most JNI code is generated by FastFFI, but some JNI code is written by ourselves, such as JNI code for +transferring VectorSchemaRoot into arrow::Table. + +To build the bridge dynamic library, here is main part of our CMakeLists.txt: + +.. code-block:: cmake + + # set auto-generated JNI code and handwriting JNI code as source files + file(GLOB SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-sources/annotations/*.cc" "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-test-sources/test-annotations/*.cc" + "${CMAKE_CURRENT_SOURCE_DIR}/src/main/cpp/ffi/*.cc") + # remove auto-generated JNI code for specific method cause we have handwriting JNI code for it + list(REMOVE_ITEM SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/target/generated-sources/annotations/jni_com_alibaba_graphar_arrow_ArrowTable_Static_cxx_0x58c7409.cc") + + set(LIBNAME "gar-jni") + + # find JNI related libraries + find_package(JNI REQUIRED) + include_directories(SYSTEM ${JAVA_INCLUDE_PATH}) + include_directories(SYSTEM ${JAVA_INCLUDE_PATH2}) + + # some JNI code depends on arrow + find_package(Arrow REQUIRED) + # build graphar-cpp in specific version + include(graphar-cpp) + build_graphar_cpp() + + # build the bridge JNI library + add_library(${LIBNAME} SHARED ${SOURCES}) + # include graphar-cpp headers + target_include_directories(${LIBNAME} SYSTEM BEFORE PRIVATE ${GAR_INCLUDE_DIR}) + # link graphar-cpp and arrow + target_link_libraries(${LIBNAME} ${CMAKE_JNI_LINKER_FLAGS} gar_shared) + target_link_libraries(${LIBNAME} ${CMAKE_JNI_LINKER_FLAGS} Arrow::arrow_static) + +More about usage of CMake, please refer to `CMake's official website `__. + +Building GraphAr Java +--------------------- + +Please refer to `GraphAr Java Library user guide <../user-guide/java-lib.html>`__. + +How To Test +----------- + +.. code-block:: bash + + $ export GAR_TEST_DATA=$PWD/../testing/ + $ mvn clean test + +This will build GraphAr C++ library internally for Java. If you already installed GraphAr C++ library in your system, +you can append this option to skip: ``-DbuildGarCPP=OFF``. + +Code Style +---------- + +We follow `AOSP Java code +style `__. To ensure +CI for checking code style will pass, please ensure check below is +success: + +.. code-block:: bash + + $ mvn spotless:check + +If there are violations, running command below to automatically format: + +.. code-block:: bash + + $ mvn spotless:apply diff --git a/_sources/format/file-format.rst.txt b/_sources/format/file-format.rst.txt new file mode 100644 index 000000000..197e22696 --- /dev/null +++ b/_sources/format/file-format.rst.txt @@ -0,0 +1,218 @@ +GraphAr File Format +============================ + +Property Graph +--------------- + +GraphAr is designed for representing and storing the property graphs. Graph (in discrete mathematics) is a structure made of vertices and edges. +Property graph is then a type of graph model where the vertices/edges could carry a name (also called as type or label) and some properties. +Since carrying additional information than non-property graphs, the property graph is able to represent +connections among data scattered across diverse data databases and with different schemas. +Compared with the relational database schema, the property graph excels at showing data dependencies. +Therefore, it is widely-used in modeling modern applications including social network analytics, data mining, +network routing, scientific computing and so on. + +A property graph consists of vertices and edges, with each vertex contains a unique identifier and: + +- A text label that describes the vertex type. +- A collection of properties, with each property can be represented by a key-value pair. + +Each edge contains a unique identifier and: + +- The outgoing vertex (source). +- The incoming vertex (destination). +- A text label that describes the relationship between the two vertices. +- A collection of properties. + +The following is an example property graph containing two types of vertices ("person" and "comment") and three types of edges. + +.. image:: ../images/property_graph.png + :width: 700 + :align: center + :alt: property graph + + +Property Data Types +------------------- +GraphAr support a set of built-in property data types that are common in real use cases and supported by most file types (CSV, ORC, Parquet), includes: + +- **Boolean** +- **Int32**: Integer with 32 bits +- **Int64**: Integer with 64 bits +- **Float**: 32-bit floating point values +- **Double**: 64-bit floating point values +- **String**: Textual data +- **Date**: days since the Unix epoch +- **Timestamp**: milliseconds since the Unix epoch +- **Time**: milliseconds since midnight +- **List**: A list of values of the same type + +GraphAr also supports the user-defined data types, which can be used to represent complex data structures, +such as the struct, map, and union types. + +Configurations +-------------- + +Vertex Chunk Size +````````````````` +The vertex chunk size is a configuration parameter that determines the number of vertices in a vertex chunk +and used to partition the logical vertex table into multiple physical vertex tables. + +The vertex chunk size should be set to a value that is large enough to reduce the overhead of reading/writing files, +but small enough to avoid reading/writing too many vertices at once. We recommend setting the vertex chunk size to +empirical value 2^18 (262,144) for most cases. + +Edge Chunk Size +```````````````` + +The edge chunk size is a configuration parameter that determines the number of edges in an edge chunk +and used to partition the logical edge table into multiple physical edge tables. + +The edge chunk size should be set to a value that is large enough to reduce the overhead of reading/writing files, +but small enough to avoid reading/writing too many edges at once. We recommend setting the edge chunk size to +empirical value 2^22 (4,194,304) for most cases. + +Data File Format +```````````````` +GraphAr supports multiple file formats for storing the actual data of vertices and edges, +including Apache ORC, Apache Parquet, CSV, and JSON. + +The file format should be chosen based on the specific use case and the data processing framework that will be used to +process the graph data. For example, if the graph data will be processed using Apache Spark, +then the Apache Parquet file format is recommended. + +Adjacency List Type +```````````````````` +Adjacency list is a data structure used to represent the edges of a graph. GraphAr supports multiple types of adjacency lists for a given group of edges, including: + +- **ordered_by_source**: all the edges in the logical table are ordered and further partitioned by the internal vertex id of the source, which can be seen as the CSR format. +- **ordered_by_dest**: all the edges in the logical table are ordered and further partitioned by the internal vertex id of the destination, which can be seen as the CSC format. +- **unordered_by_source**: the internal id of the source vertex is used as the partition key to divide the edges into different sub-logical-tables, and the edges in each sub-logical-table are unordered, which can be seen as the COO format. +- **unordered_by_dest**: the internal id of the destination vertex is used as the partition key to divide the edges into different sub-logical-tables, and the edges in each sub-logical-table are unordered, which can also be seen as the COO format. + + +Vertex Chunks in GraphAr +------------------------ + +Logical table of vertices +````````````````````````` +Each type of vertices (with the same label) constructs a logical vertex table, with each vertex assigned with a global index inside this type (called internal vertex id) starting from 0, corresponding to the row number of the vertex in the logical vertex table. An example layout for a logical table of vertices under the label "person" is provided for reference. + +Given an internal vertex id and the vertex label, a vertex is uniquely identifiable and its respective properties can be accessed from this table. The internal vertex id is further used to identify the source and destination vertices when maintaining the topology of the graph. + +.. image:: ../images/vertex_logical_table.png + :width: 650 + :align: center + :alt: vertex logical table + +.. note:: + + In the logical vertex table, some property can be marked as the primary key, such as the "id" column of the "person" table. + + +Physical table of vertices +`````````````````````````` +The logical vertex table will be partitioned into multiple continuous vertex chunks for enhancing the reading/writing efficiency. To maintain the ability of random access, the size of vertex chunks for the same label is fixed. To support to access required properties avoiding reading all properties from the files, and to add properties for vertices without modifying the existing files, the columns of the logical table will be divided into several column groups. + +Take the "person" vertex table as an example, if the chunk size is set to be 500, the logical table will be separated into sub-logical-tables of 500 rows with the exception of the last one, which may have less than 500 rows. The columns for maintaining properties will also be divided into distinct groups (e.g., 2 for our example). As a result, a total of 4 physical vertex tables are created for storing the example logical table, which can be seen from the following figure. + +.. image:: ../images/vertex_physical_table.png + :width: 650 + :align: center + :alt: vertex physical table + + +**Note**: For efficiently utilize the filter push-down of the payload file format like Parquet, the internal vertex id is stored in the payload file as a column. And since the internal vertex id is continuous, the payload file format can use the delta encoding for the internal vertex id column, which would not bring too much overhead for the storage. + +Edge Chunks in GraphAr +------------------------ + +Logical table of edges +`````````````````````` +For maintaining a type of edges (that with the same triplet of the source label, edge label, and destination label), a logical edge table is established. And in order to support quickly creating a graph from the graph storage file, the logical edge table could maintain the topology information in a way similar to CSR/CSC (learn more about `CSR/CSC `_), that is, the edges are ordered by the internal vertex id of either source or destination. In this way, an offset table is required to store the start offset for each vertex's edges, and the edges with the same source/destination will be stored continuously in the logical table. + +Take the logical table for "person likes person" edges as an example, the logical edge table looks like: + +.. image:: ../images/edge_logical_table.png + :width: 650 + :align: center + :alt: edge logical table + +Physical table of edges +``````````````````````` +As same with the vertex table, the logical edge table is also partitioned into some sub-logical-tables, with each sub-logical-table contains edges that the source (or destination) vertices are in the same vertex chunk. According to the partition strategy and the order of the edges, edges can be stored in GraphAr following the setting adjacency list type. + +After that, the whole logical table of edges will be divided into multiple sub-logical-tables with each sub-logical-table contains edges that the source (or destination) vertices are in the same vertex chunk. Then, a sub-logical-table is further divided into edge chunks of a predefined, fixed number of rows (referred to as edge chunk size). Finally, an edge chunk is separated into physical tables in the following way: + +- an adjList table (which contains only two columns: the internal vertex id of the source and the destination). +- 0 or more property group tables (each contains the properties of the edges). + +Additionally, there would be an offset table for **ordered_by_source** or **ordered_by_dest** edges. The offset table is used to record the starting point of the edges for each vertex. The partition of the offset table should be in alignment with the partition of the corresponding vertex table. The first row of each offset chunk is always 0, indicating the starting point for the corresponding sub-logical-table for edges. + +Take the "person knows person" edges to illustrate. Suppose the vertex chunk size is set to 500 and the edge chunk size is 1024, and the edges are **ordered_by_source**, then the edges could be saved in the following physical tables: + +.. image:: ../images/edge_physical_table1.png + :width: 650 + :align: center + :alt: edge physical table1 + +.. image:: ../images/edge_physical_table2.png + :width: 650 + :align: center + :alt: edge physical table2 + +.. tip:: + + When the edge type is **ordered_by_source**, the sorted adjList table together with the offset table can be used as CSR, supporting the fast access of the outgoing edges for a given vertex. Similarly, a CSC view can be constructed by sorting the edges by destination and recording corresponding offsets, supporting the fast access of the incoming edges for a given vertex. + +Information files +------------------ +GraphAr uses two kinds of files to store a graph: a group of Yaml files to describe metadata information; and data files to store actual data for vertices and edges. +A graph information file which named ".graph.yml" describes the meta information for a graph whose name is . The content of this file includes: + +- the graph name; +- the root directory path of the data files; +- the vertex information and edge information files included; +- the version of GraphAr. +- extra information for the graph, could be used for user defined information. + +A vertex information file which named "