From e853a54bba3feaa8458c7820b7128ef978278b0c Mon Sep 17 00:00:00 2001 From: Dandelion <49650772+aroundabout@users.noreply.github.com> Date: Mon, 20 Nov 2023 21:13:54 +0800 Subject: [PATCH] doc: add cassandra docker usage of server & enhance loader desc (#299) --- .../cn/docs/clients/restful-api/metrics.md | 2 +- content/cn/docs/clients/restful-api/other.md | 2 +- .../cn/docs/quickstart/hugegraph-loader.md | 195 ++++++++++++++--- .../cn/docs/quickstart/hugegraph-server.md | 68 +++++- .../en/docs/clients/restful-api/metrics.md | 2 +- content/en/docs/clients/restful-api/other.md | 2 +- .../en/docs/quickstart/hugegraph-loader.md | 199 +++++++++++++++--- .../en/docs/quickstart/hugegraph-server.md | 66 +++++- 8 files changed, 480 insertions(+), 56 deletions(-) diff --git a/content/cn/docs/clients/restful-api/metrics.md b/content/cn/docs/clients/restful-api/metrics.md index f4cf772b5..e984d2039 100644 --- a/content/cn/docs/clients/restful-api/metrics.md +++ b/content/cn/docs/clients/restful-api/metrics.md @@ -1,7 +1,7 @@ --- title: "Metrics API" linkTitle: "Metrics" -weight: 1 +weight: 17 --- diff --git a/content/cn/docs/clients/restful-api/other.md b/content/cn/docs/clients/restful-api/other.md index eb00992f1..8f394e439 100644 --- a/content/cn/docs/clients/restful-api/other.md +++ b/content/cn/docs/clients/restful-api/other.md @@ -1,7 +1,7 @@ --- title: "Other API" linkTitle: "Other" -weight: 17 +weight: 18 --- ### 11.1 Other diff --git a/content/cn/docs/quickstart/hugegraph-loader.md b/content/cn/docs/quickstart/hugegraph-loader.md index 72baf3a8d..2c8c2f823 100644 --- a/content/cn/docs/quickstart/hugegraph-loader.md +++ b/content/cn/docs/quickstart/hugegraph-loader.md @@ -24,10 +24,43 @@ HugeGraph-Loader 是 HugeGraph 的数据导入组件,能够将多种数据源 有两种方式可以获取 HugeGraph-Loader: +- 使用 Docker 镜像 (推荐) - 下载已编译的压缩包 - 克隆源码编译安装 -#### 2.1 下载已编译的压缩包 +#### 2.1 使用 Docker 镜像 + +我们可以使用 `docker run -itd --name loader hugegraph/loader`部署 loader 服务。对于需要加载的数据,则可以通过挂载 `-v /path/to/data/file:/loader/file` 或者`docker cp`的方式将文件复制到 loader 容器内部。 + +或者使用 docker-compose 启动 loader, 启动命令为 `docker-compose up -d`, 样例的 docker-compose.yml 如下所示: + +```yaml +version: '3' + +services: + server: + image: hugegraph/hugegraph + container_name: graph + ports: + - 8080:8080 + + hubble: + image: hugegraph/hubble + container_name: hubble + ports: + - 8088:8088 + + loader: + image: hugegraph/loader + container_name: loader + # mount your own data here + # volumes: + # - /path/to/data/file:/loader/file +``` + +具体的数据导入流程可以参考 [4.5 使用 docker 导入](#45-使用-docker-导入) + +#### 2.2 下载已编译的压缩包 下载最新版本的 HugeGraph-Toolchain Release 包,里面包含了 loader + tool + hubble 全套工具,如果你已经下载,可跳过重复步骤 @@ -36,7 +69,7 @@ wget https://downloads.apache.org/incubator/hugegraph/1.0.0/apache-hugegraph-too tar zxf *hugegraph*.tar.gz ``` -#### 2.2 克隆源码编译安装 +#### 2.3 克隆源码编译安装 克隆最新版本的 HugeGraph-Loader 源码包: @@ -49,21 +82,11 @@ wget https://downloads.apache.org/incubator/hugegraph/1.0.0/apache-hugegraph-too ``` 由于 Oracle ojdbc license 的限制,需要手动安装 ojdbc 到本地 maven 仓库。 -访问[Oracle jdbc 下载](https://www.oracle.com/database/technologies/appdev/jdbc-downloads.html) 页面。选择 Oracle Database 12c Release 2 (12.2.0.1) drivers,如下图所示。 +访问[Oracle jdbc 下载](https://www.oracle.com/database/technologies/appdev/jdbc-drivers-archive.html) 页面。选择 Oracle Database 12c Release 2 (12.2.0.1) drivers,如下图所示。 -
- image -
- - -打开链接后,选择“ojdbc8.jar”, 如下图所示。 +打开链接后,选择“ojdbc8.jar” -
- image -
- - - 把 ojdbc8 安装到本地 maven 仓库,进入`ojdbc8.jar`所在目录,执行以下命令。 +把 ojdbc8 安装到本地 maven 仓库,进入`ojdbc8.jar`所在目录,执行以下命令。 ``` mvn install:install-file -Dfile=./ojdbc8.jar -DgroupId=com.oracle -DartifactId=ojdbc8 -Dversion=12.2.0.1 -Dpackaging=jar ``` @@ -253,7 +276,9 @@ Office,388 > 注意:0.11.0 版本以前的映射文件与 0.11.0 以后的格式变化较大,为表述方便,下面称 0.11.0 以前的映射文件(格式)为 1.0 版本,0.11.0 以后的为 2.0 版本。并且若无特殊说明,“映射文件”表示的是 2.0 版本的。 -2.0 版本的映射文件的框架为: + +
+点击展开/折叠 2.0 版本的映射文件的框架 ```json { @@ -276,9 +301,13 @@ Office,388 } ``` +
+
+ 这里直接给出两个版本的映射文件(描述了上面图模型和数据文件) -2.0 版本的映射文件: +
+点击展开/折叠 2.0 版本的映射文件 ```json { @@ -482,7 +511,11 @@ Office,388 } ``` -1.0 版本的映射文件: +
+
+ +
+点击展开/折叠 1.0 版本的映射文件 ```json { @@ -539,6 +572,9 @@ Office,388 } ``` +
+
+ 映射文件 1.0 版本是以顶点和边为中心,设置输入源;而 2.0 版本是以输入源为中心,设置顶点和边映射。有些输入源(比如一个文件)既能生成顶点,也能生成边,如果用 1.0 版的格式写,就需要在 vertex 和 edge 映射块中各写一次 input 块,这两次的 input 块是完全一样的;而 2.0 版本只需要写一次 input。所以 2.0 版相比于 1.0 版,能省掉一些 input 的重复书写。 在 hugegraph-loader-{version} 的 bin 目录下,有一个脚本工具 `mapping-convert.sh` 能直接将 1.0 版本的映射文件转换为 2.0 版本的,使用方式如下: @@ -655,7 +691,7 @@ schema: 必填 - skipped_line:想跳过的行,复合结构,目前只能配置要跳过的行的正则表达式,用子节点 regex 描述,默认不跳过任何行,选填; - early_stop:某次从 Kafka broker 拉取的记录为空,停止任务,默认为 false,仅用于调试,选填; -##### 3.3.1 顶点和边映射 +##### 3.3.3 顶点和边映射 顶点和边映射的节点(JSON 文件中的一个 key)有很多相同的部分,下面先介绍相同部分,再分别介绍`顶点映射`和`边映射`的特有节点。 @@ -837,14 +873,14 @@ id|name|lang|price|ISBN 边文件:`example/file/edge_knows.json` -``` +```json {"source_name": "marko", "target_name": "vadas", "date": "20160110", "weight": 0.5} {"source_name": "marko", "target_name": "josh", "date": "20130220", "weight": 1.0} ``` 边文件:`example/file/edge_created.json` -``` +```json {"aname": "marko", "bname": "lop", "date": "20171210", "weight": 0.4} {"aname": "josh", "bname": "lop", "date": "20091111", "weight": 0.4} {"aname": "josh", "bname": "ripple", "date": "20171210", "weight": 1.0} @@ -853,7 +889,8 @@ id|name|lang|price|ISBN #### 4.2 编写 schema -schema 文件:`example/file/schema.groovy` +
+点击展开/折叠 schema 文件:example/file/schema.groovy ```groovy schema.propertyKey("name").asText().ifNotExist().create(); @@ -879,9 +916,13 @@ schema.indexLabel("createdByDate").onE("created").by("date").secondary().ifNotEx schema.indexLabel("createdByWeight").onE("created").by("weight").range().ifNotExist().create(); schema.indexLabel("knowsByWeight").onE("knows").by("weight").range().ifNotExist().create(); ``` +
#### 4.3 编写输入源映射文件`example/file/struct.json` +
+点击展开/折叠 源映射文件 example/file/struct.json + ```json { "vertices": [ @@ -945,6 +986,7 @@ schema.indexLabel("knowsByWeight").onE("knows").by("weight").range().ifNotExist( ] } ``` +
#### 4.4 执行命令导入 @@ -954,7 +996,7 @@ sh bin/hugegraph-loader.sh -g hugegraph -f example/file/struct.json -s example/f 导入结束后,会出现类似如下统计信息: -``` +```bash vertices/edges has been loaded this time : 8/6 -------------------------------------------------- count metrics @@ -970,7 +1012,112 @@ count metrics edge insert failure : 0 ``` -#### 4.5 使用 spark-loader 导入 +#### 4.5 使用 docker 导入 + +##### 4.5.1 使用 docker exec 直接导入数据 + +###### 4.5.1.1 数据准备 + +如果仅仅尝试使用 loader, 我们可以使用内置的 example 数据集进行导入,无需自己额外准备数据 + +如果使用自定义的数据,则在使用 loader 导入数据之前,我们需要将数据复制到容器内部。 + +首先我们可以根据 [4.1-4.3](#41-准备数据) 的步骤准备数据,将准备好的数据通过 `docker cp` 复制到 loader 容器内部。 + +假设我们已经按照上述的步骤准备好了对应的数据集,存放在 `hugegraph-dataset` 文件夹下,文件结构如下: + +```bash +tree -f hugegraph-dataset/ + +hugegraph-dataset +├── hugegraph-dataset/edge_created.json +├── hugegraph-dataset/edge_knows.json +├── hugegraph-dataset/schema.groovy +├── hugegraph-dataset/struct.json +├── hugegraph-dataset/vertex_person.csv +└── hugegraph-dataset/vertex_software.txt +``` + +将文件复制到容器内部 + +```bash +docker cp hugegraph-dataset loader:/loader/dataset +docker exec -it loader ls /loader/dataset + +edge_created.json edge_knows.json schema.groovy struct.json vertex_person.csv vertex_software.txt +``` + +###### 4.5.1.2 数据导入 + +以内置的 example 数据集为例,我们可以使用以下的命令对数据进行导入。 + +如果需要导入自己准备的数据集,则只需要修改 `-f` 配置脚本的路径 以及 `-s` schema 文件路径即可。 + +其他的参数可以参照 [3.4.1 参数说明](#341-参数说明) + +```bash +docker exec -it loader bin/hugegraph-loader.sh -g hugegraph -f example/file/struct.json -s example/file/schema.groovy -h graph -p 8080 +``` + +如果导入用户自定义的数据集,按照刚才的例子,则使用: + +```bash +docker exec -it loader bin/hugegraph-loader.sh -g hugegraph -f /loader/dataset/struct.json -s /loader/dataset/schema.groovy -h graph -p 8080 +``` + + +> 如果 `loader` 和 `server`位于同一 docker 网络,则可以指定 `-h {server_container_name}`, 否则需要指定 `server`的宿主机的 ip (在我们的例子中, `server_container_name` 为 `graph`). + +然后我们可以观察到结果: + +```bash +HugeGraphLoader worked in NORMAL MODE +vertices/edges loaded this time : 8/6 +-------------------------------------------------- +count metrics + input read success : 14 + input read failure : 0 + vertex parse success : 8 + vertex parse failure : 0 + vertex insert success : 8 + vertex insert failure : 0 + edge parse success : 6 + edge parse failure : 0 + edge insert success : 6 + edge insert failure : 0 +-------------------------------------------------- +meter metrics + total time : 0.199s + read time : 0.046s + load time : 0.153s + vertex load time : 0.077s + vertex load rate(vertices/s) : 103 + edge load time : 0.112s + edge load rate(edges/s) : 53 +``` + +也可以使用 `curl` 或者 `hubble`观察导入结果,此处以 `curl` 为例: + +```bash +> curl "http://localhost:8080/graphs/hugegraph/graph/vertices" | gunzip +{"vertices":[{"id":1,"label":"software","type":"vertex","properties":{"name":"lop","lang":"java","price":328.0}},{"id":2,"label":"software","type":"vertex","properties":{"name":"ripple","lang":"java","price":199.0}},{"id":"1:tom","label":"person","type":"vertex","properties":{"name":"tom"}},{"id":"1:josh","label":"person","type":"vertex","properties":{"name":"josh","age":32,"city":"Beijing"}},{"id":"1:marko","label":"person","type":"vertex","properties":{"name":"marko","age":29,"city":"Beijing"}},{"id":"1:peter","label":"person","type":"vertex","properties":{"name":"peter","age":35,"city":"Shanghai"}},{"id":"1:vadas","label":"person","type":"vertex","properties":{"name":"vadas","age":27,"city":"Hongkong"}},{"id":"1:li,nary","label":"person","type":"vertex","properties":{"name":"li,nary","age":26,"city":"Wu,han"}}]} +``` + +如果想检查边的导入结果,可以使用 `curl "http://localhost:8080/graphs/hugegraph/graph/edges" | gunzip` + +##### 4.5.2 进入 docker 容器进行导入 + +除了直接使用 `docker exec` 导入数据,我们也可以进入容器进行数据导入,基本流程与 [4.5.1](#451-使用-docker-exec-直接导入数据) 相同 + +使用 `docker exec -it loader bash`进入容器内部,并执行命令 + +```bash +sh bin/hugegraph-loader.sh -g hugegraph -f example/file/struct.json -s example/file/schema.groovy -h graph -p 8080 +``` + +执行的结果如 [4.5.1](#451-使用-docker-exec-直接导入数据) 所示 + +#### 4.6 使用 spark-loader 导入 > Spark 版本:Spark 3+,其他版本未测试。 > HugeGraph Toolchain 版本:toolchain-1.0.0 > diff --git a/content/cn/docs/quickstart/hugegraph-server.md b/content/cn/docs/quickstart/hugegraph-server.md index 7e434395f..9b39ac081 100644 --- a/content/cn/docs/quickstart/hugegraph-server.md +++ b/content/cn/docs/quickstart/hugegraph-server.md @@ -143,9 +143,73 @@ bin/hugegraph deploy -v {hugegraph-version} -p {install-path} [-u {download-path #### 5.1 使用 Docker -在 [3.1 使用 Docker 容器](#31-使用-docker-容器-推荐)中,我们已经介绍了如何使用 `docker` 部署 `hugegraph-server`, 我们还可以设置参数在 sever 启动的时候加载样例图 +在 [3.1 使用 Docker 容器](#31-使用-docker-容器-推荐)中,我们已经介绍了如何使用 `docker` 部署 `hugegraph-server`, 我们还可以使用其他的后端存储或者设置参数在 sever 启动的时候加载样例图 -##### 5.1.1 启动 server 的时候创建示例图 +##### 5.1.1 使用 Cassandra 作为后端 + +
+点击展开/折叠 Cassandra 配置及启动方法 + +在使用 Docker 的时候,我们可以使用 Cassandra 作为后端存储。我们更加推荐直接使用 docker-compose 来对于 server 以及 Cassandra 进行统一管理 + +样例的 `docker-compose.yml` 可以在 [github](https://github.com/apache/incubator-hugegraph/blob/master/hugegraph-dist/docker/example/docker-compose-cassandra.yml) 中获取,使用 `docker-compose up -d` 启动。(如果使用 cassandra 4.0 版本作为后端存储,则需要大约两个分钟初始化,请耐心等待) + +```yaml +version: "3" + +services: + graph: + image: hugegraph/hugegraph + container_name: cas-graph + ports: + - 8080:8080 + environment: + hugegraph.backend: cassandra + hugegraph.serializer: cassandra + hugegraph.cassandra.host: cas-cassandra + hugegraph.cassandra.port: 9042 + networks: + - ca-network + depends_on: + - cassandra + healthcheck: + test: ["CMD", "bin/gremlin-console.sh", "--" ,"-e", "scripts/remote-connect.groovy"] + interval: 10s + timeout: 30s + retries: 3 + + cassandra: + image: cassandra:4 + container_name: cas-cassandra + ports: + - 7000:7000 + - 9042:9042 + security_opt: + - seccomp:unconfined + networks: + - ca-network + healthcheck: + test: ["CMD", "cqlsh", "--execute", "describe keyspaces;"] + interval: 10s + timeout: 30s + retries: 5 + +networks: + ca-network: + +volumes: + hugegraph-data: +``` + +在这个 yaml 中,需要在环境变量中以 `hugegraph.`的形式进行参数传递,配置 Cassandra 相关的参数。 + +具体来说,在 `hugegraph.properties` 配置文件中,提供了 `backend=xxx`, `cassandra.host=xxx` 等配置项,为了配置这些配置项,在传递环境变量的过程之中,我们需要在这些配置项前加上 `hugegrpah.`,即 `hugegraph.backend` 和 `hugegraph.cassandra.host`。 + +其他配置可以参照 [4 配置](#4-配置) + +
+ +##### 5.1.2 启动 server 的时候创建示例图 在 docker 启动的时候设置环境变量 `PRELOAD=true`, 从而实现启动脚本的时候加载数据。 diff --git a/content/en/docs/clients/restful-api/metrics.md b/content/en/docs/clients/restful-api/metrics.md index cd67890d3..16255b248 100644 --- a/content/en/docs/clients/restful-api/metrics.md +++ b/content/en/docs/clients/restful-api/metrics.md @@ -1,7 +1,7 @@ --- title: "Metrics API" linkTitle: "Metrics" -weight: 1 +weight: 17 --- diff --git a/content/en/docs/clients/restful-api/other.md b/content/en/docs/clients/restful-api/other.md index 0dd617653..ed5135388 100644 --- a/content/en/docs/clients/restful-api/other.md +++ b/content/en/docs/clients/restful-api/other.md @@ -1,7 +1,7 @@ --- title: "Other API" linkTitle: "Other" -weight: 17 +weight: 18 --- ### 11.1 Other diff --git a/content/en/docs/quickstart/hugegraph-loader.md b/content/en/docs/quickstart/hugegraph-loader.md index 7b018147e..23908f6ec 100644 --- a/content/en/docs/quickstart/hugegraph-loader.md +++ b/content/en/docs/quickstart/hugegraph-loader.md @@ -23,10 +23,43 @@ It will be explained in detail below. There are two ways to get HugeGraph-Loader: +- User docker image (Recommended) - Download the compiled tarball - Clone source code then compile and install -#### 2.1 Download the compiled archive +#### 2.1 Use Docker image + +We can deploy the loader service using `docker run -itd --name loader hugegraph/loader`. For the data that needs to be loaded, it can be copied into the loader container either by mounting `-v /path/to/data/file:/loader/file` or by using `docker cp`. + +Alternatively, to start the loader using docker-compose, the command is `docker-compose up -d`. An example of the docker-compose.yml is as follows: + +```yaml +version: '3' + +services: + server: + image: hugegraph/hugegraph + container_name: graph + ports: + - 8080:8080 + + hubble: + image: hugegraph/hubble + container_name: hubble + ports: + - 8088:8088 + + loader: + image: hugegraph/loader + container_name: loader + # mount your own data here + # volumes: + # - /path/to/data/file:/loader/file +``` + +The specific data loading process can be referenced under [4.5 User Docker to load data](#45-use-docker-to-load-data) + +#### 2.2 Download the compiled archive Download the latest version of the HugeGraph-Toolchain release package: @@ -35,7 +68,7 @@ wget https://downloads.apache.org/incubator/hugegraph/1.0.0/apache-hugegraph-too tar zxf *hugegraph*.tar.gz ``` -#### 2.2 Clone source code to compile and install +#### 2.3 Clone source code to compile and install Clone the latest version of HugeGraph-Loader source package: @@ -48,21 +81,11 @@ wget https://downloads.apache.org/incubator/hugegraph/1.0.0/apache-hugegraph-too ``` Due to the license limitation of the `Oracle OJDBC`, you need to manually install ojdbc to the local maven repository. -Visit the [Oracle jdbc downloads](https://www.oracle.com/database/technologies/appdev/jdbc-downloads.html) page. Select Oracle Database 12c Release 2 (12.2.0.1) drivers, as shown in the following figure. - -
- image -
+Visit the [Oracle jdbc downloads](https://www.oracle.com/database/technologies/appdev/jdbc-drivers-archive.html) page. Select Oracle Database 12c Release 2 (12.2.0.1) drivers, as shown in the following figure. +After opening the link, select "ojdbc8.jar". -After opening the link, select "ojdbc8.jar" as shown below. - -
- image -
- - - Install ojdbc8 to the local maven repository, enter the directory where `ojdbc8.jar` is located, and execute the following command. +Install ojdbc8 to the local maven repository, enter the directory where `ojdbc8.jar` is located, and execute the following command. ``` mvn install:install-file -Dfile=./ojdbc8.jar -DgroupId=com.oracle -DartifactId=ojdbc8 -Dversion=12.2.0.1 -Dpackaging=jar ``` @@ -250,7 +273,10 @@ In the simplest terms, each mapping block describes: where is the file to be imp > Note: The format of the mapping file before version 0.11.0 and the format after 0.11.0 has changed greatly. For the convenience of expression, the mapping file (format) before 0.11.0 is called version 1.0, and the version after 0.11.0 is version 2.0 . And unless otherwise specified, the "map file" refers to version 2.0. -The skeleton of the map file for version 2.0 is: + + +
+Click to expand/collapse the skeleton of the map file for version 2.0 ```json { @@ -273,9 +299,13 @@ The skeleton of the map file for version 2.0 is: } ``` +
+
+ Two versions of the mapping file are given directly here (the above graph model and data file are described) -Mapping file for version 2.0: +
+Click to expand/collapse mapping file for version 2.0 ```json { @@ -479,7 +509,11 @@ Mapping file for version 2.0: } ``` -Mapping file for version 1.0: +
+
+ +
+Click to expand/collapse mapping file for version 1.0 ```json { @@ -536,6 +570,9 @@ Mapping file for version 1.0: } ``` +
+
+ The 1.0 version of the mapping file is centered on the vertex and edge, and sets the input source; while the 2.0 version is centered on the input source, and sets the vertex and edge mapping. Some input sources (such as a file) can generate both vertices and edges. If you write in the 1.0 format, you need to write an input block in each of the vertex and edge mapping blocks. The two input blocks are exactly the same ; and the 2.0 version only needs to write input once. Therefore, compared with version 1.0, version 2.0 can save some repetitive writing of input. In the bin directory of hugegraph-loader-{version}, there is a script tool `mapping-convert.sh` that can directly convert the mapping file of version 1.0 to version 2.0. The usage is as follows: @@ -652,7 +689,7 @@ schema: required - skipped_line: the line you want to skip, composite structure, currently can only configure the regular expression of the line to be skipped, described by the child node regex, the default is not to skip any line, optional; - early_stop: the record pulled from Kafka broker at a certain time is empty, stop the task, default is false, only for debugging, optional; -##### 3.3.1 Vertex and Edge Mapping +##### 3.3.3 Vertex and Edge Mapping The nodes of vertex and edge mapping (a key in the JSON file) have a lot of the same parts. The same parts are introduced first, and then the unique nodes of `vertex map` and `edge map` are introduced respectively. @@ -680,7 +717,7 @@ The nodes of vertex and edge mapping (a key in the JSON file) have a lot of the **Note:** If the newly imported attribute value is empty, the existing old data will be used instead of the empty value. For the effect, please refer to the following example -```javascript +```json // The update strategy is specified in the JSON file as follows { "vertices": [ @@ -833,14 +870,14 @@ id|name|lang|price|ISBN Edge file: `example/file/edge_knows.json` -``` +```json {"source_name": "marko", "target_name": "vadas", "date": "20160110", "weight": 0.5} {"source_name": "marko", "target_name": "josh", "date": "20130220", "weight": 1.0} ``` Edge file: `example/file/edge_created.json` -``` +```json {"aname": "marko", "bname": "lop", "date": "20171210", "weight": 0.4} {"aname": "josh", "bname": "lop", "date": "20091111", "weight": 0.4} {"aname": "josh", "bname": "ripple", "date": "20171210", "weight": 1.0} @@ -849,7 +886,8 @@ Edge file: `example/file/edge_created.json` #### 4.2 Write schema -schema file: `example/file/schema.groovy` +
+Click to expand/collapse schema file: example/file/schema.groovy ```groovy schema.propertyKey("name").asText().ifNotExist().create(); @@ -876,8 +914,13 @@ schema.indexLabel("createdByWeight").onE("created").by("weight").range().ifNotEx schema.indexLabel("knowsByWeight").onE("knows").by("weight").range().ifNotExist().create(); ``` +
+ #### 4.3 Write the input source mapping file `example/file/struct.json` +
+Click to expand/collapse the input source mapping file example/file/struct.json + ```json { "vertices": [ @@ -942,6 +985,8 @@ schema.indexLabel("knowsByWeight").onE("knows").by("weight").range().ifNotExist( } ``` +
+ #### 4.4 Command to import ```bash @@ -950,7 +995,7 @@ sh bin/hugegraph-loader.sh -g hugegraph -f example/file/struct.json -s example/f After the import is complete, statistics similar to the following will appear: -``` +```bash vertices/edges has been loaded this time : 8/6 -------------------------------------------------- count metrics @@ -966,7 +1011,111 @@ count metrics edge insert failure : 0 ``` -#### 4.5 Import data by spark-loader +#### 4.5 Use Docker to load data + +##### 4.5.1 Use docker exec to load data directly + +###### 4.5.1.1 Prepare data + +If you just want to try out the loader, you can import the built-in example dataset without needing to prepare additional data yourself. + +If using custom data, before importing data with the loader, we need to copy the data into the container. + +First, following the steps in [4.1-4.3](#41-prepare-data), we can prepare the data and then use `docker cp` to copy the prepared data into the loader container. + +Suppose we've prepared the corresponding dataset following the above steps, stored in the `hugegraph-dataset` folder with the following file structure: + +```bash +tree -f hugegraph-dataset/ + +hugegraph-dataset +├── hugegraph-dataset/edge_created.json +├── hugegraph-dataset/edge_knows.json +├── hugegraph-dataset/schema.groovy +├── hugegraph-dataset/struct.json +├── hugegraph-dataset/vertex_person.csv +└── hugegraph-dataset/vertex_software.txt +``` + +Copy the files into the container. + +```bash +docker cp hugegraph-dataset loader:/loader/dataset +docker exec -it loader ls /loader/dataset + +edge_created.json edge_knows.json schema.groovy struct.json vertex_person.csv vertex_software.txt +``` + +###### 4.5.1.2 Data loading + +Taking the built-in example dataset as an example, we can use the following command to load the data. + +If you need to import your custom dataset, you just need to modify the paths for `-f` (data script) and `-s` (schema) configurations. + +"You can refer to [3.4.1 Parameter description](#341-parameter-description) for the rest of the parameters. + +```bash +docker exec -it loader bin/hugegraph-loader.sh -g hugegraph -f example/file/struct.json -s example/file/schema.groovy -h graph -p 8080 +``` + +If loading a custom dataset, following the previous example, you would use: + +```bash +docker exec -it loader bin/hugegraph-loader.sh -g hugegraph -f /loader/dataset/struct.json -s /loader/dataset/schema.groovy -h graph -p 8080 +``` + +> If `loader` and `server` are in the same Docker network, you can specify `-h {server_container_name}`; otherwise, you need to specify the IP of the `server` host (in our example, `server_container_name` is `graph`). + +Then we can obverse the result: + +```bash +HugeGraphLoader worked in NORMAL MODE +vertices/edges loaded this time : 8/6 +-------------------------------------------------- +count metrics + input read success : 14 + input read failure : 0 + vertex parse success : 8 + vertex parse failure : 0 + vertex insert success : 8 + vertex insert failure : 0 + edge parse success : 6 + edge parse failure : 0 + edge insert success : 6 + edge insert failure : 0 +-------------------------------------------------- +meter metrics + total time : 0.199s + read time : 0.046s + load time : 0.153s + vertex load time : 0.077s + vertex load rate(vertices/s) : 103 + edge load time : 0.112s + edge load rate(edges/s) : 53 +``` + +You can also use `curl` or `hubble` to observe the import result. Here's an example using `curl`: + +```bash +> curl "http://localhost:8080/graphs/hugegraph/graph/vertices" | gunzip +{"vertices":[{"id":1,"label":"software","type":"vertex","properties":{"name":"lop","lang":"java","price":328.0}},{"id":2,"label":"software","type":"vertex","properties":{"name":"ripple","lang":"java","price":199.0}},{"id":"1:tom","label":"person","type":"vertex","properties":{"name":"tom"}},{"id":"1:josh","label":"person","type":"vertex","properties":{"name":"josh","age":32,"city":"Beijing"}},{"id":"1:marko","label":"person","type":"vertex","properties":{"name":"marko","age":29,"city":"Beijing"}},{"id":"1:peter","label":"person","type":"vertex","properties":{"name":"peter","age":35,"city":"Shanghai"}},{"id":"1:vadas","label":"person","type":"vertex","properties":{"name":"vadas","age":27,"city":"Hongkong"}},{"id":"1:li,nary","label":"person","type":"vertex","properties":{"name":"li,nary","age":26,"city":"Wu,han"}}]} +``` + +If you want to check the import result of edges, you can use `curl "http://localhost:8080/graphs/hugegraph/graph/edges" | gunzip`. + +##### 4.5.2 Enter the docker container to load data + +Besides using `docker exec` directly for data import, we can also enter the container for data loading. The basic process is similar to [4.5.1](#451-use-docker-exec-to-load-data-directly). + +Enter the container by `docker exec -it loader bash` and execute the command: + +```bash +sh bin/hugegraph-loader.sh -g hugegraph -f example/file/struct.json -s example/file/schema.groovy -h graph -p 8080 +``` + +The results of the execution will be similar to those shown in [4.5.1](#451-use-docker-exec-to-load-data-directly). + +#### 4.6 Import data by spark-loader > Spark version: Spark 3+, other versions has not been tested. > HugeGraph Toolchain version: toolchain-1.0.0 > diff --git a/content/en/docs/quickstart/hugegraph-server.md b/content/en/docs/quickstart/hugegraph-server.md index 7cd4cc0fa..644d24dd5 100644 --- a/content/en/docs/quickstart/hugegraph-server.md +++ b/content/en/docs/quickstart/hugegraph-server.md @@ -157,7 +157,71 @@ for detailed configuration introduction, please refer to [configuration document In [3.1 Use Docker container](#31-use-docker-container-recommended), we have introduced how to use docker to deploy `hugegraph-server`. `server` can also preload an example graph by setting the parameter. -##### 5.1.1 Create example graph when starting server +##### 5.1.1 Use Cassandra as the storage + +
+ Click to expand/collapse Cassandra configuration and startup methods + +When using Docker, we can use Cassandra as the backend storage. We highly recommend using docker-compose directly to manage both the server and Cassandra. + +The sample `docker-compose.yml` can be obtained on [github](https://github.com/apache/incubator-hugegraph/blob/master/hugegraph-dist/docker/example/docker-compose-cassandra.yml), and you can start it with `docker-compose up -d`. (If using Cassandra 4.0 as the backend storage, it takes approximately two minutes to initialize. Please be patient.) + +```yaml +version: "3" + +services: + graph: + image: hugegraph/hugegraph + container_name: cas-graph + ports: + - 8080:8080 + environment: + hugegraph.backend: cassandra + hugegraph.serializer: cassandra + hugegraph.cassandra.host: cas-cassandra + hugegraph.cassandra.port: 9042 + networks: + - ca-network + depends_on: + - cassandra + healthcheck: + test: ["CMD", "bin/gremlin-console.sh", "--" ,"-e", "scripts/remote-connect.groovy"] + interval: 10s + timeout: 30s + retries: 3 + + cassandra: + image: cassandra:4 + container_name: cas-cassandra + ports: + - 7000:7000 + - 9042:9042 + security_opt: + - seccomp:unconfined + networks: + - ca-network + healthcheck: + test: ["CMD", "cqlsh", "--execute", "describe keyspaces;"] + interval: 10s + timeout: 30s + retries: 5 + +networks: + ca-network: + +volumes: + hugegraph-data: +``` + +In this yaml file, configuration parameters related to Cassandra need to be passed as environment variables in the format of `hugegraph.`. + +Specifically, in the configuration file `hugegraph.properties` , there are settings like `backend=xxx` and `cassandra.host=xxx`. To configure these settings during the process of passing environment variables, we need to prepend `hugegraph.` to these configurations, like `hugegraph.backend` and `hugegraph.cassandra.host`. + +The rest of the configurations can be referenced under [4 config](#4-config) + +
+ +##### 5.1.2 Create example graph when starting server Set the environment variable `PRELOAD=true` when starting Docker in order to load data during the execution of the startup script.