From 019e17dfe4e71361844fe724868c2b899c99a9dc Mon Sep 17 00:00:00 2001 From: Reid Date: Wed, 7 Sep 2022 15:16:21 +0800 Subject: [PATCH 1/5] chore: reteTimeOut => rateTimeOut --- .../main/scala/com/vesoft/nebula/connector/NebulaOptions.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala index 55d9a929..f6844160 100644 --- a/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala +++ b/nebula-spark-connector/src/main/scala/com/vesoft/nebula/connector/NebulaOptions.scala @@ -220,7 +220,7 @@ object NebulaOptions { val TIMEOUT: String = "timeout" val CONNECTION_RETRY: String = "connectionRetry" val EXECUTION_RETRY: String = "executionRetry" - val RATE_TIME_OUT: String = "reteTimeOut" + val RATE_TIME_OUT: String = "rateTimeOut" val USER_NAME: String = "user" val PASSWD: String = "passwd" val ENABLE_GRAPH_SSL: String = "enableGraphSSL" From d524665cbfd1778d92bd7da7dd75a85d15278288 Mon Sep 17 00:00:00 2001 From: Reid Date: Wed, 7 Sep 2022 16:00:10 +0800 Subject: [PATCH 2/5] doc effect: add python write vertex and edge code demo --- README.md | 37 ++++++++++++++++++++++++++++++++++++ README_CN.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/README.md b/README.md index caad4442..438ba575 100644 --- a/README.md +++ b/README.md @@ -251,6 +251,7 @@ spark = SparkSession.builder.config( "/path_to/nebula-spark-connector-3.0.0.jar").appName( "nebula-connector").getOrCreate() +# read vertex df = spark.read.format( "com.vesoft.nebula.connector.NebulaDataSource").option( "type", "vertex").option( @@ -259,6 +260,42 @@ df = spark.read.format( "returnCols", "name,age").option( "metaAddress", "metad0:9559").option( "partitionNumber", 1).load() + +# write vertex +df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ + .mode("overwrite")\ + .option("timeout", 300000)\ + .option("connectionRetry", 1)\ + .option("executionRetry", 2)\ + .option("vidPolicy", "")\ + .option("metaAddress", "metad0:9559")\ + .option("graphAddress", "graphd:9669")\ + .option("user", "root")\ + .option("passwd", "nebula")\ + .option("type", "vertex")\ + .option("spaceName", "basketballplayer")\ + .option("label", "player")\ + .option("vertexField", "vid")\ + .option("batch", 3000)\ + .option("writeMode", "insert").save() + +# write edge +df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ + .mode("overwrite")\ + .option("srcPolicy", "")\ + .option("dstPolicy", "")\ + .option("metaAddress", "metad0:9559")\ + .option("graphAddress", "graphd:9669")\ + .option("user", "root")\ + .option("passwd", "nebula")\ + .option("type", "edge")\ + .option("spaceName", "basketballplayer")\ + .option("label", "server")\ + .option("srcVertexField", "srcid")\ + .option("dstVertexField", "dstid")\ + .option("rankFiled", "")\ + .option("batch", 100)\ + .option("writeMode", "insert").save() # delete to delete edge, update to update edge ``` ## Version match diff --git a/README_CN.md b/README_CN.md index d044134a..0d49cc53 100644 --- a/README_CN.md +++ b/README_CN.md @@ -260,6 +260,59 @@ df = spark.read.format( "returnCols", "name,age").option( "metaAddress", "metad0:9559").option( "partitionNumber", 1).load() + +from pyspark.sql import SparkSession + +spark = SparkSession.builder.config( + "nebula-spark-connector-3.0.0.jar", + "/path_to/nebula-spark-connector-3.0.0.jar").appName( + "nebula-connector").getOrCreate() + +# read vertex +df = spark.read.format( + "com.vesoft.nebula.connector.NebulaDataSource").option( + "type", "vertex").option( + "spaceName", "basketballplayer").option( + "label", "player").option( + "returnCols", "name,age").option( + "metaAddress", "metad0:9559").option( + "partitionNumber", 1).load() + +# write vertex +df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ + .mode("overwrite")\ + .option("timeout", 300000)\ + .option("connectionRetry", 1)\ + .option("executionRetry", 2)\ + .option("vidPolicy", "")\ + .option("metaAddress", "metad0:9559")\ + .option("graphAddress", "graphd:9669")\ + .option("user", "root")\ + .option("passwd", "nebula")\ + .option("type", "vertex")\ + .option("spaceName", "basketballplayer")\ + .option("label", "player")\ + .option("vertexField", "vid")\ + .option("batch", 3000)\ + .option("writeMode", "insert").save() + +# write edge +df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ + .mode("overwrite")\ + .option("srcPolicy", "")\ + .option("dstPolicy", "")\ + .option("metaAddress", "metad0:9559")\ + .option("graphAddress", "graphd:9669")\ + .option("user", "root")\ + .option("passwd", "nebula")\ + .option("type", "edge")\ + .option("spaceName", "basketballplayer")\ + .option("label", "server")\ + .option("srcVertexField", "srcid")\ + .option("dstVertexField", "dstid")\ + .option("rankFiled", "")\ + .option("batch", 100)\ + .option("writeMode", "insert").save() # delete to delete edge, update to update edge ``` ## 版本匹配 From d12259ce4b7b1dd34dba770b67c55bb78d455012 Mon Sep 17 00:00:00 2001 From: Reid Date: Wed, 7 Sep 2022 16:22:58 +0800 Subject: [PATCH 3/5] docs: delete duplicated code --- README_CN.md | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/README_CN.md b/README_CN.md index 0d49cc53..a30f9379 100644 --- a/README_CN.md +++ b/README_CN.md @@ -247,22 +247,6 @@ df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( ``` from pyspark.sql import SparkSession -spark = SparkSession.builder.config( - "nebula-spark-connector-3.0.0.jar", - "/path_to/nebula-spark-connector-3.0.0.jar").appName( - "nebula-connector").getOrCreate() - -df = spark.read.format( - "com.vesoft.nebula.connector.NebulaDataSource").option( - "type", "vertex").option( - "spaceName", "basketballplayer").option( - "label", "player").option( - "returnCols", "name,age").option( - "metaAddress", "metad0:9559").option( - "partitionNumber", 1).load() - -from pyspark.sql import SparkSession - spark = SparkSession.builder.config( "nebula-spark-connector-3.0.0.jar", "/path_to/nebula-spark-connector-3.0.0.jar").appName( From 79b972f4fcb8c278f83dc9212bfc24b21c607269 Mon Sep 17 00:00:00 2001 From: Reid Date: Wed, 7 Sep 2022 17:37:43 +0800 Subject: [PATCH 4/5] docs: move the section of demo --- README.md | 78 +++++++++++++++++++++++++++------------------------- README_CN.md | 77 ++++++++++++++++++++++++++------------------------- 2 files changed, 80 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index 438ba575..8d914d14 100644 --- a/README.md +++ b/README.md @@ -176,7 +176,7 @@ only showing top 2 rows ### Write in PySpark Let's try a write example, by default, the `writeMode` is `insert` - +#### write vertex ```python df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( "type", "vertex").option( @@ -190,7 +190,7 @@ df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( "passwd", "nebula").option( "user", "root").save() ``` - +#### delete vertex For delete or update write mode, we could(for instance)specify with `writeMode` as `delete` like: ```python df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( @@ -206,6 +206,44 @@ df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( "writeMode", "delete").option( "user", "root").save() ``` +#### write edge +```python +df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ + .mode("overwrite")\ + .option("srcPolicy", "")\ + .option("dstPolicy", "")\ + .option("metaAddress", "metad0:9559")\ + .option("graphAddress", "graphd:9669")\ + .option("user", "root")\ + .option("passwd", "nebula")\ + .option("type", "edge")\ + .option("spaceName", "basketballplayer")\ + .option("label", "server")\ + .option("srcVertexField", "srcid")\ + .option("dstVertexField", "dstid")\ + .option("rankFiled", "")\ + .option("batch", 100)\ + .option("writeMode", "insert").save() # delete to delete edge, update to update edge +``` +#### delete edge +```python +df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ + .mode("overwrite")\ + .option("srcPolicy", "")\ + .option("dstPolicy", "")\ + .option("metaAddress", "metad0:9559")\ + .option("graphAddress", "graphd:9669")\ + .option("user", "root")\ + .option("passwd", "nebula")\ + .option("type", "edge")\ + .option("spaceName", "basketballplayer")\ + .option("label", "server")\ + .option("srcVertexField", "srcid")\ + .option("dstVertexField", "dstid")\ + .option("rankFiled", "")\ + .option("batch", 100)\ + .option("writeMode", "delete").save() # delete to delete edge, update to update edge +``` ### Options in PySpark @@ -260,42 +298,6 @@ df = spark.read.format( "returnCols", "name,age").option( "metaAddress", "metad0:9559").option( "partitionNumber", 1).load() - -# write vertex -df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ - .mode("overwrite")\ - .option("timeout", 300000)\ - .option("connectionRetry", 1)\ - .option("executionRetry", 2)\ - .option("vidPolicy", "")\ - .option("metaAddress", "metad0:9559")\ - .option("graphAddress", "graphd:9669")\ - .option("user", "root")\ - .option("passwd", "nebula")\ - .option("type", "vertex")\ - .option("spaceName", "basketballplayer")\ - .option("label", "player")\ - .option("vertexField", "vid")\ - .option("batch", 3000)\ - .option("writeMode", "insert").save() - -# write edge -df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ - .mode("overwrite")\ - .option("srcPolicy", "")\ - .option("dstPolicy", "")\ - .option("metaAddress", "metad0:9559")\ - .option("graphAddress", "graphd:9669")\ - .option("user", "root")\ - .option("passwd", "nebula")\ - .option("type", "edge")\ - .option("spaceName", "basketballplayer")\ - .option("label", "server")\ - .option("srcVertexField", "srcid")\ - .option("dstVertexField", "dstid")\ - .option("rankFiled", "")\ - .option("batch", 100)\ - .option("writeMode", "insert").save() # delete to delete edge, update to update edge ``` ## Version match diff --git a/README_CN.md b/README_CN.md index a30f9379..e59cb58c 100644 --- a/README_CN.md +++ b/README_CN.md @@ -176,7 +176,7 @@ only showing top 2 rows ### PySpark 中写 NebulaGraph 中数据 再试一试写入数据的例子,默认不指定的情况下 `writeMode` 是 `insert`: - +#### 写入点 ```python df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( "type", "vertex").option( @@ -190,6 +190,7 @@ df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( "passwd", "nebula").option( "user", "root").save() ``` +#### 删除点 如果想指定 `delete` 或者 `update` 的非默认写入模式,增加 `writeMode` 的配置,比如 `delete` 的例子: ```python @@ -206,6 +207,44 @@ df.write.format("com.vesoft.nebula.connector.NebulaDataSource").option( "writeMode", "delete").option( "user", "root").save() ``` +#### 写入边 +```python +df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ + .mode("overwrite")\ + .option("srcPolicy", "")\ + .option("dstPolicy", "")\ + .option("metaAddress", "metad0:9559")\ + .option("graphAddress", "graphd:9669")\ + .option("user", "root")\ + .option("passwd", "nebula")\ + .option("type", "edge")\ + .option("spaceName", "basketballplayer")\ + .option("label", "server")\ + .option("srcVertexField", "srcid")\ + .option("dstVertexField", "dstid")\ + .option("rankFiled", "")\ + .option("batch", 100)\ + .option("writeMode", "insert").save() # delete to delete edge, update to update edge +``` +#### 删除边 +```python +df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ + .mode("overwrite")\ + .option("srcPolicy", "")\ + .option("dstPolicy", "")\ + .option("metaAddress", "metad0:9559")\ + .option("graphAddress", "graphd:9669")\ + .option("user", "root")\ + .option("passwd", "nebula")\ + .option("type", "edge")\ + .option("spaceName", "basketballplayer")\ + .option("label", "server")\ + .option("srcVertexField", "srcid")\ + .option("dstVertexField", "dstid")\ + .option("rankFiled", "")\ + .option("batch", 100)\ + .option("writeMode", "delete").save() # delete to delete edge, update to update edge +``` ### 关于 PySpark 读写的 option @@ -261,42 +300,6 @@ df = spark.read.format( "returnCols", "name,age").option( "metaAddress", "metad0:9559").option( "partitionNumber", 1).load() - -# write vertex -df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ - .mode("overwrite")\ - .option("timeout", 300000)\ - .option("connectionRetry", 1)\ - .option("executionRetry", 2)\ - .option("vidPolicy", "")\ - .option("metaAddress", "metad0:9559")\ - .option("graphAddress", "graphd:9669")\ - .option("user", "root")\ - .option("passwd", "nebula")\ - .option("type", "vertex")\ - .option("spaceName", "basketballplayer")\ - .option("label", "player")\ - .option("vertexField", "vid")\ - .option("batch", 3000)\ - .option("writeMode", "insert").save() - -# write edge -df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ - .mode("overwrite")\ - .option("srcPolicy", "")\ - .option("dstPolicy", "")\ - .option("metaAddress", "metad0:9559")\ - .option("graphAddress", "graphd:9669")\ - .option("user", "root")\ - .option("passwd", "nebula")\ - .option("type", "edge")\ - .option("spaceName", "basketballplayer")\ - .option("label", "server")\ - .option("srcVertexField", "srcid")\ - .option("dstVertexField", "dstid")\ - .option("rankFiled", "")\ - .option("batch", 100)\ - .option("writeMode", "insert").save() # delete to delete edge, update to update edge ``` ## 版本匹配 From feaca8adda140c728d77b34dfd98e21af2958301 Mon Sep 17 00:00:00 2001 From: Reid Date: Thu, 15 Sep 2022 13:54:03 +0800 Subject: [PATCH 5/5] docs: typo updated --- README.md | 6 +++--- README_CN.md | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 8d914d14..ea313424 100644 --- a/README.md +++ b/README.md @@ -221,7 +221,7 @@ df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ .option("label", "server")\ .option("srcVertexField", "srcid")\ .option("dstVertexField", "dstid")\ - .option("rankFiled", "")\ + .option("randkField", "")\ .option("batch", 100)\ .option("writeMode", "insert").save() # delete to delete edge, update to update edge ``` @@ -240,7 +240,7 @@ df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ .option("label", "server")\ .option("srcVertexField", "srcid")\ .option("dstVertexField", "dstid")\ - .option("rankFiled", "")\ + .option("randkField", "")\ .option("batch", 100)\ .option("writeMode", "delete").save() # delete to delete edge, update to update edge ``` @@ -259,7 +259,7 @@ For more options, i.e. delete edge with vertex being deleted, refer to [nebula/c val VERTEX_FIELD = "vertexField" val SRC_VERTEX_FIELD = "srcVertexField" val DST_VERTEX_FIELD = "dstVertexField" - val RANK_FIELD = "rankFiled" + val RANK_FIELD = "randkField" val BATCH: String = "batch" val VID_AS_PROP: String = "vidAsProp" val SRC_AS_PROP: String = "srcAsProp" diff --git a/README_CN.md b/README_CN.md index e59cb58c..a612f6bb 100644 --- a/README_CN.md +++ b/README_CN.md @@ -222,7 +222,7 @@ df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ .option("label", "server")\ .option("srcVertexField", "srcid")\ .option("dstVertexField", "dstid")\ - .option("rankFiled", "")\ + .option("rankField", "")\ .option("batch", 100)\ .option("writeMode", "insert").save() # delete to delete edge, update to update edge ``` @@ -241,7 +241,7 @@ df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ .option("label", "server")\ .option("srcVertexField", "srcid")\ .option("dstVertexField", "dstid")\ - .option("rankFiled", "")\ + .option("randkField", "")\ .option("batch", 100)\ .option("writeMode", "delete").save() # delete to delete edge, update to update edge ``` @@ -261,7 +261,7 @@ df.write.format("com.vesoft.nebula.connector.NebulaDataSource")\ val VERTEX_FIELD = "vertexField" val SRC_VERTEX_FIELD = "srcVertexField" val DST_VERTEX_FIELD = "dstVertexField" - val RANK_FIELD = "rankFiled" + val RANK_FIELD = "randkField" val BATCH: String = "batch" val VID_AS_PROP: String = "vidAsProp" val SRC_AS_PROP: String = "srcAsProp"