From 680e3d8860d31a4b9181ac588b3769ed6d00a4f5 Mon Sep 17 00:00:00 2001 From: Weibin Zeng Date: Thu, 1 Aug 2024 16:50:57 +0800 Subject: [PATCH] chore(spark): Add reference document about how to integrate cloud storage and remove related dependencies (#572) Signed-off-by: acezen --- docs/libraries/spark/spark.md | 7 ++ maven-projects/spark/graphar/pom.xml | 28 ------ maven-projects/spark/pom.xml | 143 --------------------------- 3 files changed, 7 insertions(+), 171 deletions(-) diff --git a/docs/libraries/spark/spark.md b/docs/libraries/spark/spark.md index 7043005df..eaf79b8b9 100644 --- a/docs/libraries/spark/spark.md +++ b/docs/libraries/spark/spark.md @@ -225,6 +225,13 @@ For more information on usage, please refer to the examples: - [TransformExample.scala][transform-example] shows an example for graph data conversion between different file types or different adjList types. - [Neo4j2GraphAr.scala][neo4j2graphar] and [GraphAr2Neo4j.scala][graphar2neo4j] are examples to conduct data importing/exporting for Neo4j. +### Working with Cloud Storage (AWS S3, aliyun OSS) + +The Spark library for GraphAr supports reading and writing data from/to cloud storage services such as AWS S3, to do so, you need to include the Hadoop AWS dependency in your project. See the reference documentation for more details. + +- [AWS S3](https://spark.apache.org/docs/latest/cloud-integration.html) +- [Aliyun OSS](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html) + [test-graph-info]: https://github.com/apache/incubator-graphar/blob/main/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/TestGraphInfo.scala [test-index-generator]: https://github.com/apache/incubator-graphar/blob/main/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/TestIndexGenerator.scala diff --git a/maven-projects/spark/graphar/pom.xml b/maven-projects/spark/graphar/pom.xml index f9cb86583..7e2901c2b 100644 --- a/maven-projects/spark/graphar/pom.xml +++ b/maven-projects/spark/graphar/pom.xml @@ -88,34 +88,6 @@ snakeyaml 2.0 - - com.aliyun.odps - hadoop-fs-oss - ${cupid.sdk.version} - - - org.apache.hadoop - hadoop-common - - - - - com.aliyun.odps - odps-spark-datasource_2.11 - ${cupid.sdk.version} - - - net.jpountz.lz4 - lz4 - - - - - com.aliyun.odps - cupid-sdk - ${cupid.sdk.version} - provided - org.neo4j neo4j-connector-apache-spark_2.12 diff --git a/maven-projects/spark/pom.xml b/maven-projects/spark/pom.xml index 5e5df9a3b..e69de29bb 100644 --- a/maven-projects/spark/pom.xml +++ b/maven-projects/spark/pom.xml @@ -1,143 +0,0 @@ - - - - - 4.0.0 - - - org.apache.graphar - graphar-root - ${graphar.version} - ../pom.xml - - - spark - pom - ${graphar.version} - - - - datasources-32 - - graphar - UTF-8 - UTF-8 - 2.12.10 - 2.12 - 512m - 1024m - 3.2.2 - 11 - 11 - 3.3.8-public - - - graphar - datasources-32 - - - true - - - - datasources-33 - - graphar - UTF-8 - UTF-8 - 2.12.12 - 2.12 - 512m - 1024m - 3.3.4 - 11 - 11 - 3.3.8-public - - - graphar - datasources-33 - - - - - - - com.diffplug.spotless - spotless-maven-plugin - 2.20.0 - - - - - - - 1.13.0 - - - - - - ${project.basedir}/.scalafmt.conf - - - - - - org.scala-tools - maven-scala-plugin - 2.15.2 - - ${scala.version} - - -target:jvm-1.8 - - - -Xss4096K - - - - - scala-compile - - compile - - - - META-INF/*.SF - META-INF/*.DSA - META-INF/*.RSA - - - - - scala-test-compile - - testCompile - - - - - - -