From 680e3d8860d31a4b9181ac588b3769ed6d00a4f5 Mon Sep 17 00:00:00 2001
From: Weibin Zeng <qiaozi.zwb@alibaba-inc.com>
Date: Thu, 1 Aug 2024 16:50:57 +0800
Subject: [PATCH] chore(spark): Add reference document about how to integrate
 cloud storage and remove related dependencies (#572)

Signed-off-by: acezen <qiaozi.zwb@alibaba-inc.com>
---
 docs/libraries/spark/spark.md        |   7 ++
 maven-projects/spark/graphar/pom.xml |  28 ------
 maven-projects/spark/pom.xml         | 143 ---------------------------
 3 files changed, 7 insertions(+), 171 deletions(-)
diff --git a/docs/libraries/spark/spark.md b/docs/libraries/spark/spark.md
index 7043005df..eaf79b8b9 100644
--- a/docs/libraries/spark/spark.md
+++ b/docs/libraries/spark/spark.md
@@ -225,6 +225,13 @@ For more information on usage, please refer to the examples:
 - [TransformExample.scala][transform-example] shows an example for graph data conversion between different file types or different adjList types.
 - [Neo4j2GraphAr.scala][neo4j2graphar] and [GraphAr2Neo4j.scala][graphar2neo4j] are examples to conduct data importing/exporting for Neo4j.
 
+### Working with Cloud Storage (AWS S3, aliyun OSS)
+
+The Spark library for GraphAr supports reading and writing data from/to cloud storage services such as AWS S3, to do so, you need to include the Hadoop AWS dependency in your project. See the reference documentation for more details.
+
+- [AWS S3](https://spark.apache.org/docs/latest/cloud-integration.html)
+- [Aliyun OSS](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html)
+
 
 [test-graph-info]: https://github.com/apache/incubator-graphar/blob/main/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/TestGraphInfo.scala
 [test-index-generator]: https://github.com/apache/incubator-graphar/blob/main/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/TestIndexGenerator.scala
diff --git a/maven-projects/spark/graphar/pom.xml b/maven-projects/spark/graphar/pom.xml
index f9cb86583..7e2901c2b 100644
--- a/maven-projects/spark/graphar/pom.xml
+++ b/maven-projects/spark/graphar/pom.xml
@@ -88,34 +88,6 @@
             <artifactId>snakeyaml</artifactId>
             <version>2.0</version>
         </dependency>
-        <dependency>
-            <groupId>com.aliyun.odps</groupId>
-            <artifactId>hadoop-fs-oss</artifactId>
-            <version>${cupid.sdk.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.apache.hadoop</groupId>
-                    <artifactId>hadoop-common</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>com.aliyun.odps</groupId>
-            <artifactId>odps-spark-datasource_2.11</artifactId>
-            <version>${cupid.sdk.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>net.jpountz.lz4</groupId>
-                    <artifactId>lz4</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>com.aliyun.odps</groupId>
-            <artifactId>cupid-sdk</artifactId>
-            <version>${cupid.sdk.version}</version>
-            <scope>provided</scope>
-        </dependency>
         <dependency>
             <groupId>org.neo4j</groupId>
             <artifactId>neo4j-connector-apache-spark_2.12</artifactId>
diff --git a/maven-projects/spark/pom.xml b/maven-projects/spark/pom.xml
index 5e5df9a3b..e69de29bb 100644
--- a/maven-projects/spark/pom.xml
+++ b/maven-projects/spark/pom.xml
@@ -1,143 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-
-    Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
--->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-    <modelVersion>4.0.0</modelVersion>
-
-    <parent>
-        <groupId>org.apache.graphar</groupId>
-        <artifactId>graphar-root</artifactId>
-        <version>${graphar.version}</version>
-        <relativePath>../pom.xml</relativePath>
-    </parent>
-
-    <artifactId>spark</artifactId>
-    <packaging>pom</packaging>
-    <version>${graphar.version}</version>
-
-    <profiles>
-        <profile>
-            <id>datasources-32</id>
-            <properties>
-                <sbt.project.name>graphar</sbt.project.name>
-                <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-                <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-                <scala.version>2.12.10</scala.version>
-                <scala.binary.version>2.12</scala.binary.version>
-                <PermGen>512m</PermGen>
-                <MaxPermGen>1024m</MaxPermGen>
-                <spark.version>3.2.2</spark.version>
-                <maven.compiler.source>11</maven.compiler.source>
-                <maven.compiler.target>11</maven.compiler.target>
-                <cupid.sdk.version>3.3.8-public</cupid.sdk.version>
-            </properties>
-            <modules>
-                <module>graphar</module>
-                <module>datasources-32</module>
-            </modules>
-            <activation>
-                <activeByDefault>true</activeByDefault>
-            </activation>
-        </profile>
-        <profile>
-            <id>datasources-33</id>
-            <properties>
-                <sbt.project.name>graphar</sbt.project.name>
-                <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-                <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-                <scala.version>2.12.12</scala.version>
-                <scala.binary.version>2.12</scala.binary.version>
-                <PermGen>512m</PermGen>
-                <MaxPermGen>1024m</MaxPermGen>
-                <spark.version>3.3.4</spark.version>
-                <maven.compiler.source>11</maven.compiler.source>
-                <maven.compiler.target>11</maven.compiler.target>
-                <cupid.sdk.version>3.3.8-public</cupid.sdk.version>
-            </properties>
-            <modules>
-                <module>graphar</module>
-                <module>datasources-33</module>
-            </modules>
-        </profile>
-    </profiles>
-    <build>
-        <plugins>
-            <plugin>
-                <groupId>com.diffplug.spotless</groupId>
-                <artifactId>spotless-maven-plugin</artifactId>
-                <version>2.20.0</version>
-                <configuration>
-                    <!-- define a language-specific format -->
-                    <java>
-                        <!-- no need to specify files, inferred automatically, but you can if you want -->
-                        <!-- apply a specific flavor of google-java-format and reflow long strings -->
-                        <googleJavaFormat>
-                            <version>1.13.0</version>
-                            <style>AOSP</style>
-                        </googleJavaFormat>
-                    </java>
-                    <scala>
-                        <scalafmt>
-                            <file>${project.basedir}/.scalafmt.conf</file> <!-- optional -->
-                        </scalafmt>
-                    </scala>
-                </configuration>
-            </plugin>
-            <plugin>
-                <groupId>org.scala-tools</groupId>
-                <artifactId>maven-scala-plugin</artifactId>
-                <version>2.15.2</version>
-                <configuration>
-                    <scalaVersion>${scala.version}</scalaVersion>
-                    <args>
-                        <arg>-target:jvm-1.8</arg>
-                    </args>
-                    <jvmArgs>
-                        <jvmArg>-Xss4096K</jvmArg>
-                    </jvmArgs>
-                </configuration>
-                <executions>
-                    <execution>
-                        <id>scala-compile</id>
-                        <goals>
-                            <goal>compile</goal>
-                        </goals>
-                        <configuration>
-                            <excludes>
-                                <exclude>META-INF/*.SF</exclude>
-                                <exclude>META-INF/*.DSA</exclude>
-                                <exclude>META-INF/*.RSA</exclude>
-                            </excludes>
-                        </configuration>
-                    </execution>
-                    <execution>
-                        <id>scala-test-compile</id>
-                        <goals>
-                            <goal>testCompile</goal>
-                        </goals>
-                    </execution>
-                </executions>
-            </plugin>
-        </plugins>
-    </build>
-</project>