Skip to content

Commit

Permalink
chore(spark): Add reference document about how to integrate cloud sto…
Browse files Browse the repository at this point in the history
…rage and remove related dependencies (apache#572)

Signed-off-by: acezen <qiaozi.zwb@alibaba-inc.com>
  • Loading branch information
acezen authored and Elssky committed Oct 8, 2024
1 parent 35371ea commit 680e3d8
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 171 deletions.
7 changes: 7 additions & 0 deletions docs/libraries/spark/spark.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,13 @@ For more information on usage, please refer to the examples:
- [TransformExample.scala][transform-example] shows an example for graph data conversion between different file types or different adjList types.
- [Neo4j2GraphAr.scala][neo4j2graphar] and [GraphAr2Neo4j.scala][graphar2neo4j] are examples to conduct data importing/exporting for Neo4j.

### Working with Cloud Storage (AWS S3, aliyun OSS)

The Spark library for GraphAr supports reading and writing data from/to cloud storage services such as AWS S3, to do so, you need to include the Hadoop AWS dependency in your project. See the reference documentation for more details.

- [AWS S3](https://spark.apache.org/docs/latest/cloud-integration.html)
- [Aliyun OSS](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html)


[test-graph-info]: https://github.com/apache/incubator-graphar/blob/main/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/TestGraphInfo.scala
[test-index-generator]: https://github.com/apache/incubator-graphar/blob/main/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/TestIndexGenerator.scala
Expand Down
28 changes: 0 additions & 28 deletions maven-projects/spark/graphar/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -88,34 +88,6 @@
<artifactId>snakeyaml</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>com.aliyun.odps</groupId>
<artifactId>hadoop-fs-oss</artifactId>
<version>${cupid.sdk.version}</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.aliyun.odps</groupId>
<artifactId>odps-spark-datasource_2.11</artifactId>
<version>${cupid.sdk.version}</version>
<exclusions>
<exclusion>
<groupId>net.jpountz.lz4</groupId>
<artifactId>lz4</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.aliyun.odps</groupId>
<artifactId>cupid-sdk</artifactId>
<version>${cupid.sdk.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.neo4j</groupId>
<artifactId>neo4j-connector-apache-spark_2.12</artifactId>
Expand Down
143 changes: 0 additions & 143 deletions maven-projects/spark/pom.xml
Original file line number Diff line number Diff line change
@@ -1,143 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>org.apache.graphar</groupId>
<artifactId>graphar-root</artifactId>
<version>${graphar.version}</version>
<relativePath>../pom.xml</relativePath>
</parent>

<artifactId>spark</artifactId>
<packaging>pom</packaging>
<version>${graphar.version}</version>

<profiles>
<profile>
<id>datasources-32</id>
<properties>
<sbt.project.name>graphar</sbt.project.name>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<scala.version>2.12.10</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<PermGen>512m</PermGen>
<MaxPermGen>1024m</MaxPermGen>
<spark.version>3.2.2</spark.version>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<cupid.sdk.version>3.3.8-public</cupid.sdk.version>
</properties>
<modules>
<module>graphar</module>
<module>datasources-32</module>
</modules>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
</profile>
<profile>
<id>datasources-33</id>
<properties>
<sbt.project.name>graphar</sbt.project.name>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
<scala.version>2.12.12</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<PermGen>512m</PermGen>
<MaxPermGen>1024m</MaxPermGen>
<spark.version>3.3.4</spark.version>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<cupid.sdk.version>3.3.8-public</cupid.sdk.version>
</properties>
<modules>
<module>graphar</module>
<module>datasources-33</module>
</modules>
</profile>
</profiles>
<build>
<plugins>
<plugin>
<groupId>com.diffplug.spotless</groupId>
<artifactId>spotless-maven-plugin</artifactId>
<version>2.20.0</version>
<configuration>
<!-- define a language-specific format -->
<java>
<!-- no need to specify files, inferred automatically, but you can if you want -->
<!-- apply a specific flavor of google-java-format and reflow long strings -->
<googleJavaFormat>
<version>1.13.0</version>
<style>AOSP</style>
</googleJavaFormat>
</java>
<scala>
<scalafmt>
<file>${project.basedir}/.scalafmt.conf</file> <!-- optional -->
</scalafmt>
</scala>
</configuration>
</plugin>
<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<configuration>
<scalaVersion>${scala.version}</scalaVersion>
<args>
<arg>-target:jvm-1.8</arg>
</args>
<jvmArgs>
<jvmArg>-Xss4096K</jvmArg>
</jvmArgs>
</configuration>
<executions>
<execution>
<id>scala-compile</id>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</configuration>
</execution>
<execution>
<id>scala-test-compile</id>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

0 comments on commit 680e3d8

Please sign in to comment.